You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

277 lines
9.0 KiB

3 months ago
4 years ago
3 years ago
3 years ago
3 years ago
  1. package shell
  2. import (
  3. "context"
  4. "flag"
  5. "fmt"
  6. "io"
  7. "github.com/seaweedfs/seaweedfs/weed/pb"
  8. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  9. "google.golang.org/grpc"
  10. "github.com/seaweedfs/seaweedfs/weed/operation"
  11. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  12. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  13. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  14. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  15. )
  16. func init() {
  17. Commands = append(Commands, &commandEcDecode{})
  18. }
  19. type commandEcDecode struct {
  20. }
  21. func (c *commandEcDecode) Name() string {
  22. return "ec.decode"
  23. }
  24. func (c *commandEcDecode) Help() string {
  25. return `decode a erasure coded volume into a normal volume
  26. ec.decode [-collection=""] [-volumeId=<volume_id>]
  27. `
  28. }
  29. func (c *commandEcDecode) HasTag(CommandTag) bool {
  30. return false
  31. }
  32. func (c *commandEcDecode) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  33. decodeCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  34. volumeId := decodeCommand.Int("volumeId", 0, "the volume id")
  35. collection := decodeCommand.String("collection", "", "the collection name")
  36. if err = decodeCommand.Parse(args); err != nil {
  37. return nil
  38. }
  39. if err = commandEnv.confirmIsLocked(args); err != nil {
  40. return
  41. }
  42. vid := needle.VolumeId(*volumeId)
  43. // collect topology information
  44. topologyInfo, _, err := collectTopologyInfo(commandEnv, 0)
  45. if err != nil {
  46. return err
  47. }
  48. // volumeId is provided
  49. if vid != 0 {
  50. return doEcDecode(commandEnv, topologyInfo, *collection, vid)
  51. }
  52. // apply to all volumes in the collection
  53. volumeIds := collectEcShardIds(topologyInfo, *collection)
  54. fmt.Printf("ec encode volumes: %v\n", volumeIds)
  55. for _, vid := range volumeIds {
  56. if err = doEcDecode(commandEnv, topologyInfo, *collection, vid); err != nil {
  57. return err
  58. }
  59. }
  60. return nil
  61. }
  62. func doEcDecode(commandEnv *CommandEnv, topoInfo *master_pb.TopologyInfo, collection string, vid needle.VolumeId) (err error) {
  63. if !commandEnv.isLocked() {
  64. return fmt.Errorf("lock is lost")
  65. }
  66. // find volume location
  67. nodeToEcIndexBits := collectEcNodeShardBits(topoInfo, vid)
  68. fmt.Printf("ec volume %d shard locations: %+v\n", vid, nodeToEcIndexBits)
  69. // collect ec shards to the server with most space
  70. targetNodeLocation, err := collectEcShards(commandEnv, nodeToEcIndexBits, collection, vid)
  71. if err != nil {
  72. return fmt.Errorf("collectEcShards for volume %d: %v", vid, err)
  73. }
  74. // generate a normal volume
  75. err = generateNormalVolume(commandEnv.option.GrpcDialOption, vid, collection, targetNodeLocation)
  76. if err != nil {
  77. return fmt.Errorf("generate normal volume %d on %s: %v", vid, targetNodeLocation, err)
  78. }
  79. // delete the previous ec shards
  80. err = mountVolumeAndDeleteEcShards(commandEnv.option.GrpcDialOption, collection, targetNodeLocation, nodeToEcIndexBits, vid)
  81. if err != nil {
  82. return fmt.Errorf("delete ec shards for volume %d: %v", vid, err)
  83. }
  84. return nil
  85. }
  86. func mountVolumeAndDeleteEcShards(grpcDialOption grpc.DialOption, collection string, targetNodeLocation pb.ServerAddress, nodeToEcIndexBits map[pb.ServerAddress]erasure_coding.ShardBits, vid needle.VolumeId) error {
  87. // mount volume
  88. if err := operation.WithVolumeServerClient(false, targetNodeLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  89. _, mountErr := volumeServerClient.VolumeMount(context.Background(), &volume_server_pb.VolumeMountRequest{
  90. VolumeId: uint32(vid),
  91. })
  92. return mountErr
  93. }); err != nil {
  94. return fmt.Errorf("mountVolumeAndDeleteEcShards mount volume %d on %s: %v", vid, targetNodeLocation, err)
  95. }
  96. // unmount ec shards
  97. for location, ecIndexBits := range nodeToEcIndexBits {
  98. fmt.Printf("unmount ec volume %d on %s has shards: %+v\n", vid, location, ecIndexBits.ShardIds())
  99. err := unmountEcShards(grpcDialOption, vid, location, ecIndexBits.ToUint32Slice())
  100. if err != nil {
  101. return fmt.Errorf("mountVolumeAndDeleteEcShards unmount ec volume %d on %s: %v", vid, location, err)
  102. }
  103. }
  104. // delete ec shards
  105. for location, ecIndexBits := range nodeToEcIndexBits {
  106. fmt.Printf("delete ec volume %d on %s has shards: %+v\n", vid, location, ecIndexBits.ShardIds())
  107. err := sourceServerDeleteEcShards(grpcDialOption, collection, vid, location, ecIndexBits.ToUint32Slice())
  108. if err != nil {
  109. return fmt.Errorf("mountVolumeAndDeleteEcShards delete ec volume %d on %s: %v", vid, location, err)
  110. }
  111. }
  112. return nil
  113. }
  114. func generateNormalVolume(grpcDialOption grpc.DialOption, vid needle.VolumeId, collection string, sourceVolumeServer pb.ServerAddress) error {
  115. fmt.Printf("generateNormalVolume from ec volume %d on %s\n", vid, sourceVolumeServer)
  116. err := operation.WithVolumeServerClient(false, sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  117. _, genErr := volumeServerClient.VolumeEcShardsToVolume(context.Background(), &volume_server_pb.VolumeEcShardsToVolumeRequest{
  118. VolumeId: uint32(vid),
  119. Collection: collection,
  120. })
  121. return genErr
  122. })
  123. return err
  124. }
  125. func collectEcShards(commandEnv *CommandEnv, nodeToEcIndexBits map[pb.ServerAddress]erasure_coding.ShardBits, collection string, vid needle.VolumeId) (targetNodeLocation pb.ServerAddress, err error) {
  126. maxShardCount := 0
  127. var existingEcIndexBits erasure_coding.ShardBits
  128. for loc, ecIndexBits := range nodeToEcIndexBits {
  129. toBeCopiedShardCount := ecIndexBits.MinusParityShards().ShardIdCount()
  130. if toBeCopiedShardCount > maxShardCount {
  131. maxShardCount = toBeCopiedShardCount
  132. targetNodeLocation = loc
  133. existingEcIndexBits = ecIndexBits
  134. }
  135. }
  136. fmt.Printf("collectEcShards: ec volume %d collect shards to %s from: %+v\n", vid, targetNodeLocation, nodeToEcIndexBits)
  137. var copiedEcIndexBits erasure_coding.ShardBits
  138. for loc, ecIndexBits := range nodeToEcIndexBits {
  139. if loc == targetNodeLocation {
  140. continue
  141. }
  142. needToCopyEcIndexBits := ecIndexBits.Minus(existingEcIndexBits).MinusParityShards()
  143. if needToCopyEcIndexBits.ShardIdCount() == 0 {
  144. continue
  145. }
  146. err = operation.WithVolumeServerClient(false, targetNodeLocation, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  147. fmt.Printf("copy %d.%v %s => %s\n", vid, needToCopyEcIndexBits.ShardIds(), loc, targetNodeLocation)
  148. _, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{
  149. VolumeId: uint32(vid),
  150. Collection: collection,
  151. ShardIds: needToCopyEcIndexBits.ToUint32Slice(),
  152. CopyEcxFile: false,
  153. CopyEcjFile: true,
  154. CopyVifFile: true,
  155. SourceDataNode: string(loc),
  156. })
  157. if copyErr != nil {
  158. return fmt.Errorf("copy %d.%v %s => %s : %v\n", vid, needToCopyEcIndexBits.ShardIds(), loc, targetNodeLocation, copyErr)
  159. }
  160. fmt.Printf("mount %d.%v on %s\n", vid, needToCopyEcIndexBits.ShardIds(), targetNodeLocation)
  161. _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
  162. VolumeId: uint32(vid),
  163. Collection: collection,
  164. ShardIds: needToCopyEcIndexBits.ToUint32Slice(),
  165. })
  166. if mountErr != nil {
  167. return fmt.Errorf("mount %d.%v on %s : %v\n", vid, needToCopyEcIndexBits.ShardIds(), targetNodeLocation, mountErr)
  168. }
  169. return nil
  170. })
  171. if err != nil {
  172. break
  173. }
  174. copiedEcIndexBits = copiedEcIndexBits.Plus(needToCopyEcIndexBits)
  175. }
  176. nodeToEcIndexBits[targetNodeLocation] = existingEcIndexBits.Plus(copiedEcIndexBits)
  177. return targetNodeLocation, err
  178. }
  179. func lookupVolumeIds(commandEnv *CommandEnv, volumeIds []string) (volumeIdLocations []*master_pb.LookupVolumeResponse_VolumeIdLocation, err error) {
  180. var resp *master_pb.LookupVolumeResponse
  181. err = commandEnv.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
  182. resp, err = client.LookupVolume(context.Background(), &master_pb.LookupVolumeRequest{VolumeOrFileIds: volumeIds})
  183. return err
  184. })
  185. if err != nil {
  186. return nil, err
  187. }
  188. return resp.VolumeIdLocations, nil
  189. }
  190. func collectEcShardIds(topoInfo *master_pb.TopologyInfo, selectedCollection string) (vids []needle.VolumeId) {
  191. vidMap := make(map[uint32]bool)
  192. eachDataNode(topoInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) {
  193. if diskInfo, found := dn.DiskInfos[string(types.HardDriveType)]; found {
  194. for _, v := range diskInfo.EcShardInfos {
  195. if v.Collection == selectedCollection {
  196. vidMap[v.Id] = true
  197. }
  198. }
  199. }
  200. })
  201. for vid := range vidMap {
  202. vids = append(vids, needle.VolumeId(vid))
  203. }
  204. return
  205. }
  206. func collectEcNodeShardBits(topoInfo *master_pb.TopologyInfo, vid needle.VolumeId) map[pb.ServerAddress]erasure_coding.ShardBits {
  207. nodeToEcIndexBits := make(map[pb.ServerAddress]erasure_coding.ShardBits)
  208. eachDataNode(topoInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) {
  209. if diskInfo, found := dn.DiskInfos[string(types.HardDriveType)]; found {
  210. for _, v := range diskInfo.EcShardInfos {
  211. if v.Id == uint32(vid) {
  212. nodeToEcIndexBits[pb.NewServerAddressFromDataNode(dn)] = erasure_coding.ShardBits(v.EcIndexBits)
  213. }
  214. }
  215. }
  216. })
  217. return nodeToEcIndexBits
  218. }