You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

198 lines
6.8 KiB

  1. package shell
  2. import (
  3. "context"
  4. "fmt"
  5. "sort"
  6. "github.com/chrislusf/seaweedfs/weed/glog"
  7. "github.com/chrislusf/seaweedfs/weed/operation"
  8. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  9. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  10. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  11. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  12. "google.golang.org/grpc"
  13. )
  14. func moveMountedShardToEcNode(ctx context.Context, commandEnv *commandEnv, existingLocation *EcNode, collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, destinationEcNode *EcNode, applyBalancing bool) error {
  15. fmt.Printf("moved ec shard %d.%d %s => %s\n", vid, shardId, existingLocation.info.Id, destinationEcNode.info.Id)
  16. if !applyBalancing {
  17. return nil
  18. }
  19. // ask destination node to copy shard and the ecx file from source node, and mount it
  20. copiedShardIds, err := oneServerCopyAndMountEcShardsFromSource(ctx, commandEnv.option.GrpcDialOption, destinationEcNode, uint32(shardId), 1, vid, collection, existingLocation.info.Id)
  21. if err != nil {
  22. return err
  23. }
  24. // unmount the to be deleted shards
  25. err = unmountEcShards(ctx, commandEnv.option.GrpcDialOption, vid, existingLocation.info.Id, copiedShardIds)
  26. if err != nil {
  27. return err
  28. }
  29. // ask source node to delete the shard, and maybe the ecx file
  30. return sourceServerDeleteEcShards(ctx, commandEnv.option.GrpcDialOption, collection, vid, existingLocation.info.Id, copiedShardIds)
  31. }
  32. func oneServerCopyAndMountEcShardsFromSource(ctx context.Context, grpcDialOption grpc.DialOption,
  33. targetServer *EcNode, startFromShardId uint32, shardCount int,
  34. volumeId needle.VolumeId, collection string, existingLocation string) (copiedShardIds []uint32, err error) {
  35. var shardIdsToCopy []uint32
  36. for shardId := startFromShardId; shardId < startFromShardId+uint32(shardCount); shardId++ {
  37. shardIdsToCopy = append(shardIdsToCopy, shardId)
  38. }
  39. fmt.Printf("allocate %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  40. err = operation.WithVolumeServerClient(targetServer.info.Id, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  41. if targetServer.info.Id != existingLocation {
  42. fmt.Printf("copy %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  43. _, copyErr := volumeServerClient.VolumeEcShardsCopy(ctx, &volume_server_pb.VolumeEcShardsCopyRequest{
  44. VolumeId: uint32(volumeId),
  45. Collection: collection,
  46. ShardIds: shardIdsToCopy,
  47. CopyEcxFile: true,
  48. SourceDataNode: existingLocation,
  49. })
  50. if copyErr != nil {
  51. return fmt.Errorf("copy %d.%v %s => %s : %v\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id, copyErr)
  52. }
  53. }
  54. fmt.Printf("mount %d.%v on %s\n", volumeId, shardIdsToCopy, targetServer.info.Id)
  55. _, mountErr := volumeServerClient.VolumeEcShardsMount(ctx, &volume_server_pb.VolumeEcShardsMountRequest{
  56. VolumeId: uint32(volumeId),
  57. Collection: collection,
  58. ShardIds: shardIdsToCopy,
  59. })
  60. if mountErr != nil {
  61. return fmt.Errorf("mount %d.%v on %s : %v\n", volumeId, shardIdsToCopy, targetServer.info.Id, mountErr)
  62. }
  63. if targetServer.info.Id != existingLocation {
  64. copiedShardIds = shardIdsToCopy
  65. glog.V(0).Infof("%s ec volume %d deletes shards %+v", existingLocation, volumeId, copiedShardIds)
  66. }
  67. return nil
  68. })
  69. if err != nil {
  70. return
  71. }
  72. return
  73. }
  74. func eachDataNode(topo *master_pb.TopologyInfo, fn func(*master_pb.DataNodeInfo)) {
  75. for _, dc := range topo.DataCenterInfos {
  76. for _, rack := range dc.RackInfos {
  77. for _, dn := range rack.DataNodeInfos {
  78. fn(dn)
  79. }
  80. }
  81. }
  82. }
  83. func sortEcNodes(ecNodes []*EcNode) {
  84. sort.Slice(ecNodes, func(i, j int) bool {
  85. return ecNodes[i].freeEcSlot > ecNodes[j].freeEcSlot
  86. })
  87. }
  88. func countShards(ecShardInfos []*master_pb.VolumeEcShardInformationMessage) (count int) {
  89. for _, ecShardInfo := range ecShardInfos {
  90. shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
  91. count += shardBits.ShardIdCount()
  92. }
  93. return
  94. }
  95. func countFreeShardSlots(dn *master_pb.DataNodeInfo) (count int) {
  96. return int(dn.FreeVolumeCount)*10 - countShards(dn.EcShardInfos)
  97. }
  98. type EcNode struct {
  99. info *master_pb.DataNodeInfo
  100. freeEcSlot int
  101. }
  102. func collectEcNodes(ctx context.Context, commandEnv *commandEnv) (ecNodes []*EcNode, totalFreeEcSlots int, err error) {
  103. // list all possible locations
  104. var resp *master_pb.VolumeListResponse
  105. err = commandEnv.masterClient.WithClient(ctx, func(client master_pb.SeaweedClient) error {
  106. resp, err = client.VolumeList(ctx, &master_pb.VolumeListRequest{})
  107. return err
  108. })
  109. if err != nil {
  110. return nil, 0, err
  111. }
  112. // find out all volume servers with one slot left.
  113. eachDataNode(resp.TopologyInfo, func(dn *master_pb.DataNodeInfo) {
  114. if freeEcSlots := countFreeShardSlots(dn); freeEcSlots > 0 {
  115. ecNodes = append(ecNodes, &EcNode{
  116. info: dn,
  117. freeEcSlot: int(freeEcSlots),
  118. })
  119. totalFreeEcSlots += freeEcSlots
  120. }
  121. })
  122. sortEcNodes(ecNodes)
  123. return
  124. }
  125. func sourceServerDeleteEcShards(ctx context.Context, grpcDialOption grpc.DialOption,
  126. collection string, volumeId needle.VolumeId, sourceLocation string, toBeDeletedShardIds []uint32) error {
  127. fmt.Printf("delete %d.%v from %s\n", volumeId, toBeDeletedShardIds, sourceLocation)
  128. return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  129. _, deleteErr := volumeServerClient.VolumeEcShardsDelete(ctx, &volume_server_pb.VolumeEcShardsDeleteRequest{
  130. VolumeId: uint32(volumeId),
  131. Collection: collection,
  132. ShardIds: toBeDeletedShardIds,
  133. })
  134. return deleteErr
  135. })
  136. }
  137. func unmountEcShards(ctx context.Context, grpcDialOption grpc.DialOption,
  138. volumeId needle.VolumeId, sourceLocation string, toBeUnmountedhardIds []uint32) error {
  139. fmt.Printf("unmount %d.%v from %s\n", volumeId, toBeUnmountedhardIds, sourceLocation)
  140. return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  141. _, deleteErr := volumeServerClient.VolumeEcShardsUnmount(ctx, &volume_server_pb.VolumeEcShardsUnmountRequest{
  142. VolumeId: uint32(volumeId),
  143. ShardIds: toBeUnmountedhardIds,
  144. })
  145. return deleteErr
  146. })
  147. }
  148. func mountEcShards(ctx context.Context, grpcDialOption grpc.DialOption,
  149. collection string, volumeId needle.VolumeId, sourceLocation string, toBeMountedhardIds []uint32) error {
  150. fmt.Printf("mount %d.%v on %s\n", volumeId, toBeMountedhardIds, sourceLocation)
  151. return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  152. _, mountErr := volumeServerClient.VolumeEcShardsMount(ctx, &volume_server_pb.VolumeEcShardsMountRequest{
  153. VolumeId: uint32(volumeId),
  154. Collection: collection,
  155. ShardIds: toBeMountedhardIds,
  156. })
  157. return mountErr
  158. })
  159. }