356 lines
11 KiB

  1. package shell
  2. import (
  3. "context"
  4. "fmt"
  5. "math"
  6. "sort"
  7. "github.com/chrislusf/seaweedfs/weed/glog"
  8. "github.com/chrislusf/seaweedfs/weed/operation"
  9. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  10. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  11. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  12. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  13. "google.golang.org/grpc"
  14. )
  15. func moveMountedShardToEcNode(commandEnv *CommandEnv, existingLocation *EcNode, collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, destinationEcNode *EcNode, applyBalancing bool) (err error) {
  16. copiedShardIds := []uint32{uint32(shardId)}
  17. if applyBalancing {
  18. // ask destination node to copy shard and the ecx file from source node, and mount it
  19. copiedShardIds, err = oneServerCopyAndMountEcShardsFromSource(commandEnv.option.GrpcDialOption, destinationEcNode, []uint32{uint32(shardId)}, vid, collection, existingLocation.info.Id)
  20. if err != nil {
  21. return err
  22. }
  23. // unmount the to be deleted shards
  24. err = unmountEcShards(commandEnv.option.GrpcDialOption, vid, existingLocation.info.Id, copiedShardIds)
  25. if err != nil {
  26. return err
  27. }
  28. // ask source node to delete the shard, and maybe the ecx file
  29. err = sourceServerDeleteEcShards(commandEnv.option.GrpcDialOption, collection, vid, existingLocation.info.Id, copiedShardIds)
  30. if err != nil {
  31. return err
  32. }
  33. fmt.Printf("moved ec shard %d.%d %s => %s\n", vid, shardId, existingLocation.info.Id, destinationEcNode.info.Id)
  34. }
  35. destinationEcNode.addEcVolumeShards(vid, collection, copiedShardIds)
  36. existingLocation.deleteEcVolumeShards(vid, copiedShardIds)
  37. return nil
  38. }
  39. func oneServerCopyAndMountEcShardsFromSource(grpcDialOption grpc.DialOption,
  40. targetServer *EcNode, shardIdsToCopy []uint32,
  41. volumeId needle.VolumeId, collection string, existingLocation string) (copiedShardIds []uint32, err error) {
  42. fmt.Printf("allocate %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  43. err = operation.WithVolumeServerClient(targetServer.info.Id, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  44. if targetServer.info.Id != existingLocation {
  45. fmt.Printf("copy %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  46. _, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{
  47. VolumeId: uint32(volumeId),
  48. Collection: collection,
  49. ShardIds: shardIdsToCopy,
  50. CopyEcxFile: true,
  51. CopyEcjFile: true,
  52. CopyVifFile: true,
  53. SourceDataNode: existingLocation,
  54. })
  55. if copyErr != nil {
  56. return fmt.Errorf("copy %d.%v %s => %s : %v\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id, copyErr)
  57. }
  58. }
  59. fmt.Printf("mount %d.%v on %s\n", volumeId, shardIdsToCopy, targetServer.info.Id)
  60. _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
  61. VolumeId: uint32(volumeId),
  62. Collection: collection,
  63. ShardIds: shardIdsToCopy,
  64. })
  65. if mountErr != nil {
  66. return fmt.Errorf("mount %d.%v on %s : %v\n", volumeId, shardIdsToCopy, targetServer.info.Id, mountErr)
  67. }
  68. if targetServer.info.Id != existingLocation {
  69. copiedShardIds = shardIdsToCopy
  70. glog.V(0).Infof("%s ec volume %d deletes shards %+v", existingLocation, volumeId, copiedShardIds)
  71. }
  72. return nil
  73. })
  74. if err != nil {
  75. return
  76. }
  77. return
  78. }
  79. func eachDataNode(topo *master_pb.TopologyInfo, fn func(dc string, rack RackId, dn *master_pb.DataNodeInfo)) {
  80. for _, dc := range topo.DataCenterInfos {
  81. for _, rack := range dc.RackInfos {
  82. for _, dn := range rack.DataNodeInfos {
  83. fn(dc.Id, RackId(rack.Id), dn)
  84. }
  85. }
  86. }
  87. }
  88. func sortEcNodesByFreeslotsDecending(ecNodes []*EcNode) {
  89. sort.Slice(ecNodes, func(i, j int) bool {
  90. return ecNodes[i].freeEcSlot > ecNodes[j].freeEcSlot
  91. })
  92. }
  93. func sortEcNodesByFreeslotsAscending(ecNodes []*EcNode) {
  94. sort.Slice(ecNodes, func(i, j int) bool {
  95. return ecNodes[i].freeEcSlot < ecNodes[j].freeEcSlot
  96. })
  97. }
  98. type CandidateEcNode struct {
  99. ecNode *EcNode
  100. shardCount int
  101. }
  102. // if the index node changed the freeEcSlot, need to keep every EcNode still sorted
  103. func ensureSortedEcNodes(data []*CandidateEcNode, index int, lessThan func(i, j int) bool) {
  104. for i := index - 1; i >= 0; i-- {
  105. if lessThan(i+1, i) {
  106. swap(data, i, i+1)
  107. } else {
  108. break
  109. }
  110. }
  111. for i := index + 1; i < len(data); i++ {
  112. if lessThan(i, i-1) {
  113. swap(data, i, i-1)
  114. } else {
  115. break
  116. }
  117. }
  118. }
  119. func swap(data []*CandidateEcNode, i, j int) {
  120. t := data[i]
  121. data[i] = data[j]
  122. data[j] = t
  123. }
  124. func countShards(ecShardInfos []*master_pb.VolumeEcShardInformationMessage) (count int) {
  125. for _, ecShardInfo := range ecShardInfos {
  126. shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
  127. count += shardBits.ShardIdCount()
  128. }
  129. return
  130. }
  131. func countFreeShardSlots(dn *master_pb.DataNodeInfo) (count int) {
  132. return int(dn.MaxVolumeCount-dn.ActiveVolumeCount)*erasure_coding.DataShardsCount - countShards(dn.EcShardInfos)
  133. }
  134. type RackId string
  135. type EcNodeId string
  136. type EcNode struct {
  137. info *master_pb.DataNodeInfo
  138. dc string
  139. rack RackId
  140. freeEcSlot int
  141. }
  142. func (ecNode *EcNode) localShardIdCount(vid uint32) int {
  143. for _, ecShardInfo := range ecNode.info.EcShardInfos {
  144. if vid == ecShardInfo.Id {
  145. shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
  146. return shardBits.ShardIdCount()
  147. }
  148. }
  149. return 0
  150. }
  151. type EcRack struct {
  152. ecNodes map[EcNodeId]*EcNode
  153. freeEcSlot int
  154. }
  155. func collectEcNodes(commandEnv *CommandEnv, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int, err error) {
  156. // list all possible locations
  157. var resp *master_pb.VolumeListResponse
  158. err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error {
  159. resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
  160. return err
  161. })
  162. if err != nil {
  163. return nil, 0, err
  164. }
  165. // find out all volume servers with one slot left.
  166. ecNodes, totalFreeEcSlots = collectEcVolumeServersByDc(resp.TopologyInfo, selectedDataCenter)
  167. sortEcNodesByFreeslotsDecending(ecNodes)
  168. return
  169. }
  170. func collectEcVolumeServersByDc(topo *master_pb.TopologyInfo, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int) {
  171. eachDataNode(topo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  172. if selectedDataCenter != "" && selectedDataCenter != dc {
  173. return
  174. }
  175. freeEcSlots := countFreeShardSlots(dn)
  176. ecNodes = append(ecNodes, &EcNode{
  177. info: dn,
  178. dc: dc,
  179. rack: rack,
  180. freeEcSlot: int(freeEcSlots),
  181. })
  182. totalFreeEcSlots += freeEcSlots
  183. })
  184. return
  185. }
  186. func sourceServerDeleteEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation string, toBeDeletedShardIds []uint32) error {
  187. fmt.Printf("delete %d.%v from %s\n", volumeId, toBeDeletedShardIds, sourceLocation)
  188. return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  189. _, deleteErr := volumeServerClient.VolumeEcShardsDelete(context.Background(), &volume_server_pb.VolumeEcShardsDeleteRequest{
  190. VolumeId: uint32(volumeId),
  191. Collection: collection,
  192. ShardIds: toBeDeletedShardIds,
  193. })
  194. return deleteErr
  195. })
  196. }
  197. func unmountEcShards(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceLocation string, toBeUnmountedhardIds []uint32) error {
  198. fmt.Printf("unmount %d.%v from %s\n", volumeId, toBeUnmountedhardIds, sourceLocation)
  199. return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  200. _, deleteErr := volumeServerClient.VolumeEcShardsUnmount(context.Background(), &volume_server_pb.VolumeEcShardsUnmountRequest{
  201. VolumeId: uint32(volumeId),
  202. ShardIds: toBeUnmountedhardIds,
  203. })
  204. return deleteErr
  205. })
  206. }
  207. func mountEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation string, toBeMountedhardIds []uint32) error {
  208. fmt.Printf("mount %d.%v on %s\n", volumeId, toBeMountedhardIds, sourceLocation)
  209. return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  210. _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
  211. VolumeId: uint32(volumeId),
  212. Collection: collection,
  213. ShardIds: toBeMountedhardIds,
  214. })
  215. return mountErr
  216. })
  217. }
  218. func divide(total, n int) float64 {
  219. return float64(total) / float64(n)
  220. }
  221. func ceilDivide(total, n int) int {
  222. return int(math.Ceil(float64(total) / float64(n)))
  223. }
  224. func findEcVolumeShards(ecNode *EcNode, vid needle.VolumeId) erasure_coding.ShardBits {
  225. for _, shardInfo := range ecNode.info.EcShardInfos {
  226. if needle.VolumeId(shardInfo.Id) == vid {
  227. return erasure_coding.ShardBits(shardInfo.EcIndexBits)
  228. }
  229. }
  230. return 0
  231. }
  232. func (ecNode *EcNode) addEcVolumeShards(vid needle.VolumeId, collection string, shardIds []uint32) *EcNode {
  233. foundVolume := false
  234. for _, shardInfo := range ecNode.info.EcShardInfos {
  235. if needle.VolumeId(shardInfo.Id) == vid {
  236. oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits)
  237. newShardBits := oldShardBits
  238. for _, shardId := range shardIds {
  239. newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId))
  240. }
  241. shardInfo.EcIndexBits = uint32(newShardBits)
  242. ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount()
  243. foundVolume = true
  244. break
  245. }
  246. }
  247. if !foundVolume {
  248. var newShardBits erasure_coding.ShardBits
  249. for _, shardId := range shardIds {
  250. newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId))
  251. }
  252. ecNode.info.EcShardInfos = append(ecNode.info.EcShardInfos, &master_pb.VolumeEcShardInformationMessage{
  253. Id: uint32(vid),
  254. Collection: collection,
  255. EcIndexBits: uint32(newShardBits),
  256. })
  257. ecNode.freeEcSlot -= len(shardIds)
  258. }
  259. return ecNode
  260. }
  261. func (ecNode *EcNode) deleteEcVolumeShards(vid needle.VolumeId, shardIds []uint32) *EcNode {
  262. for _, shardInfo := range ecNode.info.EcShardInfos {
  263. if needle.VolumeId(shardInfo.Id) == vid {
  264. oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits)
  265. newShardBits := oldShardBits
  266. for _, shardId := range shardIds {
  267. newShardBits = newShardBits.RemoveShardId(erasure_coding.ShardId(shardId))
  268. }
  269. shardInfo.EcIndexBits = uint32(newShardBits)
  270. ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount()
  271. }
  272. }
  273. return ecNode
  274. }
  275. func groupByCount(data []*EcNode, identifierFn func(*EcNode) (id string, count int)) map[string]int {
  276. countMap := make(map[string]int)
  277. for _, d := range data {
  278. id, count := identifierFn(d)
  279. countMap[id] += count
  280. }
  281. return countMap
  282. }
  283. func groupBy(data []*EcNode, identifierFn func(*EcNode) (id string)) map[string][]*EcNode {
  284. groupMap := make(map[string][]*EcNode)
  285. for _, d := range data {
  286. id := identifierFn(d)
  287. groupMap[id] = append(groupMap[id], d)
  288. }
  289. return groupMap
  290. }