You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

383 lines
12 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
  1. package shell
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/seaweedfs/seaweedfs/weed/glog"
  6. "github.com/seaweedfs/seaweedfs/weed/operation"
  7. "github.com/seaweedfs/seaweedfs/weed/pb"
  8. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  9. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  12. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  13. "golang.org/x/exp/slices"
  14. "google.golang.org/grpc"
  15. "math"
  16. )
  17. func moveMountedShardToEcNode(commandEnv *CommandEnv, existingLocation *EcNode, collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, destinationEcNode *EcNode, applyBalancing bool) (err error) {
  18. if !commandEnv.isLocked() {
  19. return fmt.Errorf("lock is lost")
  20. }
  21. copiedShardIds := []uint32{uint32(shardId)}
  22. if applyBalancing {
  23. existingServerAddress := pb.NewServerAddressFromDataNode(existingLocation.info)
  24. // ask destination node to copy shard and the ecx file from source node, and mount it
  25. copiedShardIds, err = oneServerCopyAndMountEcShardsFromSource(commandEnv.option.GrpcDialOption, destinationEcNode, []uint32{uint32(shardId)}, vid, collection, existingServerAddress)
  26. if err != nil {
  27. return err
  28. }
  29. // unmount the to be deleted shards
  30. err = unmountEcShards(commandEnv.option.GrpcDialOption, vid, existingServerAddress, copiedShardIds)
  31. if err != nil {
  32. return err
  33. }
  34. // ask source node to delete the shard, and maybe the ecx file
  35. err = sourceServerDeleteEcShards(commandEnv.option.GrpcDialOption, collection, vid, existingServerAddress, copiedShardIds)
  36. if err != nil {
  37. return err
  38. }
  39. fmt.Printf("moved ec shard %d.%d %s => %s\n", vid, shardId, existingLocation.info.Id, destinationEcNode.info.Id)
  40. }
  41. destinationEcNode.addEcVolumeShards(vid, collection, copiedShardIds)
  42. existingLocation.deleteEcVolumeShards(vid, copiedShardIds)
  43. return nil
  44. }
  45. func oneServerCopyAndMountEcShardsFromSource(grpcDialOption grpc.DialOption,
  46. targetServer *EcNode, shardIdsToCopy []uint32,
  47. volumeId needle.VolumeId, collection string, existingLocation pb.ServerAddress) (copiedShardIds []uint32, err error) {
  48. fmt.Printf("allocate %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  49. targetAddress := pb.NewServerAddressFromDataNode(targetServer.info)
  50. err = operation.WithVolumeServerClient(false, targetAddress, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  51. if targetAddress != existingLocation {
  52. fmt.Printf("copy %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  53. _, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{
  54. VolumeId: uint32(volumeId),
  55. Collection: collection,
  56. ShardIds: shardIdsToCopy,
  57. CopyEcxFile: true,
  58. CopyEcjFile: true,
  59. CopyVifFile: true,
  60. SourceDataNode: string(existingLocation),
  61. })
  62. if copyErr != nil {
  63. return fmt.Errorf("copy %d.%v %s => %s : %v\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id, copyErr)
  64. }
  65. }
  66. fmt.Printf("mount %d.%v on %s\n", volumeId, shardIdsToCopy, targetServer.info.Id)
  67. _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
  68. VolumeId: uint32(volumeId),
  69. Collection: collection,
  70. ShardIds: shardIdsToCopy,
  71. })
  72. if mountErr != nil {
  73. return fmt.Errorf("mount %d.%v on %s : %v\n", volumeId, shardIdsToCopy, targetServer.info.Id, mountErr)
  74. }
  75. if targetAddress != existingLocation {
  76. copiedShardIds = shardIdsToCopy
  77. glog.V(0).Infof("%s ec volume %d deletes shards %+v", existingLocation, volumeId, copiedShardIds)
  78. }
  79. return nil
  80. })
  81. if err != nil {
  82. return
  83. }
  84. return
  85. }
  86. func eachDataNode(topo *master_pb.TopologyInfo, fn func(dc string, rack RackId, dn *master_pb.DataNodeInfo)) {
  87. for _, dc := range topo.DataCenterInfos {
  88. for _, rack := range dc.RackInfos {
  89. for _, dn := range rack.DataNodeInfos {
  90. fn(dc.Id, RackId(rack.Id), dn)
  91. }
  92. }
  93. }
  94. }
  95. func sortEcNodesByFreeslotsDecending(ecNodes []*EcNode) {
  96. slices.SortFunc(ecNodes, func(a, b *EcNode) bool {
  97. return a.freeEcSlot > b.freeEcSlot
  98. })
  99. }
  100. func sortEcNodesByFreeslotsAscending(ecNodes []*EcNode) {
  101. slices.SortFunc(ecNodes, func(a, b *EcNode) bool {
  102. return a.freeEcSlot < b.freeEcSlot
  103. })
  104. }
  105. type CandidateEcNode struct {
  106. ecNode *EcNode
  107. shardCount int
  108. }
  109. // if the index node changed the freeEcSlot, need to keep every EcNode still sorted
  110. func ensureSortedEcNodes(data []*CandidateEcNode, index int, lessThan func(i, j int) bool) {
  111. for i := index - 1; i >= 0; i-- {
  112. if lessThan(i+1, i) {
  113. swap(data, i, i+1)
  114. } else {
  115. break
  116. }
  117. }
  118. for i := index + 1; i < len(data); i++ {
  119. if lessThan(i, i-1) {
  120. swap(data, i, i-1)
  121. } else {
  122. break
  123. }
  124. }
  125. }
  126. func swap(data []*CandidateEcNode, i, j int) {
  127. t := data[i]
  128. data[i] = data[j]
  129. data[j] = t
  130. }
  131. func countShards(ecShardInfos []*master_pb.VolumeEcShardInformationMessage) (count int) {
  132. for _, ecShardInfo := range ecShardInfos {
  133. shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
  134. count += shardBits.ShardIdCount()
  135. }
  136. return
  137. }
  138. func countFreeShardSlots(dn *master_pb.DataNodeInfo, diskType types.DiskType) (count int) {
  139. if dn.DiskInfos == nil {
  140. return 0
  141. }
  142. diskInfo := dn.DiskInfos[string(diskType)]
  143. if diskInfo == nil {
  144. return 0
  145. }
  146. return int(diskInfo.MaxVolumeCount-diskInfo.VolumeCount)*erasure_coding.DataShardsCount - countShards(diskInfo.EcShardInfos)
  147. }
  148. type RackId string
  149. type EcNodeId string
  150. type EcNode struct {
  151. info *master_pb.DataNodeInfo
  152. dc string
  153. rack RackId
  154. freeEcSlot int
  155. }
  156. func (ecNode *EcNode) localShardIdCount(vid uint32) int {
  157. for _, diskInfo := range ecNode.info.DiskInfos {
  158. for _, ecShardInfo := range diskInfo.EcShardInfos {
  159. if vid == ecShardInfo.Id {
  160. shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
  161. return shardBits.ShardIdCount()
  162. }
  163. }
  164. }
  165. return 0
  166. }
  167. type EcRack struct {
  168. ecNodes map[EcNodeId]*EcNode
  169. freeEcSlot int
  170. }
  171. func collectEcNodes(commandEnv *CommandEnv, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int, err error) {
  172. // list all possible locations
  173. // collect topology information
  174. topologyInfo, _, err := collectTopologyInfo(commandEnv, 0)
  175. if err != nil {
  176. return
  177. }
  178. // find out all volume servers with one slot left.
  179. ecNodes, totalFreeEcSlots = collectEcVolumeServersByDc(topologyInfo, selectedDataCenter)
  180. sortEcNodesByFreeslotsDecending(ecNodes)
  181. return
  182. }
  183. func collectEcVolumeServersByDc(topo *master_pb.TopologyInfo, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int) {
  184. eachDataNode(topo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  185. if selectedDataCenter != "" && selectedDataCenter != dc {
  186. return
  187. }
  188. freeEcSlots := countFreeShardSlots(dn, types.HardDriveType)
  189. ecNodes = append(ecNodes, &EcNode{
  190. info: dn,
  191. dc: dc,
  192. rack: rack,
  193. freeEcSlot: int(freeEcSlots),
  194. })
  195. totalFreeEcSlots += freeEcSlots
  196. })
  197. return
  198. }
  199. func sourceServerDeleteEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation pb.ServerAddress, toBeDeletedShardIds []uint32) error {
  200. fmt.Printf("delete %d.%v from %s\n", volumeId, toBeDeletedShardIds, sourceLocation)
  201. return operation.WithVolumeServerClient(false, sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  202. _, deleteErr := volumeServerClient.VolumeEcShardsDelete(context.Background(), &volume_server_pb.VolumeEcShardsDeleteRequest{
  203. VolumeId: uint32(volumeId),
  204. Collection: collection,
  205. ShardIds: toBeDeletedShardIds,
  206. })
  207. return deleteErr
  208. })
  209. }
  210. func unmountEcShards(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceLocation pb.ServerAddress, toBeUnmountedhardIds []uint32) error {
  211. fmt.Printf("unmount %d.%v from %s\n", volumeId, toBeUnmountedhardIds, sourceLocation)
  212. return operation.WithVolumeServerClient(false, sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  213. _, deleteErr := volumeServerClient.VolumeEcShardsUnmount(context.Background(), &volume_server_pb.VolumeEcShardsUnmountRequest{
  214. VolumeId: uint32(volumeId),
  215. ShardIds: toBeUnmountedhardIds,
  216. })
  217. return deleteErr
  218. })
  219. }
  220. func mountEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation pb.ServerAddress, toBeMountedhardIds []uint32) error {
  221. fmt.Printf("mount %d.%v on %s\n", volumeId, toBeMountedhardIds, sourceLocation)
  222. return operation.WithVolumeServerClient(false, sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  223. _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
  224. VolumeId: uint32(volumeId),
  225. Collection: collection,
  226. ShardIds: toBeMountedhardIds,
  227. })
  228. return mountErr
  229. })
  230. }
  231. func divide(total, n int) float64 {
  232. return float64(total) / float64(n)
  233. }
  234. func ceilDivide(total, n int) int {
  235. return int(math.Ceil(float64(total) / float64(n)))
  236. }
  237. func findEcVolumeShards(ecNode *EcNode, vid needle.VolumeId) erasure_coding.ShardBits {
  238. if diskInfo, found := ecNode.info.DiskInfos[string(types.HardDriveType)]; found {
  239. for _, shardInfo := range diskInfo.EcShardInfos {
  240. if needle.VolumeId(shardInfo.Id) == vid {
  241. return erasure_coding.ShardBits(shardInfo.EcIndexBits)
  242. }
  243. }
  244. }
  245. return 0
  246. }
  247. func (ecNode *EcNode) addEcVolumeShards(vid needle.VolumeId, collection string, shardIds []uint32) *EcNode {
  248. foundVolume := false
  249. diskInfo, found := ecNode.info.DiskInfos[string(types.HardDriveType)]
  250. if found {
  251. for _, shardInfo := range diskInfo.EcShardInfos {
  252. if needle.VolumeId(shardInfo.Id) == vid {
  253. oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits)
  254. newShardBits := oldShardBits
  255. for _, shardId := range shardIds {
  256. newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId))
  257. }
  258. shardInfo.EcIndexBits = uint32(newShardBits)
  259. ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount()
  260. foundVolume = true
  261. break
  262. }
  263. }
  264. } else {
  265. diskInfo = &master_pb.DiskInfo{
  266. Type: string(types.HardDriveType),
  267. }
  268. ecNode.info.DiskInfos[string(types.HardDriveType)] = diskInfo
  269. }
  270. if !foundVolume {
  271. var newShardBits erasure_coding.ShardBits
  272. for _, shardId := range shardIds {
  273. newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId))
  274. }
  275. diskInfo.EcShardInfos = append(diskInfo.EcShardInfos, &master_pb.VolumeEcShardInformationMessage{
  276. Id: uint32(vid),
  277. Collection: collection,
  278. EcIndexBits: uint32(newShardBits),
  279. DiskType: string(types.HardDriveType),
  280. })
  281. ecNode.freeEcSlot -= len(shardIds)
  282. }
  283. return ecNode
  284. }
  285. func (ecNode *EcNode) deleteEcVolumeShards(vid needle.VolumeId, shardIds []uint32) *EcNode {
  286. if diskInfo, found := ecNode.info.DiskInfos[string(types.HardDriveType)]; found {
  287. for _, shardInfo := range diskInfo.EcShardInfos {
  288. if needle.VolumeId(shardInfo.Id) == vid {
  289. oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits)
  290. newShardBits := oldShardBits
  291. for _, shardId := range shardIds {
  292. newShardBits = newShardBits.RemoveShardId(erasure_coding.ShardId(shardId))
  293. }
  294. shardInfo.EcIndexBits = uint32(newShardBits)
  295. ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount()
  296. }
  297. }
  298. }
  299. return ecNode
  300. }
  301. func groupByCount(data []*EcNode, identifierFn func(*EcNode) (id string, count int)) map[string]int {
  302. countMap := make(map[string]int)
  303. for _, d := range data {
  304. id, count := identifierFn(d)
  305. countMap[id] += count
  306. }
  307. return countMap
  308. }
  309. func groupBy(data []*EcNode, identifierFn func(*EcNode) (id string)) map[string][]*EcNode {
  310. groupMap := make(map[string][]*EcNode)
  311. for _, d := range data {
  312. id := identifierFn(d)
  313. groupMap[id] = append(groupMap[id], d)
  314. }
  315. return groupMap
  316. }