You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

291 lines
9.4 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
  1. package shell
  2. import (
  3. "flag"
  4. "fmt"
  5. "github.com/seaweedfs/seaweedfs/weed/glog"
  6. "github.com/seaweedfs/seaweedfs/weed/pb"
  7. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  8. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  9. "github.com/seaweedfs/seaweedfs/weed/wdclient"
  10. "io"
  11. "path/filepath"
  12. "sync"
  13. "time"
  14. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  15. )
  16. func init() {
  17. Commands = append(Commands, &commandVolumeTierMove{})
  18. }
  19. type volumeTierMoveJob struct {
  20. src pb.ServerAddress
  21. vid needle.VolumeId
  22. }
  23. type commandVolumeTierMove struct {
  24. activeServers sync.Map
  25. queues map[pb.ServerAddress]chan volumeTierMoveJob
  26. //activeServers map[pb.ServerAddress]struct{}
  27. //activeServersLock sync.Mutex
  28. //activeServersCond *sync.Cond
  29. }
  30. func (c *commandVolumeTierMove) Name() string {
  31. return "volume.tier.move"
  32. }
  33. func (c *commandVolumeTierMove) Help() string {
  34. return `change a volume from one disk type to another
  35. volume.tier.move -fromDiskType=hdd -toDiskType=ssd [-collectionPattern=""] [-fullPercent=95] [-quietFor=1h] [-parallelLimit=4]
  36. Even if the volume is replicated, only one replica will be changed and the rest replicas will be dropped.
  37. So "volume.fix.replication" and "volume.balance" should be followed.
  38. `
  39. }
  40. func (c *commandVolumeTierMove) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  41. tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  42. collectionPattern := tierCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'")
  43. fullPercentage := tierCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size")
  44. quietPeriod := tierCommand.Duration("quietFor", 24*time.Hour, "select volumes without no writes for this period")
  45. source := tierCommand.String("fromDiskType", "", "the source disk type")
  46. target := tierCommand.String("toDiskType", "", "the target disk type")
  47. parallelLimit := tierCommand.Int("parallelLimit", 0, "limit the number of parallel copying jobs")
  48. applyChange := tierCommand.Bool("force", false, "actually apply the changes")
  49. ioBytePerSecond := tierCommand.Int64("ioBytePerSecond", 0, "limit the speed of move")
  50. if err = tierCommand.Parse(args); err != nil {
  51. return nil
  52. }
  53. infoAboutSimulationMode(writer, *applyChange, "-force")
  54. if err = commandEnv.confirmIsLocked(args); err != nil {
  55. return
  56. }
  57. fromDiskType := types.ToDiskType(*source)
  58. toDiskType := types.ToDiskType(*target)
  59. if fromDiskType == toDiskType {
  60. return fmt.Errorf("source tier %s is the same as target tier %s", fromDiskType, toDiskType)
  61. }
  62. // collect topology information
  63. topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
  64. if err != nil {
  65. return err
  66. }
  67. // collect all volumes that should change
  68. volumeIds, err := collectVolumeIdsForTierChange(commandEnv, topologyInfo, volumeSizeLimitMb, fromDiskType, *collectionPattern, *fullPercentage, *quietPeriod)
  69. if err != nil {
  70. return err
  71. }
  72. fmt.Printf("tier move volumes: %v\n", volumeIds)
  73. _, allLocations := collectVolumeReplicaLocations(topologyInfo)
  74. allLocations = filterLocationsByDiskType(allLocations, toDiskType)
  75. keepDataNodesSorted(allLocations, toDiskType)
  76. if len(allLocations) > 0 && *parallelLimit > 0 && *parallelLimit < len(allLocations) {
  77. allLocations = allLocations[:*parallelLimit]
  78. }
  79. wg := sync.WaitGroup{}
  80. bufferLen := len(allLocations)
  81. c.queues = make(map[pb.ServerAddress]chan volumeTierMoveJob)
  82. for _, dst := range allLocations {
  83. destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
  84. c.queues[destServerAddress] = make(chan volumeTierMoveJob, bufferLen)
  85. wg.Add(1)
  86. go func(dst location, jobs <-chan volumeTierMoveJob, applyChanges bool) {
  87. defer wg.Done()
  88. for job := range jobs {
  89. fmt.Fprintf(writer, "moving volume %d from %s to %s with disk type %s ...\n", job.vid, job.src, dst.dataNode.Id, toDiskType.ReadableString())
  90. locations, found := commandEnv.MasterClient.GetLocations(uint32(job.vid))
  91. if !found {
  92. fmt.Printf("volume %d not found", job.vid)
  93. continue
  94. }
  95. unlock := c.Lock(job.src)
  96. if applyChanges {
  97. if err := c.doMoveOneVolume(commandEnv, writer, job.vid, toDiskType, locations, job.src, dst, *ioBytePerSecond); err != nil {
  98. fmt.Fprintf(writer, "move volume %d %s => %s: %v\n", job.vid, job.src, dst.dataNode.Id, err)
  99. }
  100. }
  101. unlock()
  102. }
  103. }(dst, c.queues[destServerAddress], *applyChange)
  104. }
  105. for _, vid := range volumeIds {
  106. if err = c.doVolumeTierMove(commandEnv, writer, vid, toDiskType, allLocations); err != nil {
  107. fmt.Printf("tier move volume %d: %v\n", vid, err)
  108. }
  109. allLocations = rotateDataNodes(allLocations)
  110. }
  111. for key, _ := range c.queues {
  112. close(c.queues[key])
  113. }
  114. wg.Wait()
  115. return nil
  116. }
  117. func (c *commandVolumeTierMove) Lock(key pb.ServerAddress) func() {
  118. value, _ := c.activeServers.LoadOrStore(key, &sync.Mutex{})
  119. mtx := value.(*sync.Mutex)
  120. mtx.Lock()
  121. return func() { mtx.Unlock() }
  122. }
  123. func filterLocationsByDiskType(dataNodes []location, diskType types.DiskType) (ret []location) {
  124. for _, loc := range dataNodes {
  125. _, found := loc.dataNode.DiskInfos[string(diskType)]
  126. if found {
  127. ret = append(ret, loc)
  128. }
  129. }
  130. return
  131. }
  132. func rotateDataNodes(dataNodes []location) []location {
  133. if len(dataNodes) > 0 {
  134. return append(dataNodes[1:], dataNodes[0])
  135. } else {
  136. return dataNodes
  137. }
  138. }
  139. func isOneOf(server string, locations []wdclient.Location) bool {
  140. for _, loc := range locations {
  141. if server == loc.Url {
  142. return true
  143. }
  144. }
  145. return false
  146. }
  147. func (c *commandVolumeTierMove) doVolumeTierMove(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, allLocations []location) (err error) {
  148. // find volume location
  149. locations, found := commandEnv.MasterClient.GetLocations(uint32(vid))
  150. if !found {
  151. return fmt.Errorf("volume %d not found", vid)
  152. }
  153. // find one server with the most empty volume slots with target disk type
  154. hasFoundTarget := false
  155. fn := capacityByFreeVolumeCount(toDiskType)
  156. for _, dst := range allLocations {
  157. if fn(dst.dataNode) > 0 && !hasFoundTarget {
  158. // ask the volume server to replicate the volume
  159. if isOneOf(dst.dataNode.Id, locations) {
  160. continue
  161. }
  162. var sourceVolumeServer pb.ServerAddress
  163. for _, loc := range locations {
  164. if loc.Url != dst.dataNode.Id {
  165. sourceVolumeServer = loc.ServerAddress()
  166. }
  167. }
  168. if sourceVolumeServer == "" {
  169. continue
  170. }
  171. hasFoundTarget = true
  172. // adjust volume count
  173. dst.dataNode.DiskInfos[string(toDiskType)].VolumeCount++
  174. destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
  175. c.queues[destServerAddress] <- volumeTierMoveJob{sourceVolumeServer, vid}
  176. }
  177. }
  178. if !hasFoundTarget {
  179. fmt.Fprintf(writer, "can not find disk type %s for volume %d\n", toDiskType.ReadableString(), vid)
  180. }
  181. return nil
  182. }
  183. func (c *commandVolumeTierMove) doMoveOneVolume(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, locations []wdclient.Location, sourceVolumeServer pb.ServerAddress, dst location, ioBytePerSecond int64) (err error) {
  184. if !commandEnv.isLocked() {
  185. return fmt.Errorf("lock is lost")
  186. }
  187. // mark all replicas as read only
  188. if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, false); err != nil {
  189. return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err)
  190. }
  191. if err = LiveMoveVolume(commandEnv.option.GrpcDialOption, writer, vid, sourceVolumeServer, pb.NewServerAddressFromDataNode(dst.dataNode), 5*time.Second, toDiskType.ReadableString(), ioBytePerSecond, true); err != nil {
  192. // mark all replicas as writable
  193. if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, true); err != nil {
  194. glog.Errorf("mark volume %d as writable on %s: %v", vid, locations[0].Url, err)
  195. }
  196. return fmt.Errorf("move volume %d %s => %s : %v", vid, locations[0].Url, dst.dataNode.Id, err)
  197. }
  198. // remove the remaining replicas
  199. for _, loc := range locations {
  200. if loc.Url != dst.dataNode.Id && loc.ServerAddress() != sourceVolumeServer {
  201. if err = deleteVolume(commandEnv.option.GrpcDialOption, vid, loc.ServerAddress()); err != nil {
  202. fmt.Fprintf(writer, "failed to delete volume %d on %s: %v\n", vid, loc.Url, err)
  203. }
  204. // reduce volume count? Not really necessary since they are "more" full and will not be a candidate to move to
  205. }
  206. }
  207. return nil
  208. }
  209. func collectVolumeIdsForTierChange(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeSizeLimitMb uint64, sourceTier types.DiskType, collectionPattern string, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) {
  210. quietSeconds := int64(quietPeriod / time.Second)
  211. nowUnixSeconds := time.Now().Unix()
  212. fmt.Printf("collect %s volumes quiet for: %d seconds\n", sourceTier, quietSeconds)
  213. vidMap := make(map[uint32]bool)
  214. eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  215. for _, diskInfo := range dn.DiskInfos {
  216. for _, v := range diskInfo.VolumeInfos {
  217. // check collection name pattern
  218. if collectionPattern != "" {
  219. matched, err := filepath.Match(collectionPattern, v.Collection)
  220. if err != nil {
  221. return
  222. }
  223. if !matched {
  224. continue
  225. }
  226. }
  227. if v.ModifiedAtSecond+quietSeconds < nowUnixSeconds && types.ToDiskType(v.DiskType) == sourceTier {
  228. if float64(v.Size) > fullPercentage/100*float64(volumeSizeLimitMb)*1024*1024 {
  229. vidMap[v.Id] = true
  230. }
  231. }
  232. }
  233. }
  234. })
  235. for vid := range vidMap {
  236. vids = append(vids, needle.VolumeId(vid))
  237. }
  238. return
  239. }