You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

361 lines
10 KiB

  1. package shell
  2. import (
  3. "context"
  4. "errors"
  5. "flag"
  6. "fmt"
  7. "io"
  8. "net/http"
  9. "sort"
  10. "strings"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  12. "github.com/seaweedfs/seaweedfs/weed/wdclient"
  13. "golang.org/x/exp/maps"
  14. "golang.org/x/exp/slices"
  15. "github.com/seaweedfs/seaweedfs/weed/operation"
  16. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  17. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  18. "github.com/seaweedfs/seaweedfs/weed/util"
  19. )
  20. var (
  21. client *http.Client
  22. )
  23. func init() {
  24. client = &http.Client{}
  25. Commands = append(Commands, &commandFsMergeVolumes{})
  26. }
  27. type commandFsMergeVolumes struct {
  28. volumes map[needle.VolumeId]*master_pb.VolumeInformationMessage
  29. volumeSizeLimit uint64
  30. }
  31. func (c *commandFsMergeVolumes) Name() string {
  32. return "fs.mergeVolumes"
  33. }
  34. func (c *commandFsMergeVolumes) Help() string {
  35. return `re-locate chunks into target volumes and try to clear lighter volumes.
  36. This would help clear half-full volumes and let vacuum system to delete them later.
  37. fs.mergeVolumes [-toVolumeId=y] [-fromVolumeId=x] [-collection="*"] [-apply] [/dir/]
  38. `
  39. }
  40. func (c *commandFsMergeVolumes) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  41. dir, err := commandEnv.parseUrl(findInputDirectory(args))
  42. if err != nil {
  43. return err
  44. }
  45. if dir != "/" {
  46. dir = strings.TrimRight(dir, "/")
  47. }
  48. fsMergeVolumesCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  49. fromVolumeArg := fsMergeVolumesCommand.Uint("fromVolumeId", 0, "move chunks with this volume id")
  50. toVolumeArg := fsMergeVolumesCommand.Uint("toVolumeId", 0, "change chunks to this volume id")
  51. collectionArg := fsMergeVolumesCommand.String("collection", "*", "Name of collection to merge")
  52. apply := fsMergeVolumesCommand.Bool("apply", false, "applying the metadata changes")
  53. if err = fsMergeVolumesCommand.Parse(args); err != nil {
  54. return err
  55. }
  56. fromVolumeId := needle.VolumeId(*fromVolumeArg)
  57. toVolumeId := needle.VolumeId(*toVolumeArg)
  58. c.reloadVolumesInfo(commandEnv.MasterClient)
  59. if fromVolumeId != 0 && toVolumeId != 0 {
  60. if fromVolumeId == toVolumeId {
  61. return fmt.Errorf("no volume id changes, %d == %d", fromVolumeId, toVolumeId)
  62. }
  63. compatible, err := c.volumesAreCompatible(fromVolumeId, toVolumeId)
  64. if err != nil {
  65. return fmt.Errorf("cannot determine volumes are compatible: %d and %d", fromVolumeId, toVolumeId)
  66. }
  67. if !compatible {
  68. return fmt.Errorf("volume %d is not compatible with volume %d", fromVolumeId, toVolumeId)
  69. }
  70. fromSize := c.getVolumeSizeById(fromVolumeId)
  71. toSize := c.getVolumeSizeById(toVolumeId)
  72. if fromSize+toSize > c.volumeSizeLimit {
  73. return fmt.Errorf(
  74. "volume %d (%d MB) cannot merge into volume %d (%d MB_ due to volume size limit (%d MB)",
  75. fromVolumeId, fromSize/1024/1024,
  76. toVolumeId, toSize/1024/1024,
  77. c.volumeSizeLimit/1024/102,
  78. )
  79. }
  80. }
  81. plan, err := c.createMergePlan(*collectionArg, toVolumeId, fromVolumeId)
  82. if err != nil {
  83. return err
  84. }
  85. c.printPlan(plan)
  86. if len(plan) == 0 {
  87. return nil
  88. }
  89. defer client.CloseIdleConnections()
  90. return commandEnv.WithFilerClient(false, func(filerClient filer_pb.SeaweedFilerClient) error {
  91. return filer_pb.TraverseBfs(commandEnv, util.FullPath(dir), func(parentPath util.FullPath, entry *filer_pb.Entry) {
  92. if entry.IsDirectory {
  93. return
  94. }
  95. for _, chunk := range entry.Chunks {
  96. if chunk.IsChunkManifest {
  97. fmt.Printf("Change volume id for large file is not implemented yet: %s/%s\n", parentPath, entry.Name)
  98. continue
  99. }
  100. chunkVolumeId := needle.VolumeId(chunk.Fid.VolumeId)
  101. toVolumeId, found := plan[chunkVolumeId]
  102. if !found {
  103. continue
  104. }
  105. path := parentPath.Child(entry.Name)
  106. fmt.Printf("move %s(%s)\n", path, chunk.GetFileIdString())
  107. if !*apply {
  108. continue
  109. }
  110. if err = moveChunk(chunk, toVolumeId, commandEnv.MasterClient); err != nil {
  111. fmt.Printf("failed to move %s/%s: %v\n", path, chunk.GetFileIdString(), err)
  112. continue
  113. }
  114. if err = filer_pb.UpdateEntry(filerClient, &filer_pb.UpdateEntryRequest{
  115. Directory: string(parentPath),
  116. Entry: entry,
  117. }); err != nil {
  118. fmt.Printf("failed to update %s: %v\n", path, err)
  119. }
  120. }
  121. })
  122. })
  123. }
  124. func (c *commandFsMergeVolumes) getVolumeInfoById(vid needle.VolumeId) (*master_pb.VolumeInformationMessage, error) {
  125. info := c.volumes[vid]
  126. var err error
  127. if info == nil {
  128. err = errors.New("cannot find volume")
  129. }
  130. return info, err
  131. }
  132. func (c *commandFsMergeVolumes) volumesAreCompatible(src needle.VolumeId, dest needle.VolumeId) (bool, error) {
  133. srcInfo, err := c.getVolumeInfoById(src)
  134. if err != nil {
  135. return false, err
  136. }
  137. destInfo, err := c.getVolumeInfoById(dest)
  138. if err != nil {
  139. return false, err
  140. }
  141. return (srcInfo.Collection == destInfo.Collection &&
  142. srcInfo.Ttl == destInfo.Ttl &&
  143. srcInfo.ReplicaPlacement == destInfo.ReplicaPlacement), nil
  144. }
  145. func (c *commandFsMergeVolumes) reloadVolumesInfo(masterClient *wdclient.MasterClient) error {
  146. c.volumes = make(map[needle.VolumeId]*master_pb.VolumeInformationMessage)
  147. return masterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
  148. volumes, err := client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
  149. if err != nil {
  150. return err
  151. }
  152. c.volumeSizeLimit = volumes.GetVolumeSizeLimitMb() * 1024 * 1024
  153. for _, dc := range volumes.TopologyInfo.DataCenterInfos {
  154. for _, rack := range dc.RackInfos {
  155. for _, node := range rack.DataNodeInfos {
  156. for _, disk := range node.DiskInfos {
  157. for _, volume := range disk.VolumeInfos {
  158. vid := needle.VolumeId(volume.Id)
  159. if found := c.volumes[vid]; found == nil {
  160. c.volumes[vid] = volume
  161. }
  162. }
  163. }
  164. }
  165. }
  166. }
  167. return nil
  168. })
  169. }
  170. func (c *commandFsMergeVolumes) createMergePlan(collection string, toVolumeId needle.VolumeId, fromVolumeId needle.VolumeId) (map[needle.VolumeId]needle.VolumeId, error) {
  171. plan := make(map[needle.VolumeId]needle.VolumeId)
  172. volumes := maps.Keys(c.volumes)
  173. sort.Slice(volumes, func(a, b int) bool {
  174. return c.volumes[volumes[b]].Size < c.volumes[volumes[a]].Size
  175. })
  176. l := len(volumes)
  177. for i := 0; i < l; i++ {
  178. volume := c.volumes[volumes[i]]
  179. if volume.GetReadOnly() || c.getVolumeSize(volume) == 0 || (collection != "*" && collection != volume.GetCollection()) {
  180. volumes = slices.Delete(volumes, i, i+1)
  181. i--
  182. l--
  183. }
  184. }
  185. for i := l - 1; i >= 0; i-- {
  186. src := volumes[i]
  187. if fromVolumeId != 0 && src != fromVolumeId {
  188. continue
  189. }
  190. for j := 0; j < i; j++ {
  191. condidate := volumes[j]
  192. if toVolumeId != 0 && condidate != toVolumeId {
  193. continue
  194. }
  195. if _, moving := plan[condidate]; moving {
  196. continue
  197. }
  198. compatible, err := c.volumesAreCompatible(src, condidate)
  199. if err != nil {
  200. return nil, err
  201. }
  202. if !compatible {
  203. continue
  204. }
  205. if c.getVolumeSizeBasedOnPlan(plan, condidate)+c.getVolumeSizeById(src) > c.volumeSizeLimit {
  206. continue
  207. }
  208. plan[src] = condidate
  209. break
  210. }
  211. }
  212. return plan, nil
  213. }
  214. func (c *commandFsMergeVolumes) getVolumeSizeBasedOnPlan(plan map[needle.VolumeId]needle.VolumeId, vid needle.VolumeId) uint64 {
  215. size := c.getVolumeSizeById(vid)
  216. for src, dist := range plan {
  217. if dist == vid {
  218. size += c.getVolumeSizeById(src)
  219. }
  220. }
  221. return size
  222. }
  223. func (c *commandFsMergeVolumes) getVolumeSize(volume *master_pb.VolumeInformationMessage) uint64 {
  224. return volume.Size - volume.DeletedByteCount
  225. }
  226. func (c *commandFsMergeVolumes) getVolumeSizeById(vid needle.VolumeId) uint64 {
  227. return c.getVolumeSize(c.volumes[vid])
  228. }
  229. func (c *commandFsMergeVolumes) printPlan(plan map[needle.VolumeId]needle.VolumeId) {
  230. fmt.Printf("max volume size: %d MB\n", c.volumeSizeLimit/1024/1024)
  231. reversePlan := make(map[needle.VolumeId][]needle.VolumeId)
  232. for src, dist := range plan {
  233. reversePlan[dist] = append(reversePlan[dist], src)
  234. }
  235. for dist, srcs := range reversePlan {
  236. currentSize := c.getVolumeSizeById(dist)
  237. for _, src := range srcs {
  238. srcSize := c.getVolumeSizeById(src)
  239. newSize := currentSize + srcSize
  240. fmt.Printf(
  241. "volume %d (%d MB) merge into volume %d (%d MB => %d MB)\n",
  242. src, srcSize/1024/1024,
  243. dist, currentSize/1024/1024, newSize/1024/1024,
  244. )
  245. currentSize = newSize
  246. }
  247. fmt.Println()
  248. }
  249. }
  250. func moveChunk(chunk *filer_pb.FileChunk, toVolumeId needle.VolumeId, masterClient *wdclient.MasterClient) error {
  251. fromFid := needle.NewFileId(needle.VolumeId(chunk.Fid.VolumeId), chunk.Fid.FileKey, chunk.Fid.Cookie)
  252. toFid := needle.NewFileId(toVolumeId, chunk.Fid.FileKey, chunk.Fid.Cookie)
  253. downloadURLs, err := masterClient.LookupVolumeServerUrl(fromFid.VolumeId.String())
  254. if err != nil {
  255. return err
  256. }
  257. downloadURL := fmt.Sprintf("http://%s/%s?readDeleted=true", downloadURLs[0], fromFid.String())
  258. uploadURLs, err := masterClient.LookupVolumeServerUrl(toVolumeId.String())
  259. if err != nil {
  260. return err
  261. }
  262. uploadURL := fmt.Sprintf("http://%s/%s", uploadURLs[0], toFid.String())
  263. resp, reader, err := readUrl(downloadURL)
  264. if err != nil {
  265. return err
  266. }
  267. defer util.CloseResponse(resp)
  268. defer reader.Close()
  269. var filename string
  270. contentDisposition := resp.Header.Get("Content-Disposition")
  271. if len(contentDisposition) > 0 {
  272. idx := strings.Index(contentDisposition, "filename=")
  273. if idx != -1 {
  274. filename = contentDisposition[idx+len("filename="):]
  275. filename = strings.Trim(filename, "\"")
  276. }
  277. }
  278. contentType := resp.Header.Get("Content-Type")
  279. isCompressed := resp.Header.Get("Content-Encoding") == "gzip"
  280. md5 := resp.Header.Get("Content-MD5")
  281. _, err, _ = operation.Upload(reader, &operation.UploadOption{
  282. UploadUrl: uploadURL,
  283. Filename: filename,
  284. IsInputCompressed: isCompressed,
  285. Cipher: false,
  286. MimeType: contentType,
  287. PairMap: nil,
  288. Md5: md5,
  289. })
  290. if err != nil {
  291. return err
  292. }
  293. chunk.Fid.VolumeId = uint32(toVolumeId)
  294. chunk.FileId = ""
  295. return nil
  296. }
  297. func readUrl(fileUrl string) (*http.Response, io.ReadCloser, error) {
  298. req, err := http.NewRequest("GET", fileUrl, nil)
  299. if err != nil {
  300. return nil, nil, err
  301. }
  302. req.Header.Add("Accept-Encoding", "gzip")
  303. r, err := client.Do(req)
  304. if err != nil {
  305. return nil, nil, err
  306. }
  307. if r.StatusCode >= 400 {
  308. util.CloseResponse(r)
  309. return nil, nil, fmt.Errorf("%s: %s", fileUrl, r.Status)
  310. }
  311. return r, r.Body, nil
  312. }