You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

308 lines
8.7 KiB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
  1. package shell
  2. import (
  3. "context"
  4. "flag"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "math"
  9. "os"
  10. "path/filepath"
  11. "time"
  12. "github.com/chrislusf/seaweedfs/weed/operation"
  13. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  14. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  15. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  16. "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
  17. "github.com/chrislusf/seaweedfs/weed/storage/types"
  18. "github.com/chrislusf/seaweedfs/weed/util"
  19. )
  20. func init() {
  21. Commands = append(Commands, &commandVolumeFsck{})
  22. }
  23. type commandVolumeFsck struct {
  24. env *CommandEnv
  25. }
  26. func (c *commandVolumeFsck) Name() string {
  27. return "volume.fsck"
  28. }
  29. func (c *commandVolumeFsck) Help() string {
  30. return `check all volumes to find entries not used by the filer
  31. Important assumption!!!
  32. the system is all used by one filer.
  33. This command works this way:
  34. 1. collect all file ids from all volumes, as set A
  35. 2. collect all file ids from the filer, as set B
  36. 3. find out the set A subtract B
  37. `
  38. }
  39. func (c *commandVolumeFsck) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  40. fsckCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  41. verbose := fsckCommand.Bool("v", false, "verbose mode")
  42. applyPurging := fsckCommand.Bool("reallyDeleteFromVolume", false, "<expert only> delete data not referenced by the filer")
  43. if err = fsckCommand.Parse(args); err != nil {
  44. return nil
  45. }
  46. c.env = commandEnv
  47. // create a temp folder
  48. tempFolder, err := ioutil.TempDir("", "sw_fsck")
  49. if err != nil {
  50. return fmt.Errorf("failed to create temp folder: %v", err)
  51. }
  52. if *verbose {
  53. fmt.Fprintf(writer, "working directory: %s\n", tempFolder)
  54. }
  55. defer os.RemoveAll(tempFolder)
  56. // collect all volume id locations
  57. volumeIdToServer, err := c.collectVolumeIds(*verbose, writer)
  58. if err != nil {
  59. return fmt.Errorf("failed to collect all volume locations: %v", err)
  60. }
  61. // collect each volume file ids
  62. for volumeId, vinfo := range volumeIdToServer {
  63. err = c.collectOneVolumeFileIds(tempFolder, volumeId, vinfo, *verbose, writer)
  64. if err != nil {
  65. return fmt.Errorf("failed to collect file ids from volume %d on %s: %v", volumeId, vinfo.server, err)
  66. }
  67. }
  68. // collect all filer file ids
  69. if err = c.collectFilerFileIds(tempFolder, volumeIdToServer, *verbose, writer); err != nil {
  70. return fmt.Errorf("failed to collect file ids from filer: %v", err)
  71. }
  72. // volume file ids substract filer file ids
  73. var totalInUseCount, totalOrphanChunkCount, totalOrphanDataSize uint64
  74. for volumeId, vinfo := range volumeIdToServer {
  75. inUseCount, orphanChunkCount, orphanDataSize, checkErr := c.oneVolumeFileIdsSubtractFilerFileIds(tempFolder, volumeId, writer, *verbose)
  76. if checkErr != nil {
  77. return fmt.Errorf("failed to collect file ids from volume %d on %s: %v", volumeId, vinfo.server, checkErr)
  78. }
  79. totalInUseCount += inUseCount
  80. totalOrphanChunkCount += orphanChunkCount
  81. totalOrphanDataSize += orphanDataSize
  82. }
  83. if totalOrphanChunkCount == 0 {
  84. fmt.Fprintf(writer, "no orphan data\n")
  85. }
  86. pct := float64(totalOrphanChunkCount*100) / (float64(totalOrphanChunkCount + totalInUseCount))
  87. fmt.Fprintf(writer, "\nTotal\t\tentries:%d\torphan:%d\t%.2f%%\t%dB\n",
  88. totalOrphanChunkCount+totalInUseCount, totalOrphanChunkCount, pct, totalOrphanDataSize)
  89. fmt.Fprintf(writer, "This could be normal if multiple filers or no filers are used.\n")
  90. if *applyPurging {
  91. fmt.Fprintf(writer, "\nstarting to destroy your data ...\n")
  92. time.Sleep(30 * time.Second)
  93. fmt.Fprintf(writer, "just kidding. Not implemented yet.\n")
  94. }
  95. return nil
  96. }
  97. func (c *commandVolumeFsck) collectOneVolumeFileIds(tempFolder string, volumeId uint32, vinfo VInfo, verbose bool, writer io.Writer) error {
  98. if verbose {
  99. fmt.Fprintf(writer, "collecting volume %d file ids from %s ...\n", volumeId, vinfo.server)
  100. }
  101. return operation.WithVolumeServerClient(vinfo.server, c.env.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  102. copyFileClient, err := volumeServerClient.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
  103. VolumeId: volumeId,
  104. Ext: ".idx",
  105. CompactionRevision: math.MaxUint32,
  106. StopOffset: math.MaxInt64,
  107. Collection: vinfo.collection,
  108. IsEcVolume: vinfo.isEcVolume,
  109. IgnoreSourceFileNotFound: false,
  110. })
  111. if err != nil {
  112. return fmt.Errorf("failed to start copying volume %d.idx: %v", volumeId, err)
  113. }
  114. err = writeToFile(copyFileClient, getVolumeFileIdFile(tempFolder, volumeId))
  115. if err != nil {
  116. return fmt.Errorf("failed to copy %d.idx from %s: %v", volumeId, vinfo.server, err)
  117. }
  118. return nil
  119. })
  120. }
  121. func (c *commandVolumeFsck) collectFilerFileIds(tempFolder string, volumeIdToServer map[uint32]VInfo, verbose bool, writer io.Writer) error {
  122. if verbose {
  123. fmt.Fprintf(writer, "collecting file ids from filer ...\n")
  124. }
  125. files := make(map[uint32]*os.File)
  126. for vid := range volumeIdToServer {
  127. dst, openErr := os.OpenFile(getFilerFileIdFile(tempFolder, vid), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
  128. if openErr != nil {
  129. return fmt.Errorf("failed to create file %s: %v", getFilerFileIdFile(tempFolder, vid), openErr)
  130. }
  131. files[vid] = dst
  132. }
  133. defer func() {
  134. for _, f := range files {
  135. f.Close()
  136. }
  137. }()
  138. type Item struct {
  139. vid uint32
  140. fileKey uint64
  141. }
  142. return doTraverseBfsAndSaving(c.env, nil, "/", false, func(outputChan chan interface{}) {
  143. buffer := make([]byte, 8)
  144. for item := range outputChan {
  145. i := item.(*Item)
  146. util.Uint64toBytes(buffer, i.fileKey)
  147. files[i.vid].Write(buffer)
  148. }
  149. }, func(entry *filer_pb.FullEntry, outputChan chan interface{}) (err error) {
  150. for _, chunk := range entry.Entry.Chunks {
  151. outputChan <- &Item{
  152. vid: chunk.Fid.VolumeId,
  153. fileKey: chunk.Fid.FileKey,
  154. }
  155. }
  156. return nil
  157. })
  158. }
  159. func (c *commandVolumeFsck) oneVolumeFileIdsSubtractFilerFileIds(tempFolder string, volumeId uint32, writer io.Writer, verbose bool) (inUseCount, orphanChunkCount, orphanDataSize uint64, err error) {
  160. db := needle_map.NewMemDb()
  161. defer db.Close()
  162. if err = db.LoadFromIdx(getVolumeFileIdFile(tempFolder, volumeId)); err != nil {
  163. return
  164. }
  165. filerFileIdsData, err := ioutil.ReadFile(getFilerFileIdFile(tempFolder, volumeId))
  166. if err != nil {
  167. return
  168. }
  169. dataLen := len(filerFileIdsData)
  170. if dataLen%8 != 0 {
  171. return 0, 0, 0, fmt.Errorf("filer data is corrupted")
  172. }
  173. for i := 0; i < len(filerFileIdsData); i += 8 {
  174. fileKey := util.BytesToUint64(filerFileIdsData[i : i+8])
  175. db.Delete(types.NeedleId(fileKey))
  176. inUseCount++
  177. }
  178. db.AscendingVisit(func(n needle_map.NeedleValue) error {
  179. // fmt.Printf("%d,%x\n", volumeId, n.Key)
  180. orphanChunkCount++
  181. orphanDataSize += uint64(n.Size)
  182. return nil
  183. })
  184. if orphanChunkCount > 0 {
  185. pct := float64(orphanChunkCount*100) / (float64(orphanChunkCount + inUseCount))
  186. fmt.Fprintf(writer, "volume:%d\tentries:%d\torphan:%d\t%.2f%%\t%dB\n",
  187. volumeId, orphanChunkCount+inUseCount, orphanChunkCount, pct, orphanDataSize)
  188. }
  189. return
  190. }
  191. type VInfo struct {
  192. server string
  193. collection string
  194. isEcVolume bool
  195. }
  196. func (c *commandVolumeFsck) collectVolumeIds(verbose bool, writer io.Writer) (volumeIdToServer map[uint32]VInfo, err error) {
  197. if verbose {
  198. fmt.Fprintf(writer, "collecting volume id and locations from master ...\n")
  199. }
  200. volumeIdToServer = make(map[uint32]VInfo)
  201. var resp *master_pb.VolumeListResponse
  202. err = c.env.MasterClient.WithClient(func(client master_pb.SeaweedClient) error {
  203. resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
  204. return err
  205. })
  206. if err != nil {
  207. return
  208. }
  209. eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, t *master_pb.DataNodeInfo) {
  210. for _, vi := range t.VolumeInfos {
  211. volumeIdToServer[vi.Id] = VInfo{
  212. server: t.Id,
  213. collection: vi.Collection,
  214. isEcVolume: false,
  215. }
  216. }
  217. for _, ecShardInfo := range t.EcShardInfos {
  218. volumeIdToServer[ecShardInfo.Id] = VInfo{
  219. server: t.Id,
  220. collection: ecShardInfo.Collection,
  221. isEcVolume: true,
  222. }
  223. }
  224. })
  225. if verbose {
  226. fmt.Fprintf(writer, "collected %d volumes and locations.\n", len(volumeIdToServer))
  227. }
  228. return
  229. }
  230. func getVolumeFileIdFile(tempFolder string, vid uint32) string {
  231. return filepath.Join(tempFolder, fmt.Sprintf("%d.idx", vid))
  232. }
  233. func getFilerFileIdFile(tempFolder string, vid uint32) string {
  234. return filepath.Join(tempFolder, fmt.Sprintf("%d.fid", vid))
  235. }
  236. func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName string) error {
  237. flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
  238. dst, err := os.OpenFile(fileName, flags, 0644)
  239. if err != nil {
  240. return nil
  241. }
  242. defer dst.Close()
  243. for {
  244. resp, receiveErr := client.Recv()
  245. if receiveErr == io.EOF {
  246. break
  247. }
  248. if receiveErr != nil {
  249. return fmt.Errorf("receiving %s: %v", fileName, receiveErr)
  250. }
  251. dst.Write(resp.FileContent)
  252. }
  253. return nil
  254. }