You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

695 lines
22 KiB

4 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
  1. package shell
  2. import (
  3. "bufio"
  4. "context"
  5. "flag"
  6. "fmt"
  7. "github.com/chrislusf/seaweedfs/weed/filer"
  8. "github.com/chrislusf/seaweedfs/weed/operation"
  9. "github.com/chrislusf/seaweedfs/weed/pb"
  10. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  11. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  12. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  13. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  14. "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
  15. "github.com/chrislusf/seaweedfs/weed/storage/types"
  16. "github.com/chrislusf/seaweedfs/weed/util"
  17. "io"
  18. "io/ioutil"
  19. "math"
  20. "net/http"
  21. "net/url"
  22. "os"
  23. "path"
  24. "path/filepath"
  25. "strings"
  26. "sync"
  27. "time"
  28. )
  29. func init() {
  30. Commands = append(Commands, &commandVolumeFsck{})
  31. }
  32. type commandVolumeFsck struct {
  33. env *CommandEnv
  34. forcePurging bool
  35. }
  36. func (c *commandVolumeFsck) Name() string {
  37. return "volume.fsck"
  38. }
  39. func (c *commandVolumeFsck) Help() string {
  40. return `check all volumes to find entries not used by the filer
  41. Important assumption!!!
  42. the system is all used by one filer.
  43. This command works this way:
  44. 1. collect all file ids from all volumes, as set A
  45. 2. collect all file ids from the filer, as set B
  46. 3. find out the set A subtract B
  47. If -findMissingChunksInFiler is enabled, this works
  48. in a reverse way:
  49. 1. collect all file ids from all volumes, as set A
  50. 2. collect all file ids from the filer, as set B
  51. 3. find out the set B subtract A
  52. `
  53. }
  54. func (c *commandVolumeFsck) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  55. fsckCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  56. verbose := fsckCommand.Bool("v", false, "verbose mode")
  57. findMissingChunksInFiler := fsckCommand.Bool("findMissingChunksInFiler", false, "see \"help volume.fsck\"")
  58. findMissingChunksInFilerPath := fsckCommand.String("findMissingChunksInFilerPath", "/", "used together with findMissingChunksInFiler")
  59. findMissingChunksInVolumeId := fsckCommand.Int("findMissingChunksInVolumeId", 0, "used together with findMissingChunksInFiler")
  60. applyPurging := fsckCommand.Bool("reallyDeleteFromVolume", false, "<expert only!> after detection, delete missing data from volumes / delete missing file entries from filer")
  61. c.forcePurging = *fsckCommand.Bool("forcePurging", false, "delete missing data from volumes in one replica")
  62. purgeAbsent := fsckCommand.Bool("reallyDeleteFilerEntries", false, "<expert only!> delete missing file entries from filer if the corresponding volume is missing for any reason, please ensure all still existing/expected volumes are connected! used together with findMissingChunksInFiler")
  63. tempPath := fsckCommand.String("tempPath", path.Join(os.TempDir()), "path for temporary idx files")
  64. if err = fsckCommand.Parse(args); err != nil {
  65. return nil
  66. }
  67. if err = commandEnv.confirmIsLocked(args); err != nil {
  68. return
  69. }
  70. c.env = commandEnv
  71. // create a temp folder
  72. tempFolder, err := os.MkdirTemp(*tempPath, "sw_fsck")
  73. if err != nil {
  74. return fmt.Errorf("failed to create temp folder: %v", err)
  75. }
  76. if *verbose {
  77. fmt.Fprintf(writer, "working directory: %s\n", tempFolder)
  78. }
  79. defer os.RemoveAll(tempFolder)
  80. // collect all volume id locations
  81. dataNodeVolumeIdToVInfo, err := c.collectVolumeIds(commandEnv, *verbose, writer)
  82. if err != nil {
  83. return fmt.Errorf("failed to collect all volume locations: %v", err)
  84. }
  85. isBucketsPath := false
  86. var fillerBucketsPath string
  87. if *findMissingChunksInFiler && *findMissingChunksInFilerPath != "/" {
  88. fillerBucketsPath, err = readFilerBucketsPath(commandEnv)
  89. if err != nil {
  90. return fmt.Errorf("read filer buckets path: %v", err)
  91. }
  92. if strings.HasPrefix(*findMissingChunksInFilerPath, fillerBucketsPath) {
  93. isBucketsPath = true
  94. }
  95. }
  96. if err != nil {
  97. return fmt.Errorf("read filer buckets path: %v", err)
  98. }
  99. collectMtime := time.Now().Unix()
  100. // collect each volume file ids
  101. for dataNodeId, volumeIdToVInfo := range dataNodeVolumeIdToVInfo {
  102. for volumeId, vinfo := range volumeIdToVInfo {
  103. if *findMissingChunksInVolumeId > 0 && uint32(*findMissingChunksInVolumeId) != volumeId {
  104. delete(volumeIdToVInfo, volumeId)
  105. continue
  106. }
  107. if isBucketsPath && !strings.HasPrefix(*findMissingChunksInFilerPath, fillerBucketsPath+"/"+vinfo.collection) {
  108. delete(volumeIdToVInfo, volumeId)
  109. continue
  110. }
  111. err = c.collectOneVolumeFileIds(tempFolder, dataNodeId, volumeId, vinfo, *verbose, writer)
  112. if err != nil {
  113. return fmt.Errorf("failed to collect file ids from volume %d on %s: %v", volumeId, vinfo.server, err)
  114. }
  115. }
  116. }
  117. if *findMissingChunksInFiler {
  118. // collect all filer file ids and paths
  119. if err = c.collectFilerFileIdAndPaths(dataNodeVolumeIdToVInfo, tempFolder, writer, *findMissingChunksInFilerPath, *verbose, *purgeAbsent, collectMtime); err != nil {
  120. return fmt.Errorf("collectFilerFileIdAndPaths: %v", err)
  121. }
  122. for dataNodeId, volumeIdToVInfo := range dataNodeVolumeIdToVInfo {
  123. // for each volume, check filer file ids
  124. if err = c.findFilerChunksMissingInVolumeServers(volumeIdToVInfo, tempFolder, dataNodeId, writer, *verbose, *applyPurging); err != nil {
  125. return fmt.Errorf("findFilerChunksMissingInVolumeServers: %v", err)
  126. }
  127. }
  128. } else {
  129. // collect all filer file ids
  130. if err = c.collectFilerFileIds(dataNodeVolumeIdToVInfo, tempFolder, writer, *verbose); err != nil {
  131. return fmt.Errorf("failed to collect file ids from filer: %v", err)
  132. }
  133. // volume file ids subtract filer file ids
  134. if err = c.findExtraChunksInVolumeServers(dataNodeVolumeIdToVInfo, tempFolder, writer, *verbose, *applyPurging); err != nil {
  135. return fmt.Errorf("findExtraChunksInVolumeServers: %v", err)
  136. }
  137. }
  138. return nil
  139. }
  140. func (c *commandVolumeFsck) collectFilerFileIdAndPaths(dataNodeVolumeIdToVInfo map[string]map[uint32]VInfo, tempFolder string, writer io.Writer, filerPath string, verbose bool, purgeAbsent bool, collectMtime int64) error {
  141. if verbose {
  142. fmt.Fprintf(writer, "checking each file from filer ...\n")
  143. }
  144. files := make(map[uint32]*os.File)
  145. for _, volumeIdToServer := range dataNodeVolumeIdToVInfo {
  146. for vid := range volumeIdToServer {
  147. if _, ok := files[vid]; ok {
  148. continue
  149. }
  150. dst, openErr := os.OpenFile(getFilerFileIdFile(tempFolder, vid), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
  151. if openErr != nil {
  152. return fmt.Errorf("failed to create file %s: %v", getFilerFileIdFile(tempFolder, vid), openErr)
  153. }
  154. files[vid] = dst
  155. }
  156. }
  157. defer func() {
  158. for _, f := range files {
  159. f.Close()
  160. }
  161. }()
  162. type Item struct {
  163. vid uint32
  164. fileKey uint64
  165. cookie uint32
  166. path util.FullPath
  167. }
  168. return doTraverseBfsAndSaving(c.env, nil, filerPath, false, func(entry *filer_pb.FullEntry, outputChan chan interface{}) (err error) {
  169. if verbose && entry.Entry.IsDirectory {
  170. fmt.Fprintf(writer, "checking directory %s\n", util.NewFullPath(entry.Dir, entry.Entry.Name))
  171. }
  172. dataChunks, manifestChunks, resolveErr := filer.ResolveChunkManifest(filer.LookupFn(c.env), entry.Entry.Chunks, 0, math.MaxInt64)
  173. if resolveErr != nil {
  174. return nil
  175. }
  176. dataChunks = append(dataChunks, manifestChunks...)
  177. for _, chunk := range dataChunks {
  178. if chunk.Mtime > collectMtime {
  179. continue
  180. }
  181. outputChan <- &Item{
  182. vid: chunk.Fid.VolumeId,
  183. fileKey: chunk.Fid.FileKey,
  184. cookie: chunk.Fid.Cookie,
  185. path: util.NewFullPath(entry.Dir, entry.Entry.Name),
  186. }
  187. }
  188. return nil
  189. }, func(outputChan chan interface{}) {
  190. buffer := make([]byte, 16)
  191. for item := range outputChan {
  192. i := item.(*Item)
  193. if f, ok := files[i.vid]; ok {
  194. util.Uint64toBytes(buffer, i.fileKey)
  195. util.Uint32toBytes(buffer[8:], i.cookie)
  196. util.Uint32toBytes(buffer[12:], uint32(len(i.path)))
  197. f.Write(buffer)
  198. f.Write([]byte(i.path))
  199. // fmt.Fprintf(writer, "%d,%x%08x %d %s\n", i.vid, i.fileKey, i.cookie, len(i.path), i.path)
  200. } else {
  201. fmt.Fprintf(writer, "%d,%x%08x %s volume not found\n", i.vid, i.fileKey, i.cookie, i.path)
  202. if purgeAbsent {
  203. fmt.Printf("deleting path %s after volume not found", i.path)
  204. c.httpDelete(i.path, verbose)
  205. }
  206. }
  207. }
  208. })
  209. }
  210. func (c *commandVolumeFsck) findFilerChunksMissingInVolumeServers(volumeIdToVInfo map[uint32]VInfo, tempFolder string, dataNodeId string, writer io.Writer, verbose bool, applyPurging bool) error {
  211. for volumeId, vinfo := range volumeIdToVInfo {
  212. checkErr := c.oneVolumeFileIdsCheckOneVolume(tempFolder, dataNodeId, volumeId, writer, verbose, applyPurging)
  213. if checkErr != nil {
  214. return fmt.Errorf("failed to collect file ids from volume %d on %s: %v", volumeId, vinfo.server, checkErr)
  215. }
  216. }
  217. return nil
  218. }
  219. func (c *commandVolumeFsck) findExtraChunksInVolumeServers(dataNodeVolumeIdToVInfo map[string]map[uint32]VInfo, tempFolder string, writer io.Writer, verbose bool, applyPurging bool) error {
  220. var totalInUseCount, totalOrphanChunkCount, totalOrphanDataSize uint64
  221. volumeIdOrphanFileIds := make(map[uint32]map[string]bool)
  222. isSeveralReplicas := make(map[uint32]bool)
  223. isEcVolumeReplicas := make(map[uint32]bool)
  224. isReadOnlyReplicas := make(map[uint32]bool)
  225. serverReplicas := make(map[uint32][]pb.ServerAddress)
  226. for dataNodeId, volumeIdToVInfo := range dataNodeVolumeIdToVInfo {
  227. for volumeId, vinfo := range volumeIdToVInfo {
  228. inUseCount, orphanFileIds, orphanDataSize, checkErr := c.oneVolumeFileIdsSubtractFilerFileIds(tempFolder, dataNodeId, volumeId, writer, verbose)
  229. if checkErr != nil {
  230. return fmt.Errorf("failed to collect file ids from volume %d on %s: %v", volumeId, vinfo.server, checkErr)
  231. }
  232. isSeveralReplicas[volumeId] = false
  233. if _, found := volumeIdOrphanFileIds[volumeId]; !found {
  234. volumeIdOrphanFileIds[volumeId] = make(map[string]bool)
  235. } else {
  236. isSeveralReplicas[volumeId] = true
  237. }
  238. for _, fid := range orphanFileIds {
  239. if isSeveralReplicas[volumeId] {
  240. if _, found := volumeIdOrphanFileIds[volumeId][fid]; !found {
  241. continue
  242. }
  243. }
  244. volumeIdOrphanFileIds[volumeId][fid] = isSeveralReplicas[volumeId]
  245. }
  246. totalInUseCount += inUseCount
  247. totalOrphanChunkCount += uint64(len(orphanFileIds))
  248. totalOrphanDataSize += orphanDataSize
  249. if verbose {
  250. for _, fid := range orphanFileIds {
  251. fmt.Fprintf(writer, "%s\n", fid)
  252. }
  253. }
  254. isEcVolumeReplicas[volumeId] = vinfo.isEcVolume
  255. if isReadOnly, found := isReadOnlyReplicas[volumeId]; !(found && isReadOnly) {
  256. isReadOnlyReplicas[volumeId] = vinfo.isReadOnly
  257. }
  258. serverReplicas[volumeId] = append(serverReplicas[volumeId], vinfo.server)
  259. }
  260. for volumeId, orphanReplicaFileIds := range volumeIdOrphanFileIds {
  261. if !(applyPurging && len(orphanReplicaFileIds) > 0) {
  262. continue
  263. }
  264. orphanFileIds := []string{}
  265. for fid, foundInAllReplicas := range orphanReplicaFileIds {
  266. if !isSeveralReplicas[volumeId] || c.forcePurging || (isSeveralReplicas[volumeId] && foundInAllReplicas) {
  267. orphanFileIds = append(orphanFileIds, fid)
  268. }
  269. }
  270. if !(len(orphanFileIds) > 0) {
  271. continue
  272. }
  273. if verbose {
  274. fmt.Fprintf(writer, "purging process for volume %d.\n", volumeId)
  275. }
  276. if isEcVolumeReplicas[volumeId] {
  277. fmt.Fprintf(writer, "skip purging for Erasure Coded volume %d.\n", volumeId)
  278. continue
  279. }
  280. for _, server := range serverReplicas[volumeId] {
  281. needleVID := needle.VolumeId(volumeId)
  282. if isReadOnlyReplicas[volumeId] {
  283. err := markVolumeWritable(c.env.option.GrpcDialOption, needleVID, server, true)
  284. if err != nil {
  285. return fmt.Errorf("mark volume %d read/write: %v", volumeId, err)
  286. }
  287. fmt.Fprintf(writer, "temporarily marked %d on server %v writable for forced purge\n", volumeId, server)
  288. defer markVolumeWritable(c.env.option.GrpcDialOption, needleVID, server, false)
  289. fmt.Fprintf(writer, "marked %d on server %v writable for forced purge\n", volumeId, server)
  290. }
  291. if verbose {
  292. fmt.Fprintf(writer, "purging files from volume %d\n", volumeId)
  293. }
  294. if err := c.purgeFileIdsForOneVolume(volumeId, orphanFileIds, writer); err != nil {
  295. return fmt.Errorf("purging volume %d: %v", volumeId, err)
  296. }
  297. }
  298. }
  299. }
  300. if !applyPurging {
  301. pct := float64(totalOrphanChunkCount*100) / (float64(totalOrphanChunkCount + totalInUseCount))
  302. fmt.Fprintf(writer, "\nTotal\t\tentries:%d\torphan:%d\t%.2f%%\t%dB\n",
  303. totalOrphanChunkCount+totalInUseCount, totalOrphanChunkCount, pct, totalOrphanDataSize)
  304. fmt.Fprintf(writer, "This could be normal if multiple filers or no filers are used.\n")
  305. }
  306. if totalOrphanChunkCount == 0 {
  307. fmt.Fprintf(writer, "no orphan data\n")
  308. //return nil
  309. }
  310. return nil
  311. }
  312. func (c *commandVolumeFsck) collectOneVolumeFileIds(tempFolder string, dataNodeId string, volumeId uint32, vinfo VInfo, verbose bool, writer io.Writer) error {
  313. if verbose {
  314. fmt.Fprintf(writer, "collecting volume %d file ids from %s ...\n", volumeId, vinfo.server)
  315. }
  316. return operation.WithVolumeServerClient(false, vinfo.server, c.env.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  317. ext := ".idx"
  318. if vinfo.isEcVolume {
  319. ext = ".ecx"
  320. }
  321. copyFileClient, err := volumeServerClient.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
  322. VolumeId: volumeId,
  323. Ext: ext,
  324. CompactionRevision: math.MaxUint32,
  325. StopOffset: math.MaxInt64,
  326. Collection: vinfo.collection,
  327. IsEcVolume: vinfo.isEcVolume,
  328. IgnoreSourceFileNotFound: false,
  329. })
  330. if err != nil {
  331. return fmt.Errorf("failed to start copying volume %d%s: %v", volumeId, ext, err)
  332. }
  333. err = writeToFile(copyFileClient, getVolumeFileIdFile(tempFolder, dataNodeId, volumeId))
  334. if err != nil {
  335. return fmt.Errorf("failed to copy %d%s from %s: %v", volumeId, ext, vinfo.server, err)
  336. }
  337. return nil
  338. })
  339. }
  340. func (c *commandVolumeFsck) collectFilerFileIds(dataNodeVolumeIdToVInfo map[string]map[uint32]VInfo, tempFolder string, writer io.Writer, verbose bool) error {
  341. if verbose {
  342. fmt.Fprintf(writer, "collecting file ids from filer ...\n")
  343. }
  344. files := make(map[uint32]*os.File)
  345. for _, volumeIdToServer := range dataNodeVolumeIdToVInfo {
  346. for vid := range volumeIdToServer {
  347. dst, openErr := os.OpenFile(getFilerFileIdFile(tempFolder, vid), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
  348. if openErr != nil {
  349. return fmt.Errorf("failed to create file %s: %v", getFilerFileIdFile(tempFolder, vid), openErr)
  350. }
  351. files[vid] = dst
  352. }
  353. }
  354. defer func() {
  355. for _, f := range files {
  356. f.Close()
  357. }
  358. }()
  359. type Item struct {
  360. vid uint32
  361. fileKey uint64
  362. }
  363. return doTraverseBfsAndSaving(c.env, nil, "/", false, func(entry *filer_pb.FullEntry, outputChan chan interface{}) (err error) {
  364. dataChunks, manifestChunks, resolveErr := filer.ResolveChunkManifest(filer.LookupFn(c.env), entry.Entry.Chunks, 0, math.MaxInt64)
  365. if resolveErr != nil {
  366. if verbose {
  367. fmt.Fprintf(writer, "resolving manifest chunks in %s: %v\n", util.NewFullPath(entry.Dir, entry.Entry.Name), resolveErr)
  368. }
  369. return nil
  370. }
  371. dataChunks = append(dataChunks, manifestChunks...)
  372. for _, chunk := range dataChunks {
  373. outputChan <- &Item{
  374. vid: chunk.Fid.VolumeId,
  375. fileKey: chunk.Fid.FileKey,
  376. }
  377. }
  378. return nil
  379. }, func(outputChan chan interface{}) {
  380. buffer := make([]byte, 8)
  381. for item := range outputChan {
  382. i := item.(*Item)
  383. util.Uint64toBytes(buffer, i.fileKey)
  384. files[i.vid].Write(buffer)
  385. }
  386. })
  387. }
  388. func (c *commandVolumeFsck) oneVolumeFileIdsCheckOneVolume(tempFolder string, dataNodeId string, volumeId uint32, writer io.Writer, verbose bool, applyPurging bool) (err error) {
  389. if verbose {
  390. fmt.Fprintf(writer, "find missing file chunks in dataNodeId %s volume %d ...\n", dataNodeId, volumeId)
  391. }
  392. db := needle_map.NewMemDb()
  393. defer db.Close()
  394. if err = db.LoadFromIdx(getVolumeFileIdFile(tempFolder, dataNodeId, volumeId)); err != nil {
  395. return
  396. }
  397. file := getFilerFileIdFile(tempFolder, volumeId)
  398. fp, err := os.Open(file)
  399. if err != nil {
  400. return
  401. }
  402. defer fp.Close()
  403. type Item struct {
  404. fileKey uint64
  405. cookie uint32
  406. path util.FullPath
  407. }
  408. br := bufio.NewReader(fp)
  409. buffer := make([]byte, 16)
  410. item := &Item{}
  411. var readSize int
  412. for {
  413. readSize, err = io.ReadFull(br, buffer)
  414. if err != nil || readSize != 16 {
  415. break
  416. }
  417. item.fileKey = util.BytesToUint64(buffer[:8])
  418. item.cookie = util.BytesToUint32(buffer[8:12])
  419. pathSize := util.BytesToUint32(buffer[12:16])
  420. pathBytes := make([]byte, int(pathSize))
  421. n, err := io.ReadFull(br, pathBytes)
  422. if err != nil {
  423. fmt.Fprintf(writer, "%d,%x%08x in unexpected error: %v\n", volumeId, item.fileKey, item.cookie, err)
  424. }
  425. if n != int(pathSize) {
  426. fmt.Fprintf(writer, "%d,%x%08x %d unexpected file name size %d\n", volumeId, item.fileKey, item.cookie, pathSize, n)
  427. }
  428. item.path = util.FullPath(string(pathBytes))
  429. needleId := types.NeedleId(item.fileKey)
  430. if _, found := db.Get(needleId); !found {
  431. fmt.Fprintf(writer, "%s\n", item.path)
  432. if applyPurging {
  433. // defining the URL this way automatically escapes complex path names
  434. c.httpDelete(item.path, verbose)
  435. }
  436. }
  437. }
  438. return nil
  439. }
  440. func (c *commandVolumeFsck) httpDelete(path util.FullPath, verbose bool) {
  441. req, err := http.NewRequest(http.MethodDelete, "", nil)
  442. req.URL = &url.URL{
  443. Scheme: "http",
  444. Host: c.env.option.FilerAddress.ToHttpAddress(),
  445. Path: string(path),
  446. }
  447. if verbose {
  448. fmt.Printf("full HTTP delete request to be sent: %v\n", req)
  449. }
  450. if err != nil {
  451. fmt.Errorf("HTTP delete request error: %v\n", err)
  452. }
  453. client := &http.Client{}
  454. resp, err := client.Do(req)
  455. if err != nil {
  456. fmt.Errorf("DELETE fetch error: %v\n", err)
  457. }
  458. defer resp.Body.Close()
  459. _, err = ioutil.ReadAll(resp.Body)
  460. if err != nil {
  461. fmt.Errorf("DELETE response error: %v\n", err)
  462. }
  463. if verbose {
  464. fmt.Println("delete response Status : ", resp.Status)
  465. fmt.Println("delete response Headers : ", resp.Header)
  466. }
  467. }
  468. func (c *commandVolumeFsck) oneVolumeFileIdsSubtractFilerFileIds(tempFolder string, dataNodeId string, volumeId uint32, writer io.Writer, verbose bool) (inUseCount uint64, orphanFileIds []string, orphanDataSize uint64, err error) {
  469. db := needle_map.NewMemDb()
  470. defer db.Close()
  471. if err = db.LoadFromIdx(getVolumeFileIdFile(tempFolder, dataNodeId, volumeId)); err != nil {
  472. return
  473. }
  474. filerFileIdsData, err := os.ReadFile(getFilerFileIdFile(tempFolder, volumeId))
  475. if err != nil {
  476. return
  477. }
  478. dataLen := len(filerFileIdsData)
  479. if dataLen%8 != 0 {
  480. return 0, nil, 0, fmt.Errorf("filer data is corrupted")
  481. }
  482. for i := 0; i < len(filerFileIdsData); i += 8 {
  483. fileKey := util.BytesToUint64(filerFileIdsData[i : i+8])
  484. db.Delete(types.NeedleId(fileKey))
  485. inUseCount++
  486. }
  487. var orphanFileCount uint64
  488. db.AscendingVisit(func(n needle_map.NeedleValue) error {
  489. // fmt.Printf("%d,%x\n", volumeId, n.Key)
  490. orphanFileIds = append(orphanFileIds, fmt.Sprintf("%d,%s00000000", volumeId, n.Key.String()))
  491. orphanFileCount++
  492. orphanDataSize += uint64(n.Size)
  493. return nil
  494. })
  495. if orphanFileCount > 0 {
  496. pct := float64(orphanFileCount*100) / (float64(orphanFileCount + inUseCount))
  497. fmt.Fprintf(writer, "dataNode:%s\tvolume:%d\tentries:%d\torphan:%d\t%.2f%%\t%dB\n",
  498. dataNodeId, volumeId, orphanFileCount+inUseCount, orphanFileCount, pct, orphanDataSize)
  499. }
  500. return
  501. }
  502. type VInfo struct {
  503. server pb.ServerAddress
  504. collection string
  505. isEcVolume bool
  506. isReadOnly bool
  507. }
  508. func (c *commandVolumeFsck) collectVolumeIds(commandEnv *CommandEnv, verbose bool, writer io.Writer) (volumeIdToServer map[string]map[uint32]VInfo, err error) {
  509. if verbose {
  510. fmt.Fprintf(writer, "collecting volume id and locations from master ...\n")
  511. }
  512. volumeIdToServer = make(map[string]map[uint32]VInfo)
  513. // collect topology information
  514. topologyInfo, _, err := collectTopologyInfo(commandEnv, 0)
  515. if err != nil {
  516. return
  517. }
  518. eachDataNode(topologyInfo, func(dc string, rack RackId, t *master_pb.DataNodeInfo) {
  519. for _, diskInfo := range t.DiskInfos {
  520. dataNodeId := t.GetId()
  521. volumeIdToServer[dataNodeId] = make(map[uint32]VInfo)
  522. for _, vi := range diskInfo.VolumeInfos {
  523. volumeIdToServer[dataNodeId][vi.Id] = VInfo{
  524. server: pb.NewServerAddressFromDataNode(t),
  525. collection: vi.Collection,
  526. isEcVolume: false,
  527. isReadOnly: vi.ReadOnly,
  528. }
  529. }
  530. for _, ecShardInfo := range diskInfo.EcShardInfos {
  531. volumeIdToServer[dataNodeId][ecShardInfo.Id] = VInfo{
  532. server: pb.NewServerAddressFromDataNode(t),
  533. collection: ecShardInfo.Collection,
  534. isEcVolume: true,
  535. isReadOnly: true,
  536. }
  537. }
  538. }
  539. })
  540. if verbose {
  541. fmt.Fprintf(writer, "collected %d volumes and locations.\n", len(volumeIdToServer))
  542. }
  543. return
  544. }
  545. func (c *commandVolumeFsck) purgeFileIdsForOneVolume(volumeId uint32, fileIds []string, writer io.Writer) (err error) {
  546. fmt.Fprintf(writer, "purging orphan data for volume %d...\n", volumeId)
  547. locations, found := c.env.MasterClient.GetLocations(volumeId)
  548. if !found {
  549. return fmt.Errorf("failed to find volume %d locations", volumeId)
  550. }
  551. resultChan := make(chan []*volume_server_pb.DeleteResult, len(locations))
  552. var wg sync.WaitGroup
  553. for _, location := range locations {
  554. wg.Add(1)
  555. go func(server pb.ServerAddress, fidList []string) {
  556. defer wg.Done()
  557. if deleteResults, deleteErr := operation.DeleteFilesAtOneVolumeServer(server, c.env.option.GrpcDialOption, fidList, false); deleteErr != nil {
  558. err = deleteErr
  559. } else if deleteResults != nil {
  560. resultChan <- deleteResults
  561. }
  562. }(location.ServerAddress(), fileIds)
  563. }
  564. wg.Wait()
  565. close(resultChan)
  566. for results := range resultChan {
  567. for _, result := range results {
  568. if result.Error != "" {
  569. fmt.Fprintf(writer, "purge error: %s\n", result.Error)
  570. }
  571. }
  572. }
  573. return
  574. }
  575. func getVolumeFileIdFile(tempFolder string, dataNodeid string, vid uint32) string {
  576. return filepath.Join(tempFolder, fmt.Sprintf("%s_%d.idx", dataNodeid, vid))
  577. }
  578. func getFilerFileIdFile(tempFolder string, vid uint32) string {
  579. return filepath.Join(tempFolder, fmt.Sprintf("%d.fid", vid))
  580. }
  581. func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName string) error {
  582. flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
  583. dst, err := os.OpenFile(fileName, flags, 0644)
  584. if err != nil {
  585. return nil
  586. }
  587. defer dst.Close()
  588. for {
  589. resp, receiveErr := client.Recv()
  590. if receiveErr == io.EOF {
  591. break
  592. }
  593. if receiveErr != nil {
  594. return fmt.Errorf("receiving %s: %v", fileName, receiveErr)
  595. }
  596. dst.Write(resp.FileContent)
  597. }
  598. return nil
  599. }