From f84b70c3625e0c45132da9fb72542b3809413c2f Mon Sep 17 00:00:00 2001 From: Lisandro Pin Date: Thu, 5 Feb 2026 20:27:03 +0100 Subject: [PATCH] Implement index (fast) scrubbing for regular/EC volumes. (#8207) Implement index (fast) scrubbing for regular/EC volumes via `ScrubVolume()`/`ScrubEcVolume()`. Also rearranges existing index test files for reuse across unit tests for different modules. --- weed/server/volume_grpc_scrub.go | 25 ++-- weed/storage/erasure_coding/ec_volume.go | 11 ++ weed/storage/erasure_coding/ec_volume_test.go | 2 +- weed/storage/idx/check.go | 103 +++++++++++++++++ weed/storage/idx/check_test.go | 108 ++++++++++++++++++ .../test_files}/389.ecx | Bin weed/storage/idx/test_files/deleted_files.ecx | Bin 0 -> 1856 bytes weed/storage/idx/test_files/deleted_files.idx | Bin 0 -> 3680 bytes .../idx/test_files/deleted_files_bitrot.ecx | Bin 0 -> 1857 bytes weed/storage/idx/test_files/simple_index.idx | Bin 0 -> 2576 bytes .../idx/test_files/simple_index_bitrot.idx | Bin 0 -> 2577 bytes .../idx/test_files/simple_index_truncated.idx | Bin 0 -> 2540 bytes weed/storage/volume_checking.go | 22 ++++ 13 files changed, 254 insertions(+), 17 deletions(-) create mode 100644 weed/storage/idx/check.go create mode 100644 weed/storage/idx/check_test.go rename weed/storage/{erasure_coding => idx/test_files}/389.ecx (100%) create mode 100644 weed/storage/idx/test_files/deleted_files.ecx create mode 100644 weed/storage/idx/test_files/deleted_files.idx create mode 100644 weed/storage/idx/test_files/deleted_files_bitrot.ecx create mode 100644 weed/storage/idx/test_files/simple_index.idx create mode 100644 weed/storage/idx/test_files/simple_index_bitrot.idx create mode 100644 weed/storage/idx/test_files/simple_index_truncated.idx diff --git a/weed/server/volume_grpc_scrub.go b/weed/server/volume_grpc_scrub.go index 5899241da..9d8d42582 100644 --- a/weed/server/volume_grpc_scrub.go +++ b/weed/server/volume_grpc_scrub.go @@ -31,11 +31,11 @@ func (vs *VolumeServer) ScrubVolume(ctx context.Context, req *volume_server_pb.S return nil, fmt.Errorf("volume id %d not found", vid) } - var files uint64 + var files int64 var serrs []error switch m := req.GetMode(); m { case volume_server_pb.VolumeScrubMode_INDEX: - files, serrs = scrubVolumeIndex(ctx, v) + files, serrs = v.CheckIndex() case volume_server_pb.VolumeScrubMode_FULL: files, serrs = scrubVolumeFull(ctx, v) default: @@ -43,7 +43,7 @@ func (vs *VolumeServer) ScrubVolume(ctx context.Context, req *volume_server_pb.S } totalVolumes += 1 - totalFiles += files + totalFiles += uint64(files) if len(serrs) != 0 { brokenVolumeIds = append(brokenVolumeIds, uint32(vid)) for _, err := range serrs { @@ -61,11 +61,7 @@ func (vs *VolumeServer) ScrubVolume(ctx context.Context, req *volume_server_pb.S return res, nil } -func scrubVolumeIndex(ctx context.Context, v *storage.Volume) (uint64, []error) { - return 0, []error{fmt.Errorf("scrubVolumeIndex(): not implemented")} -} - -func scrubVolumeFull(ctx context.Context, v *storage.Volume) (uint64, []error) { +func scrubVolumeFull(ctx context.Context, v *storage.Volume) (int64, []error) { return 0, []error{fmt.Errorf("scrubVolumeFull(): not implemented")} } @@ -91,12 +87,13 @@ func (vs *VolumeServer) ScrubEcVolume(ctx context.Context, req *volume_server_pb return nil, fmt.Errorf("EC volume id %d not found", vid) } - var files uint64 + var files int64 var shardInfos []*volume_server_pb.EcShardInfo var serrs []error switch m := req.GetMode(); m { case volume_server_pb.VolumeScrubMode_INDEX: - files, shardInfos, serrs = scrubEcVolumeIndex(v) + // index scrubs do not verify individual EC shards + files, serrs = v.CheckIndex() case volume_server_pb.VolumeScrubMode_FULL: files, shardInfos, serrs = scrubEcVolumeFull(ctx, v) default: @@ -104,7 +101,7 @@ func (vs *VolumeServer) ScrubEcVolume(ctx context.Context, req *volume_server_pb } totalVolumes += 1 - totalFiles += files + totalFiles += uint64(files) if len(serrs) != 0 || len(shardInfos) != 0 { brokenVolumeIds = append(brokenVolumeIds, uint32(vid)) brokenShardInfos = append(brokenShardInfos, shardInfos...) @@ -124,10 +121,6 @@ func (vs *VolumeServer) ScrubEcVolume(ctx context.Context, req *volume_server_pb return res, nil } -func scrubEcVolumeIndex(ecv *erasure_coding.EcVolume) (uint64, []*volume_server_pb.EcShardInfo, []error) { - return 0, nil, []error{fmt.Errorf("scrubEcVolumeIndex(): not implemented")} -} - -func scrubEcVolumeFull(ctx context.Context, v *erasure_coding.EcVolume) (uint64, []*volume_server_pb.EcShardInfo, []error) { +func scrubEcVolumeFull(ctx context.Context, ecv *erasure_coding.EcVolume) (int64, []*volume_server_pb.EcShardInfo, []error) { return 0, nil, []error{fmt.Errorf("scrubEcVolumeFull(): not implemented")} } diff --git a/weed/storage/erasure_coding/ec_volume.go b/weed/storage/erasure_coding/ec_volume.go index fb585e5ab..7581fb67f 100644 --- a/weed/storage/erasure_coding/ec_volume.go +++ b/weed/storage/erasure_coding/ec_volume.go @@ -332,3 +332,14 @@ func SearchNeedleFromSortedIndex(ecxFile *os.File, ecxFileSize int64, needleId t func (ev *EcVolume) IsTimeToDestroy() bool { return ev.ExpireAtSec > 0 && time.Now().Unix() > (int64(ev.ExpireAtSec)+destroyDelaySeconds) } + +func (ev *EcVolume) CheckIndex() (int64, []error) { + if ev.ecxFile == nil { + return 0, []error{fmt.Errorf("no ECX file associated with EC volume %v", ev.VolumeId)} + } + if ev.ecxFileSize == 0 { + return 0, []error{fmt.Errorf("zero-size ECX file for EC volume %v", ev.VolumeId)} + } + + return idx.CheckIndexFile(ev.ecxFile, ev.ecxFileSize, ev.Version) +} diff --git a/weed/storage/erasure_coding/ec_volume_test.go b/weed/storage/erasure_coding/ec_volume_test.go index 82df0b8b1..323972c1a 100644 --- a/weed/storage/erasure_coding/ec_volume_test.go +++ b/weed/storage/erasure_coding/ec_volume_test.go @@ -13,7 +13,7 @@ import ( func TestPositioning(t *testing.T) { - ecxFile, err := os.OpenFile("389.ecx", os.O_RDONLY, 0) + ecxFile, err := os.OpenFile("../idx/test_files/389.ecx", os.O_RDONLY, 0) if err != nil { t.Errorf("failed to open ecx file: %v", err) } diff --git a/weed/storage/idx/check.go b/weed/storage/idx/check.go new file mode 100644 index 000000000..c0ff3267d --- /dev/null +++ b/weed/storage/idx/check.go @@ -0,0 +1,103 @@ +package idx + +import ( + "fmt" + "io" + "sort" + + "github.com/seaweedfs/seaweedfs/weed/storage/needle" + "github.com/seaweedfs/seaweedfs/weed/storage/types" +) + +type indexEntry struct { + index int + id types.NeedleId + offset int64 + size types.Size +} + +func (ie *indexEntry) Compare(other *indexEntry) int { + if ie.offset < other.offset { + return -1 + } + if ie.offset > other.offset { + return 1 + } + if ie.size < other.size { + return -1 + } + if ie.size > other.size { + return 1 + } + return 0 +} + +// CheckIndexFile verifies the integrity of a IDX/ECX index file. Returns a count of processed file entries, and slice of found errors. +func CheckIndexFile(r io.ReaderAt, indexFileSize int64, version needle.Version) (int64, []error) { + errs := []error{} + + entries := []*indexEntry{} + var i int + err := WalkIndexFile(r, 0, func(id types.NeedleId, offset types.Offset, size types.Size) error { + entries = append(entries, &indexEntry{ + index: i, + id: id, + offset: offset.ToActualOffset(), + size: size, + }) + i++ + return nil + }) + if err != nil { + errs = append(errs, err) + } + + sort.Slice(entries, func(i, j int) bool { + return entries[i].Compare(entries[j]) < 0 + }) + + for i, e := range entries { + if i == 0 { + // nothing to check for the first entry + continue + } + + start, end := e.offset, e.offset + if size := needle.GetActualSize(e.size, version); size != 0 { + end += size - 1 + } + + last := entries[i-1] + lastStart, lastEnd := last.offset, last.offset + if lastSize := needle.GetActualSize(last.size, version); lastSize != 0 { + lastEnd += lastSize - 1 + } + + // check if needles overlap + if start <= lastEnd { + errs = append(errs, fmt.Errorf( + "needle %d (#%d) at [%d-%d] overlaps needle %d at [%d-%d]", + e.id, e.index+1, + start, end, + last.id, + lastStart, lastEnd)) + } + + // The check below is intended to ensure all index entries are contiguous; unfortunately, Seaweed + // can delete index entries for files while keeping their data, so volumes with deleted files + // will fail this test :( + // See https://github.com/seaweedfs/seaweedfs/issues/8204 for details. + /* + if e.offset != lastEnd + 1 { + errs = append(errs, fmt.Errorf("offset %d for needle %d (#%d) doesn't match end of needle %d at %d", e.offset, e.id, e.index+1, last.id, lastEnd)) + } + */ + } + + count := int64(len(entries)) + if got, want := count*types.NeedleMapEntrySize, indexFileSize; got != want { + errs = append(errs, fmt.Errorf("expected an index file of size %d, got %d", want, got)) + } + + return count, errs +} diff --git a/weed/storage/idx/check_test.go b/weed/storage/idx/check_test.go new file mode 100644 index 000000000..c397d8e56 --- /dev/null +++ b/weed/storage/idx/check_test.go @@ -0,0 +1,108 @@ +package idx + +import ( + "fmt" + "os" + "reflect" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/storage/needle" +) + +func TestCheckIndexFile(t *testing.T) { + testCases := []struct { + name string + indexPath string + version needle.Version + want int64 + wantErrs []error + }{ + { + name: "healthy index", + indexPath: "./test_files/simple_index.idx", + version: needle.Version3, + want: 161, + wantErrs: []error{}, + }, + { + name: "healthy index with deleted files", + indexPath: "./test_files/deleted_files.idx", + version: needle.Version3, + want: 230, + wantErrs: []error{}, + }, + { + name: "damaged index (bitrot)", + indexPath: "./test_files/simple_index_bitrot.idx", + version: needle.Version3, + want: 161, + wantErrs: []error{ + fmt.Errorf("needle 3544668469065756977 (#2) at [6602459528-7427766999] overlaps needle 49 at [6602459528-7427766999]"), + fmt.Errorf("expected an index file of size 2577, got 2576"), + }, + }, + { + name: "damaged index (truncated)", + indexPath: "./test_files/simple_index_truncated.idx", + version: needle.Version3, + want: 158, + wantErrs: []error{ + fmt.Errorf("expected an index file of size 2540, got 2528"), + }, + }, + { + name: "healthy EC index", + indexPath: "./test_files/389.ecx", + version: needle.Version3, + want: 485098, + wantErrs: []error{}, + }, + { + name: "healthy EC index with deleted files", + indexPath: "./test_files/deleted_files.ecx", + version: needle.Version3, + want: 116, + wantErrs: []error{}, + }, + { + name: "damaged EC index (bitrot)", + indexPath: "./test_files/deleted_files_bitrot.ecx", + version: needle.Version3, + want: 116, + wantErrs: []error{ + fmt.Errorf("needle 3223857 (#110) at [6602459528-7427767055] overlaps needle 12593 at [6601933184-7407907279]"), + fmt.Errorf("needle 3544668469065757234 (#43) at [6737203600-7579354079] overlaps needle 3223857 at [6602459528-7427767055]"), + fmt.Errorf("needle 3421236 (#112) at [7006693800-7899362591] overlaps needle 3544668469065757234 at [6737203600-7579354079]"), + fmt.Errorf("needle 310 (#113) at [7276179888-8185702583] overlaps needle 3421236 at [7006693800-7899362591]"), + fmt.Errorf("needle 7089336938131513954 (#52) at [13204919056-13205053935] overlaps needle 27410143614427489 at [13070174984-14703946887]"), + fmt.Errorf("needle 25186 (#50) at [13204919056-14855533967] overlaps needle 7089336938131513954 at [13204919056-13205053935]"), + fmt.Errorf("needle 7089336938131513954 (#51) at [13204919056-14855533967] overlaps needle 25186 at [13204919056-14855533967]"), + fmt.Errorf("expected an index file of size 1857, got 1856"), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + idx, err := os.OpenFile(tc.indexPath, os.O_RDONLY, 0) + if err != nil { + t.Fatalf("failed to open index file: %v", err) + } + defer idx.Close() + + idxStat, err := idx.Stat() + if err != nil { + t.Fatalf("failed to stat index file: %v", err) + } + + got, gotErrs := CheckIndexFile(idx, idxStat.Size(), tc.version) + + if got != tc.want { + t.Errorf("expected %d files processed, got %d", tc.want, got) + } + if !reflect.DeepEqual(gotErrs, tc.wantErrs) { + t.Errorf("expected errors %v, got %v", tc.wantErrs, gotErrs) + } + }) + } +} diff --git a/weed/storage/erasure_coding/389.ecx b/weed/storage/idx/test_files/389.ecx similarity index 100% rename from weed/storage/erasure_coding/389.ecx rename to weed/storage/idx/test_files/389.ecx diff --git a/weed/storage/idx/test_files/deleted_files.ecx b/weed/storage/idx/test_files/deleted_files.ecx new file mode 100644 index 0000000000000000000000000000000000000000..e8b29d7a7e41f7da1b2883faf506e40a5bad6129 GIT binary patch literal 1856 zcmXxkc~Dhl7{~GNx!bwi?MUvFnuHeV6&EN3K_xFLN-mV63`hzoiU6Xtuf1H`m{GRu`^SMzFVqsTn!h8 z(K+WP2K9kBoX#D8p+DoTA3>KTHRwJVD0u{3zTS<`&}YPvbiIAovkO(?D7sI!Vadu8 zN7K`ljuMa~9#7BtplUyfC*oHSjnAn&;#i(OHV*Bn{^CjW+O{5D(RlGxdbLMlI6hq* zNAIE{3}ws3Gw}bT9D8DicqaX`@1xtjL!813@kVSX)5NI^Xi>2@hluClEGfkj^1e8Y zK`k+;kEe;#32GVc8SgJ%$dHUH=*zwPm%)&$Cox4lCwV4Acc!BcTqIu1Fl{U5!H31m z3GR9ba(%=(1ovw|A9YQfOK@>tl)0P4c?2Koff_eU{4OE21z2*u>n|oWGYs{#w_nN# zFZ9PWBv|Up88I~$_1GnG1raetnC`wPt|T(B8;)YVcnc%%w_@%xS6t1ghiN$8d`-NE zF|N-rH*FX1#g)_vbLoEZM?^20kIr#aTu=1=Xl%V>#h()sEs=MjwvtKpw^F){70rbzrj3mf%pP(Ar0v2z4Koo?wlX?n+GMo z#q_{N%vrm{cbE}68Z~LOxP$lrC!`gN9})k_hiHZFCsuSM)J0%YCy0&A9Q`Ri+oW7E zk+ifY>JRcg#mpO-wP*wSO1+)g)9k2Yq+gZJBuDPU@a;0myOL7+6`K2q*pE5xD6~6Q z#RExQn~J(e6FZrg>p&a)t2l_XIfKx*%Kj66Muj#A8`fpM>?q0r5DN1>Q#;={fZOJ<52N z``$onN)%5Z#~O~V_B-)Ja+3Z*uU{-qV8x~1p-TEqNoGY`1*)@7@&&9+h(p_bSG*W^ zXdJo(KXDdrcPnZ^nD{MLWnagTBIiTNBmeLNl(5y3uOk226ztB+;&l|P^~I-hl(>*J zM4sQoewUHISP V+tgL4D0$<=ZPcB9-t&KF{tKZrKY9QF literal 0 HcmV?d00001 diff --git a/weed/storage/idx/test_files/deleted_files.idx b/weed/storage/idx/test_files/deleted_files.idx new file mode 100644 index 0000000000000000000000000000000000000000..28e0122fe575457c42d9c78b41b8b04b75a8a5ee GIT binary patch literal 3680 zcmYk-3s9D2766an3|B7iePdRzQr3Wg1CrC+@!C> z5^XU>3I}nFamobR4I$k$%$6KBDZvW@YiheIp_3+iu=CCHJ~Q7p^PBU$^S|3@c)Kfl!+Wv?= z3F?DIz_72`UQi#p1!G4kdonbSf?(Qno;?-1v`pw*p0K^)^yXt2$6RMmgZ_L4)aAF> zelP?dgmFR>I}pa5S_5Pv) z_6iJlegIu^7kd?k|M&o+`X}~kjBxoLrqhmm$6;jAU09rUaeNI%d0mH-*Hm^qo^v+A z)Dy@~!sy-YFa`#(ZE*e06V@kN*=ZPavLE_~QS5Y#ZBs!z!=0T0x6D+iQyux-4!2up zAaWjXJQwaGKZL1zEV~FEC4Yi7hR*!yD#o~I0qcsz>=KOk^nu#x2Kyb1&-0-gI>X+J z@#k&O`sJ|yiWfEv!hE=ieFzgZ&jZo)d%6x|qV)#!;cv2!VA7}uFfMV_&xi1OgKE*$ z@qRV%N?ZWl<5=Ebi^=n@z})mR`vj)^_z1>=V)n=I_Wl;Shcnry@S^z#ShBp>_3-&z z1661(yAi&ctI!U#u)lzBOFb-4-eWgm`mT3i9kHB!27aOLP?zE@9}`? z`6FykEa=}4t?M7yFCe1)8dN*3u_qy-_Zsx(RqQEP`0ouc%{|VZibYFnplLbF_Qs;y z6QGW&X8U0AYlookc4JS&l1&p~I^M{hfmeH)VGN-eHnfC4B7Mfg@R5ogh^Y9l0L_2d zL5NC7g;~w#A!g&X?%U9Hq;Y&MmNf;#7!<^wkL6BBVa*!B4#n&3-@tO!S z!~G#5u&T}g@AKjLtI=xRQEdAs}T3M@1S+zc`M?u zCTw|E|iHF)EnU%)cMk>#XHJA>z8_zohy=93d4T&#Bq4E-@@x0ycnXsCq(gf; zm~BIH_zvhMInFx`>n!_W80~nU^+@sUfadBn-oFtkt@$wA_>{c~HhmXV9dYc@2KrONaTT-`LsMxN;`6hjZC^*koM@ zZDa+z5Sva{!5nvqU4-;`mC%@HvWt;^j%NQ0dX%RKJ?zZ(~cyZHTLm`Y6Me zoa4|hEaUjQ$S4{IOU6$2K4kh2KzQ@~7Ujr{sf9Yn@jeyEY_-GE`l;x9g*K)>@P_HpFQz6Mj*0rm;(X#5u}F%4D<1K9;oK0L`7zumt+Cf^pe;7GQ8a3op zX|asFUg1^b%?jTnXGkNCyiHo9lXDcdk@FO`k_)8KNG_2UE@ZpH-Q+T9tS0Z17K!Bd zrIAbiK;fb1IU+4u$<@;6As>^b`nUI?vY~^ZXmB!xQd)4je2sb!uQGR6&^aT%?ckPXGn{8$=ehT ZBu1VJQI$|>U^loUgR1k+)q>^emS zhCGIZh9<4lCRv;2Lui^rSm|as%1pDZrfE!BCO&kLFI!vx^!elLzV`Y3c6NUEoG(0z z!F^RQ&ui@?Rus!mSZdqFc66yfVlA+XhoEm?i1pM-u{Xw;kr+nI5|6~#`UjTKP_YlH z?-pzq{KdYQFIC}raE90)%g#!aFIF zPb~TQ;#h{oN#isKpfDyZ5I;#qhX$KrANmN=0oj>n-rK1Mv7;k&k?D~%V=Wq5Za zh79OV^Q zLf#Q)Gp=JM>ce^B90EHgyZ8HumlK?O8GTLv`Q;LPu{EhMp?2=T!i!RP}HQ1J_iQ}2f1Sja{uVf_xlFpjJ-@R-p_x()UN*f9U;LHiRu1B>03y2m@&n?E1Fi%}3zCcn)3%ch1`(GyMoHw@Xhory3eE(L=`Fq5-STHFH zH9bn)L$a>}vdhE|NdDvlv=UdJ6&)!}5t!5jv5~Z>Px06V8UKxYbeMTLZ#UZ5(X!sg z!g)5->GHlR16UNfAH%n+q#s0P#g}NVqhfCsyQ0x4HYEvd?``o4T$7T}rFe_;ak)BCi^Ifku)g3| z44E=dl_EAAxrY+AQTp|4xH<=$u- z)X5)<4^Z=wdp35-d{Ykbw(&iTOM|3urT)!)Ja#M+pQWMw4z{+w`PMfydas$%U!*ZB w16%!qM|Som#2xEP(od9J6JKNZ7h&ka{}SJ%sa8cPiW7Iy^!3y3|2NI@4`Yuf19C)EG$8?BXrh=Yfr<)f zE)|oBMb7eK>Ll3;b5@Pc#HcmcXjW>A(LqJ0aRyE2ru%pPxNCjZKIg3SyJzox;PEhf ze--weNl!>O)Ph@>j^>JcV~G16c8w7~g&{o;WzG;kjiGo3+WJ_rKZbwCV+`6N4n!+I zfxBvmI0)nFcwC{$;(nON6{0GJi-R#2XW_1H6%WKZe--ZhAH_qkPrZz3!xZr_9D`gq z#~H+-IM1EMmD?o_!@ai`%6?HCj=N0NBaJPMj7WS%mG~Hqcqt#Z6&1{PH5<^uNVZ@GCoxZIeZu&eLt9upQqeo`HYJF}U}V_%-||)}b}b z63@o}U;g#2a~j?>X#Otm1Nlf8*V2QXg?8!NIFBuD>hZO7Mj>nD_1%Z)3n;j^mvFxp+H+ zmPca${bcbD2Hzf!Ehk63lc6z@IB&lzevgp*6VdXX6Ti=}x&Ogz)6ZW|Xkse%efbjq zfUpyF*phdP|GuD?sX zg@~rv7$0^MpCPhfAf(R}w-QH|HN;rdSI^461Nkx!GO=ABJp)b_Wl_amn6Qy$k=Suo!7-3j9lr#RjcR6 zO-7|!u`GL8;+?#lnT9bmReX=prc&?uYs9}07vZgyE$=+BH%;RbDsXRT7W*(Z`ZSiQ zQeVo%xUmm#-jw=OX5zI=`1I6dzJ>U*EG!cbh;1ZPdiS@-i=8BFuE*jp`K{a}ZhIEP zUpGs`GZboGHnl;_^tpP?2N=xp6miN<4~bev7^RMDj;PGp+m- z)@gwfk7fEFy}iyiOgxI|$BVGWNqwp~W?Z?9)4fXK3Cvn|4yWT2aUz*7EW(oXj5wLB zVc%dpugBBKx{;6NLwPS$2Cu6=I2-l*p2h4V2T_g?S^pZd+aht?@D*n?{@RVG(nI2v%}#!jBLdkCH+F}r6kjgt819V_pv%Q73=wGaUE|hZO1%jhWLHf zWEW#7mHwd)u*TDl^OM^WuV?MN2GsSp#SN^je}wI(UQY)pGhW7+*DCQtlr_DFJK`I0 z6K~h)92hJN3zRQ0`% z)}+rrOVy6~IF3G9jyliwp(*&h`lal5iS5VE;P5nxf1o<$dCXU1#Mh`v@?h(w_lFK@ d-<=7!Wkc0TZDTQ({8KXjE;~Q|8f{txA_17 literal 0 HcmV?d00001 diff --git a/weed/storage/idx/test_files/simple_index_bitrot.idx b/weed/storage/idx/test_files/simple_index_bitrot.idx new file mode 100644 index 0000000000000000000000000000000000000000..4f821da893ae85a4d97946c9cf66d762c53ecbd3 GIT binary patch literal 2577 zcmZ|QdsLLw8prYPJ1;Zu++P(n3JeP^ydqF?@gOE%xQr4eg4cjdL_-r2@P;OenG&d| zfaX#$DHeH_gQ+LUR+wki(TdbM*wL)i7Do>zT3Kj%CVl45_J5!Cd-h&?X7ByHaDRmU z^|+t+#1Qu#Y#;TYoQE)^<)Ucm;)gL5O+#H1EB43m-*}8c8^wXBWk=B~2Z@6)u87AO z>Js-s8&iN%K2#ixR+Nce)gtbXW$tqHydT7auuZ;zY3(HO5bOh-IK~*np*T*T!kN=4 z4nyD71I2bu9FE?+8QguuG3c$|qJ}*u9!Zzjfv7cA;yAjvOVIQV@mRW6uEz3Cf;fS0 zpSm#*_)+Y__lt0p5ATV`(Orwc)Zq}1r~BpUSp0It$@DbO!+s!I{45Xc_#M-Nuf!?% zb!>uE*9@#FQhJVOmxb?L7dHl!MqSnt4&&2=OP1Hq4#n}Xe zhu|}IzjzJ-P5oe&B7TX0Puejox+9)XFTYUqZ**}!fxd+}_q;1Eq<33a)QB4K5*|x7 zV>Rp)FXi#_3`|dYe$)L?LeRt{oN1mt-yf^#8#o4IpJC!P^gZ?^rk{Qiuj7ebr?FkM zh|36m)U(!vUg8RZgI8c&b5p#D;Ik{ycI^>wrr&2raLoN!yoCWvBC)+aQM{Fb*T-Vb z&K7TDa7-kQ>u-qPB;@vZ)ZE9#Z!u){uV_~9{_6-$Ou@E0Px5yNJ6em?wL|=Oo-z-^ zeAZuln5ToXaK87H_(a$P^0&%v zXmy$5He%Kq@R?sIzQXXHzo5h=iLWv|HVfs(3*vT$FLUFp@!pSXj7YIyUi^&Y9Xykf ziZL`re2bB$63_mt#eWbN;dv_?Uw>fFHH}LsM_=0{_F;7N3Cxq_eJLi!jJ|{8n!Ha% zBVIj^Pj^-7&BT{xVjjO&Y$c(>v%W1}>>y!79cF*I--=G+=EpF6zCrS?Bwo7(y=~%d zB;L4#p+)+Q;!D!F08DM(`X0FYhk4$MoUh`~xVs-=yL&_C2a;S|i|yPMaS+dTj>7(j z^e3e+6RL0EJn7lvpF`=-#QbHL_DvQKBsIZ>@}l$)WiY7)Hq;*So|F($TMt5voR<<# z>UTM)SrsyWD3e?t;&e;DP$EeW($VWXB#&Z>-vUoRk^7@WGo|b}mMMXf$1?RFo?ho0 zCLY1mBZXMvAS$k!Opc=J>;ujXNX zSH2fYIxi@_a5Q+&dj>NP?nAMM$o%J-*&2!cs;@YcSqpdIQ-4A{i)`y{9B%LV&1UvT zBRv0?CV37yL6@=b^zLU4a}NCv{g;c9&*i0q4Vdm_i06^JWjoIBC~-dXr)yY?UluRq z<%Y9pQ%8ymSx_|^Ez2%mOx~(>C?)&F%gB4r=(*qC^DpL=z2Vpcy!U501^t>a7EF-( zl@zpA;7I(Jcojvjwqq|kAYRYnu4e3UTg7E8sa}pc>=W?@mU0KB`9I<+mPSoQyS+wS z&9cZAEK$-glwA~OXgE8ENWPmDu_;*2REcYObx|AIsA=N2SeaFXp+x$JvX_?i_2L7p%}>St zZM*m&>+}e$zj@a`#QJ$Ie0IwBL^({EAq90sPpNOBEYH*XW|WGLQC2(%=fdyBA5-q~ zd>@B)6aSO)O^sNLm&7NiIPeO(&HH=5VB_XX*wek|-9lyWE2xd$`cqVHos0d@gXt(| z*fKa7pXWZ6_0F^9@Ja0M2J!b)B|m|7DMoym>LfST9^U@YPR$$B;kqoSbWqb!ggNiH P)Zb*=`(L5%j<^0F+&a0? literal 0 HcmV?d00001 diff --git a/weed/storage/idx/test_files/simple_index_truncated.idx b/weed/storage/idx/test_files/simple_index_truncated.idx new file mode 100644 index 0000000000000000000000000000000000000000..fcf238305c0fc3c37d5b28635da9f8073d879726 GIT binary patch literal 2540 zcmXxldsNj`7QpfExtIHX{8ZE^Ff6q2i9pH2K}>w`xJsA^J_B+@R5T$0UudG3DS?U# zWG)SpVwo*3rj}$YtX8EnG3qqfXjW>A(ZN?!HfY*S_wW31*ZQn|&ROSo&))mM{bBL_ zDjc~J9*{1mdDk!>$`kj*knlZhA0>VWLql8M##hz+!?+z-ydt}8$1S6pJC#)^gZ?u z%-61o*Ym`#Q#dZz#1(`->f38VFL4#2p(`=1{aL)3&@-#B?Ak5fLchNq!8P|&@m2;b zjmGizMDaETULT7+H&^^RgX5xcU4Kpd24T0yW6XbC{1!uI|BA(~pTB|d1Ykj8pC;n314NF6|xShCl2K*Kji!U?0=P#&)6!8^?$LFAKz98;k_;N4qIz2zG zG9t}}b;&alzrizE>6pUP#J3n}F7utgR{T2&QNCK)^y&lq(ljBd5^Y_x*pJb%$FWY9 z`ch`bjJ|{Gs??{lkZ3%IUw5O-w~|<%jdlDUv7MwU-~Nt7v5TaQ4Ojyuzm-Pvmd7ys zeWS#?l6>_R^md55k$m$GhF0k}%Ab^RL73b1`91LTkMPxt+^-5`+}%%b+`TF5gGnu| z$8q+uID}_AN8$Wk`jhI*gxZ_9Px#LG=TQBbShyVXzRBW&q$hb$f0h2B29sXoz}Q3T zNrjQ#b`av^zEmXX-{)b>sgm_WndJExw^#awiY7BeLun6c&*F6T$$WU`)IfHmb2v4`v- z-(Whe$J5EaQh@bcc`sBZFQ{I)_UiXNgP8~Sp`2l|{&{A$MdQ5UFV1Gxq8<1(9v9Cd z*M1w9SHHj6%>HDA@BjMqUGxo=M_$M!oICaN%wf)<|DyeJLE>|ncW^J}ds*W7?4>V?7xCiWGgzjM6c@9wW;B)@r+5hktJkB-_KTNO@V?2H-}?QR^3tA2 zoI!g2te~i03#Ot8GJh3CZB@9EzY?#eWB?rVCSkl#sGhv&!f~B=9Fb?}dypd(x zLACr_T*I=MsaS5W71y#nx)ocD^b56%(ku(^&LI+ila=vl*iP4o>v?%`JC;$?#BZ@G zrvyWp^bfU%Ro-@7A6=Jt1FPpWqAtH8Ze(@CJ?vNYdfG?1={%wPoI z#mA^99fW()58_X$^!VP#q20uvQ@Obbo9UwXI8_H;LUZWf`!$=kT*R5F-*+q3y)R>I z(&wL~dfQx_haN0Poo4IcRQ#U%QuaH?*25=odiRQdpeFSREEnU%m#9tgV(+2%hYspq Gv;P6@x~>HP literal 0 HcmV?d00001 diff --git a/weed/storage/volume_checking.go b/weed/storage/volume_checking.go index 7ac63d6f0..e178e6b1a 100644 --- a/weed/storage/volume_checking.go +++ b/weed/storage/volume_checking.go @@ -14,6 +14,28 @@ import ( "github.com/seaweedfs/seaweedfs/weed/util" ) +func (v *Volume) CheckIndex() (int64, []error) { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + + idxFileName := v.FileName(".idx") + idxFile, err := os.OpenFile(idxFileName, os.O_RDONLY, 0644) + if err != nil { + return 0, []error{fmt.Errorf("failed to open IDX file %s for volume %v: %v", idxFileName, v.Id, err)} + } + defer idxFile.Close() + + idxStat, err := idxFile.Stat() + if err != nil { + return 0, []error{fmt.Errorf("failed to stat IDX file %s for volume %v: %v", idxFileName, v.Id, err)} + } + if idxStat.Size() == 0 { + return 0, []error{fmt.Errorf("zero-size IDX file for volume %v at %s", v.Id, idxFileName)} + } + + return idx.CheckIndexFile(idxFile, idxStat.Size(), v.Version()) +} + func CheckVolumeDataIntegrity(v *Volume, indexFile *os.File) (lastAppendAtNs uint64, err error) { var indexSize int64 if indexSize, err = verifyIndexFileIntegrity(indexFile); err != nil {