Browse Source
Implement index (fast) scrubbing for regular/EC volumes. (#8207)
Implement index (fast) scrubbing for regular/EC volumes. (#8207)
Implement index (fast) scrubbing for regular/EC volumes via `ScrubVolume()`/`ScrubEcVolume()`. Also rearranges existing index test files for reuse across unit tests for different modules.pull/8226/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 254 additions and 17 deletions
-
25weed/server/volume_grpc_scrub.go
-
11weed/storage/erasure_coding/ec_volume.go
-
2weed/storage/erasure_coding/ec_volume_test.go
-
103weed/storage/idx/check.go
-
108weed/storage/idx/check_test.go
-
0weed/storage/idx/test_files/389.ecx
-
BINweed/storage/idx/test_files/deleted_files.ecx
-
BINweed/storage/idx/test_files/deleted_files.idx
-
BINweed/storage/idx/test_files/deleted_files_bitrot.ecx
-
BINweed/storage/idx/test_files/simple_index.idx
-
BINweed/storage/idx/test_files/simple_index_bitrot.idx
-
BINweed/storage/idx/test_files/simple_index_truncated.idx
-
22weed/storage/volume_checking.go
@ -0,0 +1,103 @@ |
|||
package idx |
|||
|
|||
import ( |
|||
"fmt" |
|||
"io" |
|||
"sort" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/storage/needle" |
|||
"github.com/seaweedfs/seaweedfs/weed/storage/types" |
|||
) |
|||
|
|||
type indexEntry struct { |
|||
index int |
|||
id types.NeedleId |
|||
offset int64 |
|||
size types.Size |
|||
} |
|||
|
|||
func (ie *indexEntry) Compare(other *indexEntry) int { |
|||
if ie.offset < other.offset { |
|||
return -1 |
|||
} |
|||
if ie.offset > other.offset { |
|||
return 1 |
|||
} |
|||
if ie.size < other.size { |
|||
return -1 |
|||
} |
|||
if ie.size > other.size { |
|||
return 1 |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
// CheckIndexFile verifies the integrity of a IDX/ECX index file. Returns a count of processed file entries, and slice of found errors.
|
|||
func CheckIndexFile(r io.ReaderAt, indexFileSize int64, version needle.Version) (int64, []error) { |
|||
errs := []error{} |
|||
|
|||
entries := []*indexEntry{} |
|||
var i int |
|||
err := WalkIndexFile(r, 0, func(id types.NeedleId, offset types.Offset, size types.Size) error { |
|||
entries = append(entries, &indexEntry{ |
|||
index: i, |
|||
id: id, |
|||
offset: offset.ToActualOffset(), |
|||
size: size, |
|||
}) |
|||
i++ |
|||
return nil |
|||
}) |
|||
if err != nil { |
|||
errs = append(errs, err) |
|||
} |
|||
|
|||
sort.Slice(entries, func(i, j int) bool { |
|||
return entries[i].Compare(entries[j]) < 0 |
|||
}) |
|||
|
|||
for i, e := range entries { |
|||
if i == 0 { |
|||
// nothing to check for the first entry
|
|||
continue |
|||
} |
|||
|
|||
start, end := e.offset, e.offset |
|||
if size := needle.GetActualSize(e.size, version); size != 0 { |
|||
end += size - 1 |
|||
} |
|||
|
|||
last := entries[i-1] |
|||
lastStart, lastEnd := last.offset, last.offset |
|||
if lastSize := needle.GetActualSize(last.size, version); lastSize != 0 { |
|||
lastEnd += lastSize - 1 |
|||
} |
|||
|
|||
// check if needles overlap
|
|||
if start <= lastEnd { |
|||
errs = append(errs, fmt.Errorf( |
|||
"needle %d (#%d) at [%d-%d] overlaps needle %d at [%d-%d]", |
|||
e.id, e.index+1, |
|||
start, end, |
|||
last.id, |
|||
lastStart, lastEnd)) |
|||
} |
|||
|
|||
// The check below is intended to ensure all index entries are contiguous; unfortunately, Seaweed
|
|||
// can delete index entries for files while keeping their data, so volumes with deleted files
|
|||
// will fail this test :(
|
|||
// See https://github.com/seaweedfs/seaweedfs/issues/8204 for details.
|
|||
/* |
|||
if e.offset != lastEnd + 1 { |
|||
errs = append(errs, fmt.Errorf("offset %d for needle %d (#%d) doesn't match end of needle %d at %d", e.offset, e.id, e.index+1, last.id, lastEnd)) |
|||
} |
|||
*/ |
|||
} |
|||
|
|||
count := int64(len(entries)) |
|||
if got, want := count*types.NeedleMapEntrySize, indexFileSize; got != want { |
|||
errs = append(errs, fmt.Errorf("expected an index file of size %d, got %d", want, got)) |
|||
} |
|||
|
|||
return count, errs |
|||
} |
|||
@ -0,0 +1,108 @@ |
|||
package idx |
|||
|
|||
import ( |
|||
"fmt" |
|||
"os" |
|||
"reflect" |
|||
"testing" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/storage/needle" |
|||
) |
|||
|
|||
func TestCheckIndexFile(t *testing.T) { |
|||
testCases := []struct { |
|||
name string |
|||
indexPath string |
|||
version needle.Version |
|||
want int64 |
|||
wantErrs []error |
|||
}{ |
|||
{ |
|||
name: "healthy index", |
|||
indexPath: "./test_files/simple_index.idx", |
|||
version: needle.Version3, |
|||
want: 161, |
|||
wantErrs: []error{}, |
|||
}, |
|||
{ |
|||
name: "healthy index with deleted files", |
|||
indexPath: "./test_files/deleted_files.idx", |
|||
version: needle.Version3, |
|||
want: 230, |
|||
wantErrs: []error{}, |
|||
}, |
|||
{ |
|||
name: "damaged index (bitrot)", |
|||
indexPath: "./test_files/simple_index_bitrot.idx", |
|||
version: needle.Version3, |
|||
want: 161, |
|||
wantErrs: []error{ |
|||
fmt.Errorf("needle 3544668469065756977 (#2) at [6602459528-7427766999] overlaps needle 49 at [6602459528-7427766999]"), |
|||
fmt.Errorf("expected an index file of size 2577, got 2576"), |
|||
}, |
|||
}, |
|||
{ |
|||
name: "damaged index (truncated)", |
|||
indexPath: "./test_files/simple_index_truncated.idx", |
|||
version: needle.Version3, |
|||
want: 158, |
|||
wantErrs: []error{ |
|||
fmt.Errorf("expected an index file of size 2540, got 2528"), |
|||
}, |
|||
}, |
|||
{ |
|||
name: "healthy EC index", |
|||
indexPath: "./test_files/389.ecx", |
|||
version: needle.Version3, |
|||
want: 485098, |
|||
wantErrs: []error{}, |
|||
}, |
|||
{ |
|||
name: "healthy EC index with deleted files", |
|||
indexPath: "./test_files/deleted_files.ecx", |
|||
version: needle.Version3, |
|||
want: 116, |
|||
wantErrs: []error{}, |
|||
}, |
|||
{ |
|||
name: "damaged EC index (bitrot)", |
|||
indexPath: "./test_files/deleted_files_bitrot.ecx", |
|||
version: needle.Version3, |
|||
want: 116, |
|||
wantErrs: []error{ |
|||
fmt.Errorf("needle 3223857 (#110) at [6602459528-7427767055] overlaps needle 12593 at [6601933184-7407907279]"), |
|||
fmt.Errorf("needle 3544668469065757234 (#43) at [6737203600-7579354079] overlaps needle 3223857 at [6602459528-7427767055]"), |
|||
fmt.Errorf("needle 3421236 (#112) at [7006693800-7899362591] overlaps needle 3544668469065757234 at [6737203600-7579354079]"), |
|||
fmt.Errorf("needle 310 (#113) at [7276179888-8185702583] overlaps needle 3421236 at [7006693800-7899362591]"), |
|||
fmt.Errorf("needle 7089336938131513954 (#52) at [13204919056-13205053935] overlaps needle 27410143614427489 at [13070174984-14703946887]"), |
|||
fmt.Errorf("needle 25186 (#50) at [13204919056-14855533967] overlaps needle 7089336938131513954 at [13204919056-13205053935]"), |
|||
fmt.Errorf("needle 7089336938131513954 (#51) at [13204919056-14855533967] overlaps needle 25186 at [13204919056-14855533967]"), |
|||
fmt.Errorf("expected an index file of size 1857, got 1856"), |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
for _, tc := range testCases { |
|||
t.Run(tc.name, func(t *testing.T) { |
|||
idx, err := os.OpenFile(tc.indexPath, os.O_RDONLY, 0) |
|||
if err != nil { |
|||
t.Fatalf("failed to open index file: %v", err) |
|||
} |
|||
defer idx.Close() |
|||
|
|||
idxStat, err := idx.Stat() |
|||
if err != nil { |
|||
t.Fatalf("failed to stat index file: %v", err) |
|||
} |
|||
|
|||
got, gotErrs := CheckIndexFile(idx, idxStat.Size(), tc.version) |
|||
|
|||
if got != tc.want { |
|||
t.Errorf("expected %d files processed, got %d", tc.want, got) |
|||
} |
|||
if !reflect.DeepEqual(gotErrs, tc.wantErrs) { |
|||
t.Errorf("expected errors %v, got %v", tc.wantErrs, gotErrs) |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue