From 00d0274fd7c829f5d26c051f5832e0f602929b08 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Wed, 23 May 2018 22:28:54 -0700 Subject: [PATCH] prepare to read from multiple file chunks --- weed/filer2/filechunks.go | 21 +++++ weed/filer2/filechunks_test.go | 140 ++++++++++++++++++++++++++++++++- 2 files changed, 157 insertions(+), 4 deletions(-) diff --git a/weed/filer2/filechunks.go b/weed/filer2/filechunks.go index 65197d471..93cee81de 100644 --- a/weed/filer2/filechunks.go +++ b/weed/filer2/filechunks.go @@ -52,6 +52,27 @@ func FindUnusedFileChunks(oldChunks, newChunks []*filer_pb.FileChunk) (unused [] return } +func ReadFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*filer_pb.FileChunk) { + + visibles := nonOverlappingVisibleIntervals(chunks) + + stop := offset + int64(size) + + for _, chunk := range visibles { + if chunk.start <= offset && offset < chunk.stop { + views = append(views, &filer_pb.FileChunk{ + FileId: chunk.fileId, + Offset: offset - chunk.start, // offset is the data starting location in this file id + Size: uint64(min(chunk.stop, stop) - offset), + }) + offset = min(chunk.stop, stop) + } + } + + return views + +} + func logPrintf(name string, visibles []*visibleInterval) { return diff --git a/weed/filer2/filechunks_test.go b/weed/filer2/filechunks_test.go index b87b61d3b..9e39477be 100644 --- a/weed/filer2/filechunks_test.go +++ b/weed/filer2/filechunks_test.go @@ -9,10 +9,10 @@ import ( func TestCompactFileChunks(t *testing.T) { chunks := []*filer_pb.FileChunk{ - {Offset:10, Size:100, FileId:"abc", Mtime:50}, - {Offset:100, Size:100, FileId:"def", Mtime:100}, - {Offset:200, Size:100, FileId:"ghi", Mtime:200}, - {Offset:110, Size:200, FileId:"jkl", Mtime:300}, + {Offset: 10, Size: 100, FileId: "abc", Mtime: 50}, + {Offset: 100, Size: 100, FileId: "def", Mtime: 100}, + {Offset: 200, Size: 100, FileId: "ghi", Mtime: 200}, + {Offset: 110, Size: 200, FileId: "jkl", Mtime: 300}, } compacted, garbarge := CompactFileChunks(chunks) @@ -144,3 +144,135 @@ func TestIntervalMerging(t *testing.T) { } } + +func TestChunksReading(t *testing.T) { + + testcases := []struct { + Chunks []*filer_pb.FileChunk + Offset int64 + Size int + Expected []*filer_pb.FileChunk + }{ + // case 0: normal + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 100, Size: 100, FileId: "asdf", Mtime: 134}, + {Offset: 200, Size: 100, FileId: "fsad", Mtime: 353}, + }, + Offset: 0, + Size: 250, + Expected: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc"}, + {Offset: 0, Size: 100, FileId: "asdf"}, + {Offset: 0, Size: 50, FileId: "fsad"}, + }, + }, + // case 1: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + }, + Offset: 50, + Size: 100, + Expected: []*filer_pb.FileChunk{ + {Offset: 50, Size: 100, FileId: "asdf"}, + }, + }, + // case 2: updates overwrite part of previous chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 50, FileId: "asdf", Mtime: 134}, + }, + Offset: 25, + Size: 50, + Expected: []*filer_pb.FileChunk{ + {Offset: 25, Size: 25, FileId: "asdf"}, + {Offset: 0, Size: 25, FileId: "abc"}, + }, + }, + // case 3: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 50, Size: 250, FileId: "xxxx", Mtime: 154}, + }, + Offset: 0, + Size: 200, + Expected: []*filer_pb.FileChunk{ + {Offset: 0, Size: 50, FileId: "asdf"}, + {Offset: 0, Size: 150, FileId: "xxxx"}, + }, + }, + // case 4: updates far away from prev chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 250, Size: 250, FileId: "xxxx", Mtime: 154}, + }, + Offset: 0, + Size: 400, + Expected: []*filer_pb.FileChunk{ + {Offset: 0, Size: 200, FileId: "asdf"}, + // {Offset: 0, Size: 150, FileId: "xxxx"}, // missing intervals should not happen + }, + }, + // case 5: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 184}, + {Offset: 70, Size: 150, FileId: "abc", Mtime: 143}, + {Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134}, + }, + Offset: 0, + Size: 220, + Expected: []*filer_pb.FileChunk{ + {Offset: 0, Size: 200, FileId: "asdf"}, + {Offset: 0, Size: 20, FileId: "abc"}, + }, + }, + // case 6: same updates + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + }, + Offset: 0, + Size: 100, + Expected: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc"}, + }, + }, + } + + for i, testcase := range testcases { + log.Printf("++++++++++ read test case %d ++++++++++++++++++++", i) + chunks := ReadFromChunks(testcase.Chunks, testcase.Offset, testcase.Size) + for x, chunk := range chunks { + log.Printf("read case %d, chunk %d, offset=%d, size=%d, fileId=%s", + i, x, chunk.Offset, chunk.Size, chunk.FileId) + if chunk.Offset != testcase.Expected[x].Offset { + t.Fatalf("failed on read case %d, chunk %d, Offset %d, expect %d", + i, x, chunk.Offset, testcase.Expected[x].Offset) + } + if chunk.Size != testcase.Expected[x].Size { + t.Fatalf("failed on read case %d, chunk %d, Size %d, expect %d", + i, x, chunk.Size, testcase.Expected[x].Size) + } + if chunk.FileId != testcase.Expected[x].FileId { + t.Fatalf("failed on read case %d, chunk %d, FileId %s, expect %s", + i, x, chunk.FileId, testcase.Expected[x].FileId) + } + } + if len(chunks) != len(testcase.Expected) { + t.Fatalf("failed to read test case %d, len %d expected %d", i, len(chunks), len(testcase.Expected)) + } + } + +}