chrislu
3 years ago
9 changed files with 288 additions and 86 deletions
-
2weed/filesys/dirty_pages_continuous.go
-
94weed/filesys/dirty_pages_temp_file.go
-
1weed/filesys/filehandle.go
-
28weed/filesys/page_writer.go
-
152weed/filesys/page_writer/chunked_file_writer.go
-
60weed/filesys/page_writer/chunked_file_writer_test.go
-
20weed/filesys/page_writer/dirty_pages.go
-
13weed/filesys/page_writer/dirty_pages_temp_interval.go
-
4weed/filesys/page_writer/page_chunk_interval_list.go
@ -0,0 +1,152 @@ |
|||||
|
package page_writer |
||||
|
|
||||
|
import ( |
||||
|
"github.com/chrislusf/seaweedfs/weed/glog" |
||||
|
"io" |
||||
|
"os" |
||||
|
"sync" |
||||
|
) |
||||
|
|
||||
|
// ChunkedFileWriter assumes the write requests will come in within chunks
|
||||
|
type ChunkedFileWriter struct { |
||||
|
dir string |
||||
|
file *os.File |
||||
|
logicToActualChunkIndex map[int]int |
||||
|
chunkUsages []*PageChunkWrittenIntervalList |
||||
|
ChunkSize int64 |
||||
|
sync.Mutex |
||||
|
} |
||||
|
|
||||
|
var _ = io.WriterAt(&ChunkedFileWriter{}) |
||||
|
|
||||
|
func NewChunkedFileWriter(dir string, chunkSize int64) *ChunkedFileWriter { |
||||
|
return &ChunkedFileWriter{ |
||||
|
dir: dir, |
||||
|
file: nil, |
||||
|
logicToActualChunkIndex: make(map[int]int), |
||||
|
ChunkSize: chunkSize, |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (cw *ChunkedFileWriter) WriteAt(p []byte, off int64) (n int, err error) { |
||||
|
cw.Lock() |
||||
|
defer cw.Unlock() |
||||
|
|
||||
|
if cw.file == nil { |
||||
|
cw.file, err = os.CreateTemp(cw.dir, "") |
||||
|
if err != nil { |
||||
|
glog.Errorf("create temp file: %v", err) |
||||
|
return |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
actualOffset, chunkUsage := cw.toActualWriteOffset(off) |
||||
|
n, err = cw.file.WriteAt(p, actualOffset) |
||||
|
if err == nil { |
||||
|
startOffset := off % cw.ChunkSize |
||||
|
chunkUsage.MarkWritten(startOffset, startOffset+int64(n)) |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func (cw *ChunkedFileWriter) ReadDataAt(p []byte, off int64) (maxStop int64) { |
||||
|
cw.Lock() |
||||
|
defer cw.Unlock() |
||||
|
|
||||
|
if cw.file == nil { |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
logicChunkIndex := off / cw.ChunkSize |
||||
|
actualChunkIndex, chunkUsage := cw.toActualReadOffset(off) |
||||
|
if chunkUsage != nil { |
||||
|
for t := chunkUsage.head.next; t != chunkUsage.tail; t = t.next { |
||||
|
logicStart := max(off, logicChunkIndex*cw.ChunkSize+t.startOffset) |
||||
|
logicStop := min(off+int64(len(p)), logicChunkIndex*cw.ChunkSize+t.stopOffset) |
||||
|
if logicStart < logicStop { |
||||
|
actualStart := logicStart - logicChunkIndex*cw.ChunkSize + int64(actualChunkIndex)*cw.ChunkSize |
||||
|
_, err := cw.file.ReadAt(p[logicStart-off:logicStop-off], actualStart) |
||||
|
if err != nil { |
||||
|
glog.Errorf("reading temp file: %v", err) |
||||
|
break |
||||
|
} |
||||
|
maxStop = max(maxStop, logicStop) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func (cw *ChunkedFileWriter) toActualWriteOffset(logicOffset int64) (actualOffset int64, chunkUsage *PageChunkWrittenIntervalList) { |
||||
|
logicChunkIndex := int(logicOffset / cw.ChunkSize) |
||||
|
offsetRemainder := logicOffset % cw.ChunkSize |
||||
|
existingActualChunkIndex, found := cw.logicToActualChunkIndex[logicChunkIndex] |
||||
|
if found { |
||||
|
return int64(existingActualChunkIndex)*cw.ChunkSize + offsetRemainder, cw.chunkUsages[existingActualChunkIndex] |
||||
|
} |
||||
|
cw.logicToActualChunkIndex[logicChunkIndex] = len(cw.chunkUsages) |
||||
|
chunkUsage = newPageChunkWrittenIntervalList() |
||||
|
cw.chunkUsages = append(cw.chunkUsages, chunkUsage) |
||||
|
return int64(len(cw.chunkUsages)-1)*cw.ChunkSize + offsetRemainder, chunkUsage |
||||
|
} |
||||
|
|
||||
|
func (cw *ChunkedFileWriter) toActualReadOffset(logicOffset int64) (actualChunkIndex int, chunkUsage *PageChunkWrittenIntervalList) { |
||||
|
logicChunkIndex := int(logicOffset / cw.ChunkSize) |
||||
|
existingActualChunkIndex, found := cw.logicToActualChunkIndex[logicChunkIndex] |
||||
|
if found { |
||||
|
return existingActualChunkIndex, cw.chunkUsages[existingActualChunkIndex] |
||||
|
} |
||||
|
return 0, nil |
||||
|
} |
||||
|
|
||||
|
func (cw *ChunkedFileWriter) ProcessEachInterval(process func(file *os.File, logicChunkIndex int, interval *PageChunkWrittenInterval)) { |
||||
|
for logicChunkIndex, actualChunkIndex := range cw.logicToActualChunkIndex { |
||||
|
chunkUsage := cw.chunkUsages[actualChunkIndex] |
||||
|
for t := chunkUsage.head.next; t != chunkUsage.tail; t = t.next { |
||||
|
process(cw.file, logicChunkIndex, t) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
func (cw *ChunkedFileWriter) Destroy() { |
||||
|
if cw.file != nil { |
||||
|
cw.file.Close() |
||||
|
os.Remove(cw.file.Name()) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
type FileIntervalReader struct { |
||||
|
f *os.File |
||||
|
startOffset int64 |
||||
|
stopOffset int64 |
||||
|
position int64 |
||||
|
} |
||||
|
|
||||
|
var _ = io.Reader(&FileIntervalReader{}) |
||||
|
|
||||
|
func NewFileIntervalReader(cw *ChunkedFileWriter, logicChunkIndex int, interval *PageChunkWrittenInterval) *FileIntervalReader { |
||||
|
actualChunkIndex, found := cw.logicToActualChunkIndex[logicChunkIndex] |
||||
|
if !found { |
||||
|
// this should never happen
|
||||
|
return nil |
||||
|
} |
||||
|
return &FileIntervalReader{ |
||||
|
f: cw.file, |
||||
|
startOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.startOffset, |
||||
|
stopOffset: int64(actualChunkIndex)*cw.ChunkSize + interval.stopOffset, |
||||
|
position: 0, |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (fr *FileIntervalReader) Read(p []byte) (n int, err error) { |
||||
|
readSize := minInt(len(p), int(fr.stopOffset-fr.startOffset-fr.position)) |
||||
|
n, err = fr.f.ReadAt(p[:readSize], fr.startOffset+fr.position) |
||||
|
if err == nil || err == io.EOF { |
||||
|
fr.position += int64(n) |
||||
|
if fr.stopOffset-fr.startOffset-fr.position == 0 { |
||||
|
// return a tiny bit faster
|
||||
|
err = io.EOF |
||||
|
return |
||||
|
} |
||||
|
} |
||||
|
return |
||||
|
} |
@ -0,0 +1,60 @@ |
|||||
|
package page_writer |
||||
|
|
||||
|
import ( |
||||
|
"github.com/stretchr/testify/assert" |
||||
|
"os" |
||||
|
"testing" |
||||
|
) |
||||
|
|
||||
|
func TestChunkedFileWriter_toActualOffset(t *testing.T) { |
||||
|
cw := NewChunkedFileWriter("", 16) |
||||
|
|
||||
|
writeToFile(cw, 50, 60) |
||||
|
writeToFile(cw, 60, 64) |
||||
|
|
||||
|
writeToFile(cw, 32, 40) |
||||
|
writeToFile(cw, 42, 48) |
||||
|
|
||||
|
writeToFile(cw, 48, 50) |
||||
|
|
||||
|
assert.Equal(t, 1, cw.chunkUsages[0].size(), "fully covered") |
||||
|
assert.Equal(t, 2, cw.chunkUsages[1].size(), "2 intervals") |
||||
|
|
||||
|
} |
||||
|
|
||||
|
func writeToFile(cw *ChunkedFileWriter, startOffset int64, stopOffset int64) { |
||||
|
|
||||
|
_, chunkUsage := cw.toActualWriteOffset(startOffset) |
||||
|
|
||||
|
// skip doing actual writing
|
||||
|
|
||||
|
innerOffset := startOffset % cw.ChunkSize |
||||
|
chunkUsage.MarkWritten(innerOffset, innerOffset+stopOffset-startOffset) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
func TestWriteChunkedFile(t *testing.T) { |
||||
|
x := NewChunkedFileWriter(os.TempDir(), 20) |
||||
|
defer x.Destroy() |
||||
|
y := NewChunkedFileWriter(os.TempDir(), 12) |
||||
|
defer y.Destroy() |
||||
|
|
||||
|
batchSize := 4 |
||||
|
buf := make([]byte, batchSize) |
||||
|
for i := 0; i < 256; i++ { |
||||
|
for x := 0; x < batchSize; x++ { |
||||
|
buf[x] = byte(i) |
||||
|
} |
||||
|
x.WriteAt(buf, int64(i*batchSize)) |
||||
|
y.WriteAt(buf, int64((255-i)*batchSize)) |
||||
|
} |
||||
|
|
||||
|
a := make([]byte, 1) |
||||
|
b := make([]byte, 1) |
||||
|
for i := 0; i < 256*batchSize; i++ { |
||||
|
x.ReadDataAt(a, int64(i)) |
||||
|
y.ReadDataAt(b, int64(256*batchSize-1-i)) |
||||
|
assert.Equal(t, a[0], b[0], "same read") |
||||
|
} |
||||
|
|
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue