Browse Source

refactoring

pull/1278/head
Chris Lu 5 years ago
parent
commit
f282ed444b
  1. 21
      weed/filer2/filechunks.go
  2. 2
      weed/filer2/reader_at.go
  3. 4
      weed/filer2/stream.go
  4. 2
      weed/replication/sink/azuresink/azure_sink.go
  5. 2
      weed/replication/sink/b2sink/b2_sink.go
  6. 2
      weed/replication/sink/gcssink/gcs_sink.go
  7. 78
      weed/util/chunk_cache/chunk_cache.go
  8. 6
      weed/util/chunk_cache/chunk_cache_on_disk_test.go
  9. 83
      weed/util/chunk_cache/on_disk_cache_layer.go

21
weed/filer2/filechunks.go

@ -85,11 +85,15 @@ type ChunkView struct {
Offset int64 Offset int64
Size uint64 Size uint64
LogicOffset int64 LogicOffset int64
IsFullChunk bool
ChunkSize uint64
CipherKey []byte CipherKey []byte
IsGzipped bool IsGzipped bool
} }
func (cv *ChunkView) IsFullChunk() bool {
return cv.Size == cv.ChunkSize
}
func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) { func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) {
visibles := NonOverlappingVisibleIntervals(chunks) visibles := NonOverlappingVisibleIntervals(chunks)
@ -111,13 +115,12 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int
for _, chunk := range visibles { for _, chunk := range visibles {
if chunk.start <= offset && offset < chunk.stop && offset < stop { if chunk.start <= offset && offset < chunk.stop && offset < stop {
isFullChunk := chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop
views = append(views, &ChunkView{ views = append(views, &ChunkView{
FileId: chunk.fileId, FileId: chunk.fileId,
Offset: offset - chunk.start, // offset is the data starting location in this file id Offset: offset - chunk.start, // offset is the data starting location in this file id
Size: uint64(min(chunk.stop, stop) - offset), Size: uint64(min(chunk.stop, stop) - offset),
LogicOffset: offset, LogicOffset: offset,
IsFullChunk: isFullChunk,
ChunkSize: chunk.chunkSize,
CipherKey: chunk.cipherKey, CipherKey: chunk.cipherKey,
IsGzipped: chunk.isGzipped, IsGzipped: chunk.isGzipped,
}) })
@ -146,7 +149,7 @@ var bufPool = sync.Pool{
func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.FileChunk) []VisibleInterval { func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.FileChunk) []VisibleInterval {
newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, true, chunk.CipherKey, chunk.IsGzipped)
newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, chunk.Size, chunk.CipherKey, chunk.IsGzipped)
length := len(visibles) length := len(visibles)
if length == 0 { if length == 0 {
@ -160,11 +163,11 @@ func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.
logPrintf(" before", visibles) logPrintf(" before", visibles)
for _, v := range visibles { for _, v := range visibles {
if v.start < chunk.Offset && chunk.Offset < v.stop { if v.start < chunk.Offset && chunk.Offset < v.stop {
newVisibles = append(newVisibles, newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, false, v.cipherKey, v.isGzipped))
newVisibles = append(newVisibles, newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, chunk.Size, v.cipherKey, v.isGzipped))
} }
chunkStop := chunk.Offset + int64(chunk.Size) chunkStop := chunk.Offset + int64(chunk.Size)
if v.start < chunkStop && chunkStop < v.stop { if v.start < chunkStop && chunkStop < v.stop {
newVisibles = append(newVisibles, newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, false, v.cipherKey, v.isGzipped))
newVisibles = append(newVisibles, newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, chunk.Size, v.cipherKey, v.isGzipped))
} }
if chunkStop <= v.start || v.stop <= chunk.Offset { if chunkStop <= v.start || v.stop <= chunk.Offset {
newVisibles = append(newVisibles, v) newVisibles = append(newVisibles, v)
@ -216,18 +219,18 @@ type VisibleInterval struct {
stop int64 stop int64
modifiedTime int64 modifiedTime int64
fileId string fileId string
isFullChunk bool
chunkSize uint64
cipherKey []byte cipherKey []byte
isGzipped bool isGzipped bool
} }
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, isFullChunk bool, cipherKey []byte, isGzipped bool) VisibleInterval {
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval {
return VisibleInterval{ return VisibleInterval{
start: start, start: start,
stop: stop, stop: stop,
fileId: fileId, fileId: fileId,
modifiedTime: modifiedTime, modifiedTime: modifiedTime,
isFullChunk: isFullChunk,
chunkSize: chunkSize,
cipherKey: cipherKey, cipherKey: cipherKey,
isGzipped: isGzipped, isGzipped: isGzipped,
} }

2
weed/filer2/reader_at.go

@ -106,7 +106,7 @@ func (c *ChunkReadAt) fetchChunkData(chunkView *ChunkView) (data []byte, err err
// fmt.Printf("fetching %s [%d,%d)\n", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size)) // fmt.Printf("fetching %s [%d,%d)\n", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
hasDataInCache := false hasDataInCache := false
chunkData := c.chunkCache.GetChunk(chunkView.FileId)
chunkData := c.chunkCache.GetChunk(chunkView.FileId, chunkView.ChunkSize)
if chunkData != nil { if chunkData != nil {
glog.V(3).Infof("cache hit %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size)) glog.V(3).Infof("cache hit %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
hasDataInCache = true hasDataInCache = true

4
weed/filer2/stream.go

@ -31,7 +31,7 @@ func StreamContent(masterClient *wdclient.MasterClient, w io.Writer, chunks []*f
for _, chunkView := range chunkViews { for _, chunkView := range chunkViews {
urlString := fileId2Url[chunkView.FileId] urlString := fileId2Url[chunkView.FileId]
err := util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk, chunkView.Offset, int(chunkView.Size), func(data []byte) {
err := util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
w.Write(data) w.Write(data)
}) })
if err != nil { if err != nil {
@ -128,7 +128,7 @@ func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
return err return err
} }
var buffer bytes.Buffer var buffer bytes.Buffer
err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk, chunkView.Offset, int(chunkView.Size), func(data []byte) {
err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
buffer.Write(data) buffer.Write(data)
}) })
if err != nil { if err != nil {

2
weed/replication/sink/azuresink/azure_sink.go

@ -115,7 +115,7 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error {
} }
var writeErr error var writeErr error
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk, chunk.Offset, int(chunk.Size), func(data []byte) {
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
_, writeErr = appendBlobURL.AppendBlock(context.Background(), bytes.NewReader(data), azblob.AppendBlobAccessConditions{}, nil) _, writeErr = appendBlobURL.AppendBlock(context.Background(), bytes.NewReader(data), azblob.AppendBlobAccessConditions{}, nil)
}) })

2
weed/replication/sink/b2sink/b2_sink.go

@ -103,7 +103,7 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error {
} }
var writeErr error var writeErr error
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk, chunk.Offset, int(chunk.Size), func(data []byte) {
readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
_, err := writer.Write(data) _, err := writer.Write(data)
if err != nil { if err != nil {
writeErr = err writeErr = err

2
weed/replication/sink/gcssink/gcs_sink.go

@ -101,7 +101,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error {
return err return err
} }
err = util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk, chunk.Offset, int(chunk.Size), func(data []byte) {
err = util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk(), chunk.Offset, int(chunk.Size), func(data []byte) {
wc.Write(data) wc.Write(data)
}) })

78
weed/util/chunk_cache/chunk_cache.go

@ -1,52 +1,39 @@
package chunk_cache package chunk_cache
import ( import (
"fmt"
"path"
"sort"
"sync" "sync"
"github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/storage/needle"
) )
const (
memCacheSizeLimit = 1024 * 1024
)
// a global cache for recently accessed file chunks // a global cache for recently accessed file chunks
type ChunkCache struct { type ChunkCache struct {
memCache *ChunkCacheInMemory memCache *ChunkCacheInMemory
diskCaches []*ChunkCacheVolume
diskCache *OnDiskCacheLayer
sync.RWMutex sync.RWMutex
} }
func NewChunkCache(maxEntries int64, dir string, diskSizeMB int64, segmentCount int) *ChunkCache { func NewChunkCache(maxEntries int64, dir string, diskSizeMB int64, segmentCount int) *ChunkCache {
c := &ChunkCache{
memCache: NewChunkCacheInMemory(maxEntries),
}
volumeCount, volumeSize := int(diskSizeMB/30000), int64(30000) volumeCount, volumeSize := int(diskSizeMB/30000), int64(30000)
if volumeCount < segmentCount { if volumeCount < segmentCount {
volumeCount, volumeSize = segmentCount, diskSizeMB/int64(segmentCount) volumeCount, volumeSize = segmentCount, diskSizeMB/int64(segmentCount)
} }
for i := 0; i < volumeCount; i++ {
fileName := path.Join(dir, fmt.Sprintf("cache_%d", i))
diskCache, err := LoadOrCreateChunkCacheVolume(fileName, volumeSize*1024*1024)
if err != nil {
glog.Errorf("failed to add cache %s : %v", fileName, err)
} else {
c.diskCaches = append(c.diskCaches, diskCache)
}
c := &ChunkCache{
memCache: NewChunkCacheInMemory(maxEntries),
diskCache: NewOnDiskCacheLayer(dir, "cache", volumeCount, volumeSize),
} }
// keep newest cache to the front
sort.Slice(c.diskCaches, func(i, j int) bool {
return c.diskCaches[i].lastModTime.After(c.diskCaches[j].lastModTime)
})
return c return c
} }
func (c *ChunkCache) GetChunk(fileId string) (data []byte) {
func (c *ChunkCache) GetChunk(fileId string, chunkSize uint64) (data []byte) {
if c == nil { if c == nil {
return return
} }
@ -54,33 +41,25 @@ func (c *ChunkCache) GetChunk(fileId string) (data []byte) {
c.RLock() c.RLock()
defer c.RUnlock() defer c.RUnlock()
return c.doGetChunk(fileId)
return c.doGetChunk(fileId, chunkSize)
} }
func (c *ChunkCache) doGetChunk(fileId string) (data []byte) {
func (c *ChunkCache) doGetChunk(fileId string, chunkSize uint64) (data []byte) {
if chunkSize < memCacheSizeLimit {
if data = c.memCache.GetChunk(fileId); data != nil { if data = c.memCache.GetChunk(fileId); data != nil {
return data return data
} }
}
fid, err := needle.ParseFileIdFromString(fileId) fid, err := needle.ParseFileIdFromString(fileId)
if err != nil { if err != nil {
glog.Errorf("failed to parse file id %s", fileId) glog.Errorf("failed to parse file id %s", fileId)
return nil return nil
} }
for _, diskCache := range c.diskCaches {
data, err = diskCache.GetNeedle(fid.Key)
if err == storage.ErrorNotFound {
continue
}
if err != nil {
glog.Errorf("failed to read cache file %s id %s", diskCache.fileName, fileId)
continue
}
if len(data) != 0 {
return
}
}
return nil
return c.diskCache.getChunk(fid.Key)
} }
func (c *ChunkCache) SetChunk(fileId string, data []byte) { func (c *ChunkCache) SetChunk(fileId string, data []byte) {
@ -95,22 +74,8 @@ func (c *ChunkCache) SetChunk(fileId string, data []byte) {
func (c *ChunkCache) doSetChunk(fileId string, data []byte) { func (c *ChunkCache) doSetChunk(fileId string, data []byte) {
if len(data) < memCacheSizeLimit {
c.memCache.SetChunk(fileId, data) c.memCache.SetChunk(fileId, data)
if len(c.diskCaches) == 0 {
return
}
if c.diskCaches[0].fileSize+int64(len(data)) > c.diskCaches[0].sizeLimit {
t, resetErr := c.diskCaches[len(c.diskCaches)-1].Reset()
if resetErr != nil {
glog.Errorf("failed to reset cache file %s", c.diskCaches[len(c.diskCaches)-1].fileName)
return
}
for i := len(c.diskCaches) - 1; i > 0; i-- {
c.diskCaches[i] = c.diskCaches[i-1]
}
c.diskCaches[0] = t
} }
fid, err := needle.ParseFileIdFromString(fileId) fid, err := needle.ParseFileIdFromString(fileId)
@ -118,7 +83,8 @@ func (c *ChunkCache) doSetChunk(fileId string, data []byte) {
glog.Errorf("failed to parse file id %s", fileId) glog.Errorf("failed to parse file id %s", fileId)
return return
} }
c.diskCaches[0].WriteNeedle(fid.Key, data)
c.diskCache.setChunk(fid.Key, data)
} }
@ -128,7 +94,5 @@ func (c *ChunkCache) Shutdown() {
} }
c.Lock() c.Lock()
defer c.Unlock() defer c.Unlock()
for _, diskCache := range c.diskCaches {
diskCache.Shutdown()
}
c.diskCache.shutdown()
} }

6
weed/util/chunk_cache/chunk_cache_on_disk_test.go

@ -23,6 +23,7 @@ func TestOnDisk(t *testing.T) {
type test_data struct { type test_data struct {
data []byte data []byte
fileId string fileId string
size uint64
} }
testData := make([]*test_data, writeCount) testData := make([]*test_data, writeCount)
for i := 0; i < writeCount; i++ { for i := 0; i < writeCount; i++ {
@ -31,12 +32,13 @@ func TestOnDisk(t *testing.T) {
testData[i] = &test_data{ testData[i] = &test_data{
data: buff, data: buff,
fileId: fmt.Sprintf("1,%daabbccdd", i+1), fileId: fmt.Sprintf("1,%daabbccdd", i+1),
size: uint64(len(buff)),
} }
cache.SetChunk(testData[i].fileId, testData[i].data) cache.SetChunk(testData[i].fileId, testData[i].data)
} }
for i := 0; i < writeCount; i++ { for i := 0; i < writeCount; i++ {
data := cache.GetChunk(testData[i].fileId)
data := cache.GetChunk(testData[i].fileId, testData[i].size)
if bytes.Compare(data, testData[i].data) != 0 { if bytes.Compare(data, testData[i].data) != 0 {
t.Errorf("failed to write to and read from cache: %d", i) t.Errorf("failed to write to and read from cache: %d", i)
} }
@ -47,7 +49,7 @@ func TestOnDisk(t *testing.T) {
cache = NewChunkCache(0, tmpDir, totalDiskSizeMb, segmentCount) cache = NewChunkCache(0, tmpDir, totalDiskSizeMb, segmentCount)
for i := 0; i < writeCount; i++ { for i := 0; i < writeCount; i++ {
data := cache.GetChunk(testData[i].fileId)
data := cache.GetChunk(testData[i].fileId, testData[i].size)
if bytes.Compare(data, testData[i].data) != 0 { if bytes.Compare(data, testData[i].data) != 0 {
t.Errorf("failed to write to and read from cache: %d", i) t.Errorf("failed to write to and read from cache: %d", i)
} }

83
weed/util/chunk_cache/on_disk_cache_layer.go

@ -0,0 +1,83 @@
package chunk_cache
import (
"fmt"
"path"
"sort"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/types"
)
type OnDiskCacheLayer struct {
diskCaches []*ChunkCacheVolume
}
func NewOnDiskCacheLayer(dir, namePrefix string, volumeCount int, volumeSize int64) *OnDiskCacheLayer{
c := &OnDiskCacheLayer{}
for i := 0; i < volumeCount; i++ {
fileName := path.Join(dir, fmt.Sprintf("%s_%d", namePrefix, i))
diskCache, err := LoadOrCreateChunkCacheVolume(fileName, volumeSize*1024*1024)
if err != nil {
glog.Errorf("failed to add cache %s : %v", fileName, err)
} else {
c.diskCaches = append(c.diskCaches, diskCache)
}
}
// keep newest cache to the front
sort.Slice(c.diskCaches, func(i, j int) bool {
return c.diskCaches[i].lastModTime.After(c.diskCaches[j].lastModTime)
})
return c
}
func (c *OnDiskCacheLayer) setChunk(needleId types.NeedleId, data []byte) {
if c.diskCaches[0].fileSize+int64(len(data)) > c.diskCaches[0].sizeLimit {
t, resetErr := c.diskCaches[len(c.diskCaches)-1].Reset()
if resetErr != nil {
glog.Errorf("failed to reset cache file %s", c.diskCaches[len(c.diskCaches)-1].fileName)
return
}
for i := len(c.diskCaches) - 1; i > 0; i-- {
c.diskCaches[i] = c.diskCaches[i-1]
}
c.diskCaches[0] = t
}
c.diskCaches[0].WriteNeedle(needleId, data)
}
func (c *OnDiskCacheLayer) getChunk(needleId types.NeedleId) (data []byte){
var err error
for _, diskCache := range c.diskCaches {
data, err = diskCache.GetNeedle(needleId)
if err == storage.ErrorNotFound {
continue
}
if err != nil {
glog.Errorf("failed to read cache file %s id %d", diskCache.fileName, needleId)
continue
}
if len(data) != 0 {
return
}
}
return nil
}
func (c *OnDiskCacheLayer) shutdown(){
for _, diskCache := range c.diskCaches {
diskCache.Shutdown()
}
}
Loading…
Cancel
Save