Browse Source

add dat file size into vif for EC

pull/5894/head
chrislu 4 months ago
parent
commit
07f4998188
  1. 1
      weed/pb/volume_server.proto
  2. 1002
      weed/pb/volume_server_pb/volume_server.pb.go
  3. 6
      weed/server/volume_grpc_erasure_coding.go
  4. 8
      weed/storage/erasure_coding/ec_locate.go
  5. 26
      weed/storage/erasure_coding/ec_test.go
  6. 13
      weed/storage/erasure_coding/ec_volume.go
  7. 2
      weed/storage/erasure_coding/ec_volume_test.go

1
weed/pb/volume_server.proto

@ -478,6 +478,7 @@ message VolumeInfo {
uint32 version = 2; uint32 version = 2;
string replication = 3; string replication = 3;
uint32 BytesOffset = 4; uint32 BytesOffset = 4;
int64 file_size = 5; // used for EC encoded volumes to store the original file size
} }
// tiered storage // tiered storage

1002
weed/pb/volume_server_pb/volume_server.pb.go
File diff suppressed because it is too large
View File

6
weed/server/volume_grpc_erasure_coding.go

@ -71,7 +71,11 @@ func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_
} }
// write .vif files // write .vif files
if err := volume_info.SaveVolumeInfo(baseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(v.Version())}); err != nil {
datSize, _, _ := v.FileStat()
if err := volume_info.SaveVolumeInfo(baseFileName+".vif", &volume_server_pb.VolumeInfo{
Version: uint32(v.Version()),
FileSize: int64(datSize),
}); err != nil {
return nil, fmt.Errorf("SaveVolumeInfo %s: %v", baseFileName, err) return nil, fmt.Errorf("SaveVolumeInfo %s: %v", baseFileName, err)
} }

8
weed/storage/erasure_coding/ec_locate.go

@ -12,8 +12,8 @@ type Interval struct {
LargeBlockRowsCount int LargeBlockRowsCount int
} }
func LocateData(largeBlockLength, smallBlockLength int64, datSize int64, offset int64, size types.Size) (intervals []Interval) {
blockIndex, isLargeBlock, nLargeBlockRows, innerBlockOffset := locateOffset(largeBlockLength, smallBlockLength, datSize, offset)
func LocateData(largeBlockLength, smallBlockLength int64, shardDatSize int64, offset int64, size types.Size) (intervals []Interval) {
blockIndex, isLargeBlock, nLargeBlockRows, innerBlockOffset := locateOffset(largeBlockLength, smallBlockLength, shardDatSize, offset)
for size > 0 { for size > 0 {
interval := Interval{ interval := Interval{
@ -48,9 +48,9 @@ func LocateData(largeBlockLength, smallBlockLength int64, datSize int64, offset
return return
} }
func locateOffset(largeBlockLength, smallBlockLength int64, datSize int64, offset int64) (blockIndex int, isLargeBlock bool, nLargeBlockRows int64, innerBlockOffset int64) {
func locateOffset(largeBlockLength, smallBlockLength int64, shardDatSize int64, offset int64) (blockIndex int, isLargeBlock bool, nLargeBlockRows int64, innerBlockOffset int64) {
largeRowSize := largeBlockLength * DataShardsCount largeRowSize := largeBlockLength * DataShardsCount
nLargeBlockRows = datSize / (largeBlockLength * DataShardsCount)
nLargeBlockRows = (shardDatSize-1)/ largeBlockLength
// if offset is within the large block area // if offset is within the large block area
if offset < nLargeBlockRows*largeRowSize { if offset < nLargeBlockRows*largeRowSize {

26
weed/storage/erasure_coding/ec_test.go

@ -82,7 +82,9 @@ func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset type
return fmt.Errorf("failed to read dat file: %v", err) return fmt.Errorf("failed to read dat file: %v", err)
} }
ecData, err := readEcFile(datSize, ecFiles, offset, size)
ecFileStat, _ := ecFiles[0].Stat()
ecData, err := readEcFile(ecFileStat.Size(), ecFiles, offset, size)
if err != nil { if err != nil {
return fmt.Errorf("failed to read ec file: %v", err) return fmt.Errorf("failed to read ec file: %v", err)
} }
@ -107,9 +109,9 @@ func readDatFile(datFile *os.File, offset types.Offset, size types.Size) ([]byte
return data, nil return data, nil
} }
func readEcFile(datSize int64, ecFiles []*os.File, offset types.Offset, size types.Size) (data []byte, err error) {
func readEcFile(shardDatSize int64, ecFiles []*os.File, offset types.Offset, size types.Size) (data []byte, err error) {
intervals := LocateData(largeBlockSize, smallBlockSize, datSize, offset.ToActualOffset(), size)
intervals := LocateData(largeBlockSize, smallBlockSize, shardDatSize, offset.ToActualOffset(), size)
for i, interval := range intervals { for i, interval := range intervals {
if d, e := readOneInterval(interval, ecFiles); e != nil { if d, e := readOneInterval(interval, ecFiles); e != nil {
@ -132,7 +134,7 @@ func readOneInterval(interval Interval, ecFiles []*os.File) (data []byte, err er
data = make([]byte, interval.Size) data = make([]byte, interval.Size)
err = readFromFile(ecFiles[ecFileIndex], data, ecFileOffset) err = readFromFile(ecFiles[ecFileIndex], data, ecFileOffset)
{ // do some ec testing
if false { // do some ec testing
ecData, err := readFromOtherEcFiles(ecFiles, int(ecFileIndex), ecFileOffset, interval.Size) ecData, err := readFromOtherEcFiles(ecFiles, int(ecFileIndex), ecFileOffset, interval.Size)
if err != nil { if err != nil {
return nil, fmt.Errorf("ec reconstruct error: %v", err) return nil, fmt.Errorf("ec reconstruct error: %v", err)
@ -191,7 +193,7 @@ func removeGeneratedFiles(baseFileName string) {
} }
func TestLocateData(t *testing.T) { func TestLocateData(t *testing.T) {
intervals := LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize, 1)
intervals := LocateData(largeBlockSize, smallBlockSize, largeBlockSize+1, DataShardsCount*largeBlockSize, 1)
if len(intervals) != 1 { if len(intervals) != 1 {
t.Errorf("unexpected interval size %d", len(intervals)) t.Errorf("unexpected interval size %d", len(intervals))
} }
@ -199,7 +201,7 @@ func TestLocateData(t *testing.T) {
t.Errorf("unexpected interval %+v", intervals[0]) t.Errorf("unexpected interval %+v", intervals[0])
} }
intervals = LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1-DataShardsCount*largeBlockSize/2-100)
intervals = LocateData(largeBlockSize, smallBlockSize, largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1-DataShardsCount*largeBlockSize/2-100)
fmt.Printf("%+v\n", intervals) fmt.Printf("%+v\n", intervals)
} }
@ -211,7 +213,7 @@ func (this Interval) sameAs(that Interval) bool {
} }
func TestLocateData2(t *testing.T) { func TestLocateData2(t *testing.T) {
intervals := LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, 32205678320, 21479557912, 4194339)
intervals := LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, 3221225472, 21479557912, 4194339)
assert.Equal(t, intervals, []Interval{ assert.Equal(t, intervals, []Interval{
{BlockIndex: 4, InnerBlockOffset: 527128, Size: 521448, IsLargeBlock: false, LargeBlockRowsCount: 2}, {BlockIndex: 4, InnerBlockOffset: 527128, Size: 521448, IsLargeBlock: false, LargeBlockRowsCount: 2},
{BlockIndex: 5, InnerBlockOffset: 0, Size: 1048576, IsLargeBlock: false, LargeBlockRowsCount: 2}, {BlockIndex: 5, InnerBlockOffset: 0, Size: 1048576, IsLargeBlock: false, LargeBlockRowsCount: 2},
@ -220,3 +222,13 @@ func TestLocateData2(t *testing.T) {
{BlockIndex: 8, InnerBlockOffset: 0, Size: 527163, IsLargeBlock: false, LargeBlockRowsCount: 2}, {BlockIndex: 8, InnerBlockOffset: 0, Size: 527163, IsLargeBlock: false, LargeBlockRowsCount: 2},
}) })
} }
func TestLocateData3(t *testing.T) {
intervals := LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, 3221225472, 30782909808, 112568)
for _, interval := range intervals {
fmt.Printf("%+v\n", interval)
}
assert.Equal(t, intervals, []Interval{
{BlockIndex: 8876, InnerBlockOffset: 912752, Size: 112568, IsLargeBlock: false, LargeBlockRowsCount: 2},
})
}

13
weed/storage/erasure_coding/ec_volume.go

@ -39,6 +39,7 @@ type EcVolume struct {
ecjFile *os.File ecjFile *os.File
ecjFileAccessLock sync.Mutex ecjFileAccessLock sync.Mutex
diskType types.DiskType diskType types.DiskType
datFileSize int64
} }
func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection string, vid needle.VolumeId) (ev *EcVolume, err error) { func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection string, vid needle.VolumeId) (ev *EcVolume, err error) {
@ -68,6 +69,7 @@ func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection
ev.Version = needle.Version3 ev.Version = needle.Version3
if volumeInfo, _, found, _ := volume_info.MaybeLoadVolumeInfo(dataBaseFileName + ".vif"); found { if volumeInfo, _, found, _ := volume_info.MaybeLoadVolumeInfo(dataBaseFileName + ".vif"); found {
ev.Version = needle.Version(volumeInfo.Version) ev.Version = needle.Version(volumeInfo.Version)
ev.datFileSize = volumeInfo.FileSize
} else { } else {
volume_info.SaveVolumeInfo(dataBaseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(ev.Version)}) volume_info.SaveVolumeInfo(dataBaseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(ev.Version)})
} }
@ -222,8 +224,17 @@ func (ev *EcVolume) LocateEcShardNeedle(needleId types.NeedleId, version needle.
func (ev *EcVolume) LocateEcShardNeedleInterval(version needle.Version, offset int64, size types.Size) (intervals []Interval) { func (ev *EcVolume) LocateEcShardNeedleInterval(version needle.Version, offset int64, size types.Size) (intervals []Interval) {
shard := ev.Shards[0] shard := ev.Shards[0]
// Usually shard will be padded to round of ErasureCodingSmallBlockSize.
// So in most cases, if shardSize equals to n * ErasureCodingLargeBlockSize,
// the data would be in small blocks.
shardSize := shard.ecdFileSize - 1
if ev.datFileSize > 0 {
// To get the correct LargeBlockRowsCount
// use datFileSize to calculate the shardSize to match the EC encoding logic.
shardSize = ev.datFileSize / DataShardsCount
}
// calculate the locations in the ec shards // calculate the locations in the ec shards
intervals = LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, DataShardsCount*shard.ecdFileSize, offset, types.Size(needle.GetActualSize(size, version)))
intervals = LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, shardSize, offset, types.Size(needle.GetActualSize(size, version)))
return return
} }

2
weed/storage/erasure_coding/ec_volume_test.go

@ -44,7 +44,7 @@ func TestPositioning(t *testing.T) {
fmt.Printf("offset: %d size: %d\n", offset.ToActualOffset(), size) fmt.Printf("offset: %d size: %d\n", offset.ToActualOffset(), size)
var shardEcdFileSize int64 = 1118830592 // 1024*1024*1024*3 var shardEcdFileSize int64 = 1118830592 // 1024*1024*1024*3
intervals := LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, DataShardsCount*shardEcdFileSize, offset.ToActualOffset(), types.Size(needle.GetActualSize(size, needle.CurrentVersion)))
intervals := LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, shardEcdFileSize, offset.ToActualOffset(), types.Size(needle.GetActualSize(size, needle.CurrentVersion)))
for _, interval := range intervals { for _, interval := range intervals {
shardId, shardOffset := interval.ToShardIdAndOffset(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize) shardId, shardOffset := interval.ToShardIdAndOffset(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)

Loading…
Cancel
Save