You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
225 lines
7.3 KiB
225 lines
7.3 KiB
package storage
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"google.golang.org/grpc"
|
|
"io"
|
|
"os"
|
|
"sort"
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
|
"github.com/chrislusf/seaweedfs/weed/operation"
|
|
"github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
|
|
"github.com/chrislusf/seaweedfs/weed/storage/needle"
|
|
. "github.com/chrislusf/seaweedfs/weed/storage/types"
|
|
)
|
|
|
|
// The volume sync with a master volume via 2 steps:
|
|
// 1. The slave checks master side to find subscription checkpoint
|
|
// to setup the replication.
|
|
// 2. The slave receives the updates from master
|
|
|
|
/*
|
|
Assume the slave volume needs to follow the master volume.
|
|
|
|
The master volume could be compacted, and could be many files ahead of
|
|
slave volume.
|
|
|
|
Step 1:
|
|
The slave volume will ask the master volume for a snapshot
|
|
of (existing file entries, last offset, number of compacted times).
|
|
|
|
For each entry x in master existing file entries:
|
|
if x does not exist locally:
|
|
add x locally
|
|
|
|
For each entry y in local slave existing file entries:
|
|
if y does not exist on master:
|
|
delete y locally
|
|
|
|
Step 2:
|
|
After this, use the last offset and number of compacted times to request
|
|
the master volume to send a new file, and keep looping. If the number of
|
|
compacted times is changed, go back to step 1 (very likely this can be
|
|
optimized more later).
|
|
|
|
*/
|
|
|
|
func (v *Volume) Synchronize(volumeServer string, grpcDialOption grpc.DialOption) (err error) {
|
|
var lastCompactRevision uint16 = 0
|
|
var compactRevision uint16 = 0
|
|
var masterMap *needle.CompactMap
|
|
for i := 0; i < 3; i++ {
|
|
if masterMap, _, compactRevision, err = fetchVolumeFileEntries(volumeServer, grpcDialOption, v.Id); err != nil {
|
|
return fmt.Errorf("Failed to sync volume %d entries with %s: %v", v.Id, volumeServer, err)
|
|
}
|
|
if lastCompactRevision != compactRevision && lastCompactRevision != 0 {
|
|
if err = v.Compact(0); err != nil {
|
|
return fmt.Errorf("Compact Volume before synchronizing %v", err)
|
|
}
|
|
if err = v.commitCompact(); err != nil {
|
|
return fmt.Errorf("Commit Compact before synchronizing %v", err)
|
|
}
|
|
}
|
|
lastCompactRevision = compactRevision
|
|
if err = v.trySynchronizing(volumeServer, grpcDialOption, masterMap, compactRevision); err == nil {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
type ByOffset []needle.NeedleValue
|
|
|
|
func (a ByOffset) Len() int { return len(a) }
|
|
func (a ByOffset) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a ByOffset) Less(i, j int) bool { return a[i].Offset < a[j].Offset }
|
|
|
|
// trySynchronizing sync with remote volume server incrementally by
|
|
// make up the local and remote delta.
|
|
func (v *Volume) trySynchronizing(volumeServer string, grpcDialOption grpc.DialOption, masterMap *needle.CompactMap, compactRevision uint16) error {
|
|
slaveIdxFile, err := os.Open(v.nm.IndexFileName())
|
|
if err != nil {
|
|
return fmt.Errorf("Open volume %d index file: %v", v.Id, err)
|
|
}
|
|
defer slaveIdxFile.Close()
|
|
slaveMap, err := LoadBtreeNeedleMap(slaveIdxFile)
|
|
if err != nil {
|
|
return fmt.Errorf("Load volume %d index file: %v", v.Id, err)
|
|
}
|
|
var delta []needle.NeedleValue
|
|
if err := masterMap.Visit(func(needleValue needle.NeedleValue) error {
|
|
if needleValue.Key == NeedleIdEmpty {
|
|
return nil
|
|
}
|
|
if _, ok := slaveMap.Get(needleValue.Key); ok {
|
|
return nil // skip intersection
|
|
}
|
|
delta = append(delta, needleValue)
|
|
return nil
|
|
}); err != nil {
|
|
return fmt.Errorf("Add master entry: %v", err)
|
|
}
|
|
if err := slaveMap.m.Visit(func(needleValue needle.NeedleValue) error {
|
|
if needleValue.Key == NeedleIdEmpty {
|
|
return nil
|
|
}
|
|
if _, ok := masterMap.Get(needleValue.Key); ok {
|
|
return nil // skip intersection
|
|
}
|
|
needleValue.Size = 0
|
|
delta = append(delta, needleValue)
|
|
return nil
|
|
}); err != nil {
|
|
return fmt.Errorf("Remove local entry: %v", err)
|
|
}
|
|
|
|
// simulate to same ordering of remote .dat file needle entries
|
|
sort.Sort(ByOffset(delta))
|
|
|
|
// make up the delta
|
|
fetchCount := 0
|
|
for _, needleValue := range delta {
|
|
if needleValue.Size == 0 {
|
|
// remove file entry from local
|
|
v.removeNeedle(needleValue.Key)
|
|
continue
|
|
}
|
|
// add master file entry to local data file
|
|
if err := v.fetchNeedle(volumeServer, grpcDialOption, needleValue, compactRevision); err != nil {
|
|
glog.V(0).Infof("Fetch needle %v from %s: %v", needleValue, volumeServer, err)
|
|
return err
|
|
}
|
|
fetchCount++
|
|
}
|
|
glog.V(1).Infof("Fetched %d needles from %s", fetchCount, volumeServer)
|
|
return nil
|
|
}
|
|
|
|
func fetchVolumeFileEntries(volumeServer string, grpcDialOption grpc.DialOption, vid VolumeId) (m *needle.CompactMap, lastOffset uint64, compactRevision uint16, err error) {
|
|
m = needle.NewCompactMap()
|
|
|
|
syncStatus, err := operation.GetVolumeSyncStatus(volumeServer, grpcDialOption, uint32(vid))
|
|
if err != nil {
|
|
return m, 0, 0, err
|
|
}
|
|
|
|
total := 0
|
|
err = operation.GetVolumeIdxEntries(volumeServer, grpcDialOption, uint32(vid), func(key NeedleId, offset Offset, size uint32) {
|
|
// println("remote key", key, "offset", offset*NeedlePaddingSize, "size", size)
|
|
if offset > 0 && size != TombstoneFileSize {
|
|
m.Set(NeedleId(key), offset, size)
|
|
} else {
|
|
m.Delete(NeedleId(key))
|
|
}
|
|
total++
|
|
})
|
|
|
|
glog.V(2).Infof("server %s volume %d, entries %d, last offset %d, revision %d", volumeServer, vid, total, syncStatus.TailOffset, syncStatus.CompactRevision)
|
|
return m, syncStatus.TailOffset, uint16(syncStatus.CompactRevision), err
|
|
|
|
}
|
|
|
|
func (v *Volume) GetVolumeSyncStatus() *volume_server_pb.VolumeSyncStatusResponse {
|
|
var syncStatus = &volume_server_pb.VolumeSyncStatusResponse{}
|
|
if stat, err := v.dataFile.Stat(); err == nil {
|
|
syncStatus.TailOffset = uint64(stat.Size())
|
|
}
|
|
syncStatus.Collection = v.Collection
|
|
syncStatus.IdxFileSize = v.nm.IndexFileSize()
|
|
syncStatus.CompactRevision = uint32(v.SuperBlock.CompactRevision)
|
|
syncStatus.Ttl = v.SuperBlock.Ttl.String()
|
|
syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String()
|
|
return syncStatus
|
|
}
|
|
|
|
func (v *Volume) IndexFileContent() ([]byte, error) {
|
|
return v.nm.IndexFileContent()
|
|
}
|
|
|
|
// removeNeedle removes one needle by needle key
|
|
func (v *Volume) removeNeedle(key NeedleId) {
|
|
n := new(Needle)
|
|
n.Id = key
|
|
v.deleteNeedle(n)
|
|
}
|
|
|
|
// fetchNeedle fetches a remote volume needle by vid, id, offset
|
|
// The compact revision is checked first in case the remote volume
|
|
// is compacted and the offset is invalid any more.
|
|
func (v *Volume) fetchNeedle(volumeServer string, grpcDialOption grpc.DialOption, needleValue needle.NeedleValue, compactRevision uint16) error {
|
|
|
|
return operation.WithVolumeServerClient(volumeServer, grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
|
|
stream, err := client.VolumeSyncData(context.Background(), &volume_server_pb.VolumeSyncDataRequest{
|
|
VolumeId: uint32(v.Id),
|
|
Revision: uint32(compactRevision),
|
|
Offset: uint32(needleValue.Offset),
|
|
Size: uint32(needleValue.Size),
|
|
NeedleId: needleValue.Key.String(),
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var fileContent []byte
|
|
for {
|
|
resp, err := stream.Recv()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("read needle %v: %v", needleValue.Key.String(), err)
|
|
}
|
|
fileContent = append(fileContent, resp.FileContent...)
|
|
}
|
|
|
|
offset, err := v.AppendBlob(fileContent)
|
|
if err != nil {
|
|
return fmt.Errorf("Appending volume %d error: %v", v.Id, err)
|
|
}
|
|
// println("add key", needleValue.Key, "offset", offset, "size", needleValue.Size)
|
|
v.nm.Put(needleValue.Key, Offset(offset/NeedlePaddingSize), needleValue.Size)
|
|
return nil
|
|
})
|
|
|
|
}
|