Browse Source
Add "weed backup" command.
Add "weed backup" command.
This is a pre-cursor for asynchronous replication.pull/174/head
chrislusf
10 years ago
17 changed files with 632 additions and 128 deletions
-
54go/operation/sync_volume.go
-
77go/storage/needle_map.go
-
26go/storage/needle_map_boltdb.go
-
26go/storage/needle_map_leveldb.go
-
30go/storage/needle_map_memory.go
-
56go/storage/needle_read_write.go
-
4go/storage/store.go
-
39go/storage/volume.go
-
213go/storage/volume_sync.go
-
38go/util/http_util.go
-
90go/weed/backup.go
-
1go/weed/compact.go
-
1go/weed/weed.go
-
3go/weed/weed_server/volume_server.go
-
2go/weed/weed_server/volume_server_handlers_read.go
-
86go/weed/weed_server/volume_server_handlers_sync.go
-
4go/weed/weed_server/volume_server_handlers_write.go
@ -0,0 +1,54 @@ |
|||||
|
package operation |
||||
|
|
||||
|
import ( |
||||
|
"encoding/json" |
||||
|
"fmt" |
||||
|
"net/url" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
"github.com/chrislusf/seaweedfs/go/util" |
||||
|
) |
||||
|
|
||||
|
type SyncVolumeResponse struct { |
||||
|
Replication string `json:"Replication,omitempty"` |
||||
|
Ttl string `json:"Ttl,omitempty"` |
||||
|
TailOffset uint64 `json:"TailOffset,omitempty"` |
||||
|
CompactRevision uint16 `json:"CompactRevision,omitempty"` |
||||
|
IdxFileSize uint64 `json:"IdxFileSize,omitempty"` |
||||
|
Error string `json:"error,omitempty"` |
||||
|
} |
||||
|
|
||||
|
func GetVolumeSyncStatus(server string, vid string) (*SyncVolumeResponse, error) { |
||||
|
values := make(url.Values) |
||||
|
values.Add("volume", vid) |
||||
|
jsonBlob, err := util.Post("http://"+server+"/admin/sync/status", values) |
||||
|
glog.V(2).Info("sync volume result :", string(jsonBlob)) |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
var ret SyncVolumeResponse |
||||
|
err = json.Unmarshal(jsonBlob, &ret) |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
if ret.Error != "" { |
||||
|
return nil, fmt.Errorf("Volume %s get sync status error: %s", vid, ret.Error) |
||||
|
} |
||||
|
return &ret, nil |
||||
|
} |
||||
|
|
||||
|
func GetVolumeIdxEntries(server string, vid string, eachEntryFn func(key uint64, offset, size uint32)) error { |
||||
|
values := make(url.Values) |
||||
|
values.Add("volume", vid) |
||||
|
line := make([]byte, 16) |
||||
|
err := util.GetBufferStream("http://"+server+"/admin/sync/index", values, line, func(bytes []byte) { |
||||
|
key := util.BytesToUint64(bytes[:8]) |
||||
|
offset := util.BytesToUint32(bytes[8:12]) |
||||
|
size := util.BytesToUint32(bytes[12:16]) |
||||
|
eachEntryFn(key, offset, size) |
||||
|
}) |
||||
|
if err != nil { |
||||
|
return err |
||||
|
} |
||||
|
return nil |
||||
|
} |
@ -0,0 +1,213 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
"io" |
||||
|
"io/ioutil" |
||||
|
"net/url" |
||||
|
"os" |
||||
|
"sort" |
||||
|
"strconv" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
"github.com/chrislusf/seaweedfs/go/operation" |
||||
|
"github.com/chrislusf/seaweedfs/go/util" |
||||
|
) |
||||
|
|
||||
|
// The volume sync with a master volume via 2 steps:
|
||||
|
// 1. The slave checks master side to find subscription checkpoint
|
||||
|
// to setup the replication.
|
||||
|
// 2. The slave receives the updates from master
|
||||
|
|
||||
|
/* |
||||
|
Assume the slave volume needs to follow the master volume. |
||||
|
|
||||
|
The master volume could be compacted, and could be many files ahead of |
||||
|
slave volume. |
||||
|
|
||||
|
Step 1: |
||||
|
The slave volume will ask the master volume for a snapshot |
||||
|
of (existing file entries, last offset, number of compacted times). |
||||
|
|
||||
|
For each entry x in master existing file entries: |
||||
|
if x does not exist locally: |
||||
|
add x locally |
||||
|
|
||||
|
For each entry y in local slave existing file entries: |
||||
|
if y does not exist on master: |
||||
|
delete y locally |
||||
|
|
||||
|
Step 2: |
||||
|
After this, use the last offset and number of compacted times to request |
||||
|
the master volume to send a new file, and keep looping. If the number of |
||||
|
compacted times is changed, go back to step 1 (very likely this can be |
||||
|
optimized more later). |
||||
|
|
||||
|
*/ |
||||
|
|
||||
|
func (v *Volume) Synchronize(volumeServer string) (err error) { |
||||
|
var lastCompactRevision uint16 = 0 |
||||
|
var compactRevision uint16 = 0 |
||||
|
var masterMap CompactMap |
||||
|
for i := 0; i < 3; i++ { |
||||
|
if masterMap, _, compactRevision, err = fetchVolumeFileEntries(volumeServer, v.Id); err != nil { |
||||
|
return fmt.Errorf("Failed to sync volume %d entries with %s: %v", v.Id, volumeServer, err) |
||||
|
} |
||||
|
if lastCompactRevision != compactRevision && lastCompactRevision != 0 { |
||||
|
if err = v.Compact(); err != nil { |
||||
|
return fmt.Errorf("Compact Volume before synchronizing %v", err) |
||||
|
} |
||||
|
if err = v.commitCompact(); err != nil { |
||||
|
return fmt.Errorf("Commit Compact before synchronizing %v", err) |
||||
|
} |
||||
|
} |
||||
|
lastCompactRevision = compactRevision |
||||
|
if err = v.trySynchronizing(volumeServer, masterMap, compactRevision); err == nil { |
||||
|
return |
||||
|
} |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
type ByOffset []NeedleValue |
||||
|
|
||||
|
func (a ByOffset) Len() int { return len(a) } |
||||
|
func (a ByOffset) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
||||
|
func (a ByOffset) Less(i, j int) bool { return a[i].Offset < a[j].Offset } |
||||
|
|
||||
|
// trySynchronizing sync with remote volume server incrementally by
|
||||
|
// make up the local and remote delta.
|
||||
|
func (v *Volume) trySynchronizing(volumeServer string, masterMap CompactMap, compactRevision uint16) error { |
||||
|
slaveIdxFile, err := os.Open(v.nm.IndexFileName()) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("Open volume %d index file: %v", v.Id, err) |
||||
|
} |
||||
|
defer slaveIdxFile.Close() |
||||
|
slaveMap, err := LoadNeedleMap(slaveIdxFile) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("Load volume %d index file: %v", v.Id, err) |
||||
|
} |
||||
|
var delta []NeedleValue |
||||
|
if err := masterMap.Visit(func(needleValue NeedleValue) error { |
||||
|
if needleValue.Key == 0 { |
||||
|
return nil |
||||
|
} |
||||
|
if _, ok := slaveMap.Get(uint64(needleValue.Key)); ok { |
||||
|
return nil // skip intersection
|
||||
|
} |
||||
|
delta = append(delta, needleValue) |
||||
|
return nil |
||||
|
}); err != nil { |
||||
|
return fmt.Errorf("Add master entry: %v", err) |
||||
|
} |
||||
|
if err := slaveMap.m.Visit(func(needleValue NeedleValue) error { |
||||
|
if needleValue.Key == 0 { |
||||
|
return nil |
||||
|
} |
||||
|
if _, ok := masterMap.Get(needleValue.Key); ok { |
||||
|
return nil // skip intersection
|
||||
|
} |
||||
|
needleValue.Size = 0 |
||||
|
delta = append(delta, needleValue) |
||||
|
return nil |
||||
|
}); err != nil { |
||||
|
return fmt.Errorf("Remove local entry: %v", err) |
||||
|
} |
||||
|
|
||||
|
// simulate to same ordering of remote .dat file needle entries
|
||||
|
sort.Sort(ByOffset(delta)) |
||||
|
|
||||
|
// make up the delta
|
||||
|
fetchCount := 0 |
||||
|
volumeDataContentHandlerUrl := "http://" + volumeServer + "/admin/sync/data" |
||||
|
for _, needleValue := range delta { |
||||
|
if needleValue.Size == 0 { |
||||
|
// remove file entry from local
|
||||
|
v.removeNeedle(needleValue.Key) |
||||
|
continue |
||||
|
} |
||||
|
// add master file entry to local data file
|
||||
|
if err := v.fetchNeedle(volumeDataContentHandlerUrl, needleValue, compactRevision); err != nil { |
||||
|
glog.V(0).Infof("Fetch needle %v from %s: %v", needleValue, volumeServer, err) |
||||
|
return err |
||||
|
} |
||||
|
fetchCount++ |
||||
|
} |
||||
|
glog.V(1).Infof("Fetched %d needles from %s", fetchCount, volumeServer) |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func fetchVolumeFileEntries(volumeServer string, vid VolumeId) (m CompactMap, lastOffset uint64, compactRevision uint16, err error) { |
||||
|
m = NewCompactMap() |
||||
|
|
||||
|
syncStatus, err := operation.GetVolumeSyncStatus(volumeServer, vid.String()) |
||||
|
if err != nil { |
||||
|
return m, 0, 0, err |
||||
|
} |
||||
|
|
||||
|
total := 0 |
||||
|
err = operation.GetVolumeIdxEntries(volumeServer, vid.String(), func(key uint64, offset, size uint32) { |
||||
|
// println("remote key", key, "offset", offset*NeedlePaddingSize, "size", size)
|
||||
|
if offset != 0 && size != 0 { |
||||
|
m.Set(Key(key), offset, size) |
||||
|
} else { |
||||
|
m.Delete(Key(key)) |
||||
|
} |
||||
|
total++ |
||||
|
}) |
||||
|
|
||||
|
glog.V(2).Infof("server %s volume %d, entries %d, last offset %d, revision %d", volumeServer, vid, total, syncStatus.TailOffset, syncStatus.CompactRevision) |
||||
|
return m, syncStatus.TailOffset, syncStatus.CompactRevision, err |
||||
|
|
||||
|
} |
||||
|
|
||||
|
func (v *Volume) GetVolumeSyncStatus() operation.SyncVolumeResponse { |
||||
|
var syncStatus = operation.SyncVolumeResponse{} |
||||
|
if stat, err := v.dataFile.Stat(); err == nil { |
||||
|
syncStatus.TailOffset = uint64(stat.Size()) |
||||
|
} |
||||
|
syncStatus.IdxFileSize = v.nm.IndexFileSize() |
||||
|
syncStatus.CompactRevision = v.SuperBlock.CompactRevision |
||||
|
syncStatus.Ttl = v.SuperBlock.Ttl.String() |
||||
|
syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String() |
||||
|
return syncStatus |
||||
|
} |
||||
|
|
||||
|
func (v *Volume) IndexFileContent() ([]byte, error) { |
||||
|
return v.nm.IndexFileContent() |
||||
|
} |
||||
|
|
||||
|
// removeNeedle removes one needle by needle key
|
||||
|
func (v *Volume) removeNeedle(key Key) { |
||||
|
n := new(Needle) |
||||
|
n.Id = uint64(key) |
||||
|
v.delete(n) |
||||
|
} |
||||
|
|
||||
|
// fetchNeedle fetches a remote volume needle by vid, id, offset
|
||||
|
// The compact revision is checked first in case the remote volume
|
||||
|
// is compacted and the offset is invalid any more.
|
||||
|
func (v *Volume) fetchNeedle(volumeDataContentHandlerUrl string, |
||||
|
needleValue NeedleValue, compactRevision uint16) error { |
||||
|
// add master file entry to local data file
|
||||
|
values := make(url.Values) |
||||
|
values.Add("revision", strconv.Itoa(int(compactRevision))) |
||||
|
values.Add("volume", v.Id.String()) |
||||
|
values.Add("id", needleValue.Key.String()) |
||||
|
values.Add("offset", strconv.FormatUint(uint64(needleValue.Offset), 10)) |
||||
|
values.Add("size", strconv.FormatUint(uint64(needleValue.Size), 10)) |
||||
|
glog.V(4).Infof("Fetch %+v", needleValue) |
||||
|
return util.GetUrlStream(volumeDataContentHandlerUrl, values, func(r io.Reader) error { |
||||
|
b, err := ioutil.ReadAll(r) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("Reading from %s error: %v", volumeDataContentHandlerUrl, err) |
||||
|
} |
||||
|
offset, err := v.AppendBlob(b) |
||||
|
if err != nil { |
||||
|
return fmt.Errorf("Appending volume %d error: %v", v.Id, err) |
||||
|
} |
||||
|
// println("add key", needleValue.Key, "offset", offset, "size", needleValue.Size)
|
||||
|
v.nm.Put(uint64(needleValue.Key), uint32(offset/NeedlePaddingSize), needleValue.Size) |
||||
|
return nil |
||||
|
}) |
||||
|
} |
@ -0,0 +1,90 @@ |
|||||
|
package main |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/operation" |
||||
|
"github.com/chrislusf/seaweedfs/go/storage" |
||||
|
) |
||||
|
|
||||
|
var ( |
||||
|
s BackupOptions |
||||
|
) |
||||
|
|
||||
|
type BackupOptions struct { |
||||
|
master *string |
||||
|
collection *string |
||||
|
dir *string |
||||
|
volumeId *int |
||||
|
} |
||||
|
|
||||
|
func init() { |
||||
|
cmdBackup.Run = runBackup // break init cycle
|
||||
|
s.master = cmdBackup.Flag.String("server", "localhost:9333", "SeaweedFS master location") |
||||
|
s.collection = cmdBackup.Flag.String("collection", "", "collection name") |
||||
|
s.dir = cmdBackup.Flag.String("dir", ".", "directory to store volume data files") |
||||
|
s.volumeId = cmdBackup.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.") |
||||
|
} |
||||
|
|
||||
|
var cmdBackup = &Command{ |
||||
|
UsageLine: "backup -dir=. -volumeId=234 -server=localhost:9333", |
||||
|
Short: "incrementally backup a volume to local folder", |
||||
|
Long: `Incrementally backup volume data. |
||||
|
|
||||
|
It is expected that you use this inside a script, to loop through |
||||
|
all possible volume ids that needs to be backup to local folder. |
||||
|
|
||||
|
The volume id does not need to exist locally or even remotely. |
||||
|
This will help to backup future new volumes. |
||||
|
|
||||
|
Usually backing up is just copying the .dat (and .idx) files. |
||||
|
But it's tricky to incremententally copy the differences. |
||||
|
|
||||
|
The complexity comes when there are multiple addition, deletion and compaction. |
||||
|
This tool will handle them correctly and efficiently, avoiding unnecessary data transporation. |
||||
|
`, |
||||
|
} |
||||
|
|
||||
|
func runBackup(cmd *Command, args []string) bool { |
||||
|
if *s.volumeId == -1 { |
||||
|
return false |
||||
|
} |
||||
|
vid := storage.VolumeId(*s.volumeId) |
||||
|
|
||||
|
// find volume location, replication, ttl info
|
||||
|
lookup, err := operation.Lookup(*s.master, vid.String()) |
||||
|
if err != nil { |
||||
|
fmt.Printf("Error looking up volume %d: %v\n", vid, err) |
||||
|
return true |
||||
|
} |
||||
|
volumeServer := lookup.Locations[0].Url |
||||
|
|
||||
|
stats, err := operation.GetVolumeSyncStatus(volumeServer, vid.String()) |
||||
|
if err != nil { |
||||
|
fmt.Printf("Error get volume %d status: %v\n", vid, err) |
||||
|
return true |
||||
|
} |
||||
|
ttl, err := storage.ReadTTL(stats.Ttl) |
||||
|
if err != nil { |
||||
|
fmt.Printf("Error get volume %d ttl %s: %v\n", vid, stats.Ttl, err) |
||||
|
return true |
||||
|
} |
||||
|
replication, err := storage.NewReplicaPlacementFromString(stats.Replication) |
||||
|
if err != nil { |
||||
|
fmt.Printf("Error get volume %d replication %s : %v\n", vid, stats.Replication, err) |
||||
|
return true |
||||
|
} |
||||
|
|
||||
|
v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl) |
||||
|
if err != nil { |
||||
|
fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) |
||||
|
return true |
||||
|
} |
||||
|
|
||||
|
if err := v.Synchronize(volumeServer); err != nil { |
||||
|
fmt.Printf("Error synchronizing volume %d: %v\n", vid, err) |
||||
|
return true |
||||
|
} |
||||
|
|
||||
|
return true |
||||
|
} |
@ -0,0 +1,86 @@ |
|||||
|
package weed_server |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
"net/http" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
"github.com/chrislusf/seaweedfs/go/storage" |
||||
|
"github.com/chrislusf/seaweedfs/go/util" |
||||
|
) |
||||
|
|
||||
|
func (vs *VolumeServer) getVolumeSyncStatusHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
v, err := vs.getVolume("volume", r) |
||||
|
if v == nil { |
||||
|
writeJsonError(w, r, http.StatusBadRequest, err) |
||||
|
return |
||||
|
} |
||||
|
syncStat := v.GetVolumeSyncStatus() |
||||
|
if syncStat.Error != "" { |
||||
|
writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Get Volume %d status error: %s", v.Id, syncStat.Error)) |
||||
|
glog.V(2).Infoln("getVolumeSyncStatusHandler volume =", r.FormValue("volume"), ", error =", err) |
||||
|
} else { |
||||
|
writeJsonQuiet(w, r, http.StatusOK, syncStat) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (vs *VolumeServer) getVolumeIndexContentHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
v, err := vs.getVolume("volume", r) |
||||
|
if v == nil { |
||||
|
writeJsonError(w, r, http.StatusBadRequest, err) |
||||
|
return |
||||
|
} |
||||
|
content, err := v.IndexFileContent() |
||||
|
if err != nil { |
||||
|
writeJsonError(w, r, http.StatusInternalServerError, err) |
||||
|
return |
||||
|
} |
||||
|
w.Write(content) |
||||
|
} |
||||
|
|
||||
|
func (vs *VolumeServer) getVolumeDataContentHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
v, err := vs.getVolume("volume", r) |
||||
|
if v == nil { |
||||
|
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("Not Found volume: %v", err)) |
||||
|
return |
||||
|
} |
||||
|
if int(v.SuperBlock.CompactRevision) != util.ParseInt(r.FormValue("revision"), 0) { |
||||
|
writeJsonError(w, r, http.StatusExpectationFailed, fmt.Errorf("Requested Volume Revision is %s, but current revision is %d", r.FormValue("revision"), v.SuperBlock.CompactRevision)) |
||||
|
return |
||||
|
} |
||||
|
offset := uint32(util.ParseUint64(r.FormValue("offset"), 0)) |
||||
|
size := uint32(util.ParseUint64(r.FormValue("size"), 0)) |
||||
|
content, err := storage.ReadNeedleBlob(v.DataFile(), int64(offset)*storage.NeedlePaddingSize, size) |
||||
|
if err != nil { |
||||
|
writeJsonError(w, r, http.StatusInternalServerError, err) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
id := util.ParseUint64(r.FormValue("id"), 0) |
||||
|
n := new(storage.Needle) |
||||
|
n.ParseNeedleHeader(content) |
||||
|
if id != n.Id { |
||||
|
writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("Expected file entry id %d, but found %d", id, n.Id)) |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
w.Write(content) |
||||
|
} |
||||
|
|
||||
|
func (vs *VolumeServer) getVolume(volumeParameterName string, r *http.Request) (*storage.Volume, error) { |
||||
|
volumeIdString := r.FormValue(volumeParameterName) |
||||
|
if volumeIdString == "" { |
||||
|
err := fmt.Errorf("Empty Volume Id: Need to pass in %s=the_volume_id.", volumeParameterName) |
||||
|
return nil, err |
||||
|
} |
||||
|
vid, err := storage.NewVolumeId(volumeIdString) |
||||
|
if err != nil { |
||||
|
err = fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) |
||||
|
return nil, err |
||||
|
} |
||||
|
v := vs.store.GetVolume(vid) |
||||
|
if v == nil { |
||||
|
return nil, fmt.Errorf("Not Found Volume Id %s: %d", volumeIdString, vid) |
||||
|
} |
||||
|
return v, nil |
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue