Browse Source
Add "weed backup" command.
Add "weed backup" command.
This is a pre-cursor for asynchronous replication.pull/174/head
chrislusf
10 years ago
17 changed files with 632 additions and 128 deletions
-
54go/operation/sync_volume.go
-
77go/storage/needle_map.go
-
26go/storage/needle_map_boltdb.go
-
26go/storage/needle_map_leveldb.go
-
30go/storage/needle_map_memory.go
-
56go/storage/needle_read_write.go
-
4go/storage/store.go
-
39go/storage/volume.go
-
213go/storage/volume_sync.go
-
38go/util/http_util.go
-
90go/weed/backup.go
-
1go/weed/compact.go
-
1go/weed/weed.go
-
3go/weed/weed_server/volume_server.go
-
2go/weed/weed_server/volume_server_handlers_read.go
-
86go/weed/weed_server/volume_server_handlers_sync.go
-
4go/weed/weed_server/volume_server_handlers_write.go
@ -0,0 +1,54 @@ |
|||
package operation |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"net/url" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
"github.com/chrislusf/seaweedfs/go/util" |
|||
) |
|||
|
|||
type SyncVolumeResponse struct { |
|||
Replication string `json:"Replication,omitempty"` |
|||
Ttl string `json:"Ttl,omitempty"` |
|||
TailOffset uint64 `json:"TailOffset,omitempty"` |
|||
CompactRevision uint16 `json:"CompactRevision,omitempty"` |
|||
IdxFileSize uint64 `json:"IdxFileSize,omitempty"` |
|||
Error string `json:"error,omitempty"` |
|||
} |
|||
|
|||
func GetVolumeSyncStatus(server string, vid string) (*SyncVolumeResponse, error) { |
|||
values := make(url.Values) |
|||
values.Add("volume", vid) |
|||
jsonBlob, err := util.Post("http://"+server+"/admin/sync/status", values) |
|||
glog.V(2).Info("sync volume result :", string(jsonBlob)) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
var ret SyncVolumeResponse |
|||
err = json.Unmarshal(jsonBlob, &ret) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
if ret.Error != "" { |
|||
return nil, fmt.Errorf("Volume %s get sync status error: %s", vid, ret.Error) |
|||
} |
|||
return &ret, nil |
|||
} |
|||
|
|||
func GetVolumeIdxEntries(server string, vid string, eachEntryFn func(key uint64, offset, size uint32)) error { |
|||
values := make(url.Values) |
|||
values.Add("volume", vid) |
|||
line := make([]byte, 16) |
|||
err := util.GetBufferStream("http://"+server+"/admin/sync/index", values, line, func(bytes []byte) { |
|||
key := util.BytesToUint64(bytes[:8]) |
|||
offset := util.BytesToUint32(bytes[8:12]) |
|||
size := util.BytesToUint32(bytes[12:16]) |
|||
eachEntryFn(key, offset, size) |
|||
}) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
return nil |
|||
} |
@ -0,0 +1,213 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"fmt" |
|||
"io" |
|||
"io/ioutil" |
|||
"net/url" |
|||
"os" |
|||
"sort" |
|||
"strconv" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
"github.com/chrislusf/seaweedfs/go/operation" |
|||
"github.com/chrislusf/seaweedfs/go/util" |
|||
) |
|||
|
|||
// The volume sync with a master volume via 2 steps:
|
|||
// 1. The slave checks master side to find subscription checkpoint
|
|||
// to setup the replication.
|
|||
// 2. The slave receives the updates from master
|
|||
|
|||
/* |
|||
Assume the slave volume needs to follow the master volume. |
|||
|
|||
The master volume could be compacted, and could be many files ahead of |
|||
slave volume. |
|||
|
|||
Step 1: |
|||
The slave volume will ask the master volume for a snapshot |
|||
of (existing file entries, last offset, number of compacted times). |
|||
|
|||
For each entry x in master existing file entries: |
|||
if x does not exist locally: |
|||
add x locally |
|||
|
|||
For each entry y in local slave existing file entries: |
|||
if y does not exist on master: |
|||
delete y locally |
|||
|
|||
Step 2: |
|||
After this, use the last offset and number of compacted times to request |
|||
the master volume to send a new file, and keep looping. If the number of |
|||
compacted times is changed, go back to step 1 (very likely this can be |
|||
optimized more later). |
|||
|
|||
*/ |
|||
|
|||
func (v *Volume) Synchronize(volumeServer string) (err error) { |
|||
var lastCompactRevision uint16 = 0 |
|||
var compactRevision uint16 = 0 |
|||
var masterMap CompactMap |
|||
for i := 0; i < 3; i++ { |
|||
if masterMap, _, compactRevision, err = fetchVolumeFileEntries(volumeServer, v.Id); err != nil { |
|||
return fmt.Errorf("Failed to sync volume %d entries with %s: %v", v.Id, volumeServer, err) |
|||
} |
|||
if lastCompactRevision != compactRevision && lastCompactRevision != 0 { |
|||
if err = v.Compact(); err != nil { |
|||
return fmt.Errorf("Compact Volume before synchronizing %v", err) |
|||
} |
|||
if err = v.commitCompact(); err != nil { |
|||
return fmt.Errorf("Commit Compact before synchronizing %v", err) |
|||
} |
|||
} |
|||
lastCompactRevision = compactRevision |
|||
if err = v.trySynchronizing(volumeServer, masterMap, compactRevision); err == nil { |
|||
return |
|||
} |
|||
} |
|||
return |
|||
} |
|||
|
|||
type ByOffset []NeedleValue |
|||
|
|||
func (a ByOffset) Len() int { return len(a) } |
|||
func (a ByOffset) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
|||
func (a ByOffset) Less(i, j int) bool { return a[i].Offset < a[j].Offset } |
|||
|
|||
// trySynchronizing sync with remote volume server incrementally by
|
|||
// make up the local and remote delta.
|
|||
func (v *Volume) trySynchronizing(volumeServer string, masterMap CompactMap, compactRevision uint16) error { |
|||
slaveIdxFile, err := os.Open(v.nm.IndexFileName()) |
|||
if err != nil { |
|||
return fmt.Errorf("Open volume %d index file: %v", v.Id, err) |
|||
} |
|||
defer slaveIdxFile.Close() |
|||
slaveMap, err := LoadNeedleMap(slaveIdxFile) |
|||
if err != nil { |
|||
return fmt.Errorf("Load volume %d index file: %v", v.Id, err) |
|||
} |
|||
var delta []NeedleValue |
|||
if err := masterMap.Visit(func(needleValue NeedleValue) error { |
|||
if needleValue.Key == 0 { |
|||
return nil |
|||
} |
|||
if _, ok := slaveMap.Get(uint64(needleValue.Key)); ok { |
|||
return nil // skip intersection
|
|||
} |
|||
delta = append(delta, needleValue) |
|||
return nil |
|||
}); err != nil { |
|||
return fmt.Errorf("Add master entry: %v", err) |
|||
} |
|||
if err := slaveMap.m.Visit(func(needleValue NeedleValue) error { |
|||
if needleValue.Key == 0 { |
|||
return nil |
|||
} |
|||
if _, ok := masterMap.Get(needleValue.Key); ok { |
|||
return nil // skip intersection
|
|||
} |
|||
needleValue.Size = 0 |
|||
delta = append(delta, needleValue) |
|||
return nil |
|||
}); err != nil { |
|||
return fmt.Errorf("Remove local entry: %v", err) |
|||
} |
|||
|
|||
// simulate to same ordering of remote .dat file needle entries
|
|||
sort.Sort(ByOffset(delta)) |
|||
|
|||
// make up the delta
|
|||
fetchCount := 0 |
|||
volumeDataContentHandlerUrl := "http://" + volumeServer + "/admin/sync/data" |
|||
for _, needleValue := range delta { |
|||
if needleValue.Size == 0 { |
|||
// remove file entry from local
|
|||
v.removeNeedle(needleValue.Key) |
|||
continue |
|||
} |
|||
// add master file entry to local data file
|
|||
if err := v.fetchNeedle(volumeDataContentHandlerUrl, needleValue, compactRevision); err != nil { |
|||
glog.V(0).Infof("Fetch needle %v from %s: %v", needleValue, volumeServer, err) |
|||
return err |
|||
} |
|||
fetchCount++ |
|||
} |
|||
glog.V(1).Infof("Fetched %d needles from %s", fetchCount, volumeServer) |
|||
return nil |
|||
} |
|||
|
|||
func fetchVolumeFileEntries(volumeServer string, vid VolumeId) (m CompactMap, lastOffset uint64, compactRevision uint16, err error) { |
|||
m = NewCompactMap() |
|||
|
|||
syncStatus, err := operation.GetVolumeSyncStatus(volumeServer, vid.String()) |
|||
if err != nil { |
|||
return m, 0, 0, err |
|||
} |
|||
|
|||
total := 0 |
|||
err = operation.GetVolumeIdxEntries(volumeServer, vid.String(), func(key uint64, offset, size uint32) { |
|||
// println("remote key", key, "offset", offset*NeedlePaddingSize, "size", size)
|
|||
if offset != 0 && size != 0 { |
|||
m.Set(Key(key), offset, size) |
|||
} else { |
|||
m.Delete(Key(key)) |
|||
} |
|||
total++ |
|||
}) |
|||
|
|||
glog.V(2).Infof("server %s volume %d, entries %d, last offset %d, revision %d", volumeServer, vid, total, syncStatus.TailOffset, syncStatus.CompactRevision) |
|||
return m, syncStatus.TailOffset, syncStatus.CompactRevision, err |
|||
|
|||
} |
|||
|
|||
func (v *Volume) GetVolumeSyncStatus() operation.SyncVolumeResponse { |
|||
var syncStatus = operation.SyncVolumeResponse{} |
|||
if stat, err := v.dataFile.Stat(); err == nil { |
|||
syncStatus.TailOffset = uint64(stat.Size()) |
|||
} |
|||
syncStatus.IdxFileSize = v.nm.IndexFileSize() |
|||
syncStatus.CompactRevision = v.SuperBlock.CompactRevision |
|||
syncStatus.Ttl = v.SuperBlock.Ttl.String() |
|||
syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String() |
|||
return syncStatus |
|||
} |
|||
|
|||
func (v *Volume) IndexFileContent() ([]byte, error) { |
|||
return v.nm.IndexFileContent() |
|||
} |
|||
|
|||
// removeNeedle removes one needle by needle key
|
|||
func (v *Volume) removeNeedle(key Key) { |
|||
n := new(Needle) |
|||
n.Id = uint64(key) |
|||
v.delete(n) |
|||
} |
|||
|
|||
// fetchNeedle fetches a remote volume needle by vid, id, offset
|
|||
// The compact revision is checked first in case the remote volume
|
|||
// is compacted and the offset is invalid any more.
|
|||
func (v *Volume) fetchNeedle(volumeDataContentHandlerUrl string, |
|||
needleValue NeedleValue, compactRevision uint16) error { |
|||
// add master file entry to local data file
|
|||
values := make(url.Values) |
|||
values.Add("revision", strconv.Itoa(int(compactRevision))) |
|||
values.Add("volume", v.Id.String()) |
|||
values.Add("id", needleValue.Key.String()) |
|||
values.Add("offset", strconv.FormatUint(uint64(needleValue.Offset), 10)) |
|||
values.Add("size", strconv.FormatUint(uint64(needleValue.Size), 10)) |
|||
glog.V(4).Infof("Fetch %+v", needleValue) |
|||
return util.GetUrlStream(volumeDataContentHandlerUrl, values, func(r io.Reader) error { |
|||
b, err := ioutil.ReadAll(r) |
|||
if err != nil { |
|||
return fmt.Errorf("Reading from %s error: %v", volumeDataContentHandlerUrl, err) |
|||
} |
|||
offset, err := v.AppendBlob(b) |
|||
if err != nil { |
|||
return fmt.Errorf("Appending volume %d error: %v", v.Id, err) |
|||
} |
|||
// println("add key", needleValue.Key, "offset", offset, "size", needleValue.Size)
|
|||
v.nm.Put(uint64(needleValue.Key), uint32(offset/NeedlePaddingSize), needleValue.Size) |
|||
return nil |
|||
}) |
|||
} |
@ -0,0 +1,90 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"fmt" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/operation" |
|||
"github.com/chrislusf/seaweedfs/go/storage" |
|||
) |
|||
|
|||
var ( |
|||
s BackupOptions |
|||
) |
|||
|
|||
type BackupOptions struct { |
|||
master *string |
|||
collection *string |
|||
dir *string |
|||
volumeId *int |
|||
} |
|||
|
|||
func init() { |
|||
cmdBackup.Run = runBackup // break init cycle
|
|||
s.master = cmdBackup.Flag.String("server", "localhost:9333", "SeaweedFS master location") |
|||
s.collection = cmdBackup.Flag.String("collection", "", "collection name") |
|||
s.dir = cmdBackup.Flag.String("dir", ".", "directory to store volume data files") |
|||
s.volumeId = cmdBackup.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.") |
|||
} |
|||
|
|||
var cmdBackup = &Command{ |
|||
UsageLine: "backup -dir=. -volumeId=234 -server=localhost:9333", |
|||
Short: "incrementally backup a volume to local folder", |
|||
Long: `Incrementally backup volume data. |
|||
|
|||
It is expected that you use this inside a script, to loop through |
|||
all possible volume ids that needs to be backup to local folder. |
|||
|
|||
The volume id does not need to exist locally or even remotely. |
|||
This will help to backup future new volumes. |
|||
|
|||
Usually backing up is just copying the .dat (and .idx) files. |
|||
But it's tricky to incremententally copy the differences. |
|||
|
|||
The complexity comes when there are multiple addition, deletion and compaction. |
|||
This tool will handle them correctly and efficiently, avoiding unnecessary data transporation. |
|||
`, |
|||
} |
|||
|
|||
func runBackup(cmd *Command, args []string) bool { |
|||
if *s.volumeId == -1 { |
|||
return false |
|||
} |
|||
vid := storage.VolumeId(*s.volumeId) |
|||
|
|||
// find volume location, replication, ttl info
|
|||
lookup, err := operation.Lookup(*s.master, vid.String()) |
|||
if err != nil { |
|||
fmt.Printf("Error looking up volume %d: %v\n", vid, err) |
|||
return true |
|||
} |
|||
volumeServer := lookup.Locations[0].Url |
|||
|
|||
stats, err := operation.GetVolumeSyncStatus(volumeServer, vid.String()) |
|||
if err != nil { |
|||
fmt.Printf("Error get volume %d status: %v\n", vid, err) |
|||
return true |
|||
} |
|||
ttl, err := storage.ReadTTL(stats.Ttl) |
|||
if err != nil { |
|||
fmt.Printf("Error get volume %d ttl %s: %v\n", vid, stats.Ttl, err) |
|||
return true |
|||
} |
|||
replication, err := storage.NewReplicaPlacementFromString(stats.Replication) |
|||
if err != nil { |
|||
fmt.Printf("Error get volume %d replication %s : %v\n", vid, stats.Replication, err) |
|||
return true |
|||
} |
|||
|
|||
v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl) |
|||
if err != nil { |
|||
fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) |
|||
return true |
|||
} |
|||
|
|||
if err := v.Synchronize(volumeServer); err != nil { |
|||
fmt.Printf("Error synchronizing volume %d: %v\n", vid, err) |
|||
return true |
|||
} |
|||
|
|||
return true |
|||
} |
@ -0,0 +1,86 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"fmt" |
|||
"net/http" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
"github.com/chrislusf/seaweedfs/go/storage" |
|||
"github.com/chrislusf/seaweedfs/go/util" |
|||
) |
|||
|
|||
func (vs *VolumeServer) getVolumeSyncStatusHandler(w http.ResponseWriter, r *http.Request) { |
|||
v, err := vs.getVolume("volume", r) |
|||
if v == nil { |
|||
writeJsonError(w, r, http.StatusBadRequest, err) |
|||
return |
|||
} |
|||
syncStat := v.GetVolumeSyncStatus() |
|||
if syncStat.Error != "" { |
|||
writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Get Volume %d status error: %s", v.Id, syncStat.Error)) |
|||
glog.V(2).Infoln("getVolumeSyncStatusHandler volume =", r.FormValue("volume"), ", error =", err) |
|||
} else { |
|||
writeJsonQuiet(w, r, http.StatusOK, syncStat) |
|||
} |
|||
} |
|||
|
|||
func (vs *VolumeServer) getVolumeIndexContentHandler(w http.ResponseWriter, r *http.Request) { |
|||
v, err := vs.getVolume("volume", r) |
|||
if v == nil { |
|||
writeJsonError(w, r, http.StatusBadRequest, err) |
|||
return |
|||
} |
|||
content, err := v.IndexFileContent() |
|||
if err != nil { |
|||
writeJsonError(w, r, http.StatusInternalServerError, err) |
|||
return |
|||
} |
|||
w.Write(content) |
|||
} |
|||
|
|||
func (vs *VolumeServer) getVolumeDataContentHandler(w http.ResponseWriter, r *http.Request) { |
|||
v, err := vs.getVolume("volume", r) |
|||
if v == nil { |
|||
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("Not Found volume: %v", err)) |
|||
return |
|||
} |
|||
if int(v.SuperBlock.CompactRevision) != util.ParseInt(r.FormValue("revision"), 0) { |
|||
writeJsonError(w, r, http.StatusExpectationFailed, fmt.Errorf("Requested Volume Revision is %s, but current revision is %d", r.FormValue("revision"), v.SuperBlock.CompactRevision)) |
|||
return |
|||
} |
|||
offset := uint32(util.ParseUint64(r.FormValue("offset"), 0)) |
|||
size := uint32(util.ParseUint64(r.FormValue("size"), 0)) |
|||
content, err := storage.ReadNeedleBlob(v.DataFile(), int64(offset)*storage.NeedlePaddingSize, size) |
|||
if err != nil { |
|||
writeJsonError(w, r, http.StatusInternalServerError, err) |
|||
return |
|||
} |
|||
|
|||
id := util.ParseUint64(r.FormValue("id"), 0) |
|||
n := new(storage.Needle) |
|||
n.ParseNeedleHeader(content) |
|||
if id != n.Id { |
|||
writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("Expected file entry id %d, but found %d", id, n.Id)) |
|||
return |
|||
} |
|||
|
|||
w.Write(content) |
|||
} |
|||
|
|||
func (vs *VolumeServer) getVolume(volumeParameterName string, r *http.Request) (*storage.Volume, error) { |
|||
volumeIdString := r.FormValue(volumeParameterName) |
|||
if volumeIdString == "" { |
|||
err := fmt.Errorf("Empty Volume Id: Need to pass in %s=the_volume_id.", volumeParameterName) |
|||
return nil, err |
|||
} |
|||
vid, err := storage.NewVolumeId(volumeIdString) |
|||
if err != nil { |
|||
err = fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) |
|||
return nil, err |
|||
} |
|||
v := vs.store.GetVolume(vid) |
|||
if v == nil { |
|||
return nil, fmt.Errorf("Not Found Volume Id %s: %d", volumeIdString, vid) |
|||
} |
|||
return v, nil |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue