66 changed files with 2122 additions and 355 deletions
-
5Makefile
-
6README.md
-
2go/filer/client_operations.go
-
2go/filer/flat_namespace/flat_namespace_store.go
-
2go/glog/glog.go
-
2go/operation/assign_file_id.go
-
2go/operation/chunked_file.go
-
4go/operation/delete_content.go
-
2go/operation/list_masters.go
-
38go/operation/lookup.go
-
6go/operation/lookup_vid_cache.go
-
2go/operation/lookup_vid_cache_test.go
-
2go/operation/submit.go
-
3go/operation/sync_volume.go
-
77go/operation/system_message.pb.go
-
58go/proto/system_message.proto
-
73go/storage/collection_settings.go
-
12go/storage/needle_read_write.go
-
14go/storage/replica_placement.go
-
49go/storage/store.go
-
147go/storage/store_task.go
-
87go/storage/store_task_cli.go
-
115go/storage/store_task_replication.go
-
40go/storage/store_task_vacuum.go
-
24go/storage/volume.go
-
13go/storage/volume_info.go
-
6go/storage/volume_info_test.go
-
212go/storage/volume_pure_reader.go
-
22go/storage/volume_replicate_test.go
-
25go/storage/volume_super_block.go
-
4go/storage/volume_sync.go
-
4go/storage/volume_ttl.go
-
7go/storage/volume_vacuum.go
-
1go/storage/volume_version.go
-
3go/topology/allocate_volume.go
-
43go/topology/batch_operation.go
-
13go/topology/collection.go
-
1go/topology/data_node.go
-
119go/topology/node.go
-
60go/topology/store_replicate.go
-
42go/topology/topology.go
-
9go/topology/topology_event_handling.go
-
153go/topology/topology_replicate.go
-
18go/topology/topology_vacuum.go
-
256go/topology/volume_growth.go
-
117go/topology/volume_growth_test.go
-
10go/topology/volume_layout.go
-
111go/topology/volume_location_list.go
-
17go/util/concurrent_read_map.go
-
110go/util/http_util.go
-
9go/weed/backup.go
-
7go/weed/benchmark.go
-
2go/weed/compact.go
-
4go/weed/download.go
-
2go/weed/shell.go
-
2go/weed/signal_handling.go
-
7go/weed/weed_server/common.go
-
9go/weed/weed_server/filer_server_handlers.go
-
7go/weed/weed_server/master_server.go
-
8go/weed/weed_server/master_server_handlers.go
-
60go/weed/weed_server/master_server_handlers_admin.go
-
11go/weed/weed_server/volume_server.go
-
73go/weed/weed_server/volume_server_handlers_admin.go
-
2go/weed/weed_server/volume_server_handlers_read.go
-
58go/weed/weed_server/volume_server_handlers_sync.go
-
66go/weed/weed_server/volume_server_handlers_task.go
@ -1,27 +1,45 @@ |
|||
package operation; |
|||
|
|||
message VolumeInformationMessage { |
|||
required uint32 id = 1; |
|||
required uint64 size = 2; |
|||
optional string collection = 3; |
|||
required uint64 file_count = 4; |
|||
required uint64 delete_count = 5; |
|||
required uint64 deleted_byte_count = 6; |
|||
optional bool read_only = 7; |
|||
required uint32 replica_placement = 8; |
|||
optional uint32 version = 9 [default=2]; |
|||
optional uint32 ttl = 10; |
|||
required uint32 id = 1; |
|||
required uint64 size = 2; |
|||
optional string collection = 3; |
|||
required uint64 file_count = 4; |
|||
required uint64 delete_count = 5; |
|||
required uint64 deleted_byte_count = 6; |
|||
optional bool read_only = 7; |
|||
optional uint32 replica_placement = 8; |
|||
optional uint32 version = 9 [default=2]; |
|||
optional uint32 ttl = 10; |
|||
} |
|||
|
|||
message JoinMessage { |
|||
optional bool is_init = 1; |
|||
required string ip = 2; |
|||
required uint32 port = 3; |
|||
optional string public_url = 4; |
|||
required uint32 max_volume_count = 5; |
|||
required uint64 max_file_key = 6; |
|||
optional string data_center = 7; |
|||
optional string rack = 8; |
|||
repeated VolumeInformationMessage volumes = 9; |
|||
optional uint32 admin_port = 10; |
|||
optional bool is_init = 1; |
|||
required string ip = 2; |
|||
required uint32 port = 3; |
|||
optional string public_url = 4; |
|||
required uint32 max_volume_count = 5; |
|||
required uint64 max_file_key = 6; |
|||
optional string data_center = 7; |
|||
optional string rack = 8; |
|||
repeated VolumeInformationMessage volumes = 9; |
|||
optional uint32 admin_port = 10; |
|||
} |
|||
|
|||
|
|||
message CollectionSetting { |
|||
optional string collection = 1; |
|||
optional string replica_placement = 2; |
|||
optional float vacuum_garbage_threshold = 3; |
|||
} |
|||
|
|||
message GlobalSetting { |
|||
repeated CollectionSetting settings = 1; |
|||
repeated string master_peers = 2; |
|||
} |
|||
|
|||
message JoinResponse { |
|||
optional GlobalSetting settings = 1; |
|||
} |
|||
|
|||
|
|||
@ -0,0 +1,73 @@ |
|||
package storage |
|||
|
|||
type SettingKey int |
|||
|
|||
const ( |
|||
keyReplicatePlacement SettingKey = iota |
|||
keyGarbageThreshold |
|||
) |
|||
|
|||
type CollectionSettings struct { |
|||
settings map[string]map[SettingKey]interface{} |
|||
} |
|||
|
|||
func NewCollectionSettings(defaultReplicatePlacement, defaultGarbageThreshold string) *CollectionSettings { |
|||
rp, e := NewReplicaPlacementFromString(defaultReplicatePlacement) |
|||
if e != nil { |
|||
rp, _ = NewReplicaPlacementFromString("000") |
|||
} |
|||
c := &CollectionSettings{ |
|||
settings: make(map[string]map[SettingKey]interface{}), |
|||
} |
|||
c.set("", keyReplicatePlacement, rp) |
|||
c.set("", keyGarbageThreshold, defaultGarbageThreshold) |
|||
return c |
|||
} |
|||
|
|||
func (c *CollectionSettings) get(collection string, key SettingKey) interface{} { |
|||
if m, ok := c.settings[collection]; ok { |
|||
if v, ok := m[key]; ok { |
|||
return v |
|||
} |
|||
} |
|||
if m, ok := c.settings[""]; ok { |
|||
return m[key] |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (c *CollectionSettings) set(collection string, key SettingKey, value interface{}) { |
|||
m := c.settings[collection] |
|||
if m == nil { |
|||
m = make(map[SettingKey]interface{}) |
|||
c.settings[collection] = m |
|||
} |
|||
if value == nil { |
|||
//mustn't delete default setting
|
|||
if collection != "" { |
|||
delete(m, key) |
|||
} |
|||
} else { |
|||
m[key] = value |
|||
} |
|||
} |
|||
|
|||
func (c *CollectionSettings) GetGarbageThreshold(collection string) string { |
|||
return c.get(collection, keyGarbageThreshold).(string) |
|||
} |
|||
|
|||
func (c *CollectionSettings) SetGarbageThreshold(collection string, gt string) { |
|||
c.set(collection, keyGarbageThreshold, gt) |
|||
} |
|||
|
|||
func (c *CollectionSettings) GetReplicaPlacement(collection string) *ReplicaPlacement { |
|||
return c.get(collection, keyReplicatePlacement).(*ReplicaPlacement) |
|||
} |
|||
|
|||
func (c *CollectionSettings) SetReplicaPlacement(collection, t string) error { |
|||
rp, e := NewReplicaPlacementFromString(t) |
|||
if e == nil { |
|||
c.set(collection, keyReplicatePlacement, rp) |
|||
} |
|||
return e |
|||
} |
|||
@ -0,0 +1,147 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"errors" |
|||
"net/url" |
|||
"time" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
) |
|||
|
|||
const ( |
|||
TaskVacuum = "vacuum" |
|||
TaskReplicate = "replicate" |
|||
TaskBalance = "balance" |
|||
) |
|||
|
|||
var ( |
|||
ErrTaskNotFinish = errors.New("TaskNotFinish") |
|||
ErrTaskNotFound = errors.New("TaskNotFound") |
|||
ErrTaskInvalid = errors.New("TaskInvalid") |
|||
ErrTaskExists = errors.New("TaskExists") |
|||
) |
|||
|
|||
type TaskWorker interface { |
|||
Run() error |
|||
Commit() error |
|||
Clean() error |
|||
Info() url.Values |
|||
} |
|||
|
|||
type Task struct { |
|||
Id string |
|||
startTime time.Time |
|||
worker TaskWorker |
|||
ch chan bool |
|||
result error |
|||
cleanWhenFinish bool |
|||
} |
|||
|
|||
type TaskManager struct { |
|||
TaskList map[string]*Task |
|||
} |
|||
|
|||
func NewTask(worker TaskWorker, id string) *Task { |
|||
t := &Task{ |
|||
Id: id, |
|||
worker: worker, |
|||
startTime: time.Now(), |
|||
result: ErrTaskNotFinish, |
|||
ch: make(chan bool, 1), |
|||
} |
|||
go func(t *Task) { |
|||
t.result = t.worker.Run() |
|||
if t.cleanWhenFinish { |
|||
glog.V(0).Infof("clean task (%s) when finish.", t.Id) |
|||
t.worker.Clean() |
|||
} |
|||
t.ch <- true |
|||
|
|||
}(t) |
|||
return t |
|||
} |
|||
|
|||
func (t *Task) QueryResult(waitDuration time.Duration) error { |
|||
if t.result == ErrTaskNotFinish && waitDuration > 0 { |
|||
select { |
|||
case <-time.After(waitDuration): |
|||
case <-t.ch: |
|||
} |
|||
} |
|||
return t.result |
|||
} |
|||
|
|||
func NewTaskManager() *TaskManager { |
|||
return &TaskManager{ |
|||
TaskList: make(map[string]*Task), |
|||
} |
|||
} |
|||
|
|||
func (tm *TaskManager) NewTask(s *Store, args url.Values) (tid string, e error) { |
|||
tt := args.Get("task") |
|||
vid := args.Get("volume") |
|||
tid = tt + "-" + vid |
|||
if _, ok := tm.TaskList[tid]; ok { |
|||
return tid, ErrTaskExists |
|||
} |
|||
|
|||
var tw TaskWorker |
|||
switch tt { |
|||
case TaskVacuum: |
|||
tw, e = NewVacuumTask(s, args) |
|||
case TaskReplicate: |
|||
tw, e = NewReplicaTask(s, args) |
|||
case TaskBalance: |
|||
} |
|||
if e != nil { |
|||
return |
|||
} |
|||
if tw == nil { |
|||
return "", ErrTaskInvalid |
|||
} |
|||
tm.TaskList[tid] = NewTask(tw, tid) |
|||
return tid, nil |
|||
} |
|||
|
|||
func (tm *TaskManager) QueryResult(tid string, waitDuration time.Duration) (e error) { |
|||
t, ok := tm.TaskList[tid] |
|||
if !ok { |
|||
return ErrTaskNotFound |
|||
} |
|||
return t.QueryResult(waitDuration) |
|||
} |
|||
|
|||
func (tm *TaskManager) Commit(tid string) (e error) { |
|||
t, ok := tm.TaskList[tid] |
|||
if !ok { |
|||
return ErrTaskNotFound |
|||
} |
|||
if t.QueryResult(time.Second*30) == ErrTaskNotFinish { |
|||
return ErrTaskNotFinish |
|||
} |
|||
delete(tm.TaskList, tid) |
|||
return t.worker.Commit() |
|||
} |
|||
|
|||
func (tm *TaskManager) Clean(tid string) error { |
|||
t, ok := tm.TaskList[tid] |
|||
if !ok { |
|||
return ErrTaskNotFound |
|||
} |
|||
delete(tm.TaskList, tid) |
|||
if t.QueryResult(time.Second*30) == ErrTaskNotFinish { |
|||
t.cleanWhenFinish = true |
|||
glog.V(0).Infof("task (%s) is not finish, clean it later.", tid) |
|||
} else { |
|||
t.worker.Clean() |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (tm *TaskManager) ElapsedDuration(tid string) (time.Duration, error) { |
|||
t, ok := tm.TaskList[tid] |
|||
if !ok { |
|||
return 0, ErrTaskNotFound |
|||
} |
|||
return time.Since(t.startTime), nil |
|||
} |
|||
@ -0,0 +1,87 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"errors" |
|||
"fmt" |
|||
"net/url" |
|||
"time" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
"github.com/chrislusf/seaweedfs/go/util" |
|||
) |
|||
|
|||
type TaskParams map[string]string |
|||
|
|||
var ( |
|||
ErrTaskTimeout = errors.New("TaskTimeout") |
|||
) |
|||
|
|||
type TaskCli struct { |
|||
TID string |
|||
DataNode string |
|||
} |
|||
|
|||
func NewTaskCli(dataNode string, taskType string, params TaskParams) (*TaskCli, error) { |
|||
args := url.Values{} |
|||
args.Set("task", taskType) |
|||
for k, v := range params { |
|||
args.Set(k, v) |
|||
} |
|||
m, e := util.RemoteApiCall(dataNode, "/admin/task/new", args) |
|||
if e != nil { |
|||
return nil, e |
|||
} |
|||
tid := m["tid"].(string) |
|||
if tid == "" { |
|||
return nil, fmt.Errorf("Empty %s task", taskType) |
|||
} |
|||
return &TaskCli{ |
|||
TID: tid, |
|||
DataNode: dataNode, |
|||
}, nil |
|||
} |
|||
|
|||
func (c *TaskCli) WaitAndQueryResult(timeout time.Duration) error { |
|||
startTime := time.Now() |
|||
args := url.Values{} |
|||
args.Set("tid", c.TID) |
|||
args.Set("timeout", time.Minute.String()) |
|||
tryTimes := 0 |
|||
for time.Since(startTime) < timeout { |
|||
_, e := util.RemoteApiCall(c.DataNode, "/admin/task/query", args) |
|||
if e == nil { |
|||
//task have finished and have no error
|
|||
return nil |
|||
} |
|||
if util.IsRemoteApiError(e) { |
|||
if e.Error() == ErrTaskNotFinish.Error() { |
|||
tryTimes = 0 |
|||
continue |
|||
} |
|||
return e |
|||
} else { |
|||
tryTimes++ |
|||
if tryTimes >= 10 { |
|||
return e |
|||
} |
|||
glog.V(0).Infof("query task (%s) error %v, wait 1 minute then retry %d times", c.TID, e, tryTimes) |
|||
time.Sleep(time.Minute) |
|||
} |
|||
|
|||
} |
|||
return ErrTaskTimeout |
|||
} |
|||
|
|||
func (c *TaskCli) Commit() error { |
|||
args := url.Values{} |
|||
args.Set("tid", c.TID) |
|||
_, e := util.RemoteApiCall(c.DataNode, "/admin/task/commit", args) |
|||
return e |
|||
} |
|||
|
|||
func (c *TaskCli) Clean() error { |
|||
args := url.Values{} |
|||
args.Set("tid", c.TID) |
|||
_, e := util.RemoteApiCall(c.DataNode, "/admin/task/clean", args) |
|||
return e |
|||
} |
|||
@ -0,0 +1,115 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"errors" |
|||
"fmt" |
|||
"net/url" |
|||
"os" |
|||
"path" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/util" |
|||
) |
|||
|
|||
type ReplicaTask struct { |
|||
VID VolumeId |
|||
Collection string |
|||
SrcDataNode string |
|||
s *Store |
|||
location *DiskLocation |
|||
} |
|||
|
|||
func NewReplicaTask(s *Store, args url.Values) (*ReplicaTask, error) { |
|||
volumeIdString := args.Get("volume") |
|||
vid, err := NewVolumeId(volumeIdString) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) |
|||
} |
|||
source := args.Get("source") |
|||
if source == "" { |
|||
return nil, errors.New("Invalid source data node.") |
|||
|
|||
} |
|||
location := s.findFreeLocation() |
|||
if location == nil { |
|||
return nil, errors.New("No more free space left") |
|||
} |
|||
collection := args.Get("collection") |
|||
return &ReplicaTask{ |
|||
VID: vid, |
|||
Collection: collection, |
|||
SrcDataNode: source, |
|||
s: s, |
|||
location: location, |
|||
}, nil |
|||
} |
|||
|
|||
func (t *ReplicaTask) Run() error { |
|||
ch := make(chan error) |
|||
go func() { |
|||
idxUrl := util.MkUrl(t.SrcDataNode, "/admin/sync/index", url.Values{"volume": {t.VID.String()}}) |
|||
e := util.DownloadToFile(idxUrl, t.FileName()+".repx") |
|||
if e != nil { |
|||
e = fmt.Errorf("Replicat error: %s, %v", idxUrl, e) |
|||
} |
|||
ch <- e |
|||
}() |
|||
go func() { |
|||
datUrl := util.MkUrl(t.SrcDataNode, "/admin/sync/vol_data", url.Values{"volume": {t.VID.String()}}) |
|||
e := util.DownloadToFile(datUrl, t.FileName()+".repd") |
|||
if e != nil { |
|||
e = fmt.Errorf("Replicat error: %s, %v", datUrl, e) |
|||
} |
|||
ch <- e |
|||
}() |
|||
errs := make([]error, 0) |
|||
for i := 0; i < 2; i++ { |
|||
if e := <-ch; e != nil { |
|||
errs = append(errs, e) |
|||
} |
|||
} |
|||
if len(errs) == 0 { |
|||
return nil |
|||
} else { |
|||
return fmt.Errorf("%v", errs) |
|||
} |
|||
} |
|||
|
|||
func (t *ReplicaTask) Commit() error { |
|||
var ( |
|||
volume *Volume |
|||
e error |
|||
) |
|||
|
|||
if e = os.Rename(t.FileName()+".repd", t.FileName()+".dat"); e != nil { |
|||
return e |
|||
} |
|||
if e = os.Rename(t.FileName()+".repx", t.FileName()+".idx"); e != nil { |
|||
return e |
|||
} |
|||
volume, e = NewVolume(t.location.Directory, t.Collection, t.VID, t.s.needleMapKind, nil) |
|||
if e == nil { |
|||
t.location.volumes[t.VID] = volume |
|||
t.s.SendHeartbeatToMaster() |
|||
} |
|||
return e |
|||
} |
|||
|
|||
func (t *ReplicaTask) Clean() error { |
|||
os.Remove(t.FileName() + ".repx") |
|||
os.Remove(t.FileName() + ".repd") |
|||
return nil |
|||
} |
|||
|
|||
func (t *ReplicaTask) Info() url.Values { |
|||
//TODO
|
|||
return url.Values{} |
|||
} |
|||
|
|||
func (t *ReplicaTask) FileName() (fileName string) { |
|||
if t.Collection == "" { |
|||
fileName = path.Join(t.location.Directory, t.VID.String()) |
|||
} else { |
|||
fileName = path.Join(t.location.Directory, t.Collection+"_"+t.VID.String()) |
|||
} |
|||
return |
|||
} |
|||
@ -0,0 +1,40 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"fmt" |
|||
"net/url" |
|||
) |
|||
|
|||
type VacuumTask struct { |
|||
V *Volume |
|||
} |
|||
|
|||
func NewVacuumTask(s *Store, args url.Values) (*VacuumTask, error) { |
|||
volumeIdString := args.Get("volumme") |
|||
vid, err := NewVolumeId(volumeIdString) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) |
|||
} |
|||
v := s.findVolume(vid) |
|||
if v == nil { |
|||
return nil, fmt.Errorf("volume id %d is not found", vid) |
|||
} |
|||
return &VacuumTask{V: v}, nil |
|||
} |
|||
|
|||
func (t *VacuumTask) Run() error { |
|||
return t.V.Compact() |
|||
} |
|||
|
|||
func (t *VacuumTask) Commit() error { |
|||
return t.V.commitCompact() |
|||
} |
|||
|
|||
func (t *VacuumTask) Clean() error { |
|||
return t.V.cleanCompact() |
|||
} |
|||
|
|||
func (t *VacuumTask) Info() url.Values { |
|||
//TODO
|
|||
return url.Values{} |
|||
} |
|||
@ -0,0 +1,212 @@ |
|||
package storage |
|||
|
|||
import ( |
|||
"io" |
|||
"os" |
|||
"sort" |
|||
"sync" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/util" |
|||
) |
|||
|
|||
type DirtyData struct { |
|||
Offset int64 `comment:"Dirty data start offset"` |
|||
Size uint32 `comment:"Size of the dirty data"` |
|||
} |
|||
|
|||
type DirtyDatas []DirtyData |
|||
|
|||
func (s DirtyDatas) Len() int { return len(s) } |
|||
func (s DirtyDatas) Less(i, j int) bool { return s[i].Offset < s[j].Offset } |
|||
func (s DirtyDatas) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
|||
func (s DirtyDatas) Sort() { sort.Sort(s) } |
|||
func (s DirtyDatas) Search(offset int64) int { |
|||
return sort.Search(len(s), func(i int) bool { |
|||
v := &s[i] |
|||
return v.Offset+int64(v.Size) > offset |
|||
}) |
|||
} |
|||
|
|||
type PureReader struct { |
|||
Dirtys DirtyDatas |
|||
DataFile *os.File |
|||
pr *io.PipeReader |
|||
pw *io.PipeWriter |
|||
mutex sync.Mutex |
|||
} |
|||
|
|||
func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { |
|||
m := NewCompactMap() |
|||
for i := 0; i+16 <= len(indexFileContent); i += 16 { |
|||
bytes := indexFileContent[i : i+16] |
|||
key := util.BytesToUint64(bytes[:8]) |
|||
offset := util.BytesToUint32(bytes[8:12]) |
|||
size := util.BytesToUint32(bytes[12:16]) |
|||
k := Key(key) |
|||
if offset != 0 && size != 0 { |
|||
m.Set(k, offset, size) |
|||
} else { |
|||
if nv, ok := m.Get(k); ok { |
|||
//mark old needle data as dirty data
|
|||
if int64(nv.Size)-NeedleHeaderSize > 0 { |
|||
dirtys = append(dirtys, DirtyData{ |
|||
Offset: int64(nv.Offset)*8 + NeedleHeaderSize, |
|||
Size: nv.Size, |
|||
}) |
|||
} |
|||
} |
|||
m.Delete(k) |
|||
} |
|||
} |
|||
dirtys.Sort() |
|||
return dirtys |
|||
} |
|||
|
|||
func (cr *PureReader) Seek(offset int64, whence int) (int64, error) { |
|||
oldOff, e := cr.DataFile.Seek(0, 1) |
|||
if e != nil { |
|||
return 0, e |
|||
} |
|||
newOff, e := cr.DataFile.Seek(offset, whence) |
|||
if e != nil { |
|||
return 0, e |
|||
} |
|||
if oldOff != newOff { |
|||
cr.closePipe(true) |
|||
} |
|||
return newOff, nil |
|||
} |
|||
|
|||
func (cr *PureReader) Size() (int64, error) { |
|||
fi, e := cr.DataFile.Stat() |
|||
if e != nil { |
|||
return 0, e |
|||
} |
|||
return fi.Size(), nil |
|||
} |
|||
|
|||
func (cdr *PureReader) WriteTo(w io.Writer) (written int64, err error) { |
|||
off, e := cdr.DataFile.Seek(0, 1) |
|||
if e != nil { |
|||
return 0, nil |
|||
} |
|||
const ZeroBufSize = 32 * 1024 |
|||
zeroBuf := make([]byte, ZeroBufSize) |
|||
dirtyIndex := cdr.Dirtys.Search(off) |
|||
var nextDirty *DirtyData |
|||
if dirtyIndex < len(cdr.Dirtys) { |
|||
nextDirty = &cdr.Dirtys[dirtyIndex] |
|||
} |
|||
for { |
|||
if nextDirty != nil && off >= nextDirty.Offset && off < nextDirty.Offset+int64(nextDirty.Size) { |
|||
sz := nextDirty.Offset + int64(nextDirty.Size) - off |
|||
for sz > 0 { |
|||
mn := int64(ZeroBufSize) |
|||
if mn > sz { |
|||
mn = sz |
|||
} |
|||
var n int |
|||
if n, e = w.Write(zeroBuf[:mn]); e != nil { |
|||
return |
|||
} |
|||
written += int64(n) |
|||
sz -= int64(n) |
|||
off += int64(n) |
|||
} |
|||
dirtyIndex++ |
|||
if dirtyIndex < len(cdr.Dirtys) { |
|||
nextDirty = &cdr.Dirtys[dirtyIndex] |
|||
} else { |
|||
nextDirty = nil |
|||
} |
|||
if _, e = cdr.DataFile.Seek(off, 0); e != nil { |
|||
return |
|||
} |
|||
} else { |
|||
var n, sz int64 |
|||
if nextDirty != nil { |
|||
sz = nextDirty.Offset - off |
|||
} |
|||
if sz <= 0 { |
|||
// copy until eof
|
|||
n, e = io.Copy(w, cdr.DataFile) |
|||
written += n |
|||
return |
|||
} |
|||
if n, e = io.CopyN(w, cdr.DataFile, sz); e != nil { |
|||
return |
|||
} |
|||
off += n |
|||
written += n |
|||
} |
|||
} |
|||
return |
|||
} |
|||
|
|||
func (cr *PureReader) ReadAt(p []byte, off int64) (n int, err error) { |
|||
cr.Seek(off, 0) |
|||
return cr.Read(p) |
|||
} |
|||
|
|||
func (cr *PureReader) Read(p []byte) (int, error) { |
|||
return cr.getPipeReader().Read(p) |
|||
} |
|||
|
|||
func (cr *PureReader) Close() (e error) { |
|||
cr.closePipe(true) |
|||
return cr.DataFile.Close() |
|||
} |
|||
|
|||
func (cr *PureReader) closePipe(lock bool) (e error) { |
|||
if lock { |
|||
cr.mutex.Lock() |
|||
defer cr.mutex.Unlock() |
|||
} |
|||
if cr.pr != nil { |
|||
if err := cr.pr.Close(); err != nil { |
|||
e = err |
|||
} |
|||
} |
|||
cr.pr = nil |
|||
if cr.pw != nil { |
|||
if err := cr.pw.Close(); err != nil { |
|||
e = err |
|||
} |
|||
} |
|||
cr.pw = nil |
|||
return e |
|||
} |
|||
|
|||
func (cr *PureReader) getPipeReader() io.Reader { |
|||
cr.mutex.Lock() |
|||
defer cr.mutex.Unlock() |
|||
if cr.pr != nil && cr.pw != nil { |
|||
return cr.pr |
|||
} |
|||
cr.closePipe(false) |
|||
cr.pr, cr.pw = io.Pipe() |
|||
go func(pw *io.PipeWriter) { |
|||
_, e := cr.WriteTo(pw) |
|||
pw.CloseWithError(e) |
|||
}(cr.pw) |
|||
return cr.pr |
|||
} |
|||
|
|||
func (v *Volume) GetVolumeCleanReader() (cr *PureReader, err error) { |
|||
var dirtys DirtyDatas |
|||
if indexData, e := v.nm.IndexFileContent(); e != nil { |
|||
return nil, err |
|||
} else { |
|||
dirtys = ScanDirtyData(indexData) |
|||
} |
|||
dataFile, e := os.Open(v.FileName() + ".dat") |
|||
|
|||
if e != nil { |
|||
return nil, e |
|||
} |
|||
cr = &PureReader{ |
|||
Dirtys: dirtys, |
|||
DataFile: dataFile, |
|||
} |
|||
return |
|||
} |
|||
@ -0,0 +1,22 @@ |
|||
package storage |
|||
|
|||
import "testing" |
|||
|
|||
func TestDirtyDataSearch(t *testing.T) { |
|||
testData := DirtyDatas{ |
|||
{30, 20}, {106, 200}, {5, 20}, {512, 68}, {412, 50}, |
|||
} |
|||
testOffset := []int64{ |
|||
0, 150, 480, 1024, |
|||
} |
|||
testData.Sort() |
|||
t.Logf("TestData = %v", testData) |
|||
for _, off := range testOffset { |
|||
i := testData.Search(off) |
|||
if i < testData.Len() { |
|||
t.Logf("(%d) nearest chunk[%d]: %v", off, i, testData[i]) |
|||
} else { |
|||
t.Logf("Search %d return %d ", off, i) |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,43 @@ |
|||
package topology |
|||
|
|||
import ( |
|||
"net/url" |
|||
"strconv" |
|||
"time" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
"github.com/chrislusf/seaweedfs/go/util" |
|||
) |
|||
|
|||
func BatchOperation(locationList *VolumeLocationList, path string, values url.Values) (isSuccess bool) { |
|||
ch := make(chan bool, locationList.Length()) |
|||
for _, dn := range locationList.list { |
|||
go func(url string, path string, values url.Values) { |
|||
_, e := util.RemoteApiCall(url, path, values) |
|||
if e != nil { |
|||
glog.V(0).Infoln("RemoteApiCall:", util.MkUrl(url, path, values), "error =", e) |
|||
} |
|||
ch <- e == nil |
|||
|
|||
}(dn.Url(), path, values) |
|||
} |
|||
isSuccess = true |
|||
for range locationList.list { |
|||
select { |
|||
case canVacuum := <-ch: |
|||
isSuccess = isSuccess && canVacuum |
|||
case <-time.After(30 * time.Minute): |
|||
isSuccess = false |
|||
break |
|||
} |
|||
} |
|||
return isSuccess |
|||
} |
|||
|
|||
func SetVolumeReadonly(locationList *VolumeLocationList, volume string, isReadonly bool) (isSuccess bool) { |
|||
forms := url.Values{} |
|||
forms.Set("key", "readonly") |
|||
forms.Set("value", strconv.FormatBool(isReadonly)) |
|||
forms.Set("volume", volume) |
|||
return BatchOperation(locationList, "/admin/setting", forms) |
|||
} |
|||
@ -0,0 +1,153 @@ |
|||
package topology |
|||
|
|||
import ( |
|||
"container/list" |
|||
"fmt" |
|||
"time" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
"github.com/chrislusf/seaweedfs/go/storage" |
|||
) |
|||
|
|||
var ( |
|||
isReplicateCheckerRunning = false |
|||
) |
|||
|
|||
const ReplicateTaskTimeout = time.Hour |
|||
|
|||
type ReplicateTask struct { |
|||
Vid storage.VolumeId |
|||
Collection string |
|||
SrcDN *DataNode |
|||
DstDN *DataNode |
|||
} |
|||
|
|||
func (t *ReplicateTask) Run(topo *Topology) error { |
|||
//is lookup thread safe?
|
|||
locationList := topo.Lookup(t.Collection, t.Vid) |
|||
rp := topo.CollectionSettings.GetReplicaPlacement(t.Collection) |
|||
if locationList.CalcReplicaPlacement().Compare(rp) >= 0 { |
|||
glog.V(0).Infof("volume [%v] has right replica placement, rp: %s", t.Vid, rp.String()) |
|||
return nil |
|||
} |
|||
if !SetVolumeReadonly(locationList, t.Vid.String(), true) { |
|||
return fmt.Errorf("set volume readonly failed, vid=%v", t.Vid) |
|||
} |
|||
defer SetVolumeReadonly(locationList, t.Vid.String(), false) |
|||
tc, e := storage.NewTaskCli(t.DstDN.Url(), storage.TaskReplicate, storage.TaskParams{ |
|||
"volume": t.Vid.String(), |
|||
"source": t.SrcDN.Url(), |
|||
"collection": t.Collection, |
|||
}) |
|||
if e != nil { |
|||
return e |
|||
} |
|||
if e = tc.WaitAndQueryResult(ReplicateTaskTimeout); e != nil { |
|||
tc.Clean() |
|||
return e |
|||
} |
|||
e = tc.Commit() |
|||
return e |
|||
} |
|||
|
|||
func (t *ReplicateTask) WorkingDataNodes() []*DataNode { |
|||
return []*DataNode{ |
|||
t.SrcDN, |
|||
t.DstDN, |
|||
} |
|||
} |
|||
|
|||
func planReplicateTasks(t *Topology) (tasks []*ReplicateTask) { |
|||
for _, col := range t.collectionMap.Items { |
|||
c := col.(*Collection) |
|||
glog.V(0).Infoln("checking replicate on collection:", c.Name) |
|||
growOption := &VolumeGrowOption{ReplicaPlacement: c.rp} |
|||
for _, vl := range c.storageType2VolumeLayout.Items { |
|||
if vl != nil { |
|||
volumeLayout := vl.(*VolumeLayout) |
|||
for vid, locationList := range volumeLayout.vid2location { |
|||
rp1 := locationList.CalcReplicaPlacement() |
|||
if rp1.Compare(volumeLayout.rp) >= 0 { |
|||
continue |
|||
} |
|||
if additionServers, e := FindEmptySlotsForOneVolume(t, growOption, locationList); e == nil { |
|||
for _, s := range additionServers { |
|||
s.UpAdjustPlannedVolumeCountDelta(1) |
|||
rt := &ReplicateTask{ |
|||
Vid: vid, |
|||
Collection: c.Name, |
|||
SrcDN: locationList.PickForRead(), |
|||
DstDN: s, |
|||
} |
|||
tasks = append(tasks, rt) |
|||
glog.V(0).Infof("add replicate task, vid: %v, src: %s, dst: %s", vid, rt.SrcDN.Url(), rt.DstDN.Url()) |
|||
} |
|||
} else { |
|||
glog.V(0).Infof("find empty slots error, vid: %v, rp: %s => %s, %v", vid, rp1.String(), volumeLayout.rp.String(), e) |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
return |
|||
} |
|||
|
|||
func (topo *Topology) CheckReplicate() { |
|||
isReplicateCheckerRunning = true |
|||
defer func() { |
|||
isReplicateCheckerRunning = false |
|||
}() |
|||
glog.V(1).Infoln("Start replicate checker on demand") |
|||
busyDataNodes := make(map[*DataNode]int) |
|||
taskCount := 0 |
|||
taskQueue := list.New() |
|||
for _, t := range planReplicateTasks(topo) { |
|||
taskQueue.PushBack(t) |
|||
taskCount++ |
|||
} |
|||
taskChan := make(chan *ReplicateTask) |
|||
for taskCount > 0 { |
|||
TaskQueueLoop: |
|||
for e := taskQueue.Front(); e != nil; e = e.Next() { |
|||
task := e.Value.(*ReplicateTask) |
|||
//only one task will run on the data node
|
|||
dns := task.WorkingDataNodes() |
|||
for _, dn := range dns { |
|||
if busyDataNodes[dn] > 0 { |
|||
continue TaskQueueLoop |
|||
} |
|||
} |
|||
for _, dn := range dns { |
|||
busyDataNodes[dn]++ |
|||
} |
|||
go func(t *ReplicateTask) { |
|||
if e := t.Run(topo); e != nil { |
|||
glog.V(0).Infof("ReplicateTask run error, vid: %v, dst: %s. %v", t.Vid, t.DstDN.Url(), e) |
|||
} else { |
|||
glog.V(2).Infof("ReplicateTask finished, vid: %v, dst: %s", t.Vid, t.DstDN.Url()) |
|||
|
|||
} |
|||
taskChan <- t |
|||
}(task) |
|||
taskQueue.Remove(e) |
|||
|
|||
} |
|||
|
|||
finishedTask := <-taskChan |
|||
for _, dn := range finishedTask.WorkingDataNodes() { |
|||
if busyDataNodes[dn] > 0 { |
|||
busyDataNodes[dn]-- |
|||
} |
|||
} |
|||
taskCount-- |
|||
finishedTask.DstDN.UpAdjustPlannedVolumeCountDelta(-1) |
|||
} |
|||
glog.V(0).Infoln("finish replicate check.") |
|||
} |
|||
|
|||
func (topo *Topology) StartCheckReplicate() { |
|||
if isReplicateCheckerRunning { |
|||
return |
|||
} |
|||
go topo.CheckReplicate() |
|||
} |
|||
@ -0,0 +1,66 @@ |
|||
package weed_server |
|||
|
|||
import ( |
|||
"net/http" |
|||
|
|||
"time" |
|||
|
|||
"strings" |
|||
|
|||
"github.com/chrislusf/seaweedfs/go/glog" |
|||
"github.com/chrislusf/seaweedfs/go/storage" |
|||
) |
|||
|
|||
func (vs *VolumeServer) newTaskHandler(w http.ResponseWriter, r *http.Request) { |
|||
r.ParseForm() |
|||
tid, e := vs.store.TaskManager.NewTask(vs.store, r.Form) |
|||
if e == nil { |
|||
writeJsonQuiet(w, r, http.StatusOK, map[string]string{"tid": tid}) |
|||
} else { |
|||
writeJsonError(w, r, http.StatusInternalServerError, e) |
|||
} |
|||
glog.V(2).Infoln("new store task =", tid, ", error =", e) |
|||
} |
|||
|
|||
func (vs *VolumeServer) queryTaskHandler(w http.ResponseWriter, r *http.Request) { |
|||
tid := r.FormValue("tid") |
|||
timeoutStr := strings.TrimSpace(r.FormValue("timeout")) |
|||
d := time.Minute |
|||
if td, e := time.ParseDuration(timeoutStr); e == nil { |
|||
d = td |
|||
} |
|||
err := vs.store.TaskManager.QueryResult(tid, d) |
|||
if err == storage.ErrTaskNotFinish { |
|||
writeJsonError(w, r, http.StatusRequestTimeout, err) |
|||
} else if err == nil { |
|||
writeJsonError(w, r, http.StatusOK, err) |
|||
} else { |
|||
writeJsonError(w, r, http.StatusInternalServerError, err) |
|||
} |
|||
glog.V(2).Infoln("query task =", tid, ", error =", err) |
|||
} |
|||
func (vs *VolumeServer) commitTaskHandler(w http.ResponseWriter, r *http.Request) { |
|||
tid := r.FormValue("tid") |
|||
err := vs.store.TaskManager.Commit(tid) |
|||
if err == storage.ErrTaskNotFinish { |
|||
writeJsonError(w, r, http.StatusRequestTimeout, err) |
|||
} else if err == nil { |
|||
writeJsonError(w, r, http.StatusOK, err) |
|||
} |
|||
glog.V(2).Infoln("query task =", tid, ", error =", err) |
|||
} |
|||
func (vs *VolumeServer) cleanTaskHandler(w http.ResponseWriter, r *http.Request) { |
|||
tid := r.FormValue("tid") |
|||
err := vs.store.TaskManager.Clean(tid) |
|||
if err == storage.ErrTaskNotFinish { |
|||
writeJsonError(w, r, http.StatusRequestTimeout, err) |
|||
} else if err == nil { |
|||
writeJsonError(w, r, http.StatusOK, err) |
|||
} |
|||
glog.V(2).Infoln("clean task =", tid, ", error =", err) |
|||
} |
|||
|
|||
func (vs *VolumeServer) allTaskHandler(w http.ResponseWriter, r *http.Request) { |
|||
//TODO get all task
|
|||
glog.V(2).Infoln("TODO: get all task") |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue