66 changed files with 2122 additions and 355 deletions
-
5Makefile
-
6README.md
-
2go/filer/client_operations.go
-
2go/filer/flat_namespace/flat_namespace_store.go
-
2go/glog/glog.go
-
2go/operation/assign_file_id.go
-
2go/operation/chunked_file.go
-
4go/operation/delete_content.go
-
2go/operation/list_masters.go
-
38go/operation/lookup.go
-
6go/operation/lookup_vid_cache.go
-
2go/operation/lookup_vid_cache_test.go
-
2go/operation/submit.go
-
3go/operation/sync_volume.go
-
77go/operation/system_message.pb.go
-
58go/proto/system_message.proto
-
73go/storage/collection_settings.go
-
12go/storage/needle_read_write.go
-
14go/storage/replica_placement.go
-
49go/storage/store.go
-
147go/storage/store_task.go
-
87go/storage/store_task_cli.go
-
115go/storage/store_task_replication.go
-
40go/storage/store_task_vacuum.go
-
24go/storage/volume.go
-
13go/storage/volume_info.go
-
6go/storage/volume_info_test.go
-
212go/storage/volume_pure_reader.go
-
22go/storage/volume_replicate_test.go
-
25go/storage/volume_super_block.go
-
4go/storage/volume_sync.go
-
4go/storage/volume_ttl.go
-
7go/storage/volume_vacuum.go
-
1go/storage/volume_version.go
-
3go/topology/allocate_volume.go
-
43go/topology/batch_operation.go
-
13go/topology/collection.go
-
1go/topology/data_node.go
-
119go/topology/node.go
-
60go/topology/store_replicate.go
-
42go/topology/topology.go
-
9go/topology/topology_event_handling.go
-
153go/topology/topology_replicate.go
-
18go/topology/topology_vacuum.go
-
256go/topology/volume_growth.go
-
117go/topology/volume_growth_test.go
-
10go/topology/volume_layout.go
-
111go/topology/volume_location_list.go
-
17go/util/concurrent_read_map.go
-
110go/util/http_util.go
-
9go/weed/backup.go
-
7go/weed/benchmark.go
-
2go/weed/compact.go
-
4go/weed/download.go
-
2go/weed/shell.go
-
2go/weed/signal_handling.go
-
7go/weed/weed_server/common.go
-
9go/weed/weed_server/filer_server_handlers.go
-
7go/weed/weed_server/master_server.go
-
8go/weed/weed_server/master_server_handlers.go
-
60go/weed/weed_server/master_server_handlers_admin.go
-
11go/weed/weed_server/volume_server.go
-
73go/weed/weed_server/volume_server_handlers_admin.go
-
2go/weed/weed_server/volume_server_handlers_read.go
-
58go/weed/weed_server/volume_server_handlers_sync.go
-
66go/weed/weed_server/volume_server_handlers_task.go
@ -1,27 +1,45 @@ |
|||||
package operation; |
package operation; |
||||
|
|
||||
message VolumeInformationMessage { |
message VolumeInformationMessage { |
||||
required uint32 id = 1; |
|
||||
required uint64 size = 2; |
|
||||
optional string collection = 3; |
|
||||
required uint64 file_count = 4; |
|
||||
required uint64 delete_count = 5; |
|
||||
required uint64 deleted_byte_count = 6; |
|
||||
optional bool read_only = 7; |
|
||||
required uint32 replica_placement = 8; |
|
||||
optional uint32 version = 9 [default=2]; |
|
||||
optional uint32 ttl = 10; |
|
||||
|
required uint32 id = 1; |
||||
|
required uint64 size = 2; |
||||
|
optional string collection = 3; |
||||
|
required uint64 file_count = 4; |
||||
|
required uint64 delete_count = 5; |
||||
|
required uint64 deleted_byte_count = 6; |
||||
|
optional bool read_only = 7; |
||||
|
optional uint32 replica_placement = 8; |
||||
|
optional uint32 version = 9 [default=2]; |
||||
|
optional uint32 ttl = 10; |
||||
} |
} |
||||
|
|
||||
message JoinMessage { |
message JoinMessage { |
||||
optional bool is_init = 1; |
|
||||
required string ip = 2; |
|
||||
required uint32 port = 3; |
|
||||
optional string public_url = 4; |
|
||||
required uint32 max_volume_count = 5; |
|
||||
required uint64 max_file_key = 6; |
|
||||
optional string data_center = 7; |
|
||||
optional string rack = 8; |
|
||||
repeated VolumeInformationMessage volumes = 9; |
|
||||
optional uint32 admin_port = 10; |
|
||||
|
optional bool is_init = 1; |
||||
|
required string ip = 2; |
||||
|
required uint32 port = 3; |
||||
|
optional string public_url = 4; |
||||
|
required uint32 max_volume_count = 5; |
||||
|
required uint64 max_file_key = 6; |
||||
|
optional string data_center = 7; |
||||
|
optional string rack = 8; |
||||
|
repeated VolumeInformationMessage volumes = 9; |
||||
|
optional uint32 admin_port = 10; |
||||
} |
} |
||||
|
|
||||
|
|
||||
|
message CollectionSetting { |
||||
|
optional string collection = 1; |
||||
|
optional string replica_placement = 2; |
||||
|
optional float vacuum_garbage_threshold = 3; |
||||
|
} |
||||
|
|
||||
|
message GlobalSetting { |
||||
|
repeated CollectionSetting settings = 1; |
||||
|
repeated string master_peers = 2; |
||||
|
} |
||||
|
|
||||
|
message JoinResponse { |
||||
|
optional GlobalSetting settings = 1; |
||||
|
} |
||||
|
|
||||
|
|
||||
@ -0,0 +1,73 @@ |
|||||
|
package storage |
||||
|
|
||||
|
type SettingKey int |
||||
|
|
||||
|
const ( |
||||
|
keyReplicatePlacement SettingKey = iota |
||||
|
keyGarbageThreshold |
||||
|
) |
||||
|
|
||||
|
type CollectionSettings struct { |
||||
|
settings map[string]map[SettingKey]interface{} |
||||
|
} |
||||
|
|
||||
|
func NewCollectionSettings(defaultReplicatePlacement, defaultGarbageThreshold string) *CollectionSettings { |
||||
|
rp, e := NewReplicaPlacementFromString(defaultReplicatePlacement) |
||||
|
if e != nil { |
||||
|
rp, _ = NewReplicaPlacementFromString("000") |
||||
|
} |
||||
|
c := &CollectionSettings{ |
||||
|
settings: make(map[string]map[SettingKey]interface{}), |
||||
|
} |
||||
|
c.set("", keyReplicatePlacement, rp) |
||||
|
c.set("", keyGarbageThreshold, defaultGarbageThreshold) |
||||
|
return c |
||||
|
} |
||||
|
|
||||
|
func (c *CollectionSettings) get(collection string, key SettingKey) interface{} { |
||||
|
if m, ok := c.settings[collection]; ok { |
||||
|
if v, ok := m[key]; ok { |
||||
|
return v |
||||
|
} |
||||
|
} |
||||
|
if m, ok := c.settings[""]; ok { |
||||
|
return m[key] |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func (c *CollectionSettings) set(collection string, key SettingKey, value interface{}) { |
||||
|
m := c.settings[collection] |
||||
|
if m == nil { |
||||
|
m = make(map[SettingKey]interface{}) |
||||
|
c.settings[collection] = m |
||||
|
} |
||||
|
if value == nil { |
||||
|
//mustn't delete default setting
|
||||
|
if collection != "" { |
||||
|
delete(m, key) |
||||
|
} |
||||
|
} else { |
||||
|
m[key] = value |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (c *CollectionSettings) GetGarbageThreshold(collection string) string { |
||||
|
return c.get(collection, keyGarbageThreshold).(string) |
||||
|
} |
||||
|
|
||||
|
func (c *CollectionSettings) SetGarbageThreshold(collection string, gt string) { |
||||
|
c.set(collection, keyGarbageThreshold, gt) |
||||
|
} |
||||
|
|
||||
|
func (c *CollectionSettings) GetReplicaPlacement(collection string) *ReplicaPlacement { |
||||
|
return c.get(collection, keyReplicatePlacement).(*ReplicaPlacement) |
||||
|
} |
||||
|
|
||||
|
func (c *CollectionSettings) SetReplicaPlacement(collection, t string) error { |
||||
|
rp, e := NewReplicaPlacementFromString(t) |
||||
|
if e == nil { |
||||
|
c.set(collection, keyReplicatePlacement, rp) |
||||
|
} |
||||
|
return e |
||||
|
} |
||||
@ -0,0 +1,147 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"errors" |
||||
|
"net/url" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
) |
||||
|
|
||||
|
const ( |
||||
|
TaskVacuum = "vacuum" |
||||
|
TaskReplicate = "replicate" |
||||
|
TaskBalance = "balance" |
||||
|
) |
||||
|
|
||||
|
var ( |
||||
|
ErrTaskNotFinish = errors.New("TaskNotFinish") |
||||
|
ErrTaskNotFound = errors.New("TaskNotFound") |
||||
|
ErrTaskInvalid = errors.New("TaskInvalid") |
||||
|
ErrTaskExists = errors.New("TaskExists") |
||||
|
) |
||||
|
|
||||
|
type TaskWorker interface { |
||||
|
Run() error |
||||
|
Commit() error |
||||
|
Clean() error |
||||
|
Info() url.Values |
||||
|
} |
||||
|
|
||||
|
type Task struct { |
||||
|
Id string |
||||
|
startTime time.Time |
||||
|
worker TaskWorker |
||||
|
ch chan bool |
||||
|
result error |
||||
|
cleanWhenFinish bool |
||||
|
} |
||||
|
|
||||
|
type TaskManager struct { |
||||
|
TaskList map[string]*Task |
||||
|
} |
||||
|
|
||||
|
func NewTask(worker TaskWorker, id string) *Task { |
||||
|
t := &Task{ |
||||
|
Id: id, |
||||
|
worker: worker, |
||||
|
startTime: time.Now(), |
||||
|
result: ErrTaskNotFinish, |
||||
|
ch: make(chan bool, 1), |
||||
|
} |
||||
|
go func(t *Task) { |
||||
|
t.result = t.worker.Run() |
||||
|
if t.cleanWhenFinish { |
||||
|
glog.V(0).Infof("clean task (%s) when finish.", t.Id) |
||||
|
t.worker.Clean() |
||||
|
} |
||||
|
t.ch <- true |
||||
|
|
||||
|
}(t) |
||||
|
return t |
||||
|
} |
||||
|
|
||||
|
func (t *Task) QueryResult(waitDuration time.Duration) error { |
||||
|
if t.result == ErrTaskNotFinish && waitDuration > 0 { |
||||
|
select { |
||||
|
case <-time.After(waitDuration): |
||||
|
case <-t.ch: |
||||
|
} |
||||
|
} |
||||
|
return t.result |
||||
|
} |
||||
|
|
||||
|
func NewTaskManager() *TaskManager { |
||||
|
return &TaskManager{ |
||||
|
TaskList: make(map[string]*Task), |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (tm *TaskManager) NewTask(s *Store, args url.Values) (tid string, e error) { |
||||
|
tt := args.Get("task") |
||||
|
vid := args.Get("volume") |
||||
|
tid = tt + "-" + vid |
||||
|
if _, ok := tm.TaskList[tid]; ok { |
||||
|
return tid, ErrTaskExists |
||||
|
} |
||||
|
|
||||
|
var tw TaskWorker |
||||
|
switch tt { |
||||
|
case TaskVacuum: |
||||
|
tw, e = NewVacuumTask(s, args) |
||||
|
case TaskReplicate: |
||||
|
tw, e = NewReplicaTask(s, args) |
||||
|
case TaskBalance: |
||||
|
} |
||||
|
if e != nil { |
||||
|
return |
||||
|
} |
||||
|
if tw == nil { |
||||
|
return "", ErrTaskInvalid |
||||
|
} |
||||
|
tm.TaskList[tid] = NewTask(tw, tid) |
||||
|
return tid, nil |
||||
|
} |
||||
|
|
||||
|
func (tm *TaskManager) QueryResult(tid string, waitDuration time.Duration) (e error) { |
||||
|
t, ok := tm.TaskList[tid] |
||||
|
if !ok { |
||||
|
return ErrTaskNotFound |
||||
|
} |
||||
|
return t.QueryResult(waitDuration) |
||||
|
} |
||||
|
|
||||
|
func (tm *TaskManager) Commit(tid string) (e error) { |
||||
|
t, ok := tm.TaskList[tid] |
||||
|
if !ok { |
||||
|
return ErrTaskNotFound |
||||
|
} |
||||
|
if t.QueryResult(time.Second*30) == ErrTaskNotFinish { |
||||
|
return ErrTaskNotFinish |
||||
|
} |
||||
|
delete(tm.TaskList, tid) |
||||
|
return t.worker.Commit() |
||||
|
} |
||||
|
|
||||
|
func (tm *TaskManager) Clean(tid string) error { |
||||
|
t, ok := tm.TaskList[tid] |
||||
|
if !ok { |
||||
|
return ErrTaskNotFound |
||||
|
} |
||||
|
delete(tm.TaskList, tid) |
||||
|
if t.QueryResult(time.Second*30) == ErrTaskNotFinish { |
||||
|
t.cleanWhenFinish = true |
||||
|
glog.V(0).Infof("task (%s) is not finish, clean it later.", tid) |
||||
|
} else { |
||||
|
t.worker.Clean() |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func (tm *TaskManager) ElapsedDuration(tid string) (time.Duration, error) { |
||||
|
t, ok := tm.TaskList[tid] |
||||
|
if !ok { |
||||
|
return 0, ErrTaskNotFound |
||||
|
} |
||||
|
return time.Since(t.startTime), nil |
||||
|
} |
||||
@ -0,0 +1,87 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"errors" |
||||
|
"fmt" |
||||
|
"net/url" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
"github.com/chrislusf/seaweedfs/go/util" |
||||
|
) |
||||
|
|
||||
|
type TaskParams map[string]string |
||||
|
|
||||
|
var ( |
||||
|
ErrTaskTimeout = errors.New("TaskTimeout") |
||||
|
) |
||||
|
|
||||
|
type TaskCli struct { |
||||
|
TID string |
||||
|
DataNode string |
||||
|
} |
||||
|
|
||||
|
func NewTaskCli(dataNode string, taskType string, params TaskParams) (*TaskCli, error) { |
||||
|
args := url.Values{} |
||||
|
args.Set("task", taskType) |
||||
|
for k, v := range params { |
||||
|
args.Set(k, v) |
||||
|
} |
||||
|
m, e := util.RemoteApiCall(dataNode, "/admin/task/new", args) |
||||
|
if e != nil { |
||||
|
return nil, e |
||||
|
} |
||||
|
tid := m["tid"].(string) |
||||
|
if tid == "" { |
||||
|
return nil, fmt.Errorf("Empty %s task", taskType) |
||||
|
} |
||||
|
return &TaskCli{ |
||||
|
TID: tid, |
||||
|
DataNode: dataNode, |
||||
|
}, nil |
||||
|
} |
||||
|
|
||||
|
func (c *TaskCli) WaitAndQueryResult(timeout time.Duration) error { |
||||
|
startTime := time.Now() |
||||
|
args := url.Values{} |
||||
|
args.Set("tid", c.TID) |
||||
|
args.Set("timeout", time.Minute.String()) |
||||
|
tryTimes := 0 |
||||
|
for time.Since(startTime) < timeout { |
||||
|
_, e := util.RemoteApiCall(c.DataNode, "/admin/task/query", args) |
||||
|
if e == nil { |
||||
|
//task have finished and have no error
|
||||
|
return nil |
||||
|
} |
||||
|
if util.IsRemoteApiError(e) { |
||||
|
if e.Error() == ErrTaskNotFinish.Error() { |
||||
|
tryTimes = 0 |
||||
|
continue |
||||
|
} |
||||
|
return e |
||||
|
} else { |
||||
|
tryTimes++ |
||||
|
if tryTimes >= 10 { |
||||
|
return e |
||||
|
} |
||||
|
glog.V(0).Infof("query task (%s) error %v, wait 1 minute then retry %d times", c.TID, e, tryTimes) |
||||
|
time.Sleep(time.Minute) |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
return ErrTaskTimeout |
||||
|
} |
||||
|
|
||||
|
func (c *TaskCli) Commit() error { |
||||
|
args := url.Values{} |
||||
|
args.Set("tid", c.TID) |
||||
|
_, e := util.RemoteApiCall(c.DataNode, "/admin/task/commit", args) |
||||
|
return e |
||||
|
} |
||||
|
|
||||
|
func (c *TaskCli) Clean() error { |
||||
|
args := url.Values{} |
||||
|
args.Set("tid", c.TID) |
||||
|
_, e := util.RemoteApiCall(c.DataNode, "/admin/task/clean", args) |
||||
|
return e |
||||
|
} |
||||
@ -0,0 +1,115 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"errors" |
||||
|
"fmt" |
||||
|
"net/url" |
||||
|
"os" |
||||
|
"path" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/util" |
||||
|
) |
||||
|
|
||||
|
type ReplicaTask struct { |
||||
|
VID VolumeId |
||||
|
Collection string |
||||
|
SrcDataNode string |
||||
|
s *Store |
||||
|
location *DiskLocation |
||||
|
} |
||||
|
|
||||
|
func NewReplicaTask(s *Store, args url.Values) (*ReplicaTask, error) { |
||||
|
volumeIdString := args.Get("volume") |
||||
|
vid, err := NewVolumeId(volumeIdString) |
||||
|
if err != nil { |
||||
|
return nil, fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) |
||||
|
} |
||||
|
source := args.Get("source") |
||||
|
if source == "" { |
||||
|
return nil, errors.New("Invalid source data node.") |
||||
|
|
||||
|
} |
||||
|
location := s.findFreeLocation() |
||||
|
if location == nil { |
||||
|
return nil, errors.New("No more free space left") |
||||
|
} |
||||
|
collection := args.Get("collection") |
||||
|
return &ReplicaTask{ |
||||
|
VID: vid, |
||||
|
Collection: collection, |
||||
|
SrcDataNode: source, |
||||
|
s: s, |
||||
|
location: location, |
||||
|
}, nil |
||||
|
} |
||||
|
|
||||
|
func (t *ReplicaTask) Run() error { |
||||
|
ch := make(chan error) |
||||
|
go func() { |
||||
|
idxUrl := util.MkUrl(t.SrcDataNode, "/admin/sync/index", url.Values{"volume": {t.VID.String()}}) |
||||
|
e := util.DownloadToFile(idxUrl, t.FileName()+".repx") |
||||
|
if e != nil { |
||||
|
e = fmt.Errorf("Replicat error: %s, %v", idxUrl, e) |
||||
|
} |
||||
|
ch <- e |
||||
|
}() |
||||
|
go func() { |
||||
|
datUrl := util.MkUrl(t.SrcDataNode, "/admin/sync/vol_data", url.Values{"volume": {t.VID.String()}}) |
||||
|
e := util.DownloadToFile(datUrl, t.FileName()+".repd") |
||||
|
if e != nil { |
||||
|
e = fmt.Errorf("Replicat error: %s, %v", datUrl, e) |
||||
|
} |
||||
|
ch <- e |
||||
|
}() |
||||
|
errs := make([]error, 0) |
||||
|
for i := 0; i < 2; i++ { |
||||
|
if e := <-ch; e != nil { |
||||
|
errs = append(errs, e) |
||||
|
} |
||||
|
} |
||||
|
if len(errs) == 0 { |
||||
|
return nil |
||||
|
} else { |
||||
|
return fmt.Errorf("%v", errs) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (t *ReplicaTask) Commit() error { |
||||
|
var ( |
||||
|
volume *Volume |
||||
|
e error |
||||
|
) |
||||
|
|
||||
|
if e = os.Rename(t.FileName()+".repd", t.FileName()+".dat"); e != nil { |
||||
|
return e |
||||
|
} |
||||
|
if e = os.Rename(t.FileName()+".repx", t.FileName()+".idx"); e != nil { |
||||
|
return e |
||||
|
} |
||||
|
volume, e = NewVolume(t.location.Directory, t.Collection, t.VID, t.s.needleMapKind, nil) |
||||
|
if e == nil { |
||||
|
t.location.volumes[t.VID] = volume |
||||
|
t.s.SendHeartbeatToMaster() |
||||
|
} |
||||
|
return e |
||||
|
} |
||||
|
|
||||
|
func (t *ReplicaTask) Clean() error { |
||||
|
os.Remove(t.FileName() + ".repx") |
||||
|
os.Remove(t.FileName() + ".repd") |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func (t *ReplicaTask) Info() url.Values { |
||||
|
//TODO
|
||||
|
return url.Values{} |
||||
|
} |
||||
|
|
||||
|
func (t *ReplicaTask) FileName() (fileName string) { |
||||
|
if t.Collection == "" { |
||||
|
fileName = path.Join(t.location.Directory, t.VID.String()) |
||||
|
} else { |
||||
|
fileName = path.Join(t.location.Directory, t.Collection+"_"+t.VID.String()) |
||||
|
} |
||||
|
return |
||||
|
} |
||||
@ -0,0 +1,40 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
"net/url" |
||||
|
) |
||||
|
|
||||
|
type VacuumTask struct { |
||||
|
V *Volume |
||||
|
} |
||||
|
|
||||
|
func NewVacuumTask(s *Store, args url.Values) (*VacuumTask, error) { |
||||
|
volumeIdString := args.Get("volumme") |
||||
|
vid, err := NewVolumeId(volumeIdString) |
||||
|
if err != nil { |
||||
|
return nil, fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) |
||||
|
} |
||||
|
v := s.findVolume(vid) |
||||
|
if v == nil { |
||||
|
return nil, fmt.Errorf("volume id %d is not found", vid) |
||||
|
} |
||||
|
return &VacuumTask{V: v}, nil |
||||
|
} |
||||
|
|
||||
|
func (t *VacuumTask) Run() error { |
||||
|
return t.V.Compact() |
||||
|
} |
||||
|
|
||||
|
func (t *VacuumTask) Commit() error { |
||||
|
return t.V.commitCompact() |
||||
|
} |
||||
|
|
||||
|
func (t *VacuumTask) Clean() error { |
||||
|
return t.V.cleanCompact() |
||||
|
} |
||||
|
|
||||
|
func (t *VacuumTask) Info() url.Values { |
||||
|
//TODO
|
||||
|
return url.Values{} |
||||
|
} |
||||
@ -0,0 +1,212 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import ( |
||||
|
"io" |
||||
|
"os" |
||||
|
"sort" |
||||
|
"sync" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/util" |
||||
|
) |
||||
|
|
||||
|
type DirtyData struct { |
||||
|
Offset int64 `comment:"Dirty data start offset"` |
||||
|
Size uint32 `comment:"Size of the dirty data"` |
||||
|
} |
||||
|
|
||||
|
type DirtyDatas []DirtyData |
||||
|
|
||||
|
func (s DirtyDatas) Len() int { return len(s) } |
||||
|
func (s DirtyDatas) Less(i, j int) bool { return s[i].Offset < s[j].Offset } |
||||
|
func (s DirtyDatas) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
||||
|
func (s DirtyDatas) Sort() { sort.Sort(s) } |
||||
|
func (s DirtyDatas) Search(offset int64) int { |
||||
|
return sort.Search(len(s), func(i int) bool { |
||||
|
v := &s[i] |
||||
|
return v.Offset+int64(v.Size) > offset |
||||
|
}) |
||||
|
} |
||||
|
|
||||
|
type PureReader struct { |
||||
|
Dirtys DirtyDatas |
||||
|
DataFile *os.File |
||||
|
pr *io.PipeReader |
||||
|
pw *io.PipeWriter |
||||
|
mutex sync.Mutex |
||||
|
} |
||||
|
|
||||
|
func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { |
||||
|
m := NewCompactMap() |
||||
|
for i := 0; i+16 <= len(indexFileContent); i += 16 { |
||||
|
bytes := indexFileContent[i : i+16] |
||||
|
key := util.BytesToUint64(bytes[:8]) |
||||
|
offset := util.BytesToUint32(bytes[8:12]) |
||||
|
size := util.BytesToUint32(bytes[12:16]) |
||||
|
k := Key(key) |
||||
|
if offset != 0 && size != 0 { |
||||
|
m.Set(k, offset, size) |
||||
|
} else { |
||||
|
if nv, ok := m.Get(k); ok { |
||||
|
//mark old needle data as dirty data
|
||||
|
if int64(nv.Size)-NeedleHeaderSize > 0 { |
||||
|
dirtys = append(dirtys, DirtyData{ |
||||
|
Offset: int64(nv.Offset)*8 + NeedleHeaderSize, |
||||
|
Size: nv.Size, |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
|
m.Delete(k) |
||||
|
} |
||||
|
} |
||||
|
dirtys.Sort() |
||||
|
return dirtys |
||||
|
} |
||||
|
|
||||
|
func (cr *PureReader) Seek(offset int64, whence int) (int64, error) { |
||||
|
oldOff, e := cr.DataFile.Seek(0, 1) |
||||
|
if e != nil { |
||||
|
return 0, e |
||||
|
} |
||||
|
newOff, e := cr.DataFile.Seek(offset, whence) |
||||
|
if e != nil { |
||||
|
return 0, e |
||||
|
} |
||||
|
if oldOff != newOff { |
||||
|
cr.closePipe(true) |
||||
|
} |
||||
|
return newOff, nil |
||||
|
} |
||||
|
|
||||
|
func (cr *PureReader) Size() (int64, error) { |
||||
|
fi, e := cr.DataFile.Stat() |
||||
|
if e != nil { |
||||
|
return 0, e |
||||
|
} |
||||
|
return fi.Size(), nil |
||||
|
} |
||||
|
|
||||
|
func (cdr *PureReader) WriteTo(w io.Writer) (written int64, err error) { |
||||
|
off, e := cdr.DataFile.Seek(0, 1) |
||||
|
if e != nil { |
||||
|
return 0, nil |
||||
|
} |
||||
|
const ZeroBufSize = 32 * 1024 |
||||
|
zeroBuf := make([]byte, ZeroBufSize) |
||||
|
dirtyIndex := cdr.Dirtys.Search(off) |
||||
|
var nextDirty *DirtyData |
||||
|
if dirtyIndex < len(cdr.Dirtys) { |
||||
|
nextDirty = &cdr.Dirtys[dirtyIndex] |
||||
|
} |
||||
|
for { |
||||
|
if nextDirty != nil && off >= nextDirty.Offset && off < nextDirty.Offset+int64(nextDirty.Size) { |
||||
|
sz := nextDirty.Offset + int64(nextDirty.Size) - off |
||||
|
for sz > 0 { |
||||
|
mn := int64(ZeroBufSize) |
||||
|
if mn > sz { |
||||
|
mn = sz |
||||
|
} |
||||
|
var n int |
||||
|
if n, e = w.Write(zeroBuf[:mn]); e != nil { |
||||
|
return |
||||
|
} |
||||
|
written += int64(n) |
||||
|
sz -= int64(n) |
||||
|
off += int64(n) |
||||
|
} |
||||
|
dirtyIndex++ |
||||
|
if dirtyIndex < len(cdr.Dirtys) { |
||||
|
nextDirty = &cdr.Dirtys[dirtyIndex] |
||||
|
} else { |
||||
|
nextDirty = nil |
||||
|
} |
||||
|
if _, e = cdr.DataFile.Seek(off, 0); e != nil { |
||||
|
return |
||||
|
} |
||||
|
} else { |
||||
|
var n, sz int64 |
||||
|
if nextDirty != nil { |
||||
|
sz = nextDirty.Offset - off |
||||
|
} |
||||
|
if sz <= 0 { |
||||
|
// copy until eof
|
||||
|
n, e = io.Copy(w, cdr.DataFile) |
||||
|
written += n |
||||
|
return |
||||
|
} |
||||
|
if n, e = io.CopyN(w, cdr.DataFile, sz); e != nil { |
||||
|
return |
||||
|
} |
||||
|
off += n |
||||
|
written += n |
||||
|
} |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func (cr *PureReader) ReadAt(p []byte, off int64) (n int, err error) { |
||||
|
cr.Seek(off, 0) |
||||
|
return cr.Read(p) |
||||
|
} |
||||
|
|
||||
|
func (cr *PureReader) Read(p []byte) (int, error) { |
||||
|
return cr.getPipeReader().Read(p) |
||||
|
} |
||||
|
|
||||
|
func (cr *PureReader) Close() (e error) { |
||||
|
cr.closePipe(true) |
||||
|
return cr.DataFile.Close() |
||||
|
} |
||||
|
|
||||
|
func (cr *PureReader) closePipe(lock bool) (e error) { |
||||
|
if lock { |
||||
|
cr.mutex.Lock() |
||||
|
defer cr.mutex.Unlock() |
||||
|
} |
||||
|
if cr.pr != nil { |
||||
|
if err := cr.pr.Close(); err != nil { |
||||
|
e = err |
||||
|
} |
||||
|
} |
||||
|
cr.pr = nil |
||||
|
if cr.pw != nil { |
||||
|
if err := cr.pw.Close(); err != nil { |
||||
|
e = err |
||||
|
} |
||||
|
} |
||||
|
cr.pw = nil |
||||
|
return e |
||||
|
} |
||||
|
|
||||
|
func (cr *PureReader) getPipeReader() io.Reader { |
||||
|
cr.mutex.Lock() |
||||
|
defer cr.mutex.Unlock() |
||||
|
if cr.pr != nil && cr.pw != nil { |
||||
|
return cr.pr |
||||
|
} |
||||
|
cr.closePipe(false) |
||||
|
cr.pr, cr.pw = io.Pipe() |
||||
|
go func(pw *io.PipeWriter) { |
||||
|
_, e := cr.WriteTo(pw) |
||||
|
pw.CloseWithError(e) |
||||
|
}(cr.pw) |
||||
|
return cr.pr |
||||
|
} |
||||
|
|
||||
|
func (v *Volume) GetVolumeCleanReader() (cr *PureReader, err error) { |
||||
|
var dirtys DirtyDatas |
||||
|
if indexData, e := v.nm.IndexFileContent(); e != nil { |
||||
|
return nil, err |
||||
|
} else { |
||||
|
dirtys = ScanDirtyData(indexData) |
||||
|
} |
||||
|
dataFile, e := os.Open(v.FileName() + ".dat") |
||||
|
|
||||
|
if e != nil { |
||||
|
return nil, e |
||||
|
} |
||||
|
cr = &PureReader{ |
||||
|
Dirtys: dirtys, |
||||
|
DataFile: dataFile, |
||||
|
} |
||||
|
return |
||||
|
} |
||||
@ -0,0 +1,22 @@ |
|||||
|
package storage |
||||
|
|
||||
|
import "testing" |
||||
|
|
||||
|
func TestDirtyDataSearch(t *testing.T) { |
||||
|
testData := DirtyDatas{ |
||||
|
{30, 20}, {106, 200}, {5, 20}, {512, 68}, {412, 50}, |
||||
|
} |
||||
|
testOffset := []int64{ |
||||
|
0, 150, 480, 1024, |
||||
|
} |
||||
|
testData.Sort() |
||||
|
t.Logf("TestData = %v", testData) |
||||
|
for _, off := range testOffset { |
||||
|
i := testData.Search(off) |
||||
|
if i < testData.Len() { |
||||
|
t.Logf("(%d) nearest chunk[%d]: %v", off, i, testData[i]) |
||||
|
} else { |
||||
|
t.Logf("Search %d return %d ", off, i) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,43 @@ |
|||||
|
package topology |
||||
|
|
||||
|
import ( |
||||
|
"net/url" |
||||
|
"strconv" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
"github.com/chrislusf/seaweedfs/go/util" |
||||
|
) |
||||
|
|
||||
|
func BatchOperation(locationList *VolumeLocationList, path string, values url.Values) (isSuccess bool) { |
||||
|
ch := make(chan bool, locationList.Length()) |
||||
|
for _, dn := range locationList.list { |
||||
|
go func(url string, path string, values url.Values) { |
||||
|
_, e := util.RemoteApiCall(url, path, values) |
||||
|
if e != nil { |
||||
|
glog.V(0).Infoln("RemoteApiCall:", util.MkUrl(url, path, values), "error =", e) |
||||
|
} |
||||
|
ch <- e == nil |
||||
|
|
||||
|
}(dn.Url(), path, values) |
||||
|
} |
||||
|
isSuccess = true |
||||
|
for range locationList.list { |
||||
|
select { |
||||
|
case canVacuum := <-ch: |
||||
|
isSuccess = isSuccess && canVacuum |
||||
|
case <-time.After(30 * time.Minute): |
||||
|
isSuccess = false |
||||
|
break |
||||
|
} |
||||
|
} |
||||
|
return isSuccess |
||||
|
} |
||||
|
|
||||
|
func SetVolumeReadonly(locationList *VolumeLocationList, volume string, isReadonly bool) (isSuccess bool) { |
||||
|
forms := url.Values{} |
||||
|
forms.Set("key", "readonly") |
||||
|
forms.Set("value", strconv.FormatBool(isReadonly)) |
||||
|
forms.Set("volume", volume) |
||||
|
return BatchOperation(locationList, "/admin/setting", forms) |
||||
|
} |
||||
@ -0,0 +1,153 @@ |
|||||
|
package topology |
||||
|
|
||||
|
import ( |
||||
|
"container/list" |
||||
|
"fmt" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
"github.com/chrislusf/seaweedfs/go/storage" |
||||
|
) |
||||
|
|
||||
|
var ( |
||||
|
isReplicateCheckerRunning = false |
||||
|
) |
||||
|
|
||||
|
const ReplicateTaskTimeout = time.Hour |
||||
|
|
||||
|
type ReplicateTask struct { |
||||
|
Vid storage.VolumeId |
||||
|
Collection string |
||||
|
SrcDN *DataNode |
||||
|
DstDN *DataNode |
||||
|
} |
||||
|
|
||||
|
func (t *ReplicateTask) Run(topo *Topology) error { |
||||
|
//is lookup thread safe?
|
||||
|
locationList := topo.Lookup(t.Collection, t.Vid) |
||||
|
rp := topo.CollectionSettings.GetReplicaPlacement(t.Collection) |
||||
|
if locationList.CalcReplicaPlacement().Compare(rp) >= 0 { |
||||
|
glog.V(0).Infof("volume [%v] has right replica placement, rp: %s", t.Vid, rp.String()) |
||||
|
return nil |
||||
|
} |
||||
|
if !SetVolumeReadonly(locationList, t.Vid.String(), true) { |
||||
|
return fmt.Errorf("set volume readonly failed, vid=%v", t.Vid) |
||||
|
} |
||||
|
defer SetVolumeReadonly(locationList, t.Vid.String(), false) |
||||
|
tc, e := storage.NewTaskCli(t.DstDN.Url(), storage.TaskReplicate, storage.TaskParams{ |
||||
|
"volume": t.Vid.String(), |
||||
|
"source": t.SrcDN.Url(), |
||||
|
"collection": t.Collection, |
||||
|
}) |
||||
|
if e != nil { |
||||
|
return e |
||||
|
} |
||||
|
if e = tc.WaitAndQueryResult(ReplicateTaskTimeout); e != nil { |
||||
|
tc.Clean() |
||||
|
return e |
||||
|
} |
||||
|
e = tc.Commit() |
||||
|
return e |
||||
|
} |
||||
|
|
||||
|
func (t *ReplicateTask) WorkingDataNodes() []*DataNode { |
||||
|
return []*DataNode{ |
||||
|
t.SrcDN, |
||||
|
t.DstDN, |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func planReplicateTasks(t *Topology) (tasks []*ReplicateTask) { |
||||
|
for _, col := range t.collectionMap.Items { |
||||
|
c := col.(*Collection) |
||||
|
glog.V(0).Infoln("checking replicate on collection:", c.Name) |
||||
|
growOption := &VolumeGrowOption{ReplicaPlacement: c.rp} |
||||
|
for _, vl := range c.storageType2VolumeLayout.Items { |
||||
|
if vl != nil { |
||||
|
volumeLayout := vl.(*VolumeLayout) |
||||
|
for vid, locationList := range volumeLayout.vid2location { |
||||
|
rp1 := locationList.CalcReplicaPlacement() |
||||
|
if rp1.Compare(volumeLayout.rp) >= 0 { |
||||
|
continue |
||||
|
} |
||||
|
if additionServers, e := FindEmptySlotsForOneVolume(t, growOption, locationList); e == nil { |
||||
|
for _, s := range additionServers { |
||||
|
s.UpAdjustPlannedVolumeCountDelta(1) |
||||
|
rt := &ReplicateTask{ |
||||
|
Vid: vid, |
||||
|
Collection: c.Name, |
||||
|
SrcDN: locationList.PickForRead(), |
||||
|
DstDN: s, |
||||
|
} |
||||
|
tasks = append(tasks, rt) |
||||
|
glog.V(0).Infof("add replicate task, vid: %v, src: %s, dst: %s", vid, rt.SrcDN.Url(), rt.DstDN.Url()) |
||||
|
} |
||||
|
} else { |
||||
|
glog.V(0).Infof("find empty slots error, vid: %v, rp: %s => %s, %v", vid, rp1.String(), volumeLayout.rp.String(), e) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
func (topo *Topology) CheckReplicate() { |
||||
|
isReplicateCheckerRunning = true |
||||
|
defer func() { |
||||
|
isReplicateCheckerRunning = false |
||||
|
}() |
||||
|
glog.V(1).Infoln("Start replicate checker on demand") |
||||
|
busyDataNodes := make(map[*DataNode]int) |
||||
|
taskCount := 0 |
||||
|
taskQueue := list.New() |
||||
|
for _, t := range planReplicateTasks(topo) { |
||||
|
taskQueue.PushBack(t) |
||||
|
taskCount++ |
||||
|
} |
||||
|
taskChan := make(chan *ReplicateTask) |
||||
|
for taskCount > 0 { |
||||
|
TaskQueueLoop: |
||||
|
for e := taskQueue.Front(); e != nil; e = e.Next() { |
||||
|
task := e.Value.(*ReplicateTask) |
||||
|
//only one task will run on the data node
|
||||
|
dns := task.WorkingDataNodes() |
||||
|
for _, dn := range dns { |
||||
|
if busyDataNodes[dn] > 0 { |
||||
|
continue TaskQueueLoop |
||||
|
} |
||||
|
} |
||||
|
for _, dn := range dns { |
||||
|
busyDataNodes[dn]++ |
||||
|
} |
||||
|
go func(t *ReplicateTask) { |
||||
|
if e := t.Run(topo); e != nil { |
||||
|
glog.V(0).Infof("ReplicateTask run error, vid: %v, dst: %s. %v", t.Vid, t.DstDN.Url(), e) |
||||
|
} else { |
||||
|
glog.V(2).Infof("ReplicateTask finished, vid: %v, dst: %s", t.Vid, t.DstDN.Url()) |
||||
|
|
||||
|
} |
||||
|
taskChan <- t |
||||
|
}(task) |
||||
|
taskQueue.Remove(e) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
finishedTask := <-taskChan |
||||
|
for _, dn := range finishedTask.WorkingDataNodes() { |
||||
|
if busyDataNodes[dn] > 0 { |
||||
|
busyDataNodes[dn]-- |
||||
|
} |
||||
|
} |
||||
|
taskCount-- |
||||
|
finishedTask.DstDN.UpAdjustPlannedVolumeCountDelta(-1) |
||||
|
} |
||||
|
glog.V(0).Infoln("finish replicate check.") |
||||
|
} |
||||
|
|
||||
|
func (topo *Topology) StartCheckReplicate() { |
||||
|
if isReplicateCheckerRunning { |
||||
|
return |
||||
|
} |
||||
|
go topo.CheckReplicate() |
||||
|
} |
||||
@ -0,0 +1,66 @@ |
|||||
|
package weed_server |
||||
|
|
||||
|
import ( |
||||
|
"net/http" |
||||
|
|
||||
|
"time" |
||||
|
|
||||
|
"strings" |
||||
|
|
||||
|
"github.com/chrislusf/seaweedfs/go/glog" |
||||
|
"github.com/chrislusf/seaweedfs/go/storage" |
||||
|
) |
||||
|
|
||||
|
func (vs *VolumeServer) newTaskHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
r.ParseForm() |
||||
|
tid, e := vs.store.TaskManager.NewTask(vs.store, r.Form) |
||||
|
if e == nil { |
||||
|
writeJsonQuiet(w, r, http.StatusOK, map[string]string{"tid": tid}) |
||||
|
} else { |
||||
|
writeJsonError(w, r, http.StatusInternalServerError, e) |
||||
|
} |
||||
|
glog.V(2).Infoln("new store task =", tid, ", error =", e) |
||||
|
} |
||||
|
|
||||
|
func (vs *VolumeServer) queryTaskHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
tid := r.FormValue("tid") |
||||
|
timeoutStr := strings.TrimSpace(r.FormValue("timeout")) |
||||
|
d := time.Minute |
||||
|
if td, e := time.ParseDuration(timeoutStr); e == nil { |
||||
|
d = td |
||||
|
} |
||||
|
err := vs.store.TaskManager.QueryResult(tid, d) |
||||
|
if err == storage.ErrTaskNotFinish { |
||||
|
writeJsonError(w, r, http.StatusRequestTimeout, err) |
||||
|
} else if err == nil { |
||||
|
writeJsonError(w, r, http.StatusOK, err) |
||||
|
} else { |
||||
|
writeJsonError(w, r, http.StatusInternalServerError, err) |
||||
|
} |
||||
|
glog.V(2).Infoln("query task =", tid, ", error =", err) |
||||
|
} |
||||
|
func (vs *VolumeServer) commitTaskHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
tid := r.FormValue("tid") |
||||
|
err := vs.store.TaskManager.Commit(tid) |
||||
|
if err == storage.ErrTaskNotFinish { |
||||
|
writeJsonError(w, r, http.StatusRequestTimeout, err) |
||||
|
} else if err == nil { |
||||
|
writeJsonError(w, r, http.StatusOK, err) |
||||
|
} |
||||
|
glog.V(2).Infoln("query task =", tid, ", error =", err) |
||||
|
} |
||||
|
func (vs *VolumeServer) cleanTaskHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
tid := r.FormValue("tid") |
||||
|
err := vs.store.TaskManager.Clean(tid) |
||||
|
if err == storage.ErrTaskNotFinish { |
||||
|
writeJsonError(w, r, http.StatusRequestTimeout, err) |
||||
|
} else if err == nil { |
||||
|
writeJsonError(w, r, http.StatusOK, err) |
||||
|
} |
||||
|
glog.V(2).Infoln("clean task =", tid, ", error =", err) |
||||
|
} |
||||
|
|
||||
|
func (vs *VolumeServer) allTaskHandler(w http.ResponseWriter, r *http.Request) { |
||||
|
//TODO get all task
|
||||
|
glog.V(2).Infoln("TODO: get all task") |
||||
|
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue