From a74e2bed2cb1b98c2791c2a9ea5819e482ff7cb0 Mon Sep 17 00:00:00 2001 From: tnextday Date: Sat, 5 Dec 2015 21:59:05 +0800 Subject: [PATCH 01/46] go imports `Needle.ReadNeedleBody` add CRC check and warning --- go/filer/flat_namespace/flat_namespace_store.go | 2 -- go/storage/needle.go | 2 +- go/storage/needle_read_write.go | 8 ++++++++ go/storage/volume_info.go | 3 ++- go/storage/volume_sync.go | 3 +++ 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/go/filer/flat_namespace/flat_namespace_store.go b/go/filer/flat_namespace/flat_namespace_store.go index 832b70e40..068201adf 100644 --- a/go/filer/flat_namespace/flat_namespace_store.go +++ b/go/filer/flat_namespace/flat_namespace_store.go @@ -1,7 +1,5 @@ package flat_namespace -import () - type FlatNamespaceStore interface { Put(fullFileName string, fid string) (err error) Get(fullFileName string) (fid string, err error) diff --git a/go/storage/needle.go b/go/storage/needle.go index 32ebdae7d..c9124a681 100644 --- a/go/storage/needle.go +++ b/go/storage/needle.go @@ -14,8 +14,8 @@ import ( "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/images" - "github.com/chrislusf/seaweedfs/go/util" "github.com/chrislusf/seaweedfs/go/operation" + "github.com/chrislusf/seaweedfs/go/util" ) const ( diff --git a/go/storage/needle_read_write.go b/go/storage/needle_read_write.go index 9d7af600a..eb7989884 100644 --- a/go/storage/needle_read_write.go +++ b/go/storage/needle_read_write.go @@ -238,6 +238,10 @@ func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyL } n.Data = bytes[:n.Size] n.Checksum = NewCRC(n.Data) + checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) + if n.Checksum.Value() != checksum { + glog.V(0).Infof("CRC error! Data On Disk Corrupted, needle id = %x", n.Id) + } case Version2: bytes := make([]byte, bodyLength) if _, err = r.ReadAt(bytes, offset); err != nil { @@ -245,6 +249,10 @@ func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyL } n.readNeedleDataVersion2(bytes[0:n.Size]) n.Checksum = NewCRC(n.Data) + checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) + if n.Checksum.Value() != checksum { + glog.V(0).Infof("CRC error! Data On Disk Corrupted, needle id = %x", n.Id) + } default: err = fmt.Errorf("Unsupported Version! (%d)", version) } diff --git a/go/storage/volume_info.go b/go/storage/volume_info.go index a2f139c89..e4979c790 100644 --- a/go/storage/volume_info.go +++ b/go/storage/volume_info.go @@ -2,8 +2,9 @@ package storage import ( "fmt" - "github.com/chrislusf/seaweedfs/go/operation" "sort" + + "github.com/chrislusf/seaweedfs/go/operation" ) type VolumeInfo struct { diff --git a/go/storage/volume_sync.go b/go/storage/volume_sync.go index 2c72d62f0..01d59d6ae 100644 --- a/go/storage/volume_sync.go +++ b/go/storage/volume_sync.go @@ -202,6 +202,9 @@ func (v *Volume) fetchNeedle(volumeDataContentHandlerUrl string, if err != nil { return fmt.Errorf("Reading from %s error: %v", volumeDataContentHandlerUrl, err) } + if needleValue.Size != uint32(len(b)) { + return fmt.Errorf("Reading from %s error: size incorrect", volumeDataContentHandlerUrl) + } offset, err := v.AppendBlob(b) if err != nil { return fmt.Errorf("Appending volume %d error: %v", v.Id, err) From 77e3581a52efecbc6eb3e0bbc57563a61147564a Mon Sep 17 00:00:00 2001 From: tnextday Date: Mon, 7 Dec 2015 21:14:57 +0800 Subject: [PATCH 02/46] Add CleanDataReader, auto fill zero in deleted needle --- Makefile | 3 + go/storage/volume_replicate.go | 185 +++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 go/storage/volume_replicate.go diff --git a/Makefile b/Makefile index 6719a7bdd..68bd916be 100644 --- a/Makefile +++ b/Makefile @@ -20,3 +20,6 @@ build: deps linux: deps mkdir -p linux GOOS=linux GOARCH=amd64 go build $(GO_FLAGS) -o linux/$(BINARY) $(SOURCE_DIR) + +imports: + goimports -w $(SOURCE_DIR) \ No newline at end of file diff --git a/go/storage/volume_replicate.go b/go/storage/volume_replicate.go new file mode 100644 index 000000000..93c52e55c --- /dev/null +++ b/go/storage/volume_replicate.go @@ -0,0 +1,185 @@ +package storage + +import ( + "sort" + + "io" + "os" + + "sync" + + "github.com/chrislusf/seaweedfs/go/util" + "io/ioutil" +) + +type DirtyData struct { + Offset int64 `comment:"Dirty data start offset"` + Size uint32 `comment:"Size of the dirty data"` +} + +type DirtyDatas []DirtyData + +func (s DirtyDatas) Len() int { return len(s) } +func (s DirtyDatas) Less(i, j int) bool { return s[i].Offset < s[j].Offset } +func (s DirtyDatas) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s DirtyDatas) Sort() { sort.Sort(s) } +func (s DirtyDatas) Search(offset int64) int { + return sort.Search(len(s), func(i int) bool { + v := &s[i] + return /*v.Offset <= offset &&*/ v.Offset+int64(v.Size) > offset + }) +} + +type CleanDataReader struct { + Dirtys DirtyDatas + DataFile *os.File + pr *io.PipeReader + pw *io.PipeWriter + mutex sync.Mutex +} + +func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { + m := NewCompactMap() + for i := 0; i+16 <= len(indexFileContent); i += 16 { + bytes := indexFileContent[i : i+16] + key := util.BytesToUint64(bytes[:8]) + offset := util.BytesToUint32(bytes[8:12]) + size := util.BytesToUint32(bytes[12:16]) + k := Key(key) + if offset != 0 && size != 0 { + m.Set(k, offset, size) + } else { + if nv, ok := m.Get(k); ok { + //mark old needle file as dirty data + if int64(nv.Size)-NeedleHeaderSize > 0 { + dirtys = append(dirtys, DirtyData{ + Offset: int64(nv.Offset)*8 + NeedleHeaderSize, + Size: nv.Size - NeedleHeaderSize, + }) + } + } + m.Delete(k) + } + } + dirtys.Sort() + return dirtys +} + +func (cf *CleanDataReader) Seek(offset int64, whence int) (int64, error) { + off, e := cf.DataFile.Seek(0, 1) + if e != nil { + return 0, nil + } + if off != offset { + cf.Close() + } + return cf.DataFile.Seek(offset, whence) +} + +func (cf *CleanDataReader) WriteTo(w io.Writer) (written int64, err error) { + off, e := cf.DataFile.Seek(0, 1) + if e != nil { + return 0, nil + } + const ZeroBufSize = 32 * 1024 + zeroBuf := make([]byte, ZeroBufSize) + dirtyIndex := cf.Dirtys.Search(off) + var nextDirty *DirtyData + if dirtyIndex < len(cf.Dirtys) { + nextDirty = &cf.Dirtys[dirtyIndex] + if nextDirty.Offset+int64(nextDirty.Size) < off { + nextDirty = nil + } + } + for { + if nextDirty != nil && off >= nextDirty.Offset && off < nextDirty.Offset+int64(nextDirty.Size) { + sz := nextDirty.Offset + int64(nextDirty.Size) - off + for sz > 0 { + mn := int64(ZeroBufSize) + if mn > sz { + mn = sz + } + var n int + if n, e = w.Write(zeroBuf[:mn]); e != nil { + return + } + written += int64(n) + sz -= int64(n) + off += int64(n) + } + dirtyIndex++ + if dirtyIndex < len(cf.Dirtys) { + nextDirty = &cf.Dirtys[dirtyIndex] + } else { + nextDirty = nil + } + if _, e = cf.DataFile.Seek(off, 0); e != nil { + return + } + } else { + var n, sz int64 + if nextDirty != nil { + sz = nextDirty.Offset - off + } + if sz > 0 { + if n, e = io.CopyN(w, cf.DataFile, sz); e != nil { + return + } + } else { + if n, e = io.Copy(w, cf.DataFile); e != nil { + return + } + } + off += n + written += n + } + } + return +} + +func (cf *CleanDataReader) ReadAt(p []byte, off int64) (n int, err error) { + cf.Seek(off, 0) + return cf.Read(p) +} + +func (cf *CleanDataReader) Read(p []byte) (int, error) { + return cf.getPipeReader().Read(p) +} + +func (cf *CleanDataReader) Close() (e error) { + cf.mutex.Lock() + defer cf.mutex.Unlock() + cf.closePipe() + return cf.DataFile.Close() +} + +func (cf *CleanDataReader) closePipe() (e error) { + if cf.pr != nil { + if err := cf.pr.Close(); err != nil { + e = err + } + } + cf.pr = nil + if cf.pw != nil { + if err := cf.pw.Close(); err != nil { + e = err + } + } + cf.pw = nil + return e +} + +func (cf *CleanDataReader) getPipeReader() io.Reader { + cf.mutex.Lock() + defer cf.mutex.Unlock() + if cf.pr != nil && cf.pw != nil { + return cf.pr + } + cf.closePipe() + cf.pr, cf.pw = io.Pipe() + go func(pw *io.PipeWriter) { + _, e := cf.WriteTo(pw) + pw.CloseWithError(e) + }(cf.pw) + return cf.pr +} From 864e92550bfd476c4b91b840d8cd980ffb04cde9 Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 8 Dec 2015 13:07:03 +0800 Subject: [PATCH 03/46] test DirtyDatas.Search --- go/storage/volume_replicate.go | 111 ++++++++++++++++------------ go/storage/volume_replicate_test.go | 22 ++++++ 2 files changed, 85 insertions(+), 48 deletions(-) create mode 100644 go/storage/volume_replicate_test.go diff --git a/go/storage/volume_replicate.go b/go/storage/volume_replicate.go index 93c52e55c..2fc12fc9e 100644 --- a/go/storage/volume_replicate.go +++ b/go/storage/volume_replicate.go @@ -1,15 +1,12 @@ package storage import ( - "sort" - "io" "os" - + "sort" "sync" "github.com/chrislusf/seaweedfs/go/util" - "io/ioutil" ) type DirtyData struct { @@ -26,11 +23,11 @@ func (s DirtyDatas) Sort() { sort.Sort(s) } func (s DirtyDatas) Search(offset int64) int { return sort.Search(len(s), func(i int) bool { v := &s[i] - return /*v.Offset <= offset &&*/ v.Offset+int64(v.Size) > offset + return v.Offset+int64(v.Size) > offset }) } -type CleanDataReader struct { +type CleanReader struct { Dirtys DirtyDatas DataFile *os.File pr *io.PipeReader @@ -50,7 +47,7 @@ func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { m.Set(k, offset, size) } else { if nv, ok := m.Get(k); ok { - //mark old needle file as dirty data + //mark old needle data as dirty data if int64(nv.Size)-NeedleHeaderSize > 0 { dirtys = append(dirtys, DirtyData{ Offset: int64(nv.Offset)*8 + NeedleHeaderSize, @@ -65,28 +62,28 @@ func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { return dirtys } -func (cf *CleanDataReader) Seek(offset int64, whence int) (int64, error) { - off, e := cf.DataFile.Seek(0, 1) +func (cr *CleanReader) Seek(offset int64, whence int) (int64, error) { + off, e := cr.DataFile.Seek(0, 1) if e != nil { return 0, nil } if off != offset { - cf.Close() + cr.Close() } - return cf.DataFile.Seek(offset, whence) + return cr.DataFile.Seek(offset, whence) } -func (cf *CleanDataReader) WriteTo(w io.Writer) (written int64, err error) { - off, e := cf.DataFile.Seek(0, 1) +func (cdr *CleanReader) WriteTo(w io.Writer) (written int64, err error) { + off, e := cdr.DataFile.Seek(0, 1) if e != nil { return 0, nil } const ZeroBufSize = 32 * 1024 zeroBuf := make([]byte, ZeroBufSize) - dirtyIndex := cf.Dirtys.Search(off) + dirtyIndex := cdr.Dirtys.Search(off) var nextDirty *DirtyData - if dirtyIndex < len(cf.Dirtys) { - nextDirty = &cf.Dirtys[dirtyIndex] + if dirtyIndex < len(cdr.Dirtys) { + nextDirty = &cdr.Dirtys[dirtyIndex] if nextDirty.Offset+int64(nextDirty.Size) < off { nextDirty = nil } @@ -108,12 +105,12 @@ func (cf *CleanDataReader) WriteTo(w io.Writer) (written int64, err error) { off += int64(n) } dirtyIndex++ - if dirtyIndex < len(cf.Dirtys) { - nextDirty = &cf.Dirtys[dirtyIndex] + if dirtyIndex < len(cdr.Dirtys) { + nextDirty = &cdr.Dirtys[dirtyIndex] } else { nextDirty = nil } - if _, e = cf.DataFile.Seek(off, 0); e != nil { + if _, e = cdr.DataFile.Seek(off, 0); e != nil { return } } else { @@ -122,11 +119,11 @@ func (cf *CleanDataReader) WriteTo(w io.Writer) (written int64, err error) { sz = nextDirty.Offset - off } if sz > 0 { - if n, e = io.CopyN(w, cf.DataFile, sz); e != nil { + if n, e = io.CopyN(w, cdr.DataFile, sz); e != nil { return } } else { - if n, e = io.Copy(w, cf.DataFile); e != nil { + if n, e = io.Copy(w, cdr.DataFile); e != nil { return } } @@ -137,49 +134,67 @@ func (cf *CleanDataReader) WriteTo(w io.Writer) (written int64, err error) { return } -func (cf *CleanDataReader) ReadAt(p []byte, off int64) (n int, err error) { - cf.Seek(off, 0) - return cf.Read(p) +func (cr *CleanReader) ReadAt(p []byte, off int64) (n int, err error) { + cr.Seek(off, 0) + return cr.Read(p) } -func (cf *CleanDataReader) Read(p []byte) (int, error) { - return cf.getPipeReader().Read(p) +func (cr *CleanReader) Read(p []byte) (int, error) { + return cr.getPipeReader().Read(p) } -func (cf *CleanDataReader) Close() (e error) { - cf.mutex.Lock() - defer cf.mutex.Unlock() - cf.closePipe() - return cf.DataFile.Close() +func (cr *CleanReader) Close() (e error) { + cr.mutex.Lock() + defer cr.mutex.Unlock() + cr.closePipe() + return cr.DataFile.Close() } -func (cf *CleanDataReader) closePipe() (e error) { - if cf.pr != nil { - if err := cf.pr.Close(); err != nil { +func (cr *CleanReader) closePipe() (e error) { + if cr.pr != nil { + if err := cr.pr.Close(); err != nil { e = err } } - cf.pr = nil - if cf.pw != nil { - if err := cf.pw.Close(); err != nil { + cr.pr = nil + if cr.pw != nil { + if err := cr.pw.Close(); err != nil { e = err } } - cf.pw = nil + cr.pw = nil return e } -func (cf *CleanDataReader) getPipeReader() io.Reader { - cf.mutex.Lock() - defer cf.mutex.Unlock() - if cf.pr != nil && cf.pw != nil { - return cf.pr +func (cr *CleanReader) getPipeReader() io.Reader { + cr.mutex.Lock() + defer cr.mutex.Unlock() + if cr.pr != nil && cr.pw != nil { + return cr.pr } - cf.closePipe() - cf.pr, cf.pw = io.Pipe() + cr.closePipe() + cr.pr, cr.pw = io.Pipe() go func(pw *io.PipeWriter) { - _, e := cf.WriteTo(pw) + _, e := cr.WriteTo(pw) pw.CloseWithError(e) - }(cf.pw) - return cf.pr + }(cr.pw) + return cr.pr +} + +func (v *Volume) GetVolumeCleanReader() (cr *CleanReader, err error) { + var dirtys DirtyDatas + if indexData, e := v.nm.IndexFileContent(); e != nil { + return nil, err + } else { + dirtys = ScanDirtyData(indexData) + } + dataFile, e := os.Open(v.FileName()) + if e != nil { + return nil, e + } + cr = &CleanReader{ + Dirtys: dirtys, + DataFile: dataFile, + } + return } diff --git a/go/storage/volume_replicate_test.go b/go/storage/volume_replicate_test.go new file mode 100644 index 000000000..d1da211c3 --- /dev/null +++ b/go/storage/volume_replicate_test.go @@ -0,0 +1,22 @@ +package storage + +import "testing" + +func TestDirtyDataSearch(t *testing.T) { + testData := DirtyDatas{ + {30, 20}, {106, 200}, {5, 20}, {512, 68}, {412, 50}, + } + testOffset := []int64{ + 0, 150, 480, 1024, + } + testData.Sort() + t.Logf("TestData = %v", testData) + for _, off := range testOffset { + i := testData.Search(off) + if i < testData.Len() { + t.Logf("(%d) nearest chunk[%d]: %v", off, i, testData[i]) + } else { + t.Logf("Search %d return %d ", off, i) + } + } +} From 9d03f763dc3cb05ae2805b9ebbf10c5e61fc7d03 Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 8 Dec 2015 16:47:55 +0800 Subject: [PATCH 04/46] Volume server add `/admin/sync/vol_data` handler to serve cleaned volume date --- go/storage/volume_replicate.go | 12 +++- go/weed/weed_server/volume_server.go | 1 + .../volume_server_handlers_replicate.go | 60 +++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 go/weed/weed_server/volume_server_handlers_replicate.go diff --git a/go/storage/volume_replicate.go b/go/storage/volume_replicate.go index 2fc12fc9e..1ea6a8765 100644 --- a/go/storage/volume_replicate.go +++ b/go/storage/volume_replicate.go @@ -73,6 +73,14 @@ func (cr *CleanReader) Seek(offset int64, whence int) (int64, error) { return cr.DataFile.Seek(offset, whence) } +func (cr *CleanReader) Size() (int64, error) { + fi, e := cr.DataFile.Stat() + if e != nil { + return 0, e + } + return fi.Size(), nil +} + func (cdr *CleanReader) WriteTo(w io.Writer) (written int64, err error) { off, e := cdr.DataFile.Seek(0, 1) if e != nil { @@ -84,9 +92,6 @@ func (cdr *CleanReader) WriteTo(w io.Writer) (written int64, err error) { var nextDirty *DirtyData if dirtyIndex < len(cdr.Dirtys) { nextDirty = &cdr.Dirtys[dirtyIndex] - if nextDirty.Offset+int64(nextDirty.Size) < off { - nextDirty = nil - } } for { if nextDirty != nil && off >= nextDirty.Offset && off < nextDirty.Offset+int64(nextDirty.Size) { @@ -189,6 +194,7 @@ func (v *Volume) GetVolumeCleanReader() (cr *CleanReader, err error) { dirtys = ScanDirtyData(indexData) } dataFile, e := os.Open(v.FileName()) + if e != nil { return nil, e } diff --git a/go/weed/weed_server/volume_server.go b/go/weed/weed_server/volume_server.go index 8becdd0f1..b8472235e 100644 --- a/go/weed/weed_server/volume_server.go +++ b/go/weed/weed_server/volume_server.go @@ -57,6 +57,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, adminMux.HandleFunc("/admin/sync/status", vs.guard.WhiteList(vs.getVolumeSyncStatusHandler)) adminMux.HandleFunc("/admin/sync/index", vs.guard.WhiteList(vs.getVolumeIndexContentHandler)) adminMux.HandleFunc("/admin/sync/data", vs.guard.WhiteList(vs.getVolumeDataContentHandler)) + adminMux.HandleFunc("/admin/sync/vol_data", vs.guard.WhiteList(vs.getVolumeCleanDataHandler)) adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler)) adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler)) adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler)) diff --git a/go/weed/weed_server/volume_server_handlers_replicate.go b/go/weed/weed_server/volume_server_handlers_replicate.go new file mode 100644 index 000000000..c8b807ffb --- /dev/null +++ b/go/weed/weed_server/volume_server_handlers_replicate.go @@ -0,0 +1,60 @@ +package weed_server + +import ( + "fmt" + "net/http" + "strconv" + "github.com/chrislusf/seaweedfs/go/glog" + "io" + "github.com/pierrec/lz4" +) + +func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http.Request) { + v, err := vs.getVolume("volume", r) + if v == nil { + http.Error(w, fmt.Sprintf("Not Found volume: %v", err), http.StatusBadRequest) + return + } + cr, e := v.GetVolumeCleanReader() + if e != nil { + http.Error(w, fmt.Sprintf("Get volume clean reader: %v", err), http.StatusInternalServerError) + return + } + totalSize, e := cr.Size() + if e != nil { + http.Error(w, fmt.Sprintf("Get volume size: %v", err), http.StatusInternalServerError) + return + } + w.Header().Set("Accept-Ranges", "bytes") + w.Header().Set("Content-Encoding", "lz4") + lz4w := lz4.NewWriter(w) + defer lz4w.Close() + rangeReq := r.Header.Get("Range") + if rangeReq == "" { + w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) + if _, e = io.Copy(lz4w, cr); e != nil { + glog.V(4).Infoln("response write error:", e) + } + return + } + ranges, err := parseRange(rangeReq, totalSize) + if err != nil { + http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) + return + } + if len(ranges) != 1 { + http.Error(w, "Only support one range", http.StatusNotImplemented) + return + } + ra := ranges[0] + if _, e := cr.Seek(ra.start, 0); e != nil { + http.Error(w, fmt.Sprintf("Seek: %v", err), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) + w.Header().Set("Content-Range", ra.contentRange(totalSize)) + w.WriteHeader(http.StatusPartialContent) + if _, e = io.CopyN(lz4w, cr, ra.length); e != nil { + glog.V(2).Infoln("response write error:", e) + } +} From 8454b053196e75d1a7ec78a1892e2c96c77aeb56 Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 8 Dec 2015 17:33:58 +0800 Subject: [PATCH 05/46] fix ScanDirtyData bug --- go/storage/volume_replicate.go | 4 ++-- .../volume_server_handlers_replicate.go | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/go/storage/volume_replicate.go b/go/storage/volume_replicate.go index 1ea6a8765..4ca21f105 100644 --- a/go/storage/volume_replicate.go +++ b/go/storage/volume_replicate.go @@ -51,7 +51,7 @@ func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { if int64(nv.Size)-NeedleHeaderSize > 0 { dirtys = append(dirtys, DirtyData{ Offset: int64(nv.Offset)*8 + NeedleHeaderSize, - Size: nv.Size - NeedleHeaderSize, + Size: nv.Size, }) } } @@ -193,7 +193,7 @@ func (v *Volume) GetVolumeCleanReader() (cr *CleanReader, err error) { } else { dirtys = ScanDirtyData(indexData) } - dataFile, e := os.Open(v.FileName()) + dataFile, e := os.Open(v.FileName()+".dat") if e != nil { return nil, e diff --git a/go/weed/weed_server/volume_server_handlers_replicate.go b/go/weed/weed_server/volume_server_handlers_replicate.go index c8b807ffb..c8b429a7b 100644 --- a/go/weed/weed_server/volume_server_handlers_replicate.go +++ b/go/weed/weed_server/volume_server_handlers_replicate.go @@ -10,23 +10,24 @@ import ( ) func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http.Request) { - v, err := vs.getVolume("volume", r) + v, e := vs.getVolume("volume", r) if v == nil { - http.Error(w, fmt.Sprintf("Not Found volume: %v", err), http.StatusBadRequest) + http.Error(w, fmt.Sprintf("Not Found volume: %v", e), http.StatusBadRequest) return } cr, e := v.GetVolumeCleanReader() if e != nil { - http.Error(w, fmt.Sprintf("Get volume clean reader: %v", err), http.StatusInternalServerError) + http.Error(w, fmt.Sprintf("Get volume clean reader: %v", e), http.StatusInternalServerError) return } totalSize, e := cr.Size() if e != nil { - http.Error(w, fmt.Sprintf("Get volume size: %v", err), http.StatusInternalServerError) + http.Error(w, fmt.Sprintf("Get volume size: %v", e), http.StatusInternalServerError) return } w.Header().Set("Accept-Ranges", "bytes") w.Header().Set("Content-Encoding", "lz4") + w.Header().Set("Content-Disposition", fmt.Sprintf(`filename="%d.dat.lz4"`, v.Id)) lz4w := lz4.NewWriter(w) defer lz4w.Close() rangeReq := r.Header.Get("Range") @@ -37,9 +38,9 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http } return } - ranges, err := parseRange(rangeReq, totalSize) - if err != nil { - http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) + ranges, e := parseRange(rangeReq, totalSize) + if e != nil { + http.Error(w, e.Error(), http.StatusRequestedRangeNotSatisfiable) return } if len(ranges) != 1 { @@ -48,7 +49,7 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http } ra := ranges[0] if _, e := cr.Seek(ra.start, 0); e != nil { - http.Error(w, fmt.Sprintf("Seek: %v", err), http.StatusInternalServerError) + http.Error(w, fmt.Sprintf("Seek: %v", e), http.StatusInternalServerError) return } w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) From 848930ab5a9ec2a99ae5250723dc2c175390a09d Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 8 Dec 2015 17:43:52 +0800 Subject: [PATCH 06/46] fix forever loop problem --- go/storage/volume_replicate.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/go/storage/volume_replicate.go b/go/storage/volume_replicate.go index 4ca21f105..f40cf60da 100644 --- a/go/storage/volume_replicate.go +++ b/go/storage/volume_replicate.go @@ -123,14 +123,14 @@ func (cdr *CleanReader) WriteTo(w io.Writer) (written int64, err error) { if nextDirty != nil { sz = nextDirty.Offset - off } - if sz > 0 { - if n, e = io.CopyN(w, cdr.DataFile, sz); e != nil { - return - } - } else { - if n, e = io.Copy(w, cdr.DataFile); e != nil { - return - } + if sz <= 0 { + // copy until eof + n, e = io.Copy(w, cdr.DataFile); + written += n + return + } + if n, e = io.CopyN(w, cdr.DataFile, sz); e != nil { + return } off += n written += n From acf4d44f7e41544201545ee23493b5a49e800024 Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 8 Dec 2015 17:56:57 +0800 Subject: [PATCH 07/46] update --- go/storage/needle_read_write.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/storage/needle_read_write.go b/go/storage/needle_read_write.go index eb7989884..adb5058be 100644 --- a/go/storage/needle_read_write.go +++ b/go/storage/needle_read_write.go @@ -248,6 +248,9 @@ func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyL return } n.readNeedleDataVersion2(bytes[0:n.Size]) + if n.DataSize == 0 { + return + } n.Checksum = NewCRC(n.Data) checksum := util.BytesToUint32(bytes[n.Size : n.Size+NeedleChecksumSize]) if n.Checksum.Value() != checksum { From 39c97f8955f71a0ba06ac51929236f9b66b97aa9 Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 8 Dec 2015 20:12:12 +0800 Subject: [PATCH 08/46] update --- go/storage/volume_replicate.go | 30 +++++++++++-------- .../volume_server_handlers_replicate.go | 15 ++++++---- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/go/storage/volume_replicate.go b/go/storage/volume_replicate.go index f40cf60da..00b9cd14e 100644 --- a/go/storage/volume_replicate.go +++ b/go/storage/volume_replicate.go @@ -63,14 +63,18 @@ func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { } func (cr *CleanReader) Seek(offset int64, whence int) (int64, error) { - off, e := cr.DataFile.Seek(0, 1) + oldOff, e := cr.DataFile.Seek(0, 1) if e != nil { - return 0, nil + return 0, e + } + newOff, e := cr.DataFile.Seek(offset, whence) + if e != nil { + return 0, e } - if off != offset { - cr.Close() + if oldOff != newOff { + cr.closePipe(true) } - return cr.DataFile.Seek(offset, whence) + return newOff, nil } func (cr *CleanReader) Size() (int64, error) { @@ -125,7 +129,7 @@ func (cdr *CleanReader) WriteTo(w io.Writer) (written int64, err error) { } if sz <= 0 { // copy until eof - n, e = io.Copy(w, cdr.DataFile); + n, e = io.Copy(w, cdr.DataFile) written += n return } @@ -149,13 +153,15 @@ func (cr *CleanReader) Read(p []byte) (int, error) { } func (cr *CleanReader) Close() (e error) { - cr.mutex.Lock() - defer cr.mutex.Unlock() - cr.closePipe() + cr.closePipe(true) return cr.DataFile.Close() } -func (cr *CleanReader) closePipe() (e error) { +func (cr *CleanReader) closePipe(lock bool) (e error) { + if lock { + cr.mutex.Lock() + defer cr.mutex.Unlock() + } if cr.pr != nil { if err := cr.pr.Close(); err != nil { e = err @@ -177,7 +183,7 @@ func (cr *CleanReader) getPipeReader() io.Reader { if cr.pr != nil && cr.pw != nil { return cr.pr } - cr.closePipe() + cr.closePipe(false) cr.pr, cr.pw = io.Pipe() go func(pw *io.PipeWriter) { _, e := cr.WriteTo(pw) @@ -193,7 +199,7 @@ func (v *Volume) GetVolumeCleanReader() (cr *CleanReader, err error) { } else { dirtys = ScanDirtyData(indexData) } - dataFile, e := os.Open(v.FileName()+".dat") + dataFile, e := os.Open(v.FileName() + ".dat") if e != nil { return nil, e diff --git a/go/weed/weed_server/volume_server_handlers_replicate.go b/go/weed/weed_server/volume_server_handlers_replicate.go index c8b429a7b..411967326 100644 --- a/go/weed/weed_server/volume_server_handlers_replicate.go +++ b/go/weed/weed_server/volume_server_handlers_replicate.go @@ -2,10 +2,11 @@ package weed_server import ( "fmt" + "io" "net/http" "strconv" + "github.com/chrislusf/seaweedfs/go/glog" - "io" "github.com/pierrec/lz4" ) @@ -26,16 +27,17 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http return } w.Header().Set("Accept-Ranges", "bytes") - w.Header().Set("Content-Encoding", "lz4") w.Header().Set("Content-Disposition", fmt.Sprintf(`filename="%d.dat.lz4"`, v.Id)) - lz4w := lz4.NewWriter(w) - defer lz4w.Close() + rangeReq := r.Header.Get("Range") if rangeReq == "" { w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) + w.Header().Set("Content-Encoding", "lz4") + lz4w := lz4.NewWriter(w) if _, e = io.Copy(lz4w, cr); e != nil { glog.V(4).Infoln("response write error:", e) } + lz4w.Close() return } ranges, e := parseRange(rangeReq, totalSize) @@ -49,13 +51,16 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http } ra := ranges[0] if _, e := cr.Seek(ra.start, 0); e != nil { - http.Error(w, fmt.Sprintf("Seek: %v", e), http.StatusInternalServerError) + http.Error(w, e.Error(), http.StatusInternalServerError) return } w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) w.Header().Set("Content-Range", ra.contentRange(totalSize)) + w.Header().Set("Content-Encoding", "lz4") w.WriteHeader(http.StatusPartialContent) + lz4w := lz4.NewWriter(w) if _, e = io.CopyN(lz4w, cr, ra.length); e != nil { glog.V(2).Infoln("response write error:", e) } + lz4w.Close() } From 3bfb2d09cb7cd56f0b1299efd658018c09e16d08 Mon Sep 17 00:00:00 2001 From: tnextday Date: Wed, 9 Dec 2015 20:27:35 +0800 Subject: [PATCH 09/46] Volume server add "/admin/set_replica" handle to update volume replica placement setting --- go/storage/volume.go | 9 +++++ go/storage/volume_super_block.go | 11 +++++ go/weed/weed_server/volume_server.go | 1 + .../volume_server_handlers_replicate.go | 40 +++++++++++++++++++ 4 files changed, 61 insertions(+) diff --git a/go/storage/volume.go b/go/storage/volume.go index 5c6b12e9b..bae2e4aee 100644 --- a/go/storage/volume.go +++ b/go/storage/volume.go @@ -426,3 +426,12 @@ func (v *Volume) exiredLongEnough(maxDelayMinutes uint32) bool { } return false } + + +func (v *Volume) SetReplica(replica *ReplicaPlacement) error{ + if v.ReplicaPlacement.String() == replica.String(){ + return nil + } + v.ReplicaPlacement = replica + return v.writeSuperBlock() +} \ No newline at end of file diff --git a/go/storage/volume_super_block.go b/go/storage/volume_super_block.go index e37360075..e0fbd9e9a 100644 --- a/go/storage/volume_super_block.go +++ b/go/storage/volume_super_block.go @@ -59,6 +59,7 @@ func (v *Volume) maybeWriteSuperBlock() error { } return e } + func (v *Volume) readSuperBlock() (err error) { if _, err = v.dataFile.Seek(0, 0); err != nil { return fmt.Errorf("cannot seek to the beginning of %s: %v", v.dataFile.Name(), err) @@ -70,6 +71,16 @@ func (v *Volume) readSuperBlock() (err error) { v.SuperBlock, err = ParseSuperBlock(header) return err } + +func (v *Volume) writeSuperBlock() (err error) { + v.dataFileAccessLock.Lock() + defer v.dataFileAccessLock.Unlock() + if _, e := v.dataFile.WriteAt(v.SuperBlock.Bytes(), 0); e != nil { + return fmt.Errorf("cannot write volume %d super block: %v", v.Id, e) + } + return nil +} + func ParseSuperBlock(header []byte) (superBlock SuperBlock, err error) { superBlock.version = Version(header[0]) if superBlock.ReplicaPlacement, err = NewReplicaPlacementFromByte(header[1]); err != nil { diff --git a/go/weed/weed_server/volume_server.go b/go/weed/weed_server/volume_server.go index b8472235e..3480ad09b 100644 --- a/go/weed/weed_server/volume_server.go +++ b/go/weed/weed_server/volume_server.go @@ -58,6 +58,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, adminMux.HandleFunc("/admin/sync/index", vs.guard.WhiteList(vs.getVolumeIndexContentHandler)) adminMux.HandleFunc("/admin/sync/data", vs.guard.WhiteList(vs.getVolumeDataContentHandler)) adminMux.HandleFunc("/admin/sync/vol_data", vs.guard.WhiteList(vs.getVolumeCleanDataHandler)) + adminMux.HandleFunc("/admin/set_replica", vs.guard.WhiteList(vs.setVolumeReplicaHandler)) adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler)) adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler)) adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler)) diff --git a/go/weed/weed_server/volume_server_handlers_replicate.go b/go/weed/weed_server/volume_server_handlers_replicate.go index 411967326..866c808f8 100644 --- a/go/weed/weed_server/volume_server_handlers_replicate.go +++ b/go/weed/weed_server/volume_server_handlers_replicate.go @@ -7,6 +7,7 @@ import ( "strconv" "github.com/chrislusf/seaweedfs/go/glog" + "github.com/chrislusf/seaweedfs/go/storage" "github.com/pierrec/lz4" ) @@ -64,3 +65,42 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http } lz4w.Close() } + +type VolumeOptError struct { + Volume string `json:"volume"` + Err string `json:"err"` +} + +func (vs *VolumeServer) setVolumeReplicaHandler(w http.ResponseWriter, r *http.Request) { + r.ParseForm() + replica, e := storage.NewReplicaPlacementFromString(r.FormValue("replica")) + if e != nil { + writeJsonError(w, r, http.StatusBadRequest, e) + return + } + errs := []VolumeOptError{} + for _, volume := range r.Form["volume"] { + if vid, e := storage.NewVolumeId(volume); e == nil { + if v := vs.store.GetVolume(vid); v != nil { + if e := v.SetReplica(replica); e != nil { + errs = append(errs, VolumeOptError{ + Volume: volume, + Err: e.Error(), + }) + } + } + } else { + errs = append(errs, VolumeOptError{ + Volume: volume, + Err: e.Error(), + }) + } + } + result := make(map[string]interface{}) + if len(errs) > 0 { + result["error"] = "set volume replica error." + result["errors"] = errs + } + + writeJson(w, r, http.StatusAccepted, result) +} From 30746b7ec6228f13e0158b240d5df1d0e5f14a28 Mon Sep 17 00:00:00 2001 From: tnextday Date: Thu, 10 Dec 2015 11:43:00 +0800 Subject: [PATCH 10/46] set_replica allow set all or by collection --- go/storage/store.go | 13 +++++++ go/storage/volume.go | 10 +++-- .../volume_server_handlers_replicate.go | 39 ++++++++++++++----- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/go/storage/store.go b/go/storage/store.go index ebf01d09f..6c7871084 100644 --- a/go/storage/store.go +++ b/go/storage/store.go @@ -387,3 +387,16 @@ func (s *Store) HasVolume(i VolumeId) bool { v := s.findVolume(i) return v != nil } + +type VolumeWalker func(v *Volume) (e error) + +func (s *Store) WalkVolume(walker VolumeWalker) error{ + for _, location := range s.Locations { + for _, v := range location.volumes { + if e := walker(v); e != nil { + return e + } + } + } + return nil +} diff --git a/go/storage/volume.go b/go/storage/volume.go index bae2e4aee..124990341 100644 --- a/go/storage/volume.go +++ b/go/storage/volume.go @@ -427,11 +427,13 @@ func (v *Volume) exiredLongEnough(maxDelayMinutes uint32) bool { return false } - -func (v *Volume) SetReplica(replica *ReplicaPlacement) error{ - if v.ReplicaPlacement.String() == replica.String(){ +func (v *Volume) SetReplica(replica *ReplicaPlacement) error { + if replica == nil { + replica, _ = NewReplicaPlacementFromString("000") + } + if v.ReplicaPlacement.String() == replica.String() { return nil } v.ReplicaPlacement = replica return v.writeSuperBlock() -} \ No newline at end of file +} diff --git a/go/weed/weed_server/volume_server_handlers_replicate.go b/go/weed/weed_server/volume_server_handlers_replicate.go index 866c808f8..6efd2546c 100644 --- a/go/weed/weed_server/volume_server_handlers_replicate.go +++ b/go/weed/weed_server/volume_server_handlers_replicate.go @@ -9,6 +9,7 @@ import ( "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/storage" "github.com/pierrec/lz4" + "strings" ) func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http.Request) { @@ -79,23 +80,43 @@ func (vs *VolumeServer) setVolumeReplicaHandler(w http.ResponseWriter, r *http.R return } errs := []VolumeOptError{} - for _, volume := range r.Form["volume"] { - if vid, e := storage.NewVolumeId(volume); e == nil { - if v := vs.store.GetVolume(vid); v != nil { + all, _ := strconv.ParseBool(r.FormValue("all")) + if all { + vs.store.WalkVolume(func(v *storage.Volume) (e error) { + if e := v.SetReplica(replica); e != nil { + errs = append(errs, VolumeOptError{ + Volume: v.Id.String(), + Err: e.Error(), + }) + } + return nil + }) + } else { + volumesSet := make(map[string]bool) + for _, volume := range r.Form["volume"] { + volumesSet[strings.TrimSpace(volume)] = true + } + collectionsSet := make(map[string]bool) + for _, c := range r.Form["collection"] { + collectionsSet[strings.TrimSpace(c)] = true + } + if len(collectionsSet) > 0 || len(volumesSet) > 0 { + vs.store.WalkVolume(func(v *storage.Volume) (e error) { + if !collectionsSet[v.Collection] && !volumesSet[v.Id.String()] { + return nil + } if e := v.SetReplica(replica); e != nil { errs = append(errs, VolumeOptError{ - Volume: volume, + Volume: v.Id.String(), Err: e.Error(), }) } - } - } else { - errs = append(errs, VolumeOptError{ - Volume: volume, - Err: e.Error(), + return nil }) } + } + result := make(map[string]interface{}) if len(errs) > 0 { result["error"] = "set volume replica error." From 3e304e51e9b2872e19f87b9d2e89772a488c1212 Mon Sep 17 00:00:00 2001 From: tnextday Date: Sat, 12 Dec 2015 21:41:15 +0800 Subject: [PATCH 11/46] master server can reset the replicate replacement --- go/storage/volume.go | 14 ++++++ go/storage/volume_vacuum.go | 1 + go/topology/topology.go | 15 +++++++ go/weed/weed_server/master_server.go | 1 + .../master_server_handlers_admin.go | 43 +++++++++++++++++++ .../volume_server_handlers_replicate.go | 5 ++- 6 files changed, 78 insertions(+), 1 deletion(-) diff --git a/go/storage/volume.go b/go/storage/volume.go index 124990341..44d80a6be 100644 --- a/go/storage/volume.go +++ b/go/storage/volume.go @@ -437,3 +437,17 @@ func (v *Volume) SetReplica(replica *ReplicaPlacement) error { v.ReplicaPlacement = replica return v.writeSuperBlock() } + +func (v *Volume) SetReadOnly(isReadOnly bool) error { + if isReadOnly == false { + if fi, e := v.dataFile.Stat(); e != nil { + return e + } else { + if fi.Mode()&0200 == 0 { + return errors.New(v.FileName() + ".dat is READONLY") + } + } + } + v.readOnly = isReadOnly + return nil +} diff --git a/go/storage/volume_vacuum.go b/go/storage/volume_vacuum.go index 7377afdc9..a2b7cdf76 100644 --- a/go/storage/volume_vacuum.go +++ b/go/storage/volume_vacuum.go @@ -30,6 +30,7 @@ func (v *Volume) commitCompact() error { glog.V(3).Infof("Got Committing lock...") _ = v.dataFile.Close() var e error + if e = os.Rename(v.FileName()+".cpd", v.FileName()+".dat"); e != nil { return e } diff --git a/go/topology/topology.go b/go/topology/topology.go index ee1477cd2..c329b5837 100644 --- a/go/topology/topology.go +++ b/go/topology/topology.go @@ -187,3 +187,18 @@ func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter { t.LinkChildNode(dc) return dc } + +type DataNodeWalker func(dn *DataNode) (e error) + +func (t *Topology) WalkDataNode(walker DataNodeWalker) error { + for _, c := range t.Children() { + for _, rack := range c.(*DataCenter).Children() { + for _, dn := range rack.(*Rack).Children() { + if e := walker(dn.(*DataNode)); e != nil { + return e + } + } + } + } + return nil +} diff --git a/go/weed/weed_server/master_server.go b/go/weed/weed_server/master_server.go index db70ca6b1..3cac1873a 100644 --- a/go/weed/weed_server/master_server.go +++ b/go/weed/weed_server/master_server.go @@ -71,6 +71,7 @@ func NewMasterServer(r *mux.Router, port int, metaFolder string, r.HandleFunc("/vol/grow", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeGrowHandler))) r.HandleFunc("/vol/status", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeStatusHandler))) r.HandleFunc("/vol/vacuum", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeVacuumHandler))) + r.HandleFunc("/replica/set", ms.proxyToLeader(ms.guard.WhiteList(ms.setReplicaHandler))) r.HandleFunc("/submit", ms.guard.WhiteList(ms.submitFromMasterServerHandler)) r.HandleFunc("/delete", ms.guard.WhiteList(ms.deleteFromMasterServerHandler)) r.HandleFunc("/{fileId}", ms.proxyToLeader(ms.redirectHandler)) diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index fb2b18983..72962517d 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -10,6 +10,9 @@ import ( "strconv" "strings" + "net/url" + "sync" + "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/operation" "github.com/chrislusf/seaweedfs/go/storage" @@ -184,3 +187,43 @@ func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGr } return volumeGrowOption, nil } + +//only proxy to each volume server +func (ms *MasterServer) setReplicaHandler(w http.ResponseWriter, r *http.Request) { + r.ParseForm() + if _, e := storage.NewReplicaPlacementFromString(r.FormValue("replication")); e != nil { + writeJsonError(w, r, http.StatusBadRequest, e) + return + } + all, _ := strconv.ParseBool(r.FormValue("all")) + if !all && len(r.Form["volume"]) == 0 && len(r.Form["collection"]) == 0 { + writeJsonError(w, r, http.StatusBadRequest, errors.New("No available agrs found.")) + return + } + result := make(map[string]interface{}) + forms := r.Form + var wg sync.WaitGroup + ms.Topo.WalkDataNode(func(dn *topology.DataNode) (e error) { + wg.Add(1) + go func(server string, values url.Values) { + defer wg.Done() + jsonBlob, e := util.Post("http://"+server+"/admin/set_replica", values) + if e != nil { + result[server] = map[string]interface{}{ + "error": e.Error() + " " + string(jsonBlob), + } + } + var ret interface{} + if e := json.Unmarshal(jsonBlob, ret); e == nil { + result[server] = ret + } else { + result[server] = map[string]interface{}{ + "error": e.Error() + " " + string(jsonBlob), + } + } + }(dn.Url(), forms) + return nil + }) + wg.Wait() + writeJson(w, r, http.StatusOK, result) +} diff --git a/go/weed/weed_server/volume_server_handlers_replicate.go b/go/weed/weed_server/volume_server_handlers_replicate.go index 6efd2546c..cc421dc88 100644 --- a/go/weed/weed_server/volume_server_handlers_replicate.go +++ b/go/weed/weed_server/volume_server_handlers_replicate.go @@ -18,6 +18,9 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http http.Error(w, fmt.Sprintf("Not Found volume: %v", e), http.StatusBadRequest) return } + //set read only when replicating + v.SetReadOnly(true) + defer v.SetReadOnly(false) cr, e := v.GetVolumeCleanReader() if e != nil { http.Error(w, fmt.Sprintf("Get volume clean reader: %v", e), http.StatusInternalServerError) @@ -74,7 +77,7 @@ type VolumeOptError struct { func (vs *VolumeServer) setVolumeReplicaHandler(w http.ResponseWriter, r *http.Request) { r.ParseForm() - replica, e := storage.NewReplicaPlacementFromString(r.FormValue("replica")) + replica, e := storage.NewReplicaPlacementFromString(r.FormValue("replication")) if e != nil { writeJsonError(w, r, http.StatusBadRequest, e) return From 9600d78969ac4ca9362726cc7fdfe41841e7309a Mon Sep 17 00:00:00 2001 From: tnextday Date: Mon, 14 Dec 2015 21:57:15 +0800 Subject: [PATCH 12/46] temp save --- go/proto/system_message.proto | 59 +++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/go/proto/system_message.proto b/go/proto/system_message.proto index 548360b27..703b1f4a0 100644 --- a/go/proto/system_message.proto +++ b/go/proto/system_message.proto @@ -1,27 +1,46 @@ package operation; message VolumeInformationMessage { - required uint32 id = 1; - required uint64 size = 2; - optional string collection = 3; - required uint64 file_count = 4; - required uint64 delete_count = 5; - required uint64 deleted_byte_count = 6; - optional bool read_only = 7; - required uint32 replica_placement = 8; - optional uint32 version = 9 [default=2]; - optional uint32 ttl = 10; + required uint32 id = 1; + required uint64 size = 2; + optional string collection = 3; + required uint64 file_count = 4; + required uint64 delete_count = 5; + required uint64 deleted_byte_count = 6; + optional bool read_only = 7; + required uint32 replica_placement = 8; + optional uint32 version = 9 [default=2]; + optional uint32 ttl = 10; +} + +enum VolumeTask { + Vacuum = 0; + Replicate = 1; +} + +enum VolumeTaskStatus { + Working = 0; + Completed = 1; + Failed = 2; +} + +message BusyVolume { + required uint32 id = 1; + required VolumeTask task = 2; + required VolumeTaskStatus status = 3; + required int64 start_time = 4; } message JoinMessage { - optional bool is_init = 1; - required string ip = 2; - required uint32 port = 3; - optional string public_url = 4; - required uint32 max_volume_count = 5; - required uint64 max_file_key = 6; - optional string data_center = 7; - optional string rack = 8; - repeated VolumeInformationMessage volumes = 9; - optional uint32 admin_port = 10; + optional bool is_init = 1; + required string ip = 2; + required uint32 port = 3; + optional string public_url = 4; + required uint32 max_volume_count = 5; + required uint64 max_file_key = 6; + optional string data_center = 7; + optional string rack = 8; + repeated VolumeInformationMessage volumes = 9; + optional uint32 admin_port = 10; + repeated BusyVolume busy_volumes = 11; } From 1c4b0fd9a0e4442d88b62ef726870ac0d892d374 Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 15 Dec 2015 22:13:06 +0800 Subject: [PATCH 13/46] lookup support query `Head` and `readonly` location --- go/operation/chunked_file.go | 2 +- go/operation/delete_content.go | 2 +- go/operation/lookup.go | 28 +++++++++++++++---- go/operation/lookup_vid_cache.go | 6 ++-- go/operation/lookup_vid_cache_test.go | 2 +- go/weed/backup.go | 2 +- go/weed/benchmark.go | 2 +- go/weed/download.go | 4 +-- go/weed/weed_server/filer_server_handlers.go | 3 +- go/weed/weed_server/master_server_handlers.go | 2 +- .../volume_server_handlers_read.go | 2 +- 11 files changed, 36 insertions(+), 19 deletions(-) diff --git a/go/operation/chunked_file.go b/go/operation/chunked_file.go index 70564cbd2..c42da903a 100644 --- a/go/operation/chunked_file.go +++ b/go/operation/chunked_file.go @@ -150,7 +150,7 @@ func (cf *ChunkedFileReader) WriteTo(w io.Writer) (n int64, err error) { for ; chunkIndex < cm.Chunks.Len(); chunkIndex++ { ci := cm.Chunks[chunkIndex] // if we need read date from local volume server first? - fileUrl, lookupError := LookupFileId(cf.Master, ci.Fid) + fileUrl, lookupError := LookupFileId(cf.Master, ci.Fid, true) if lookupError != nil { return n, lookupError } diff --git a/go/operation/delete_content.go b/go/operation/delete_content.go index 32ad69b17..3a9ea28e3 100644 --- a/go/operation/delete_content.go +++ b/go/operation/delete_content.go @@ -21,7 +21,7 @@ type DeleteResult struct { } func DeleteFile(master string, fileId string, jwt security.EncodedJwt) error { - fileUrl, err := LookupFileId(master, fileId) + fileUrl, err := LookupFileId(master, fileId, false) if err != nil { return err } diff --git a/go/operation/lookup.go b/go/operation/lookup.go index f77d1ec9b..7719690ec 100644 --- a/go/operation/lookup.go +++ b/go/operation/lookup.go @@ -16,21 +16,33 @@ type Location struct { Url string `json:"url,omitempty"` PublicUrl string `json:"publicUrl,omitempty"` } + +type Locations []Location + type LookupResult struct { - VolumeId string `json:"volumeId,omitempty"` - Locations []Location `json:"locations,omitempty"` - Error string `json:"error,omitempty"` + VolumeId string `json:"volumeId,omitempty"` + Locations Locations `json:"locations,omitempty"` + Error string `json:"error,omitempty"` } func (lr *LookupResult) String() string { return fmt.Sprintf("VolumeId:%s, Locations:%v, Error:%s", lr.VolumeId, lr.Locations, lr.Error) } +func (ls Locations) Head() *Location { + return &ls[0] +} + +func (ls Locations) PickForRead() *Location { + return &ls[rand.Intn(len(ls))] +} + var ( vc VidCache // caching of volume locations, re-check if after 10 minutes ) func Lookup(server string, vid string) (ret *LookupResult, err error) { + //Maybe we should fetch from master when lookup location for write locations, cache_err := vc.Get(vid) if cache_err != nil { if ret, err = do_lookup(server, vid); err == nil { @@ -60,7 +72,7 @@ func do_lookup(server string, vid string) (*LookupResult, error) { return &ret, nil } -func LookupFileId(server string, fileId string) (fullUrl string, err error) { +func LookupFileId(server string, fileId string, readonly bool) (fullUrl string, err error) { parts := strings.Split(fileId, ",") if len(parts) != 2 { return "", errors.New("Invalid fileId " + fileId) @@ -72,7 +84,13 @@ func LookupFileId(server string, fileId string) (fullUrl string, err error) { if len(lookup.Locations) == 0 { return "", errors.New("File Not Found") } - return "http://" + lookup.Locations[rand.Intn(len(lookup.Locations))].Url + "/" + fileId, nil + var u string + if readonly{ + u = lookup.Locations.PickForRead().Url + }else{ + u = lookup.Locations.Head().Url + } + return "http://" + u + "/" + fileId, nil } // LookupVolumeIds find volume locations by cache and actual lookup diff --git a/go/operation/lookup_vid_cache.go b/go/operation/lookup_vid_cache.go index ac4240102..ecbfbfade 100644 --- a/go/operation/lookup_vid_cache.go +++ b/go/operation/lookup_vid_cache.go @@ -9,14 +9,14 @@ import ( ) type VidInfo struct { - Locations []Location + Locations Locations NextRefreshTime time.Time } type VidCache struct { cache []VidInfo } -func (vc *VidCache) Get(vid string) ([]Location, error) { +func (vc *VidCache) Get(vid string) (Locations, error) { id, err := strconv.Atoi(vid) if err != nil { glog.V(1).Infof("Unknown volume id %s", vid) @@ -33,7 +33,7 @@ func (vc *VidCache) Get(vid string) ([]Location, error) { } return nil, errors.New("Not Found") } -func (vc *VidCache) Set(vid string, locations []Location, duration time.Duration) { +func (vc *VidCache) Set(vid string, locations Locations, duration time.Duration) { id, err := strconv.Atoi(vid) if err != nil { glog.V(1).Infof("Unknown volume id %s", vid) diff --git a/go/operation/lookup_vid_cache_test.go b/go/operation/lookup_vid_cache_test.go index 9c9e2affb..e3e24e37e 100644 --- a/go/operation/lookup_vid_cache_test.go +++ b/go/operation/lookup_vid_cache_test.go @@ -10,7 +10,7 @@ func TestCaching(t *testing.T) { var ( vc VidCache ) - var locations []Location + var locations Locations locations = append(locations, Location{Url: "a.com:8080"}) vc.Set("123", locations, time.Second) ret, _ := vc.Get("123") diff --git a/go/weed/backup.go b/go/weed/backup.go index 5e51a8b03..0e78f2e2b 100644 --- a/go/weed/backup.go +++ b/go/weed/backup.go @@ -57,7 +57,7 @@ func runBackup(cmd *Command, args []string) bool { fmt.Printf("Error looking up volume %d: %v\n", vid, err) return true } - volumeServer := lookup.Locations[0].Url + volumeServer := lookup.Locations.Head().Url stats, err := operation.GetVolumeSyncStatus(volumeServer, vid.String()) if err != nil { diff --git a/go/weed/benchmark.go b/go/weed/benchmark.go index b63f0008e..51652b1ae 100644 --- a/go/weed/benchmark.go +++ b/go/weed/benchmark.go @@ -254,7 +254,7 @@ func readFiles(fileIdLineChan chan string, s *stat) { println("!!!! volume id ", vid, " location not found!!!!!") continue } - server := ret.Locations[rand.Intn(len(ret.Locations))].Url + server := ret.Locations.PickForRead().Url url := "http://" + server + "/" + fid if bytesRead, err := util.Get(url); err == nil { s.completed++ diff --git a/go/weed/download.go b/go/weed/download.go index dfe4f88b4..df7d1a470 100644 --- a/go/weed/download.go +++ b/go/weed/download.go @@ -53,7 +53,7 @@ func runDownload(cmd *Command, args []string) bool { } func downloadToFile(server, fileId, saveDir string) error { - fileUrl, lookupError := operation.LookupFileId(server, fileId) + fileUrl, lookupError := operation.LookupFileId(server, fileId, true) if lookupError != nil { return lookupError } @@ -105,7 +105,7 @@ func downloadToFile(server, fileId, saveDir string) error { } func fetchContent(server string, fileId string) (filename string, content []byte, e error) { - fileUrl, lookupError := operation.LookupFileId(server, fileId) + fileUrl, lookupError := operation.LookupFileId(server, fileId, true) if lookupError != nil { return "", nil, lookupError } diff --git a/go/weed/weed_server/filer_server_handlers.go b/go/weed/weed_server/filer_server_handlers.go index 1695296d4..24bbbcf26 100644 --- a/go/weed/weed_server/filer_server_handlers.go +++ b/go/weed/weed_server/filer_server_handlers.go @@ -5,7 +5,6 @@ import ( "errors" "io" "io/ioutil" - "math/rand" "net/http" "net/url" "strconv" @@ -91,7 +90,7 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request, w.WriteHeader(http.StatusNotFound) return } - urlLocation := lookup.Locations[rand.Intn(len(lookup.Locations))].Url + urlLocation := lookup.Locations.PickForRead().Url urlString := "http://" + urlLocation + "/" + fileId if fs.redirectOnRead { http.Redirect(w, r, urlString, http.StatusFound) diff --git a/go/weed/weed_server/master_server_handlers.go b/go/weed/weed_server/master_server_handlers.go index 2be5d9524..6a5b06c3c 100644 --- a/go/weed/weed_server/master_server_handlers.go +++ b/go/weed/weed_server/master_server_handlers.go @@ -25,7 +25,7 @@ func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volume if err == nil { machines := ms.Topo.Lookup(collection, volumeId) if machines != nil { - var ret []operation.Location + var ret operation.Locations for _, dn := range machines { ret = append(ret, operation.Location{Url: dn.Url(), PublicUrl: dn.PublicUrl}) } diff --git a/go/weed/weed_server/volume_server_handlers_read.go b/go/weed/weed_server/volume_server_handlers_read.go index 2aa0fc656..eac26d151 100644 --- a/go/weed/weed_server/volume_server_handlers_read.go +++ b/go/weed/weed_server/volume_server_handlers_read.go @@ -48,7 +48,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) lookupResult, err := operation.Lookup(vs.GetMasterNode(), volumeId.String()) glog.V(2).Infoln("volume", volumeId, "found on", lookupResult, "error", err) if err == nil && len(lookupResult.Locations) > 0 { - http.Redirect(w, r, util.NormalizeUrl(lookupResult.Locations[0].PublicUrl)+r.URL.Path, http.StatusMovedPermanently) + http.Redirect(w, r, util.NormalizeUrl(lookupResult.Locations.Head().PublicUrl)+r.URL.Path, http.StatusMovedPermanently) } else { glog.V(2).Infoln("lookup error:", err, r.URL.Path) w.WriteHeader(http.StatusNotFound) From f85acd7dea9a73efac7481bc3d7324473b6aff35 Mon Sep 17 00:00:00 2001 From: tnextday Date: Wed, 16 Dec 2015 17:44:14 +0800 Subject: [PATCH 14/46] Volume server add `/admin/setting` interface, support set volume replication and readonly --- .../master_server_handlers_admin.go | 25 +++- go/weed/weed_server/volume_server.go | 2 +- .../volume_server_handlers_admin.go | 86 ++++++++++++ .../volume_server_handlers_replicate.go | 130 ------------------ .../volume_server_handlers_sync.go | 61 ++++++++ 5 files changed, 168 insertions(+), 136 deletions(-) delete mode 100644 go/weed/weed_server/volume_server_handlers_replicate.go diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index 72962517d..b5eecc55b 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -191,7 +191,8 @@ func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGr //only proxy to each volume server func (ms *MasterServer) setReplicaHandler(w http.ResponseWriter, r *http.Request) { r.ParseForm() - if _, e := storage.NewReplicaPlacementFromString(r.FormValue("replication")); e != nil { + replicationValue := r.FormValue("replication") + if _, e := storage.NewReplicaPlacementFromString(replicationValue); e != nil { writeJsonError(w, r, http.StatusBadRequest, e) return } @@ -200,14 +201,27 @@ func (ms *MasterServer) setReplicaHandler(w http.ResponseWriter, r *http.Request writeJsonError(w, r, http.StatusBadRequest, errors.New("No available agrs found.")) return } - result := make(map[string]interface{}) - forms := r.Form + result := ms.batchSetVolumeOption("replication", replicationValue, r.Form["volume"], r.Form["collection"]) + writeJson(w, r, http.StatusOK, result) +} + +func (ms *MasterServer) batchSetVolumeOption(settingKey, settingValue string, volumes, collections []string)(result map[string]interface{}){ + forms := url.Values{} + forms.Set("key", settingKey) + forms.Set("value", settingValue) + if len(volumes) == 0 && len(collections) == 0 { + forms.Set("all", "true") + }else{ + forms["volume"] = volumes + forms["collection"] = collections + } + var wg sync.WaitGroup ms.Topo.WalkDataNode(func(dn *topology.DataNode) (e error) { wg.Add(1) go func(server string, values url.Values) { defer wg.Done() - jsonBlob, e := util.Post("http://"+server+"/admin/set_replica", values) + jsonBlob, e := util.Post("http://"+server+"/admin/setting", values) if e != nil { result[server] = map[string]interface{}{ "error": e.Error() + " " + string(jsonBlob), @@ -225,5 +239,6 @@ func (ms *MasterServer) setReplicaHandler(w http.ResponseWriter, r *http.Request return nil }) wg.Wait() - writeJson(w, r, http.StatusOK, result) + return } + diff --git a/go/weed/weed_server/volume_server.go b/go/weed/weed_server/volume_server.go index 3480ad09b..c1f5acb5a 100644 --- a/go/weed/weed_server/volume_server.go +++ b/go/weed/weed_server/volume_server.go @@ -53,12 +53,12 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, adminMux.HandleFunc("/admin/vacuum/check", vs.guard.WhiteList(vs.vacuumVolumeCheckHandler)) adminMux.HandleFunc("/admin/vacuum/compact", vs.guard.WhiteList(vs.vacuumVolumeCompactHandler)) adminMux.HandleFunc("/admin/vacuum/commit", vs.guard.WhiteList(vs.vacuumVolumeCommitHandler)) + adminMux.HandleFunc("/admin/setting", vs.guard.WhiteList(vs.setVolumeOptionHandler)) adminMux.HandleFunc("/admin/delete_collection", vs.guard.WhiteList(vs.deleteCollectionHandler)) adminMux.HandleFunc("/admin/sync/status", vs.guard.WhiteList(vs.getVolumeSyncStatusHandler)) adminMux.HandleFunc("/admin/sync/index", vs.guard.WhiteList(vs.getVolumeIndexContentHandler)) adminMux.HandleFunc("/admin/sync/data", vs.guard.WhiteList(vs.getVolumeDataContentHandler)) adminMux.HandleFunc("/admin/sync/vol_data", vs.guard.WhiteList(vs.getVolumeCleanDataHandler)) - adminMux.HandleFunc("/admin/set_replica", vs.guard.WhiteList(vs.setVolumeReplicaHandler)) adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler)) adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler)) adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler)) diff --git a/go/weed/weed_server/volume_server_handlers_admin.go b/go/weed/weed_server/volume_server_handlers_admin.go index 80aeb3f1d..15dccfb24 100644 --- a/go/weed/weed_server/volume_server_handlers_admin.go +++ b/go/weed/weed_server/volume_server_handlers_admin.go @@ -1,14 +1,23 @@ package weed_server import ( + "errors" "net/http" "path/filepath" + "strconv" + "strings" "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/stats" + "github.com/chrislusf/seaweedfs/go/storage" "github.com/chrislusf/seaweedfs/go/util" ) +type VolumeOptError struct { + Volume string `json:"volume"` + Err string `json:"err"` +} + func (vs *VolumeServer) statusHandler(w http.ResponseWriter, r *http.Request) { m := make(map[string]interface{}) m["Version"] = util.VERSION @@ -48,3 +57,80 @@ func (vs *VolumeServer) statsDiskHandler(w http.ResponseWriter, r *http.Request) m["DiskStatuses"] = ds writeJsonQuiet(w, r, http.StatusOK, m) } + +func (vs *VolumeServer) setVolumeOptionHandler(w http.ResponseWriter, r *http.Request) { + r.ParseForm() + errs := []VolumeOptError{} + var ( + setter storage.VolumeWalker + ) + + key := r.FormValue("key") + value := r.FormValue("value") + if key == "readonly" { + isReadOnly, e := strconv.ParseBool(value) + if e != nil { + writeJsonError(w, r, http.StatusBadRequest, e) + return + } + setter = func(v *storage.Volume) error { + if e := v.SetReadOnly(isReadOnly); e != nil { + errs = append(errs, VolumeOptError{ + Volume: v.Id.String(), + Err: e.Error(), + }) + } + return nil + } + } else if key == "replication" { + replica, e := storage.NewReplicaPlacementFromString(r.FormValue(value)) + if e != nil { + writeJsonError(w, r, http.StatusBadRequest, e) + return + } + setter = func(v *storage.Volume) error { + if e := v.SetReplica(replica); e != nil { + errs = append(errs, VolumeOptError{ + Volume: v.Id.String(), + Err: e.Error(), + }) + } + return nil + } + } else { + writeJsonError(w, r, http.StatusBadRequest, errors.New("Unkonw setting: "+key)) + return + } + + all, _ := strconv.ParseBool(r.FormValue("all")) + if all { + vs.store.WalkVolume(setter) + } else { + volumesSet := make(map[string]bool) + for _, volume := range r.Form["volume"] { + volumesSet[strings.TrimSpace(volume)] = true + } + collectionsSet := make(map[string]bool) + for _, c := range r.Form["collection"] { + collectionsSet[strings.TrimSpace(c)] = true + } + if len(collectionsSet) > 0 || len(volumesSet) > 0 { + vs.store.WalkVolume(func(v *storage.Volume) (e error) { + if !collectionsSet[v.Collection] && !volumesSet[v.Id.String()] { + return nil + } + setter(v) + return nil + }) + } + + } + + result := make(map[string]interface{}) + if len(errs) > 0 { + result["error"] = "set volume replica error." + result["errors"] = errs + } + + writeJson(w, r, http.StatusAccepted, result) +} diff --git a/go/weed/weed_server/volume_server_handlers_replicate.go b/go/weed/weed_server/volume_server_handlers_replicate.go deleted file mode 100644 index cc421dc88..000000000 --- a/go/weed/weed_server/volume_server_handlers_replicate.go +++ /dev/null @@ -1,130 +0,0 @@ -package weed_server - -import ( - "fmt" - "io" - "net/http" - "strconv" - - "github.com/chrislusf/seaweedfs/go/glog" - "github.com/chrislusf/seaweedfs/go/storage" - "github.com/pierrec/lz4" - "strings" -) - -func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http.Request) { - v, e := vs.getVolume("volume", r) - if v == nil { - http.Error(w, fmt.Sprintf("Not Found volume: %v", e), http.StatusBadRequest) - return - } - //set read only when replicating - v.SetReadOnly(true) - defer v.SetReadOnly(false) - cr, e := v.GetVolumeCleanReader() - if e != nil { - http.Error(w, fmt.Sprintf("Get volume clean reader: %v", e), http.StatusInternalServerError) - return - } - totalSize, e := cr.Size() - if e != nil { - http.Error(w, fmt.Sprintf("Get volume size: %v", e), http.StatusInternalServerError) - return - } - w.Header().Set("Accept-Ranges", "bytes") - w.Header().Set("Content-Disposition", fmt.Sprintf(`filename="%d.dat.lz4"`, v.Id)) - - rangeReq := r.Header.Get("Range") - if rangeReq == "" { - w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) - w.Header().Set("Content-Encoding", "lz4") - lz4w := lz4.NewWriter(w) - if _, e = io.Copy(lz4w, cr); e != nil { - glog.V(4).Infoln("response write error:", e) - } - lz4w.Close() - return - } - ranges, e := parseRange(rangeReq, totalSize) - if e != nil { - http.Error(w, e.Error(), http.StatusRequestedRangeNotSatisfiable) - return - } - if len(ranges) != 1 { - http.Error(w, "Only support one range", http.StatusNotImplemented) - return - } - ra := ranges[0] - if _, e := cr.Seek(ra.start, 0); e != nil { - http.Error(w, e.Error(), http.StatusInternalServerError) - return - } - w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) - w.Header().Set("Content-Range", ra.contentRange(totalSize)) - w.Header().Set("Content-Encoding", "lz4") - w.WriteHeader(http.StatusPartialContent) - lz4w := lz4.NewWriter(w) - if _, e = io.CopyN(lz4w, cr, ra.length); e != nil { - glog.V(2).Infoln("response write error:", e) - } - lz4w.Close() -} - -type VolumeOptError struct { - Volume string `json:"volume"` - Err string `json:"err"` -} - -func (vs *VolumeServer) setVolumeReplicaHandler(w http.ResponseWriter, r *http.Request) { - r.ParseForm() - replica, e := storage.NewReplicaPlacementFromString(r.FormValue("replication")) - if e != nil { - writeJsonError(w, r, http.StatusBadRequest, e) - return - } - errs := []VolumeOptError{} - all, _ := strconv.ParseBool(r.FormValue("all")) - if all { - vs.store.WalkVolume(func(v *storage.Volume) (e error) { - if e := v.SetReplica(replica); e != nil { - errs = append(errs, VolumeOptError{ - Volume: v.Id.String(), - Err: e.Error(), - }) - } - return nil - }) - } else { - volumesSet := make(map[string]bool) - for _, volume := range r.Form["volume"] { - volumesSet[strings.TrimSpace(volume)] = true - } - collectionsSet := make(map[string]bool) - for _, c := range r.Form["collection"] { - collectionsSet[strings.TrimSpace(c)] = true - } - if len(collectionsSet) > 0 || len(volumesSet) > 0 { - vs.store.WalkVolume(func(v *storage.Volume) (e error) { - if !collectionsSet[v.Collection] && !volumesSet[v.Id.String()] { - return nil - } - if e := v.SetReplica(replica); e != nil { - errs = append(errs, VolumeOptError{ - Volume: v.Id.String(), - Err: e.Error(), - }) - } - return nil - }) - } - - } - - result := make(map[string]interface{}) - if len(errs) > 0 { - result["error"] = "set volume replica error." - result["errors"] = errs - } - - writeJson(w, r, http.StatusAccepted, result) -} diff --git a/go/weed/weed_server/volume_server_handlers_sync.go b/go/weed/weed_server/volume_server_handlers_sync.go index c650e5f53..859bff563 100644 --- a/go/weed/weed_server/volume_server_handlers_sync.go +++ b/go/weed/weed_server/volume_server_handlers_sync.go @@ -2,11 +2,14 @@ package weed_server import ( "fmt" + "io" "net/http" + "strconv" "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/storage" "github.com/chrislusf/seaweedfs/go/util" + "github.com/pierrec/lz4" ) func (vs *VolumeServer) getVolumeSyncStatusHandler(w http.ResponseWriter, r *http.Request) { @@ -84,3 +87,61 @@ func (vs *VolumeServer) getVolume(volumeParameterName string, r *http.Request) ( } return v, nil } + +func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http.Request) { + v, e := vs.getVolume("volume", r) + if v == nil { + http.Error(w, fmt.Sprintf("Not Found volume: %v", e), http.StatusBadRequest) + return + } + //set read only when replicating + v.SetReadOnly(true) + defer v.SetReadOnly(false) + cr, e := v.GetVolumeCleanReader() + if e != nil { + http.Error(w, fmt.Sprintf("Get volume clean reader: %v", e), http.StatusInternalServerError) + return + } + totalSize, e := cr.Size() + if e != nil { + http.Error(w, fmt.Sprintf("Get volume size: %v", e), http.StatusInternalServerError) + return + } + w.Header().Set("Accept-Ranges", "bytes") + w.Header().Set("Content-Disposition", fmt.Sprintf(`filename="%d.dat.lz4"`, v.Id)) + + rangeReq := r.Header.Get("Range") + if rangeReq == "" { + w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) + w.Header().Set("Content-Encoding", "lz4") + lz4w := lz4.NewWriter(w) + if _, e = io.Copy(lz4w, cr); e != nil { + glog.V(4).Infoln("response write error:", e) + } + lz4w.Close() + return + } + ranges, e := parseRange(rangeReq, totalSize) + if e != nil { + http.Error(w, e.Error(), http.StatusRequestedRangeNotSatisfiable) + return + } + if len(ranges) != 1 { + http.Error(w, "Only support one range", http.StatusNotImplemented) + return + } + ra := ranges[0] + if _, e := cr.Seek(ra.start, 0); e != nil { + http.Error(w, e.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) + w.Header().Set("Content-Range", ra.contentRange(totalSize)) + w.Header().Set("Content-Encoding", "lz4") + w.WriteHeader(http.StatusPartialContent) + lz4w := lz4.NewWriter(w) + if _, e = io.CopyN(lz4w, cr, ra.length); e != nil { + glog.V(2).Infoln("response write error:", e) + } + lz4w.Close() +} From 0260a9c03040715307971cba8656430cb46bb3b1 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 18 Dec 2015 21:27:11 +0800 Subject: [PATCH 15/46] Update volume layout when volume option have changed --- go/storage/replica_placement.go | 6 ++++++ go/storage/volume_ttl.go | 4 ++++ go/topology/data_node.go | 15 ++++++++++----- go/topology/topology.go | 18 +++++++++++------- .../weed_server/volume_server_handlers_sync.go | 3 --- 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/go/storage/replica_placement.go b/go/storage/replica_placement.go index c1aca52eb..31f8f464a 100644 --- a/go/storage/replica_placement.go +++ b/go/storage/replica_placement.go @@ -51,3 +51,9 @@ func (rp *ReplicaPlacement) String() string { func (rp *ReplicaPlacement) GetCopyCount() int { return rp.DiffDataCenterCount + rp.DiffRackCount + rp.SameRackCount + 1 } + +func (rp *ReplicaPlacement) Equal(rp1 *ReplicaPlacement) bool { + return rp.SameRackCount == rp1.SameRackCount && + rp.DiffRackCount == rp1.DiffRackCount && + rp.DiffDataCenterCount == rp1.DiffDataCenterCount +} diff --git a/go/storage/volume_ttl.go b/go/storage/volume_ttl.go index 4318bb048..676479cfb 100644 --- a/go/storage/volume_ttl.go +++ b/go/storage/volume_ttl.go @@ -114,6 +114,10 @@ func toStoredByte(readableUnitByte byte) byte { return 0 } +func (t *TTL) Equal(t1 *TTL) bool { + return t.count == t1.count && t.unit == t1.unit +} + func (t TTL) Minutes() uint32 { switch t.unit { case Empty: diff --git a/go/topology/data_node.go b/go/topology/data_node.go index fe0926e85..378a7486f 100644 --- a/go/topology/data_node.go +++ b/go/topology/data_node.go @@ -31,8 +31,9 @@ func (dn *DataNode) String() string { return fmt.Sprintf("Node:%s, volumes:%v, Ip:%s, Port:%d, PublicUrl:%s, Dead:%v", dn.NodeImpl.String(), dn.volumes, dn.Ip, dn.Port, dn.PublicUrl, dn.Dead) } -func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) { - if _, ok := dn.volumes[v.Id]; !ok { +func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) (optionChanged bool) { + optionChanged = false + if v1, ok := dn.volumes[v.Id]; !ok { dn.volumes[v.Id] = v dn.UpAdjustVolumeCountDelta(1) if !v.ReadOnly { @@ -40,11 +41,13 @@ func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) { } dn.UpAdjustMaxVolumeId(v.Id) } else { + optionChanged = !v1.Ttl.Equal(v.Ttl) || v1.Collection != v.Collection || !v1.ReplicaPlacement.Equal(v.ReplicaPlacement) dn.volumes[v.Id] = v } + return } -func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (deletedVolumes []storage.VolumeInfo) { +func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (needToDeleteVolumes []storage.VolumeInfo) { actualVolumeMap := make(map[storage.VolumeId]storage.VolumeInfo) for _, v := range actualVolumes { actualVolumeMap[v.Id] = v @@ -53,13 +56,15 @@ func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (deletedVo if _, ok := actualVolumeMap[vid]; !ok { glog.V(0).Infoln("Deleting volume id:", vid) delete(dn.volumes, vid) - deletedVolumes = append(deletedVolumes, v) + needToDeleteVolumes = append(needToDeleteVolumes, v) dn.UpAdjustVolumeCountDelta(-1) dn.UpAdjustActiveVolumeCountDelta(-1) } } //TODO: adjust max volume id, if need to reclaim volume ids for _, v := range actualVolumes { - dn.AddOrUpdateVolume(v) + if dn.AddOrUpdateVolume(v) { + needToDeleteVolumes = append(needToDeleteVolumes, v) + } } return } diff --git a/go/topology/topology.go b/go/topology/topology.go index c329b5837..a2131ae3a 100644 --- a/go/topology/topology.go +++ b/go/topology/topology.go @@ -116,12 +116,12 @@ func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool { } func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) { - vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl).PickForWrite(count, option) - if err != nil || datanodes.Length() == 0 { + vid, count, dataNodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl).PickForWrite(count, option) + if err != nil || dataNodes.Length() == 0 { return "", 0, nil, errors.New("No writable volumes available!") } fileId, count := t.Sequence.NextFileId(count) - return storage.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes.Head(), nil + return storage.NewFileId(*vid, fileId, rand.Uint32()).String(), count, dataNodes.Head(), nil } func (t *Topology) GetVolumeLayout(collectionName string, rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout { @@ -167,13 +167,17 @@ func (t *Topology) ProcessJoinMessage(joinMessage *operation.JoinMessage) { glog.V(0).Infoln("Fail to convert joined volume information:", err.Error()) } } - deletedVolumes := dn.UpdateVolumes(volumeInfos) + + // If volume options(replica placement, ttl or collection) have changed, + // we need update its volume layout. + needToDeleteVolumes := dn.UpdateVolumes(volumeInfos) + for _, v := range needToDeleteVolumes { + t.UnRegisterVolumeLayout(v, dn) + } for _, v := range volumeInfos { t.RegisterVolumeLayout(v, dn) } - for _, v := range deletedVolumes { - t.UnRegisterVolumeLayout(v, dn) - } + } func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter { diff --git a/go/weed/weed_server/volume_server_handlers_sync.go b/go/weed/weed_server/volume_server_handlers_sync.go index 859bff563..fef434d28 100644 --- a/go/weed/weed_server/volume_server_handlers_sync.go +++ b/go/weed/weed_server/volume_server_handlers_sync.go @@ -94,9 +94,6 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http http.Error(w, fmt.Sprintf("Not Found volume: %v", e), http.StatusBadRequest) return } - //set read only when replicating - v.SetReadOnly(true) - defer v.SetReadOnly(false) cr, e := v.GetVolumeCleanReader() if e != nil { http.Error(w, fmt.Sprintf("Get volume clean reader: %v", e), http.StatusInternalServerError) From c12e45f9b2b6159a1c6255e6f5cdd9ba65c07cb8 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 18 Dec 2015 21:28:43 +0800 Subject: [PATCH 16/46] format code --- go/weed/weed_server/master_server_handlers_admin.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index b5eecc55b..5738aaef8 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -205,13 +205,13 @@ func (ms *MasterServer) setReplicaHandler(w http.ResponseWriter, r *http.Request writeJson(w, r, http.StatusOK, result) } -func (ms *MasterServer) batchSetVolumeOption(settingKey, settingValue string, volumes, collections []string)(result map[string]interface{}){ +func (ms *MasterServer) batchSetVolumeOption(settingKey, settingValue string, volumes, collections []string) (result map[string]interface{}) { forms := url.Values{} forms.Set("key", settingKey) forms.Set("value", settingValue) if len(volumes) == 0 && len(collections) == 0 { forms.Set("all", "true") - }else{ + } else { forms["volume"] = volumes forms["collection"] = collections } @@ -241,4 +241,3 @@ func (ms *MasterServer) batchSetVolumeOption(settingKey, settingValue string, vo wg.Wait() return } - From a76d4051617639cfa2f2c3bf9cc6cfb970865498 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 18 Dec 2015 21:29:55 +0800 Subject: [PATCH 17/46] Format code when make build --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 68bd916be..0f5bc89d7 100644 --- a/Makefile +++ b/Makefile @@ -14,12 +14,12 @@ clean: deps: go get $(GO_FLAGS) -d $(SOURCE_DIR) -build: deps +imports: + goimports -w $(SOURCE_DIR) + +build: deps imports go build $(GO_FLAGS) -o $(BINARY) $(SOURCE_DIR) linux: deps mkdir -p linux GOOS=linux GOARCH=amd64 go build $(GO_FLAGS) -o linux/$(BINARY) $(SOURCE_DIR) - -imports: - goimports -w $(SOURCE_DIR) \ No newline at end of file From 83c8bd4e8bab98129177b80b8f112f26c3508057 Mon Sep 17 00:00:00 2001 From: tnextday Date: Sat, 19 Dec 2015 17:48:28 +0800 Subject: [PATCH 18/46] Replica pacement now set by global or collection settting, we can change it anytime, then the cluster will automate replica the volume(developing). --- go/operation/sync_volume.go | 1 - go/proto/system_message.proto | 2 +- go/storage/replica_placement.go | 43 ++++++++++++++++ go/storage/store.go | 24 ++++----- go/storage/volume.go | 21 ++------ go/storage/volume_info.go | 10 +--- go/storage/volume_super_block.go | 8 +-- go/storage/volume_sync.go | 1 - go/storage/volume_version.go | 1 + go/topology/allocate_volume.go | 1 - go/topology/collection.go | 9 ++-- go/topology/data_node.go | 14 ++---- go/topology/store_replicate.go | 50 +++++++------------ go/topology/topology.go | 29 +++++------ go/topology/topology_event_handling.go | 6 +-- go/topology/volume_growth.go | 1 - go/topology/volume_growth_test.go | 1 + go/topology/volume_layout.go | 5 +- go/topology/volume_location_list.go | 7 +++ go/util/concurrent_read_map.go | 17 +++---- go/weed/backup.go | 7 +-- go/weed/compact.go | 2 +- go/weed/weed_server/master_server.go | 6 +-- .../master_server_handlers_admin.go | 19 +------ .../volume_server_handlers_admin.go | 17 +------ 25 files changed, 134 insertions(+), 168 deletions(-) diff --git a/go/operation/sync_volume.go b/go/operation/sync_volume.go index 54944a64e..713cf33c1 100644 --- a/go/operation/sync_volume.go +++ b/go/operation/sync_volume.go @@ -10,7 +10,6 @@ import ( ) type SyncVolumeResponse struct { - Replication string `json:"Replication,omitempty"` Ttl string `json:"Ttl,omitempty"` TailOffset uint64 `json:"TailOffset,omitempty"` CompactRevision uint16 `json:"CompactRevision,omitempty"` diff --git a/go/proto/system_message.proto b/go/proto/system_message.proto index 703b1f4a0..f89eaf180 100644 --- a/go/proto/system_message.proto +++ b/go/proto/system_message.proto @@ -8,7 +8,7 @@ message VolumeInformationMessage { required uint64 delete_count = 5; required uint64 deleted_byte_count = 6; optional bool read_only = 7; - required uint32 replica_placement = 8; +// required uint32 replica_placement = 8; optional uint32 version = 9 [default=2]; optional uint32 ttl = 10; } diff --git a/go/storage/replica_placement.go b/go/storage/replica_placement.go index 31f8f464a..e53bffba8 100644 --- a/go/storage/replica_placement.go +++ b/go/storage/replica_placement.go @@ -1,6 +1,7 @@ package storage import ( + "encoding/json" "errors" "fmt" ) @@ -11,6 +12,10 @@ type ReplicaPlacement struct { DiffDataCenterCount int } +type ReplicaPlacements struct { + settings map[string]*ReplicaPlacement +} + func NewReplicaPlacementFromString(t string) (*ReplicaPlacement, error) { rp := &ReplicaPlacement{} for i, c := range t { @@ -57,3 +62,41 @@ func (rp *ReplicaPlacement) Equal(rp1 *ReplicaPlacement) bool { rp.DiffRackCount == rp1.DiffRackCount && rp.DiffDataCenterCount == rp1.DiffDataCenterCount } + +func NewReplicaPlacements(defaultRP string) *ReplicaPlacements { + rp, e := NewReplicaPlacementFromString(defaultRP) + if e != nil { + rp, _ = NewReplicaPlacementFromString("000") + } + rps := &ReplicaPlacements{settings: make(map[string]*ReplicaPlacement)} + rps.settings[""] = rp + return rps +} + +func NewReplicaPlacementsFromJson(s string) *ReplicaPlacements { + m := make(map[string]*ReplicaPlacement) + if json.Unmarshal([]byte(s), m) == nil { + m[""], _ = NewReplicaPlacementFromString("000") + } + return &ReplicaPlacements{settings: m} +} + +func (rps *ReplicaPlacements) Get(collection string) *ReplicaPlacement { + if rp, ok := rps.settings[collection]; ok { + return rp + } + return rps.settings[""] +} + +func (rps *ReplicaPlacements) Set(collection, t string) error { + rp, e := NewReplicaPlacementFromString(t) + if e == nil { + rps.settings[collection] = rp + } + return e +} + +func (rps *ReplicaPlacements) Marshal() string { + buf, _ := json.Marshal(rps.settings) + return string(buf) +} diff --git a/go/storage/store.go b/go/storage/store.go index 6c7871084..9b077737d 100644 --- a/go/storage/store.go +++ b/go/storage/store.go @@ -106,11 +106,7 @@ func NewStore(port int, ip, publicUrl string, dirnames []string, maxVolumeCounts } return } -func (s *Store) AddVolume(volumeListString string, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string) error { - rt, e := NewReplicaPlacementFromString(replicaPlacement) - if e != nil { - return e - } +func (s *Store) AddVolume(volumeListString string, collection string, needleMapKind NeedleMapType, ttlString string) error { ttl, e := ReadTTL(ttlString) if e != nil { return e @@ -122,7 +118,7 @@ func (s *Store) AddVolume(volumeListString string, collection string, needleMapK if err != nil { return fmt.Errorf("Volume Id %s is not a valid unsigned integer!", id_string) } - e = s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl) + e = s.addVolume(VolumeId(id), collection, needleMapKind, ttl) } else { pair := strings.Split(range_string, "-") start, start_err := strconv.ParseUint(pair[0], 10, 64) @@ -134,7 +130,7 @@ func (s *Store) AddVolume(volumeListString string, collection string, needleMapK return fmt.Errorf("Volume End Id %s is not a valid unsigned integer!", pair[1]) } for id := start; id <= end; id++ { - if err := s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl); err != nil { + if err := s.addVolume(VolumeId(id), collection, needleMapKind, ttl); err != nil { e = err } } @@ -183,14 +179,14 @@ func (s *Store) findFreeLocation() (ret *DiskLocation) { } return ret } -func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL) error { +func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleMapType, ttl *TTL) error { if s.findVolume(vid) != nil { return fmt.Errorf("Volume Id %d already exists!", vid) } if location := s.findFreeLocation(); location != nil { - glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v", - location.Directory, vid, collection, replicaPlacement, ttl) - if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, replicaPlacement, ttl); err == nil { + glog.V(0).Infof("In dir %s adds volume:%v collection:%s ttl:%v", + location.Directory, vid, collection, ttl) + if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, ttl); err == nil { location.volumes[vid] = volume return nil } else { @@ -213,9 +209,9 @@ func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapType) { } if vid, err := NewVolumeId(base); err == nil { if l.volumes[vid] == nil { - if v, e := NewVolume(l.Directory, collection, vid, needleMapKind, nil, nil); e == nil { + if v, e := NewVolume(l.Directory, collection, vid, needleMapKind, nil); e == nil { l.volumes[vid] = v - glog.V(0).Infof("data file %s, replicaPlacement=%s v=%d size=%d ttl=%s", l.Directory+"/"+name, v.ReplicaPlacement, v.Version(), v.Size(), v.Ttl.String()) + glog.V(0).Infof("data file %s, v=%d size=%d ttl=%s", l.Directory+"/"+name, v.Version(), v.Size(), v.Ttl.String()) } else { glog.V(0).Infof("new volume %s error %s", name, e) } @@ -234,7 +230,6 @@ func (s *Store) Status() []*VolumeInfo { Id: VolumeId(k), Size: v.ContentSize(), Collection: v.Collection, - ReplicaPlacement: v.ReplicaPlacement, Version: v.Version(), FileCount: v.nm.FileCount(), DeleteCount: v.nm.DeletedCount(), @@ -281,7 +276,6 @@ func (s *Store) SendHeartbeatToMaster() (masterNode string, secretKey security.S DeleteCount: proto.Uint64(uint64(v.nm.DeletedCount())), DeletedByteCount: proto.Uint64(v.nm.DeletedSize()), ReadOnly: proto.Bool(v.readOnly), - ReplicaPlacement: proto.Uint32(uint32(v.ReplicaPlacement.Byte())), Version: proto.Uint32(uint32(v.Version())), Ttl: proto.Uint32(v.Ttl.ToUint32()), } diff --git a/go/storage/volume.go b/go/storage/volume.go index 44d80a6be..b7b492b9d 100644 --- a/go/storage/volume.go +++ b/go/storage/volume.go @@ -28,9 +28,9 @@ type Volume struct { lastModifiedTime uint64 //unix time in seconds } -func NewVolume(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL) (v *Volume, e error) { +func NewVolume(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType, ttl *TTL) (v *Volume, e error) { v = &Volume{dir: dirname, Collection: collection, Id: id} - v.SuperBlock = SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl} + v.SuperBlock = SuperBlock{Ttl: ttl} v.needleMapKind = needleMapKind e = v.load(true, true, needleMapKind) return @@ -87,7 +87,7 @@ func (v *Volume) load(alsoLoadIndex bool, createDatIfMissing bool, needleMapKind } } - if v.ReplicaPlacement == nil { + if v.version == NoneVersion { e = v.readSuperBlock() } else { e = v.maybeWriteSuperBlock() @@ -145,10 +145,6 @@ func (v *Volume) Close() { _ = v.dataFile.Close() } -func (v *Volume) NeedToReplicate() bool { - return v.ReplicaPlacement.GetCopyCount() > 1 -} - // isFileUnchanged checks whether this needle to write is same as last one. // It requires serialized access in the same volume. func (v *Volume) isFileUnchanged(n *Needle) bool { @@ -427,17 +423,6 @@ func (v *Volume) exiredLongEnough(maxDelayMinutes uint32) bool { return false } -func (v *Volume) SetReplica(replica *ReplicaPlacement) error { - if replica == nil { - replica, _ = NewReplicaPlacementFromString("000") - } - if v.ReplicaPlacement.String() == replica.String() { - return nil - } - v.ReplicaPlacement = replica - return v.writeSuperBlock() -} - func (v *Volume) SetReadOnly(isReadOnly bool) error { if isReadOnly == false { if fi, e := v.dataFile.Stat(); e != nil { diff --git a/go/storage/volume_info.go b/go/storage/volume_info.go index e4979c790..659faf213 100644 --- a/go/storage/volume_info.go +++ b/go/storage/volume_info.go @@ -10,7 +10,6 @@ import ( type VolumeInfo struct { Id VolumeId Size uint64 - ReplicaPlacement *ReplicaPlacement Ttl *TTL Collection string Version Version @@ -31,18 +30,13 @@ func NewVolumeInfo(m *operation.VolumeInformationMessage) (vi VolumeInfo, err er ReadOnly: *m.ReadOnly, Version: Version(*m.Version), } - rp, e := NewReplicaPlacementFromByte(byte(*m.ReplicaPlacement)) - if e != nil { - return vi, e - } - vi.ReplicaPlacement = rp vi.Ttl = LoadTTLFromUint32(*m.Ttl) return vi, nil } func (vi VolumeInfo) String() string { - return fmt.Sprintf("Id:%d, Size:%d, ReplicaPlacement:%s, Collection:%s, Version:%v, FileCount:%d, DeleteCount:%d, DeletedByteCount:%d, ReadOnly:%v", - vi.Id, vi.Size, vi.ReplicaPlacement, vi.Collection, vi.Version, vi.FileCount, vi.DeleteCount, vi.DeletedByteCount, vi.ReadOnly) + return fmt.Sprintf("Id:%d, Size:%d, Collection:%s, Version:%v, FileCount:%d, DeleteCount:%d, DeletedByteCount:%d, ReadOnly:%v", + vi.Id, vi.Size, vi.Collection, vi.Version, vi.FileCount, vi.DeleteCount, vi.DeletedByteCount, vi.ReadOnly) } /*VolumesInfo sorting*/ diff --git a/go/storage/volume_super_block.go b/go/storage/volume_super_block.go index e0fbd9e9a..5c7d01c21 100644 --- a/go/storage/volume_super_block.go +++ b/go/storage/volume_super_block.go @@ -15,14 +15,13 @@ const ( /* * Super block currently has 8 bytes allocated for each volume. * Byte 0: version, 1 or 2 -* Byte 1: Replica Placement strategy, 000, 001, 002, 010, etc +* Byte 1: Replica Placement strategy, 000, 001, 002, 010, etc (Deprecated!) * Byte 2 and byte 3: Time to live. See TTL for definition * Byte 4 and byte 5: The number of times the volume has been compacted. * Rest bytes: Reserved */ type SuperBlock struct { version Version - ReplicaPlacement *ReplicaPlacement Ttl *TTL CompactRevision uint16 } @@ -33,7 +32,7 @@ func (s *SuperBlock) Version() Version { func (s *SuperBlock) Bytes() []byte { header := make([]byte, SuperBlockSize) header[0] = byte(s.version) - header[1] = s.ReplicaPlacement.Byte() + header[1] = 0 s.Ttl.ToBytes(header[2:4]) util.Uint16toBytes(header[4:6], s.CompactRevision) return header @@ -83,9 +82,6 @@ func (v *Volume) writeSuperBlock() (err error) { func ParseSuperBlock(header []byte) (superBlock SuperBlock, err error) { superBlock.version = Version(header[0]) - if superBlock.ReplicaPlacement, err = NewReplicaPlacementFromByte(header[1]); err != nil { - err = fmt.Errorf("cannot read replica type: %s", err.Error()) - } superBlock.Ttl = LoadTTLFromBytes(header[2:4]) superBlock.CompactRevision = util.BytesToUint16(header[4:6]) return diff --git a/go/storage/volume_sync.go b/go/storage/volume_sync.go index 01d59d6ae..7d09c873d 100644 --- a/go/storage/volume_sync.go +++ b/go/storage/volume_sync.go @@ -169,7 +169,6 @@ func (v *Volume) GetVolumeSyncStatus() operation.SyncVolumeResponse { syncStatus.IdxFileSize = v.nm.IndexFileSize() syncStatus.CompactRevision = v.SuperBlock.CompactRevision syncStatus.Ttl = v.SuperBlock.Ttl.String() - syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String() return syncStatus } diff --git a/go/storage/volume_version.go b/go/storage/volume_version.go index 2e9f58aa2..8cd132c58 100644 --- a/go/storage/volume_version.go +++ b/go/storage/volume_version.go @@ -3,6 +3,7 @@ package storage type Version uint8 const ( + NoneVersion = Version(0) Version1 = Version(1) Version2 = Version(2) CurrentVersion = Version2 diff --git a/go/topology/allocate_volume.go b/go/topology/allocate_volume.go index f014c3527..6de3130b1 100644 --- a/go/topology/allocate_volume.go +++ b/go/topology/allocate_volume.go @@ -18,7 +18,6 @@ func AllocateVolume(dn *DataNode, vid storage.VolumeId, option *VolumeGrowOption values := make(url.Values) values.Add("volume", vid.String()) values.Add("collection", option.Collection) - values.Add("replication", option.ReplicaPlacement.String()) values.Add("ttl", option.Ttl.String()) jsonBlob, err := util.Post("http://"+dn.Url()+"/admin/assign_volume", values) if err != nil { diff --git a/go/topology/collection.go b/go/topology/collection.go index 376b62405..3d7bb7e0e 100644 --- a/go/topology/collection.go +++ b/go/topology/collection.go @@ -10,10 +10,11 @@ import ( type Collection struct { Name string volumeSizeLimit uint64 + rp *storage.ReplicaPlacement storageType2VolumeLayout *util.ConcurrentReadMap } -func NewCollection(name string, volumeSizeLimit uint64) *Collection { +func NewCollection(name string, rp *storage.ReplicaPlacement, volumeSizeLimit uint64) *Collection { c := &Collection{Name: name, volumeSizeLimit: volumeSizeLimit} c.storageType2VolumeLayout = util.NewConcurrentReadMap() return c @@ -23,13 +24,13 @@ func (c *Collection) String() string { return fmt.Sprintf("Name:%s, volumeSizeLimit:%d, storageType2VolumeLayout:%v", c.Name, c.volumeSizeLimit, c.storageType2VolumeLayout) } -func (c *Collection) GetOrCreateVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout { - keyString := rp.String() +func (c *Collection) GetOrCreateVolumeLayout(ttl *storage.TTL) *VolumeLayout { + keyString := "" if ttl != nil { keyString += ttl.String() } vl := c.storageType2VolumeLayout.Get(keyString, func() interface{} { - return NewVolumeLayout(rp, ttl, c.volumeSizeLimit) + return NewVolumeLayout(c.rp, ttl, c.volumeSizeLimit) }) return vl.(*VolumeLayout) } diff --git a/go/topology/data_node.go b/go/topology/data_node.go index 378a7486f..72ced1b73 100644 --- a/go/topology/data_node.go +++ b/go/topology/data_node.go @@ -31,9 +31,8 @@ func (dn *DataNode) String() string { return fmt.Sprintf("Node:%s, volumes:%v, Ip:%s, Port:%d, PublicUrl:%s, Dead:%v", dn.NodeImpl.String(), dn.volumes, dn.Ip, dn.Port, dn.PublicUrl, dn.Dead) } -func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) (optionChanged bool) { - optionChanged = false - if v1, ok := dn.volumes[v.Id]; !ok { +func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo){ + if _, ok := dn.volumes[v.Id]; !ok { dn.volumes[v.Id] = v dn.UpAdjustVolumeCountDelta(1) if !v.ReadOnly { @@ -41,13 +40,12 @@ func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) (optionChanged bool) } dn.UpAdjustMaxVolumeId(v.Id) } else { - optionChanged = !v1.Ttl.Equal(v.Ttl) || v1.Collection != v.Collection || !v1.ReplicaPlacement.Equal(v.ReplicaPlacement) dn.volumes[v.Id] = v } return } -func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (needToDeleteVolumes []storage.VolumeInfo) { +func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (deletedVolumes []storage.VolumeInfo) { actualVolumeMap := make(map[storage.VolumeId]storage.VolumeInfo) for _, v := range actualVolumes { actualVolumeMap[v.Id] = v @@ -56,15 +54,13 @@ func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (needToDel if _, ok := actualVolumeMap[vid]; !ok { glog.V(0).Infoln("Deleting volume id:", vid) delete(dn.volumes, vid) - needToDeleteVolumes = append(needToDeleteVolumes, v) + deletedVolumes = append(deletedVolumes, v) dn.UpAdjustVolumeCountDelta(-1) dn.UpAdjustActiveVolumeCountDelta(-1) } } //TODO: adjust max volume id, if need to reclaim volume ids for _, v := range actualVolumes { - if dn.AddOrUpdateVolume(v) { - needToDeleteVolumes = append(needToDeleteVolumes, v) - } + dn.AddOrUpdateVolume(v) } return } diff --git a/go/topology/store_replicate.go b/go/topology/store_replicate.go index dc26dade0..89ada0a69 100644 --- a/go/topology/store_replicate.go +++ b/go/topology/store_replicate.go @@ -20,31 +20,25 @@ func ReplicatedWrite(masterNode string, s *storage.Store, jwt := security.GetJwt(r) ret, err := s.Write(volumeId, needle) - needToReplicate := !s.HasVolume(volumeId) if err != nil { errorStatus = "Failed to write to local disk (" + err.Error() + ")" - } else if ret > 0 { - needToReplicate = needToReplicate || s.GetVolume(volumeId).NeedToReplicate() - } else { + } else if ret <= 0 { errorStatus = "Failed to write to local disk" } - if !needToReplicate && ret > 0 { - needToReplicate = s.GetVolume(volumeId).NeedToReplicate() - } - if needToReplicate { //send to other replica locations - if r.FormValue("type") != "replicate" { - if !distributedOperation(masterNode, s, volumeId, func(location operation.Location) bool { - _, err := operation.Upload( - "http://"+location.Url+r.URL.Path+"?type=replicate&ts="+strconv.FormatUint(needle.LastModified, 10), - string(needle.Name), bytes.NewReader(needle.Data), needle.IsGzipped(), string(needle.Mime), - jwt) - return err == nil - }) { - ret = 0 - errorStatus = "Failed to write to replicas for volume " + volumeId.String() - } + //send to other replica locations + if r.FormValue("type") != "replicate" { + if !distributedOperation(masterNode, s, volumeId, func(location operation.Location) bool { + _, err := operation.Upload( + "http://"+location.Url+r.URL.Path+"?type=replicate&ts="+strconv.FormatUint(needle.LastModified, 10), + string(needle.Name), bytes.NewReader(needle.Data), needle.IsGzipped(), string(needle.Mime), + jwt) + return err == nil + }) { + ret = 0 + errorStatus = "Failed to write to replicas for volume " + volumeId.String() } } + size = ret return } @@ -61,18 +55,12 @@ func ReplicatedDelete(masterNode string, store *storage.Store, glog.V(0).Infoln("delete error:", err) return } - - needToReplicate := !store.HasVolume(volumeId) - if !needToReplicate && ret > 0 { - needToReplicate = store.GetVolume(volumeId).NeedToReplicate() - } - if needToReplicate { //send to other replica locations - if r.FormValue("type") != "replicate" { - if !distributedOperation(masterNode, store, volumeId, func(location operation.Location) bool { - return nil == util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", jwt) - }) { - ret = 0 - } + //send to other replica locations + if r.FormValue("type") != "replicate" { + if !distributedOperation(masterNode, store, volumeId, func(location operation.Location) bool { + return nil == util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", jwt) + }) { + ret = 0 } } return diff --git a/go/topology/topology.go b/go/topology/topology.go index a2131ae3a..b0324d73a 100644 --- a/go/topology/topology.go +++ b/go/topology/topology.go @@ -28,12 +28,14 @@ type Topology struct { chanRecoveredDataNodes chan *DataNode chanFullVolumes chan storage.VolumeInfo + ReplicaPlacements *storage.ReplicaPlacements + configuration *Configuration RaftServer raft.Server } -func NewTopology(id string, confFile string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int) (*Topology, error) { +func NewTopology(id string, confFile string, rp *storage.ReplicaPlacements, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int) (*Topology, error) { t := &Topology{} t.id = NodeId(id) t.nodeType = "Topology" @@ -42,6 +44,7 @@ func NewTopology(id string, confFile string, seq sequence.Sequencer, volumeSizeL t.collectionMap = util.NewConcurrentReadMap() t.pulse = int64(pulse) t.volumeSizeLimit = volumeSizeLimit + t.ReplicaPlacements = rp t.Sequence = seq @@ -111,12 +114,12 @@ func (t *Topology) NextVolumeId() storage.VolumeId { } func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool { - vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl) + vl := t.GetVolumeLayout(option.Collection, option.Ttl) return vl.GetActiveVolumeCount(option) > 0 } func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) { - vid, count, dataNodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl).PickForWrite(count, option) + vid, count, dataNodes, err := t.GetVolumeLayout(option.Collection, option.Ttl).PickForWrite(count, option) if err != nil || dataNodes.Length() == 0 { return "", 0, nil, errors.New("No writable volumes available!") } @@ -124,10 +127,10 @@ func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, return storage.NewFileId(*vid, fileId, rand.Uint32()).String(), count, dataNodes.Head(), nil } -func (t *Topology) GetVolumeLayout(collectionName string, rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout { +func (t *Topology) GetVolumeLayout(collectionName string, ttl *storage.TTL) *VolumeLayout { return t.collectionMap.Get(collectionName, func() interface{} { - return NewCollection(collectionName, t.volumeSizeLimit) - }).(*Collection).GetOrCreateVolumeLayout(rp, ttl) + return NewCollection(collectionName, t.ReplicaPlacements.Get(collectionName), t.volumeSizeLimit) + }).(*Collection).GetOrCreateVolumeLayout(ttl) } func (t *Topology) GetCollection(collectionName string) (*Collection, bool) { @@ -140,11 +143,11 @@ func (t *Topology) DeleteCollection(collectionName string) { } func (t *Topology) RegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) { - t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl).RegisterVolume(&v, dn) + t.GetVolumeLayout(v.Collection, v.Ttl).RegisterVolume(&v, dn) } func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) { glog.Infof("removing volume info:%+v", v) - t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl).UnRegisterVolume(&v, dn) + t.GetVolumeLayout(v.Collection, v.Ttl).UnRegisterVolume(&v, dn) } func (t *Topology) ProcessJoinMessage(joinMessage *operation.JoinMessage) { @@ -168,15 +171,13 @@ func (t *Topology) ProcessJoinMessage(joinMessage *operation.JoinMessage) { } } - // If volume options(replica placement, ttl or collection) have changed, - // we need update its volume layout. - needToDeleteVolumes := dn.UpdateVolumes(volumeInfos) - for _, v := range needToDeleteVolumes { - t.UnRegisterVolumeLayout(v, dn) - } + deletedVolumes := dn.UpdateVolumes(volumeInfos) for _, v := range volumeInfos { t.RegisterVolumeLayout(v, dn) } + for _, v := range deletedVolumes { + t.UnRegisterVolumeLayout(v, dn) + } } diff --git a/go/topology/topology_event_handling.go b/go/topology/topology_event_handling.go index 5f5faf04e..6dfa07487 100644 --- a/go/topology/topology_event_handling.go +++ b/go/topology/topology_event_handling.go @@ -42,7 +42,7 @@ func (t *Topology) StartRefreshWritableVolumes(garbageThreshold string) { }() } func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool { - vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl) + vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.Ttl) if !vl.SetVolumeCapacityFull(volumeInfo.Id) { return false } @@ -56,7 +56,7 @@ func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool { func (t *Topology) UnRegisterDataNode(dn *DataNode) { for _, v := range dn.volumes { glog.V(0).Infoln("Removing Volume", v.Id, "from the dead volume server", dn) - vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl) + vl := t.GetVolumeLayout(v.Collection, v.Ttl) vl.SetVolumeUnavailable(dn, v.Id) } dn.UpAdjustVolumeCountDelta(-dn.GetVolumeCount()) @@ -66,7 +66,7 @@ func (t *Topology) UnRegisterDataNode(dn *DataNode) { } func (t *Topology) RegisterRecoveredDataNode(dn *DataNode) { for _, v := range dn.volumes { - vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl) + vl := t.GetVolumeLayout(v.Collection, v.Ttl) if vl.isWritable(&v) { vl.SetVolumeAvailable(dn, v.Id) } diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index a25ba116b..3de1a771f 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -195,7 +195,6 @@ func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *Volum Id: vid, Size: 0, Collection: option.Collection, - ReplicaPlacement: option.ReplicaPlacement, Ttl: option.Ttl, Version: storage.CurrentVersion, } diff --git a/go/topology/volume_growth_test.go b/go/topology/volume_growth_test.go index 15abfcc73..df464e47e 100644 --- a/go/topology/volume_growth_test.go +++ b/go/topology/volume_growth_test.go @@ -80,6 +80,7 @@ func setup(topologyLayout string) *Topology { //need to connect all nodes first before server adding volumes topo, err := NewTopology("weedfs", "/etc/weedfs/weedfs.conf", + storage.NewReplicaPlacements("000"), sequence.NewMemorySequencer(), 32*1024, 5) if err != nil { panic("error: " + err.Error()) diff --git a/go/topology/volume_layout.go b/go/topology/volume_layout.go index 3c1dd9503..050f576ce 100644 --- a/go/topology/volume_layout.go +++ b/go/topology/volume_layout.go @@ -42,7 +42,8 @@ func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) { vl.vid2location[v.Id] = NewVolumeLocationList() } vl.vid2location[v.Id].Set(dn) - glog.V(4).Infoln("volume", v.Id, "added to dn", dn.Id(), "len", vl.vid2location[v.Id].Length(), "copy", v.ReplicaPlacement.GetCopyCount()) + glog.V(4).Infoln("volume", v.Id, "added to dn", dn.Id(), "len", vl.vid2location[v.Id].Length()) + //TODO if vl.vid2location[v.Id].Length() == vl.rp.GetCopyCount() && vl.isWritable(v) { vl.AddToWritable(v.Id) } else { @@ -53,7 +54,7 @@ func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) { func (vl *VolumeLayout) UnRegisterVolume(v *storage.VolumeInfo, dn *DataNode) { vl.accessLock.Lock() defer vl.accessLock.Unlock() - + //TODO only delete data node from locations? vl.removeFromWritable(v.Id) delete(vl.vid2location, v.Id) } diff --git a/go/topology/volume_location_list.go b/go/topology/volume_location_list.go index d5eaf5e92..7166a4add 100644 --- a/go/topology/volume_location_list.go +++ b/go/topology/volume_location_list.go @@ -2,6 +2,8 @@ package topology import ( "fmt" + + "github.com/chrislusf/seaweedfs/go/storage" ) type VolumeLocationList struct { @@ -25,6 +27,11 @@ func (dnll *VolumeLocationList) Length() int { return len(dnll.list) } +func (dnll *VolumeLocationList) CalcReplicaPlacement() (rp *storage.ReplicaPlacement) { + //TODO CalcReplicaPlacement + return nil +} + func (dnll *VolumeLocationList) Set(loc *DataNode) { for i := 0; i < len(dnll.list); i++ { if loc.Ip == dnll.list[i].Ip && loc.Port == dnll.list[i].Port { diff --git a/go/util/concurrent_read_map.go b/go/util/concurrent_read_map.go index 41cce8b82..9e9e7f438 100644 --- a/go/util/concurrent_read_map.go +++ b/go/util/concurrent_read_map.go @@ -7,9 +7,8 @@ import ( // A mostly for read map, which can thread-safely // initialize the map entries. type ConcurrentReadMap struct { - rmutex sync.RWMutex - mutex sync.Mutex - Items map[string]interface{} + rwmutex sync.RWMutex + Items map[string]interface{} } func NewConcurrentReadMap() *ConcurrentReadMap { @@ -17,8 +16,8 @@ func NewConcurrentReadMap() *ConcurrentReadMap { } func (m *ConcurrentReadMap) initMapEntry(key string, newEntry func() interface{}) (value interface{}) { - m.mutex.Lock() - defer m.mutex.Unlock() + m.rwmutex.Lock() + defer m.rwmutex.Unlock() if value, ok := m.Items[key]; ok { return value } @@ -28,11 +27,11 @@ func (m *ConcurrentReadMap) initMapEntry(key string, newEntry func() interface{} } func (m *ConcurrentReadMap) Get(key string, newEntry func() interface{}) interface{} { - m.rmutex.RLock() - if value, ok := m.Items[key]; ok { - m.rmutex.RUnlock() + m.rwmutex.RLock() + value, ok := m.Items[key] + m.rwmutex.RUnlock() + if ok { return value } - m.rmutex.RUnlock() return m.initMapEntry(key, newEntry) } diff --git a/go/weed/backup.go b/go/weed/backup.go index 0e78f2e2b..2f97751f8 100644 --- a/go/weed/backup.go +++ b/go/weed/backup.go @@ -69,13 +69,8 @@ func runBackup(cmd *Command, args []string) bool { fmt.Printf("Error get volume %d ttl %s: %v\n", vid, stats.Ttl, err) return true } - replication, err := storage.NewReplicaPlacementFromString(stats.Replication) - if err != nil { - fmt.Printf("Error get volume %d replication %s : %v\n", vid, stats.Replication, err) - return true - } - v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl) + v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, ttl) if err != nil { fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) return true diff --git a/go/weed/compact.go b/go/weed/compact.go index 673b96901..b51879f97 100644 --- a/go/weed/compact.go +++ b/go/weed/compact.go @@ -33,7 +33,7 @@ func runCompact(cmd *Command, args []string) bool { vid := storage.VolumeId(*compactVolumeId) v, err := storage.NewVolume(*compactVolumePath, *compactVolumeCollection, vid, - storage.NeedleMapInMemory, nil, nil) + storage.NeedleMapInMemory, nil) if err != nil { glog.Fatalf("Load Volume [ERROR] %s\n", err) } diff --git a/go/weed/weed_server/master_server.go b/go/weed/weed_server/master_server.go index 3cac1873a..37fb44c74 100644 --- a/go/weed/weed_server/master_server.go +++ b/go/weed/weed_server/master_server.go @@ -11,6 +11,7 @@ import ( "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/security" "github.com/chrislusf/seaweedfs/go/sequence" + "github.com/chrislusf/seaweedfs/go/storage" "github.com/chrislusf/seaweedfs/go/topology" "github.com/chrislusf/seaweedfs/go/util" "github.com/gorilla/mux" @@ -51,8 +52,8 @@ func NewMasterServer(r *mux.Router, port int, metaFolder string, ms.bounedLeaderChan = make(chan int, 16) seq := sequence.NewMemorySequencer() var e error - if ms.Topo, e = topology.NewTopology("topo", confFile, seq, - uint64(volumeSizeLimitMB)*1024*1024, pulseSeconds); e != nil { + if ms.Topo, e = topology.NewTopology("topo", confFile, storage.NewReplicaPlacements(defaultReplicaPlacement), + seq, uint64(volumeSizeLimitMB)*1024*1024, pulseSeconds); e != nil { glog.Fatalf("cannot create topology:%s", e) } ms.vg = topology.NewDefaultVolumeGrowth() @@ -71,7 +72,6 @@ func NewMasterServer(r *mux.Router, port int, metaFolder string, r.HandleFunc("/vol/grow", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeGrowHandler))) r.HandleFunc("/vol/status", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeStatusHandler))) r.HandleFunc("/vol/vacuum", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeVacuumHandler))) - r.HandleFunc("/replica/set", ms.proxyToLeader(ms.guard.WhiteList(ms.setReplicaHandler))) r.HandleFunc("/submit", ms.guard.WhiteList(ms.submitFromMasterServerHandler)) r.HandleFunc("/delete", ms.guard.WhiteList(ms.deleteFromMasterServerHandler)) r.HandleFunc("/{fileId}", ms.proxyToLeader(ms.redirectHandler)) diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index 5738aaef8..89c373ec7 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -160,7 +160,7 @@ func (ms *MasterServer) deleteFromMasterServerHandler(w http.ResponseWriter, r * } func (ms *MasterServer) HasWritableVolume(option *topology.VolumeGrowOption) bool { - vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl) + vl := ms.Topo.GetVolumeLayout(option.Collection, option.Ttl) return vl.GetActiveVolumeCount(option) > 0 } @@ -188,23 +188,6 @@ func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGr return volumeGrowOption, nil } -//only proxy to each volume server -func (ms *MasterServer) setReplicaHandler(w http.ResponseWriter, r *http.Request) { - r.ParseForm() - replicationValue := r.FormValue("replication") - if _, e := storage.NewReplicaPlacementFromString(replicationValue); e != nil { - writeJsonError(w, r, http.StatusBadRequest, e) - return - } - all, _ := strconv.ParseBool(r.FormValue("all")) - if !all && len(r.Form["volume"]) == 0 && len(r.Form["collection"]) == 0 { - writeJsonError(w, r, http.StatusBadRequest, errors.New("No available agrs found.")) - return - } - result := ms.batchSetVolumeOption("replication", replicationValue, r.Form["volume"], r.Form["collection"]) - writeJson(w, r, http.StatusOK, result) -} - func (ms *MasterServer) batchSetVolumeOption(settingKey, settingValue string, volumes, collections []string) (result map[string]interface{}) { forms := url.Values{} forms.Set("key", settingKey) diff --git a/go/weed/weed_server/volume_server_handlers_admin.go b/go/weed/weed_server/volume_server_handlers_admin.go index 15dccfb24..9a304d895 100644 --- a/go/weed/weed_server/volume_server_handlers_admin.go +++ b/go/weed/weed_server/volume_server_handlers_admin.go @@ -26,7 +26,7 @@ func (vs *VolumeServer) statusHandler(w http.ResponseWriter, r *http.Request) { } func (vs *VolumeServer) assignVolumeHandler(w http.ResponseWriter, r *http.Request) { - err := vs.store.AddVolume(r.FormValue("volume"), r.FormValue("collection"), vs.needleMapKind, r.FormValue("replication"), r.FormValue("ttl")) + err := vs.store.AddVolume(r.FormValue("volume"), r.FormValue("collection"), vs.needleMapKind, r.FormValue("ttl")) if err == nil { writeJsonQuiet(w, r, http.StatusAccepted, map[string]string{"error": ""}) } else { @@ -82,21 +82,6 @@ func (vs *VolumeServer) setVolumeOptionHandler(w http.ResponseWriter, r *http.Re } return nil } - } else if key == "replication" { - replica, e := storage.NewReplicaPlacementFromString(r.FormValue(value)) - if e != nil { - writeJsonError(w, r, http.StatusBadRequest, e) - return - } - setter = func(v *storage.Volume) error { - if e := v.SetReplica(replica); e != nil { - errs = append(errs, VolumeOptError{ - Volume: v.Id.String(), - Err: e.Error(), - }) - } - return nil - } } else { writeJsonError(w, r, http.StatusBadRequest, errors.New("Unkonw setting: "+key)) return From f9ddb6598c758c496cd02191b7ba0273bb6cb7b4 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 00:38:04 +0800 Subject: [PATCH 19/46] Store: add a task manager to handler some volume task, such as replication, vacuum, balancing data. master can query task status using http long polling Store: save NeedleMapKind value in Store VolumeServer: add store task manager interface Volume: rename `CleanReader` to `PureReader` Makefile: format code before build joinmessage: add `GolobalSetting` in join response *: simplify code use `gofmt -s` --- Makefile | 6 +- go/glog/glog.go | 2 +- go/operation/lookup.go | 4 +- go/operation/submit.go | 2 +- go/proto/system_message.proto | 36 +++-- go/storage/store.go | 24 ++-- go/storage/store_task.go | 133 ++++++++++++++++++ go/storage/store_task_replication.go | 114 +++++++++++++++ go/storage/store_task_vacuum.go | 40 ++++++ go/storage/volume_info_test.go | 6 +- ...ume_replicate.go => volume_pure_reader.go} | 22 +-- go/storage/volume_super_block.go | 6 +- go/storage/volume_vacuum.go | 6 + go/topology/data_node.go | 2 +- go/topology/topology_event_handling.go | 2 +- go/topology/topology_replicate.go | 25 ++++ go/topology/topology_vacuum.go | 4 +- go/topology/volume_growth.go | 10 +- go/topology/volume_layout.go | 3 +- go/util/http_util.go | 21 +++ go/util/url_util.go | 13 ++ go/weed/shell.go | 2 - go/weed/signal_handling.go | 2 +- go/weed/weed_server/common.go | 7 +- go/weed/weed_server/volume_server.go | 9 +- .../volume_server_handlers_admin.go | 2 +- .../volume_server_handlers_task.go | 63 +++++++++ 27 files changed, 496 insertions(+), 70 deletions(-) create mode 100644 go/storage/store_task.go create mode 100644 go/storage/store_task_replication.go create mode 100644 go/storage/store_task_vacuum.go rename go/storage/{volume_replicate.go => volume_pure_reader.go} (87%) create mode 100644 go/topology/topology_replicate.go create mode 100644 go/util/url_util.go create mode 100644 go/weed/weed_server/volume_server_handlers_task.go diff --git a/Makefile b/Makefile index 0f5bc89d7..52c2ef8ca 100644 --- a/Makefile +++ b/Makefile @@ -14,10 +14,10 @@ clean: deps: go get $(GO_FLAGS) -d $(SOURCE_DIR) -imports: - goimports -w $(SOURCE_DIR) +fmt: + gofmt -w -s ./go/ -build: deps imports +build: deps fmt go build $(GO_FLAGS) -o $(BINARY) $(SOURCE_DIR) linux: deps diff --git a/go/glog/glog.go b/go/glog/glog.go index abd5678d4..6f6c96518 100644 --- a/go/glog/glog.go +++ b/go/glog/glog.go @@ -880,7 +880,7 @@ const flushInterval = 30 * time.Second // flushDaemon periodically flushes the log file buffers. func (l *loggingT) flushDaemon() { - for _ = range time.NewTicker(flushInterval).C { + for range time.NewTicker(flushInterval).C { l.lockAndFlushAll() } } diff --git a/go/operation/lookup.go b/go/operation/lookup.go index 7719690ec..e929a34bc 100644 --- a/go/operation/lookup.go +++ b/go/operation/lookup.go @@ -85,9 +85,9 @@ func LookupFileId(server string, fileId string, readonly bool) (fullUrl string, return "", errors.New("File Not Found") } var u string - if readonly{ + if readonly { u = lookup.Locations.PickForRead().Url - }else{ + } else { u = lookup.Locations.Head().Url } return "http://" + u + "/" + fileId, nil diff --git a/go/operation/submit.go b/go/operation/submit.go index 18484680a..8f5239f16 100644 --- a/go/operation/submit.go +++ b/go/operation/submit.go @@ -46,7 +46,7 @@ func SubmitFiles(master string, files []FilePart, } ret, err := Assign(master, uint64(len(files)), replication, collection, ttl) if err != nil { - for index, _ := range files { + for index := range files { results[index].Error = err.Error() } return results, err diff --git a/go/proto/system_message.proto b/go/proto/system_message.proto index f89eaf180..31154f8f0 100644 --- a/go/proto/system_message.proto +++ b/go/proto/system_message.proto @@ -13,24 +13,6 @@ message VolumeInformationMessage { optional uint32 ttl = 10; } -enum VolumeTask { - Vacuum = 0; - Replicate = 1; -} - -enum VolumeTaskStatus { - Working = 0; - Completed = 1; - Failed = 2; -} - -message BusyVolume { - required uint32 id = 1; - required VolumeTask task = 2; - required VolumeTaskStatus status = 3; - required int64 start_time = 4; -} - message JoinMessage { optional bool is_init = 1; required string ip = 2; @@ -42,5 +24,21 @@ message JoinMessage { optional string rack = 8; repeated VolumeInformationMessage volumes = 9; optional uint32 admin_port = 10; - repeated BusyVolume busy_volumes = 11; } + +message CollectionReplicaPlacement { + optional string collection = 1; + required string replica_placement = 2; +} + +message GlobalSetting { + repeated CollectionReplicaPlacement replica_placements = 1; + repeated string master_peers = 2; +} + +message JoinResponse { + optional GlobalSetting settings = 1; +} + + + diff --git a/go/storage/store.go b/go/storage/store.go index 9b077737d..434c81b0a 100644 --- a/go/storage/store.go +++ b/go/storage/store.go @@ -88,6 +88,8 @@ type Store struct { connected bool volumeSizeLimit uint64 //read from the master masterNodes *MasterNodes + needleMapKind NeedleMapType + TaskManager *TaskManager } func (s *Store) String() (str string) { @@ -96,7 +98,13 @@ func (s *Store) String() (str string) { } func NewStore(port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, needleMapKind NeedleMapType) (s *Store) { - s = &Store{Port: port, Ip: ip, PublicUrl: publicUrl} + s = &Store{ + Port: port, + Ip: ip, + PublicUrl: publicUrl, + TaskManager: NewTaskManager(), + needleMapKind: needleMapKind, + } s.Locations = make([]*DiskLocation, 0) for i := 0; i < len(dirnames); i++ { location := &DiskLocation{Directory: dirnames[i], MaxVolumeCount: maxVolumeCounts[i]} @@ -106,7 +114,7 @@ func NewStore(port int, ip, publicUrl string, dirnames []string, maxVolumeCounts } return } -func (s *Store) AddVolume(volumeListString string, collection string, needleMapKind NeedleMapType, ttlString string) error { +func (s *Store) AddVolume(volumeListString string, collection string, ttlString string) error { ttl, e := ReadTTL(ttlString) if e != nil { return e @@ -118,7 +126,7 @@ func (s *Store) AddVolume(volumeListString string, collection string, needleMapK if err != nil { return fmt.Errorf("Volume Id %s is not a valid unsigned integer!", id_string) } - e = s.addVolume(VolumeId(id), collection, needleMapKind, ttl) + e = s.addVolume(VolumeId(id), collection, ttl) } else { pair := strings.Split(range_string, "-") start, start_err := strconv.ParseUint(pair[0], 10, 64) @@ -130,7 +138,7 @@ func (s *Store) AddVolume(volumeListString string, collection string, needleMapK return fmt.Errorf("Volume End Id %s is not a valid unsigned integer!", pair[1]) } for id := start; id <= end; id++ { - if err := s.addVolume(VolumeId(id), collection, needleMapKind, ttl); err != nil { + if err := s.addVolume(VolumeId(id), collection, ttl); err != nil { e = err } } @@ -179,14 +187,14 @@ func (s *Store) findFreeLocation() (ret *DiskLocation) { } return ret } -func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleMapType, ttl *TTL) error { +func (s *Store) addVolume(vid VolumeId, collection string, ttl *TTL) error { if s.findVolume(vid) != nil { return fmt.Errorf("Volume Id %d already exists!", vid) } if location := s.findFreeLocation(); location != nil { glog.V(0).Infof("In dir %s adds volume:%v collection:%s ttl:%v", - location.Directory, vid, collection, ttl) - if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, ttl); err == nil { + location.Directory, vid, collection, ttl) + if volume, err := NewVolume(location.Directory, collection, vid, s.needleMapKind, ttl); err == nil { location.volumes[vid] = volume return nil } else { @@ -384,7 +392,7 @@ func (s *Store) HasVolume(i VolumeId) bool { type VolumeWalker func(v *Volume) (e error) -func (s *Store) WalkVolume(walker VolumeWalker) error{ +func (s *Store) WalkVolume(walker VolumeWalker) error { for _, location := range s.Locations { for _, v := range location.volumes { if e := walker(v); e != nil { diff --git a/go/storage/store_task.go b/go/storage/store_task.go new file mode 100644 index 000000000..2f8c0515f --- /dev/null +++ b/go/storage/store_task.go @@ -0,0 +1,133 @@ +package storage + +import ( + "errors" + "net/url" + "time" +) + +const ( + TaskVacuum = "VACUUM" + TaskReplica = "REPLICA" + TaskBalance = "BALANCE" +) + +var ( + ErrTaskNotFinish = errors.New("TaskNotFinish") + ErrTaskNotFound = errors.New("TaskNotFound") + ErrTaskInvalid = errors.New("TaskInvalid") + ErrTaskExists = errors.New("TaskExists") +) + +type TaskWorker interface { + Run() error + Commit() error + Clean() error + Info() url.Values +} + +type Task struct { + startTime time.Time + worker TaskWorker + ch chan bool + result error +} + +type TaskManager struct { + TaskList map[string]*Task +} + +func NewTask(worker TaskWorker) *Task { + t := &Task{ + worker: worker, + startTime: time.Now(), + result: ErrTaskNotFinish, + ch: make(chan bool, 1), + } + go func(t *Task) { + t.result = t.worker.Run() + t.ch <- true + }(t) + return t +} + +func (t *Task) QueryResult(waitDuration time.Duration) error { + if t.result == ErrTaskNotFinish && waitDuration > 0 { + select { + case <-time.After(waitDuration): + case <-t.ch: + } + } + return t.result +} + +func NewTaskManager() *TaskManager { + return &TaskManager{ + TaskList: make(map[string]*Task), + } +} + +func (tm *TaskManager) NewTask(s *Store, args url.Values) (tid string, e error) { + tt := args.Get("task") + vid := args.Get("volumme") + tid = tt + "-" + vid + if _, ok := tm.TaskList[tid]; ok { + return tid, ErrTaskExists + } + var tw TaskWorker + switch tt { + case TaskVacuum: + tw, e = NewVacuumTask(s, args) + case TaskReplica: + tw, e = NewReplicaTask(s, args) + case TaskBalance: + } + if e != nil { + return + } + if tw == nil { + return "", ErrTaskInvalid + } + tm.TaskList[tid] = NewTask(tw) + return tid, nil +} + +func (tm *TaskManager) QueryResult(tid string, waitDuration time.Duration) (e error) { + t, ok := tm.TaskList[tid] + if !ok { + return ErrTaskNotFound + } + return t.QueryResult(waitDuration) +} + +func (tm *TaskManager) Commit(tid string) (e error) { + t, ok := tm.TaskList[tid] + if !ok { + return ErrTaskNotFound + } + if t.QueryResult(time.Second*30) == ErrTaskNotFinish { + return ErrTaskNotFinish + } + delete(tm.TaskList, tid) + return t.worker.Commit() +} + +func (tm *TaskManager) Clean(tid string) (e error) { + t, ok := tm.TaskList[tid] + if !ok { + return ErrTaskNotFound + } + if t.QueryResult(time.Second*30) == ErrTaskNotFinish { + return ErrTaskNotFinish + } + delete(tm.TaskList, tid) + return t.worker.Clean() +} + +func (tm *TaskManager) ElapsedDuration(tid string) (time.Duration, error) { + t, ok := tm.TaskList[tid] + if !ok { + return 0, ErrTaskNotFound + } + return time.Since(t.startTime), nil +} diff --git a/go/storage/store_task_replication.go b/go/storage/store_task_replication.go new file mode 100644 index 000000000..6cad71ac4 --- /dev/null +++ b/go/storage/store_task_replication.go @@ -0,0 +1,114 @@ +package storage + +import ( + "errors" + "fmt" + "net/url" + "os" + "path" + + "github.com/chrislusf/seaweedfs/go/util" +) + +type ReplicaTask struct { + VID VolumeId + Collection string + SrcDataNode string + s *Store + location *DiskLocation +} + +func NewReplicaTask(s *Store, args url.Values) (*ReplicaTask, error) { + volumeIdString := args.Get("volume") + vid, err := NewVolumeId(volumeIdString) + if err != nil { + return nil, fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) + } + source := args.Get("source") + if source == "" { + return nil, errors.New("Invalid source data node.") + + } + location := s.findFreeLocation() + if location == nil { + return nil, errors.New("No more free space left") + } + collection := args.Get("collection") + return &ReplicaTask{ + VID: vid, + Collection: collection, + SrcDataNode: source, + s: s, + location: location, + }, nil +} + +func (t *ReplicaTask) Run() error { + ch := make(chan error) + go func() { + idxUrl := util.MkUrl(t.SrcDataNode, "/admin/sync/index", url.Values{"volume": {t.VID.String()}}) + e := util.DownloadToFile(idxUrl, t.FileName()+".repx") + if e != nil { + e = fmt.Errorf("Replicat error: %s, %v", idxUrl, e) + } + ch <- e + }() + go func() { + datUrl := util.MkUrl(t.SrcDataNode, "/admin/sync/vol_data", url.Values{"volume": {t.VID.String()}}) + e := util.DownloadToFile(datUrl, t.FileName()+".repd") + if e != nil { + e = fmt.Errorf("Replicat error: %s, %v", datUrl, e) + } + ch <- e + }() + errs := make([]error, 0, 2) + for i := 0; i < 2; i++ { + if e := <-ch; e != nil { + errs = append(errs, e) + } + } + if len(errs) == 0 { + return nil + } else { + return fmt.Errorf("%v", errs) + } +} + +func (t *ReplicaTask) Commit() error { + var ( + volume *Volume + e error + ) + + if e = os.Rename(t.FileName()+".repd", t.FileName()+".dat"); e != nil { + return e + } + if e = os.Rename(t.FileName()+".repx", t.FileName()+".idx"); e != nil { + return e + } + volume, e = NewVolume(t.location.Directory, t.Collection, t.VID, t.s.needleMapKind, nil) + if e == nil { + t.location.volumes[t.VID] = volume + } + return e +} + +func (t *ReplicaTask) Clean() error { + os.Remove(t.FileName() + ".repx") + os.Remove(t.FileName() + ".repd") + return nil +} + +func (t *ReplicaTask) Info() url.Values { + //TODO + return url.Values{} +} + +func (t *ReplicaTask) FileName() (fileName string) { + if t.Collection == "" { + fileName = path.Join(t.location.Directory, t.VID.String()) + } else { + fileName = path.Join(t.location.Directory, t.Collection+"_"+t.VID.String()) + } + return +} diff --git a/go/storage/store_task_vacuum.go b/go/storage/store_task_vacuum.go new file mode 100644 index 000000000..1da789960 --- /dev/null +++ b/go/storage/store_task_vacuum.go @@ -0,0 +1,40 @@ +package storage + +import ( + "fmt" + "net/url" +) + +type VacuumTask struct { + V *Volume +} + +func NewVacuumTask(s *Store, args url.Values) (*VacuumTask, error) { + volumeIdString := args.Get("volumme") + vid, err := NewVolumeId(volumeIdString) + if err != nil { + return nil, fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) + } + v := s.findVolume(vid) + if v == nil { + return nil, fmt.Errorf("volume id %d is not found", vid) + } + return &VacuumTask{V: v}, nil +} + +func (t *VacuumTask) Run() error { + return t.V.Compact() +} + +func (t *VacuumTask) Commit() error { + return t.V.commitCompact() +} + +func (t *VacuumTask) Clean() error { + return t.V.cleanCompact() +} + +func (t *VacuumTask) Info() url.Values { + //TODO + return url.Values{} +} diff --git a/go/storage/volume_info_test.go b/go/storage/volume_info_test.go index 9a9c43ad2..c90ca2336 100644 --- a/go/storage/volume_info_test.go +++ b/go/storage/volume_info_test.go @@ -4,13 +4,13 @@ import "testing" func TestSortVolumeInfos(t *testing.T) { vis := []*VolumeInfo{ - &VolumeInfo{ + { Id: 2, }, - &VolumeInfo{ + { Id: 1, }, - &VolumeInfo{ + { Id: 3, }, } diff --git a/go/storage/volume_replicate.go b/go/storage/volume_pure_reader.go similarity index 87% rename from go/storage/volume_replicate.go rename to go/storage/volume_pure_reader.go index 00b9cd14e..a03c88327 100644 --- a/go/storage/volume_replicate.go +++ b/go/storage/volume_pure_reader.go @@ -27,7 +27,7 @@ func (s DirtyDatas) Search(offset int64) int { }) } -type CleanReader struct { +type PureReader struct { Dirtys DirtyDatas DataFile *os.File pr *io.PipeReader @@ -62,7 +62,7 @@ func ScanDirtyData(indexFileContent []byte) (dirtys DirtyDatas) { return dirtys } -func (cr *CleanReader) Seek(offset int64, whence int) (int64, error) { +func (cr *PureReader) Seek(offset int64, whence int) (int64, error) { oldOff, e := cr.DataFile.Seek(0, 1) if e != nil { return 0, e @@ -77,7 +77,7 @@ func (cr *CleanReader) Seek(offset int64, whence int) (int64, error) { return newOff, nil } -func (cr *CleanReader) Size() (int64, error) { +func (cr *PureReader) Size() (int64, error) { fi, e := cr.DataFile.Stat() if e != nil { return 0, e @@ -85,7 +85,7 @@ func (cr *CleanReader) Size() (int64, error) { return fi.Size(), nil } -func (cdr *CleanReader) WriteTo(w io.Writer) (written int64, err error) { +func (cdr *PureReader) WriteTo(w io.Writer) (written int64, err error) { off, e := cdr.DataFile.Seek(0, 1) if e != nil { return 0, nil @@ -143,21 +143,21 @@ func (cdr *CleanReader) WriteTo(w io.Writer) (written int64, err error) { return } -func (cr *CleanReader) ReadAt(p []byte, off int64) (n int, err error) { +func (cr *PureReader) ReadAt(p []byte, off int64) (n int, err error) { cr.Seek(off, 0) return cr.Read(p) } -func (cr *CleanReader) Read(p []byte) (int, error) { +func (cr *PureReader) Read(p []byte) (int, error) { return cr.getPipeReader().Read(p) } -func (cr *CleanReader) Close() (e error) { +func (cr *PureReader) Close() (e error) { cr.closePipe(true) return cr.DataFile.Close() } -func (cr *CleanReader) closePipe(lock bool) (e error) { +func (cr *PureReader) closePipe(lock bool) (e error) { if lock { cr.mutex.Lock() defer cr.mutex.Unlock() @@ -177,7 +177,7 @@ func (cr *CleanReader) closePipe(lock bool) (e error) { return e } -func (cr *CleanReader) getPipeReader() io.Reader { +func (cr *PureReader) getPipeReader() io.Reader { cr.mutex.Lock() defer cr.mutex.Unlock() if cr.pr != nil && cr.pw != nil { @@ -192,7 +192,7 @@ func (cr *CleanReader) getPipeReader() io.Reader { return cr.pr } -func (v *Volume) GetVolumeCleanReader() (cr *CleanReader, err error) { +func (v *Volume) GetVolumeCleanReader() (cr *PureReader, err error) { var dirtys DirtyDatas if indexData, e := v.nm.IndexFileContent(); e != nil { return nil, err @@ -204,7 +204,7 @@ func (v *Volume) GetVolumeCleanReader() (cr *CleanReader, err error) { if e != nil { return nil, e } - cr = &CleanReader{ + cr = &PureReader{ Dirtys: dirtys, DataFile: dataFile, } diff --git a/go/storage/volume_super_block.go b/go/storage/volume_super_block.go index 5c7d01c21..fc8c33900 100644 --- a/go/storage/volume_super_block.go +++ b/go/storage/volume_super_block.go @@ -21,9 +21,9 @@ const ( * Rest bytes: Reserved */ type SuperBlock struct { - version Version - Ttl *TTL - CompactRevision uint16 + version Version + Ttl *TTL + CompactRevision uint16 } func (s *SuperBlock) Version() Version { diff --git a/go/storage/volume_vacuum.go b/go/storage/volume_vacuum.go index a2b7cdf76..3941a568f 100644 --- a/go/storage/volume_vacuum.go +++ b/go/storage/volume_vacuum.go @@ -46,6 +46,12 @@ func (v *Volume) commitCompact() error { return nil } +func (v *Volume) cleanCompact() error { + os.Remove(v.FileName() + ".cpd") + os.Remove(v.FileName() + ".cpx") + return nil +} + func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string) (err error) { var ( dst, idx *os.File diff --git a/go/topology/data_node.go b/go/topology/data_node.go index 72ced1b73..19f3870de 100644 --- a/go/topology/data_node.go +++ b/go/topology/data_node.go @@ -31,7 +31,7 @@ func (dn *DataNode) String() string { return fmt.Sprintf("Node:%s, volumes:%v, Ip:%s, Port:%d, PublicUrl:%s, Dead:%v", dn.NodeImpl.String(), dn.volumes, dn.Ip, dn.Port, dn.PublicUrl, dn.Dead) } -func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo){ +func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) { if _, ok := dn.volumes[v.Id]; !ok { dn.volumes[v.Id] = v dn.UpAdjustVolumeCountDelta(1) diff --git a/go/topology/topology_event_handling.go b/go/topology/topology_event_handling.go index 6dfa07487..2bb2a9d66 100644 --- a/go/topology/topology_event_handling.go +++ b/go/topology/topology_event_handling.go @@ -21,7 +21,7 @@ func (t *Topology) StartRefreshWritableVolumes(garbageThreshold string) { go func(garbageThreshold string) { c := time.Tick(15 * time.Minute) if t.IsLeader() { - for _ = range c { + for range c { t.Vacuum(garbageThreshold) } } diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go new file mode 100644 index 000000000..8c9281390 --- /dev/null +++ b/go/topology/topology_replicate.go @@ -0,0 +1,25 @@ +package topology + +import "github.com/chrislusf/seaweedfs/go/glog" + +func (t *Topology) Replicate(garbageThreshold string) int { + glog.V(0).Infoln("Start replicate on demand") + for _, col := range t.collectionMap.Items { + c := col.(*Collection) + glog.V(0).Infoln("replicate on collection:", c.Name) + for _, vl := range c.storageType2VolumeLayout.Items { + if vl != nil { + volumeLayout := vl.(*VolumeLayout) + copyCount := volumeLayout.rp.GetCopyCount() + for vid, locationList := range volumeLayout.vid2location { + if locationList.Length() < copyCount { + //set volume readonly + glog.V(0).Infoln("replicate volume :", vid) + + } + } + } + } + } + return 0 +} diff --git a/go/topology/topology_vacuum.go b/go/topology/topology_vacuum.go index 48bc8311d..cba3e8a16 100644 --- a/go/topology/topology_vacuum.go +++ b/go/topology/topology_vacuum.go @@ -26,7 +26,7 @@ func batchVacuumVolumeCheck(vl *VolumeLayout, vid storage.VolumeId, locationlist }(index, dn.Url(), vid) } isCheckSuccess := true - for _ = range locationlist.list { + for range locationlist.list { select { case canVacuum := <-ch: isCheckSuccess = isCheckSuccess && canVacuum @@ -53,7 +53,7 @@ func batchVacuumVolumeCompact(vl *VolumeLayout, vid storage.VolumeId, locationli }(index, dn.Url(), vid) } isVacuumSuccess := true - for _ = range locationlist.list { + for range locationlist.list { select { case _ = <-ch: case <-time.After(30 * time.Minute): diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index 3de1a771f..31307ffe0 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -192,11 +192,11 @@ func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *Volum for _, server := range servers { if err := AllocateVolume(server, vid, option); err == nil { vi := storage.VolumeInfo{ - Id: vid, - Size: 0, - Collection: option.Collection, - Ttl: option.Ttl, - Version: storage.CurrentVersion, + Id: vid, + Size: 0, + Collection: option.Collection, + Ttl: option.Ttl, + Version: storage.CurrentVersion, } server.AddOrUpdateVolume(vi) topo.RegisterVolumeLayout(vi, server) diff --git a/go/topology/volume_layout.go b/go/topology/volume_layout.go index 050f576ce..8a922f945 100644 --- a/go/topology/volume_layout.go +++ b/go/topology/volume_layout.go @@ -25,7 +25,6 @@ func NewVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL, volumeSizeL rp: rp, ttl: ttl, vid2location: make(map[storage.VolumeId]*VolumeLocationList), - writables: *new([]storage.VolumeId), volumeSizeLimit: volumeSizeLimit, } } @@ -43,7 +42,7 @@ func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) { } vl.vid2location[v.Id].Set(dn) glog.V(4).Infoln("volume", v.Id, "added to dn", dn.Id(), "len", vl.vid2location[v.Id].Length()) - //TODO + //TODO balancing data when have more replications if vl.vid2location[v.Id].Length() == vl.rp.GetCopyCount() && vl.isWritable(v) { vl.AddToWritable(v.Id) } else { diff --git a/go/util/http_util.go b/go/util/http_util.go index 29b2043ee..5060c77ad 100644 --- a/go/util/http_util.go +++ b/go/util/http_util.go @@ -11,6 +11,8 @@ import ( "net/url" "strings" + "os" + "github.com/chrislusf/seaweedfs/go/security" ) @@ -140,6 +142,10 @@ func DownloadUrl(fileUrl string) (filename string, rc io.ReadCloser, e error) { if err != nil { return "", nil, err } + if response.StatusCode != http.StatusOK { + response.Body.Close() + return "", nil, fmt.Errorf("%s: %s", fileUrl, response.Status) + } contentDisposition := response.Header["Content-Disposition"] if len(contentDisposition) > 0 { if strings.HasPrefix(contentDisposition[0], "filename=") { @@ -151,6 +157,21 @@ func DownloadUrl(fileUrl string) (filename string, rc io.ReadCloser, e error) { return } +func DownloadToFile(fileUrl, savePath string) (e error) { + _, rc, err := DownloadUrl(fileUrl) + if err != nil { + return err + } + defer rc.Close() + var f *os.File + if f, e = os.OpenFile(savePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm); e != nil { + return + } + _, e = io.Copy(f, rc) + f.Close() + return +} + func Do(req *http.Request) (resp *http.Response, err error) { return client.Do(req) } diff --git a/go/util/url_util.go b/go/util/url_util.go new file mode 100644 index 000000000..7204d08b0 --- /dev/null +++ b/go/util/url_util.go @@ -0,0 +1,13 @@ +package util + +import "net/url" + +func MkUrl(host, path string, args url.Values) string { + u := url.URL{ + Scheme: "http", + Host: host, + Path: path, + } + u.RawQuery = args.Encode() + return u.String() +} diff --git a/go/weed/shell.go b/go/weed/shell.go index 144621b09..feac5ddd4 100644 --- a/go/weed/shell.go +++ b/go/weed/shell.go @@ -20,8 +20,6 @@ var cmdShell = &Command{ `, } -var () - func runShell(command *Command, args []string) bool { r := bufio.NewReader(os.Stdin) o := bufio.NewWriter(os.Stdout) diff --git a/go/weed/signal_handling.go b/go/weed/signal_handling.go index 2004bb088..9c3908ce3 100644 --- a/go/weed/signal_handling.go +++ b/go/weed/signal_handling.go @@ -22,7 +22,7 @@ func OnInterrupt(fn func()) { syscall.SIGTERM, syscall.SIGQUIT) go func() { - for _ = range signalChan { + for range signalChan { fn() os.Exit(0) } diff --git a/go/weed/weed_server/common.go b/go/weed/weed_server/common.go index a7fa2de53..89499af40 100644 --- a/go/weed/weed_server/common.go +++ b/go/weed/weed_server/common.go @@ -69,7 +69,12 @@ func writeJsonQuiet(w http.ResponseWriter, r *http.Request, httpStatus int, obj } func writeJsonError(w http.ResponseWriter, r *http.Request, httpStatus int, err error) { m := make(map[string]interface{}) - m["error"] = err.Error() + if err == nil { + m["error"] = "" + } else { + m["error"] = err.Error() + + } writeJsonQuiet(w, r, httpStatus, m) } diff --git a/go/weed/weed_server/volume_server.go b/go/weed/weed_server/volume_server.go index c1f5acb5a..fbf0339e3 100644 --- a/go/weed/weed_server/volume_server.go +++ b/go/weed/weed_server/volume_server.go @@ -20,7 +20,6 @@ type VolumeServer struct { store *storage.Store guard *security.Guard - needleMapKind storage.NeedleMapType FixJpgOrientation bool ReadRedirect bool } @@ -38,12 +37,11 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, pulseSeconds: pulseSeconds, dataCenter: dataCenter, rack: rack, - needleMapKind: needleMapKind, FixJpgOrientation: fixJpgOrientation, ReadRedirect: readRedirect, } vs.SetMasterNode(masterNode) - vs.store = storage.NewStore(port, ip, publicUrl, folders, maxCounts, vs.needleMapKind) + vs.store = storage.NewStore(port, ip, publicUrl, folders, maxCounts, needleMapKind) vs.guard = security.NewGuard(whiteList, "") @@ -59,6 +57,11 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, adminMux.HandleFunc("/admin/sync/index", vs.guard.WhiteList(vs.getVolumeIndexContentHandler)) adminMux.HandleFunc("/admin/sync/data", vs.guard.WhiteList(vs.getVolumeDataContentHandler)) adminMux.HandleFunc("/admin/sync/vol_data", vs.guard.WhiteList(vs.getVolumeCleanDataHandler)) + adminMux.HandleFunc("/admin/task/new", vs.guard.WhiteList(vs.newTaskHandler)) + adminMux.HandleFunc("/admin/task/query", vs.guard.WhiteList(vs.queryTaskHandler)) + adminMux.HandleFunc("/admin/task/commit", vs.guard.WhiteList(vs.commitTaskHandler)) + adminMux.HandleFunc("/admin/task/clean", vs.guard.WhiteList(vs.cleanTaskHandler)) + adminMux.HandleFunc("/admin/task/all", vs.guard.WhiteList(vs.allTaskHandler)) adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler)) adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler)) adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler)) diff --git a/go/weed/weed_server/volume_server_handlers_admin.go b/go/weed/weed_server/volume_server_handlers_admin.go index 9a304d895..779d6f99d 100644 --- a/go/weed/weed_server/volume_server_handlers_admin.go +++ b/go/weed/weed_server/volume_server_handlers_admin.go @@ -26,7 +26,7 @@ func (vs *VolumeServer) statusHandler(w http.ResponseWriter, r *http.Request) { } func (vs *VolumeServer) assignVolumeHandler(w http.ResponseWriter, r *http.Request) { - err := vs.store.AddVolume(r.FormValue("volume"), r.FormValue("collection"), vs.needleMapKind, r.FormValue("ttl")) + err := vs.store.AddVolume(r.FormValue("volume"), r.FormValue("collection"), r.FormValue("ttl")) if err == nil { writeJsonQuiet(w, r, http.StatusAccepted, map[string]string{"error": ""}) } else { diff --git a/go/weed/weed_server/volume_server_handlers_task.go b/go/weed/weed_server/volume_server_handlers_task.go new file mode 100644 index 000000000..cd0319660 --- /dev/null +++ b/go/weed/weed_server/volume_server_handlers_task.go @@ -0,0 +1,63 @@ +package weed_server + +import ( + "net/http" + + "time" + + "strings" + + "github.com/chrislusf/seaweedfs/go/glog" + "github.com/chrislusf/seaweedfs/go/storage" +) + +func (vs *VolumeServer) newTaskHandler(w http.ResponseWriter, r *http.Request) { + tid, e := vs.store.TaskManager.NewTask(vs.store, r.Form) + if e == nil { + writeJsonQuiet(w, r, http.StatusOK, map[string]string{"tid": tid}) + } else { + writeJsonError(w, r, http.StatusInternalServerError, e) + } + glog.V(2).Infoln("new store task =", tid, ", error =", e) +} + +func (vs *VolumeServer) queryTaskHandler(w http.ResponseWriter, r *http.Request) { + tid := r.Form.Get("tid") + timeoutStr := strings.TrimSpace(r.Form.Get("timeout")) + d := time.Minute + if td, e := time.ParseDuration(timeoutStr); e == nil { + d = td + } + err := vs.store.TaskManager.QueryResult(tid, d) + if err == storage.ErrTaskNotFinish { + writeJsonError(w, r, http.StatusRequestTimeout, err) + } else if err == nil { + writeJsonError(w, r, http.StatusOK, err) + } + glog.V(2).Infoln("query task =", tid, ", error =", err) +} +func (vs *VolumeServer) commitTaskHandler(w http.ResponseWriter, r *http.Request) { + tid := r.Form.Get("tid") + err := vs.store.TaskManager.Commit(tid) + if err == storage.ErrTaskNotFinish { + writeJsonError(w, r, http.StatusRequestTimeout, err) + } else if err == nil { + writeJsonError(w, r, http.StatusOK, err) + } + glog.V(2).Infoln("query task =", tid, ", error =", err) +} +func (vs *VolumeServer) cleanTaskHandler(w http.ResponseWriter, r *http.Request) { + tid := r.Form.Get("tid") + err := vs.store.TaskManager.Clean(tid) + if err == storage.ErrTaskNotFinish { + writeJsonError(w, r, http.StatusRequestTimeout, err) + } else if err == nil { + writeJsonError(w, r, http.StatusOK, err) + } + glog.V(2).Infoln("clean task =", tid, ", error =", err) +} + +func (vs *VolumeServer) allTaskHandler(w http.ResponseWriter, r *http.Request) { + //TODO get all task + glog.V(2).Infoln("TODO: get all task") +} From bb5bfef1077562833b9caac8423f6024968572b7 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 00:52:31 +0800 Subject: [PATCH 20/46] StoreTask: send heartbeat when commit the replicat task --- go/storage/store_task_replication.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/storage/store_task_replication.go b/go/storage/store_task_replication.go index 6cad71ac4..b6b8e230a 100644 --- a/go/storage/store_task_replication.go +++ b/go/storage/store_task_replication.go @@ -89,6 +89,7 @@ func (t *ReplicaTask) Commit() error { volume, e = NewVolume(t.location.Directory, t.Collection, t.VID, t.s.needleMapKind, nil) if e == nil { t.location.volumes[t.VID] = volume + t.s.SendHeartbeatToMaster() } return e } From 986de5c743c99cdda4459f82ffd808d4b361134e Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 00:59:04 +0800 Subject: [PATCH 21/46] proto: comment addtion --- go/proto/system_message.proto | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/proto/system_message.proto b/go/proto/system_message.proto index 31154f8f0..da378cbab 100644 --- a/go/proto/system_message.proto +++ b/go/proto/system_message.proto @@ -8,7 +8,7 @@ message VolumeInformationMessage { required uint64 delete_count = 5; required uint64 deleted_byte_count = 6; optional bool read_only = 7; -// required uint32 replica_placement = 8; + required uint32 replica_placement = 8; optional uint32 version = 9 [default=2]; optional uint32 ttl = 10; } @@ -26,6 +26,7 @@ message JoinMessage { optional uint32 admin_port = 10; } +/* message CollectionReplicaPlacement { optional string collection = 1; required string replica_placement = 2; @@ -39,6 +40,5 @@ message GlobalSetting { message JoinResponse { optional GlobalSetting settings = 1; } - - +*/ From fb2a2322fcfa2c817b5e5a1b5e7e87882bc00817 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 01:06:45 +0800 Subject: [PATCH 22/46] store: always lookup form master when process write operation --- go/operation/lookup.go | 8 +++++++- go/topology/store_replicate.go | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/go/operation/lookup.go b/go/operation/lookup.go index e929a34bc..83ef55752 100644 --- a/go/operation/lookup.go +++ b/go/operation/lookup.go @@ -42,7 +42,6 @@ var ( ) func Lookup(server string, vid string) (ret *LookupResult, err error) { - //Maybe we should fetch from master when lookup location for write locations, cache_err := vc.Get(vid) if cache_err != nil { if ret, err = do_lookup(server, vid); err == nil { @@ -54,6 +53,13 @@ func Lookup(server string, vid string) (ret *LookupResult, err error) { return } +func LookupNoCache(server string, vid string) (ret *LookupResult, err error) { + if ret, err = do_lookup(server, vid); err == nil { + vc.Set(vid, ret.Locations, 10*time.Minute) + } + return +} + func do_lookup(server string, vid string) (*LookupResult, error) { values := make(url.Values) values.Add("volumeId", vid) diff --git a/go/topology/store_replicate.go b/go/topology/store_replicate.go index 89ada0a69..ec5e4ba93 100644 --- a/go/topology/store_replicate.go +++ b/go/topology/store_replicate.go @@ -67,7 +67,7 @@ func ReplicatedDelete(masterNode string, store *storage.Store, } func distributedOperation(masterNode string, store *storage.Store, volumeId storage.VolumeId, op func(location operation.Location) bool) bool { - if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil { + if lookupResult, lookupErr := operation.LookupNoCache(masterNode, volumeId.String()); lookupErr == nil { length := 0 selfUrl := (store.Ip + ":" + strconv.Itoa(store.Port)) results := make(chan bool) From 2f25bfd0d0832571701ce414177e875b21fac7ad Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 02:00:16 +0800 Subject: [PATCH 23/46] Needle: add a reserve flag `FlagIsExtendNeedle` --- go/storage/needle_read_write.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/storage/needle_read_write.go b/go/storage/needle_read_write.go index cf1b3f94e..813f537df 100644 --- a/go/storage/needle_read_write.go +++ b/go/storage/needle_read_write.go @@ -16,6 +16,7 @@ const ( FlagHasMime = 0x04 FlagHasLastModifiedDate = 0x08 FlagHasTtl = 0x10 + FlagIsExtendNeedle = 0x40 // TODO Reserve: use extent file to save big needle FlagIsChunkManifest = 0x80 LastModifiedBytesLength = 5 TtlBytesLength = 2 From 3578628e48e94143bc24c349b8f126eebf0e55f8 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 11:04:17 +0800 Subject: [PATCH 24/46] *: update --- go/proto/system_message.proto | 9 +++++---- go/storage/needle_read_write.go | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/go/proto/system_message.proto b/go/proto/system_message.proto index da378cbab..dc429f0c6 100644 --- a/go/proto/system_message.proto +++ b/go/proto/system_message.proto @@ -26,19 +26,20 @@ message JoinMessage { optional uint32 admin_port = 10; } -/* -message CollectionReplicaPlacement { + +message CollectionSetting { optional string collection = 1; required string replica_placement = 2; + optional string vacuum_garbage_threshold = 3; } message GlobalSetting { - repeated CollectionReplicaPlacement replica_placements = 1; + repeated CollectionSetting settings = 1; repeated string master_peers = 2; } message JoinResponse { optional GlobalSetting settings = 1; } -*/ + diff --git a/go/storage/needle_read_write.go b/go/storage/needle_read_write.go index 813f537df..df4989329 100644 --- a/go/storage/needle_read_write.go +++ b/go/storage/needle_read_write.go @@ -16,7 +16,7 @@ const ( FlagHasMime = 0x04 FlagHasLastModifiedDate = 0x08 FlagHasTtl = 0x10 - FlagIsExtendNeedle = 0x40 // TODO Reserve: use extent file to save big needle + FlagIsExtendNeedle = 0x40 // TODO: Reserve flag, use extent file to save big needle FlagIsChunkManifest = 0x80 LastModifiedBytesLength = 5 TtlBytesLength = 2 From 7b2e53461fb27448a9a83cd9a935b7ce4634680a Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 12:30:23 +0800 Subject: [PATCH 25/46] Add `CollectionSetttings` util --- go/proto/system_message.proto | 4 +- go/storage/collection_settings.go | 68 ++++++++++++++++++++++++++++ go/storage/needle_read_write.go | 2 +- go/storage/replica_placement.go | 43 ------------------ go/topology/topology.go | 8 ++-- go/topology/topology_replicate.go | 4 +- go/topology/volume_growth_test.go | 2 +- go/weed/weed_server/master_server.go | 3 +- 8 files changed, 80 insertions(+), 54 deletions(-) create mode 100644 go/storage/collection_settings.go diff --git a/go/proto/system_message.proto b/go/proto/system_message.proto index dc429f0c6..b7a2456f6 100644 --- a/go/proto/system_message.proto +++ b/go/proto/system_message.proto @@ -29,8 +29,8 @@ message JoinMessage { message CollectionSetting { optional string collection = 1; - required string replica_placement = 2; - optional string vacuum_garbage_threshold = 3; + optional string replica_placement = 2; + optional float vacuum_garbage_threshold = 3; } message GlobalSetting { diff --git a/go/storage/collection_settings.go b/go/storage/collection_settings.go new file mode 100644 index 000000000..89fc89d28 --- /dev/null +++ b/go/storage/collection_settings.go @@ -0,0 +1,68 @@ +package storage + +type SettingKey int + +const ( + KeyReplicatePlacement SettingKey = iota + KeyGarbageThreshold +) + +type CollectionSettings struct { + settings map[string]map[SettingKey]interface{} +} + +func NewCollectionSettings(defaultReplicatePlacement, defaultGarbageThreshold string) *CollectionSettings { + rp, e := NewReplicaPlacementFromString(defaultReplicatePlacement) + if e != nil { + rp, _ = NewReplicaPlacementFromString("000") + } + c := &CollectionSettings{ + settings: make(map[string]map[SettingKey]interface{}), + } + c.Set("", KeyReplicatePlacement, rp) + c.Set("", KeyGarbageThreshold, defaultGarbageThreshold) + return c +} + +func (c *CollectionSettings) Get(collection string, key SettingKey) interface{} { + if m, ok := c.settings[collection]; ok { + if v, ok := m[key]; ok { + return v + } + } + if m, ok := c.settings[""]; ok { + if v, ok := m[key]; ok { + return v + } + } + return nil +} + +func (c *CollectionSettings) Set(collection string, key SettingKey, value interface{}) { + if _, ok := c.settings[collection]; !ok { + c.settings[collection] = make(map[SettingKey]interface{}) + } + if value == nil { + delete(c.settings[collection], key) + } +} + +func (c *CollectionSettings) GetGarbageThreshold(collection string) float32 { + return c.Get(collection, KeyGarbageThreshold).(float32) +} + +func (c *CollectionSettings) SetGarbageThreshold(collection string, gt float32) { + c.Set(collection, KeyGarbageThreshold, gt) +} + +func (c *CollectionSettings) GetReplicaPlacement(collection string) *ReplicaPlacement { + return c.Get(collection, KeyReplicatePlacement).(*ReplicaPlacement) +} + +func (c *CollectionSettings) SetReplicaPlacement(collection, t string) error { + rp, e := NewReplicaPlacementFromString(t) + if e == nil { + c.Set(collection, KeyReplicatePlacement, rp) + } + return e +} diff --git a/go/storage/needle_read_write.go b/go/storage/needle_read_write.go index df4989329..073b2899a 100644 --- a/go/storage/needle_read_write.go +++ b/go/storage/needle_read_write.go @@ -16,7 +16,7 @@ const ( FlagHasMime = 0x04 FlagHasLastModifiedDate = 0x08 FlagHasTtl = 0x10 - FlagIsExtendNeedle = 0x40 // TODO: Reserve flag, use extent file to save big needle + FlagIsExtendNeedle = 0x40 // TODO: Reserve flag, use extent file to save big needle FlagIsChunkManifest = 0x80 LastModifiedBytesLength = 5 TtlBytesLength = 2 diff --git a/go/storage/replica_placement.go b/go/storage/replica_placement.go index e53bffba8..31f8f464a 100644 --- a/go/storage/replica_placement.go +++ b/go/storage/replica_placement.go @@ -1,7 +1,6 @@ package storage import ( - "encoding/json" "errors" "fmt" ) @@ -12,10 +11,6 @@ type ReplicaPlacement struct { DiffDataCenterCount int } -type ReplicaPlacements struct { - settings map[string]*ReplicaPlacement -} - func NewReplicaPlacementFromString(t string) (*ReplicaPlacement, error) { rp := &ReplicaPlacement{} for i, c := range t { @@ -62,41 +57,3 @@ func (rp *ReplicaPlacement) Equal(rp1 *ReplicaPlacement) bool { rp.DiffRackCount == rp1.DiffRackCount && rp.DiffDataCenterCount == rp1.DiffDataCenterCount } - -func NewReplicaPlacements(defaultRP string) *ReplicaPlacements { - rp, e := NewReplicaPlacementFromString(defaultRP) - if e != nil { - rp, _ = NewReplicaPlacementFromString("000") - } - rps := &ReplicaPlacements{settings: make(map[string]*ReplicaPlacement)} - rps.settings[""] = rp - return rps -} - -func NewReplicaPlacementsFromJson(s string) *ReplicaPlacements { - m := make(map[string]*ReplicaPlacement) - if json.Unmarshal([]byte(s), m) == nil { - m[""], _ = NewReplicaPlacementFromString("000") - } - return &ReplicaPlacements{settings: m} -} - -func (rps *ReplicaPlacements) Get(collection string) *ReplicaPlacement { - if rp, ok := rps.settings[collection]; ok { - return rp - } - return rps.settings[""] -} - -func (rps *ReplicaPlacements) Set(collection, t string) error { - rp, e := NewReplicaPlacementFromString(t) - if e == nil { - rps.settings[collection] = rp - } - return e -} - -func (rps *ReplicaPlacements) Marshal() string { - buf, _ := json.Marshal(rps.settings) - return string(buf) -} diff --git a/go/topology/topology.go b/go/topology/topology.go index b0324d73a..6cdd1e1fa 100644 --- a/go/topology/topology.go +++ b/go/topology/topology.go @@ -28,14 +28,14 @@ type Topology struct { chanRecoveredDataNodes chan *DataNode chanFullVolumes chan storage.VolumeInfo - ReplicaPlacements *storage.ReplicaPlacements + CollectionSettings *storage.CollectionSettings configuration *Configuration RaftServer raft.Server } -func NewTopology(id string, confFile string, rp *storage.ReplicaPlacements, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int) (*Topology, error) { +func NewTopology(id string, confFile string, cs *storage.CollectionSettings, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int) (*Topology, error) { t := &Topology{} t.id = NodeId(id) t.nodeType = "Topology" @@ -44,7 +44,7 @@ func NewTopology(id string, confFile string, rp *storage.ReplicaPlacements, seq t.collectionMap = util.NewConcurrentReadMap() t.pulse = int64(pulse) t.volumeSizeLimit = volumeSizeLimit - t.ReplicaPlacements = rp + t.CollectionSettings = cs t.Sequence = seq @@ -129,7 +129,7 @@ func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, func (t *Topology) GetVolumeLayout(collectionName string, ttl *storage.TTL) *VolumeLayout { return t.collectionMap.Get(collectionName, func() interface{} { - return NewCollection(collectionName, t.ReplicaPlacements.Get(collectionName), t.volumeSizeLimit) + return NewCollection(collectionName, t.CollectionSettings.GetReplicaPlacement(collectionName), t.volumeSizeLimit) }).(*Collection).GetOrCreateVolumeLayout(ttl) } diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go index 8c9281390..dbb7d490a 100644 --- a/go/topology/topology_replicate.go +++ b/go/topology/topology_replicate.go @@ -2,8 +2,8 @@ package topology import "github.com/chrislusf/seaweedfs/go/glog" -func (t *Topology) Replicate(garbageThreshold string) int { - glog.V(0).Infoln("Start replicate on demand") +func (t *Topology) Replicate() int { + glog.V(0).Infoln("Start replicate checker on demand") for _, col := range t.collectionMap.Items { c := col.(*Collection) glog.V(0).Infoln("replicate on collection:", c.Name) diff --git a/go/topology/volume_growth_test.go b/go/topology/volume_growth_test.go index df464e47e..08377b4fd 100644 --- a/go/topology/volume_growth_test.go +++ b/go/topology/volume_growth_test.go @@ -80,7 +80,7 @@ func setup(topologyLayout string) *Topology { //need to connect all nodes first before server adding volumes topo, err := NewTopology("weedfs", "/etc/weedfs/weedfs.conf", - storage.NewReplicaPlacements("000"), + storage.NewCollectionSettings("000", "0.3"), sequence.NewMemorySequencer(), 32*1024, 5) if err != nil { panic("error: " + err.Error()) diff --git a/go/weed/weed_server/master_server.go b/go/weed/weed_server/master_server.go index 37fb44c74..1adb8820e 100644 --- a/go/weed/weed_server/master_server.go +++ b/go/weed/weed_server/master_server.go @@ -51,8 +51,9 @@ func NewMasterServer(r *mux.Router, port int, metaFolder string, } ms.bounedLeaderChan = make(chan int, 16) seq := sequence.NewMemorySequencer() + cs := storage.NewCollectionSettings(defaultReplicaPlacement, garbageThreshold) var e error - if ms.Topo, e = topology.NewTopology("topo", confFile, storage.NewReplicaPlacements(defaultReplicaPlacement), + if ms.Topo, e = topology.NewTopology("topo", confFile, cs, seq, uint64(volumeSizeLimitMB)*1024*1024, pulseSeconds); e != nil { glog.Fatalf("cannot create topology:%s", e) } From 688b99eb73a09595d6401f26c4fd02d7af5fa767 Mon Sep 17 00:00:00 2001 From: Mohanson Date: Fri, 25 Dec 2015 14:47:33 +0800 Subject: [PATCH 26/46] add Introductions of scale of image --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 7ef2a72b8..f79b3c44d 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,12 @@ If you want a nicer URL, you can use one of these alternative URL formats: http://localhost:8080/3,01637037d6 ``` +If you want get an scale of image, you can add some params: + +``` +http://localhost:8080/3/01637037d6.jpg?height=200&width=200 +``` + ### Rack-Aware and Data Center-Aware Replication ### SeaweedFS apply the replication strategy on a volume level. So when you are getting a file id, you can specify the replication strategy. For example: From 3c9ef6245ec212711a4588fd094828f1e6aa81fd Mon Sep 17 00:00:00 2001 From: Mohanson Date: Fri, 25 Dec 2015 16:32:50 +0800 Subject: [PATCH 27/46] fix bug : master cann't redirect urlquery string There is a bug that master can not redirect urlquery to slave volumn. so i fix it, and it run good now. --- go/weed/weed_server/master_server_handlers_admin.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index fb2b18983..7893a5765 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -123,7 +123,13 @@ func (ms *MasterServer) redirectHandler(w http.ResponseWriter, r *http.Request) } machines := ms.Topo.Lookup("", volumeId) if machines != nil && len(machines) > 0 { - http.Redirect(w, r, util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl)+r.URL.Path, http.StatusMovedPermanently) + var url string + if r.URL.RawQuery != "" { + url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path + "?" + r.URL.RawQuery + } else { + url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path + } + http.Redirect(w, r, url, http.StatusMovedPermanently) } else { writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("volume id %d not found", volumeId)) } From 69831c40fe97d2754008ac476b0b5830f3a982cd Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 17:55:06 +0800 Subject: [PATCH 28/46] util: http Post auto make url util: add RemoteApiCall *: add store task client store task: auto clean task when finish --- go/filer/client_operations.go | 2 +- go/operation/assign_file_id.go | 2 +- go/operation/delete_content.go | 2 +- go/operation/list_masters.go | 2 +- go/operation/lookup.go | 6 +- go/operation/sync_volume.go | 2 +- go/storage/store.go | 2 +- go/storage/store_task.go | 43 ++++++++---- go/storage/store_task_cli.go | 69 +++++++++++++++++++ go/storage/store_task_replication.go | 2 +- go/topology/allocate_volume.go | 2 +- go/topology/topology_replicate.go | 2 +- go/topology/topology_vacuum.go | 6 +- go/util/http_util.go | 47 +++++++++++-- go/util/url_util.go | 13 ---- go/weed/benchmark.go | 5 +- .../master_server_handlers_admin.go | 4 +- 17 files changed, 159 insertions(+), 52 deletions(-) create mode 100644 go/storage/store_task_cli.go delete mode 100644 go/util/url_util.go diff --git a/go/filer/client_operations.go b/go/filer/client_operations.go index 80ac51693..b3ccc633a 100644 --- a/go/filer/client_operations.go +++ b/go/filer/client_operations.go @@ -58,7 +58,7 @@ func call(server string, request ApiRequest, ret interface{}) error { } values := make(url.Values) values.Add("request", string(b)) - jsonBlob, err := util.Post("http://"+server+"/__api__", values) + jsonBlob, err := util.Post(server, "/__api__", values) if err != nil { return err } diff --git a/go/operation/assign_file_id.go b/go/operation/assign_file_id.go index fa436b651..45ac5c362 100644 --- a/go/operation/assign_file_id.go +++ b/go/operation/assign_file_id.go @@ -31,7 +31,7 @@ func Assign(server string, count uint64, replication string, collection string, if ttl != "" { values.Add("ttl", ttl) } - jsonBlob, err := util.Post("http://"+server+"/dir/assign", values) + jsonBlob, err := util.Post(server, "/dir/assign", values) glog.V(2).Info("assign result :", string(jsonBlob)) if err != nil { return nil, err diff --git a/go/operation/delete_content.go b/go/operation/delete_content.go index 9bd6654d7..a8cd46f71 100644 --- a/go/operation/delete_content.go +++ b/go/operation/delete_content.go @@ -97,7 +97,7 @@ func DeleteFiles(master string, fileIds []string) (*DeleteFilesResult, error) { for _, fid := range fidList { values.Add("fid", fid) } - jsonBlob, err := util.Post("http://"+server+"/delete", values) + jsonBlob, err := util.Post(server, "/delete", values) if err != nil { ret.Errors = append(ret.Errors, err.Error()+" "+string(jsonBlob)) return diff --git a/go/operation/list_masters.go b/go/operation/list_masters.go index bda6f3c65..8aa1eae58 100644 --- a/go/operation/list_masters.go +++ b/go/operation/list_masters.go @@ -14,7 +14,7 @@ type ClusterStatusResult struct { } func ListMasters(server string) ([]string, error) { - jsonBlob, err := util.Get("http://" + server + "/cluster/status") + jsonBlob, err := util.Get(server, "/cluster/status", nil) glog.V(2).Info("list masters result :", string(jsonBlob)) if err != nil { return nil, err diff --git a/go/operation/lookup.go b/go/operation/lookup.go index 83ef55752..86a2ff760 100644 --- a/go/operation/lookup.go +++ b/go/operation/lookup.go @@ -63,7 +63,7 @@ func LookupNoCache(server string, vid string) (ret *LookupResult, err error) { func do_lookup(server string, vid string) (*LookupResult, error) { values := make(url.Values) values.Add("volumeId", vid) - jsonBlob, err := util.Post("http://"+server+"/dir/lookup", values) + jsonBlob, err := util.Post(server, "/dir/lookup", values) if err != nil { return nil, err } @@ -96,7 +96,7 @@ func LookupFileId(server string, fileId string, readonly bool) (fullUrl string, } else { u = lookup.Locations.Head().Url } - return "http://" + u + "/" + fileId, nil + return util.MkUrl(u, "/"+fileId, nil), nil } // LookupVolumeIds find volume locations by cache and actual lookup @@ -123,7 +123,7 @@ func LookupVolumeIds(server string, vids []string) (map[string]LookupResult, err for _, vid := range unknown_vids { values.Add("volumeId", vid) } - jsonBlob, err := util.Post("http://"+server+"/vol/lookup", values) + jsonBlob, err := util.Post(server, "/vol/lookup", values) if err != nil { return nil, err } diff --git a/go/operation/sync_volume.go b/go/operation/sync_volume.go index 713cf33c1..f63d6f96a 100644 --- a/go/operation/sync_volume.go +++ b/go/operation/sync_volume.go @@ -20,7 +20,7 @@ type SyncVolumeResponse struct { func GetVolumeSyncStatus(server string, vid string) (*SyncVolumeResponse, error) { values := make(url.Values) values.Add("volume", vid) - jsonBlob, err := util.Post("http://"+server+"/admin/sync/status", values) + jsonBlob, err := util.Post(server, "/admin/sync/status", values) glog.V(2).Info("sync volume result :", string(jsonBlob)) if err != nil { return nil, err diff --git a/go/storage/store.go b/go/storage/store.go index 434c81b0a..702db99fa 100644 --- a/go/storage/store.go +++ b/go/storage/store.go @@ -316,7 +316,7 @@ func (s *Store) SendHeartbeatToMaster() (masterNode string, secretKey security.S return "", "", err } - joinUrl := "http://" + masterNode + "/dir/join" + joinUrl := util.MkUrl(masterNode, "/dir/join", nil) glog.V(4).Infof("Connecting to %s ...", joinUrl) jsonBlob, err := util.PostBytes(joinUrl, data) diff --git a/go/storage/store_task.go b/go/storage/store_task.go index 2f8c0515f..123b12f5f 100644 --- a/go/storage/store_task.go +++ b/go/storage/store_task.go @@ -4,12 +4,16 @@ import ( "errors" "net/url" "time" + + "github.com/chrislusf/seaweedfs/go/glog" ) +type TaskType string + const ( - TaskVacuum = "VACUUM" - TaskReplica = "REPLICA" - TaskBalance = "BALANCE" + TaskVacuum TaskType = "VACUUM" + TaskReplica TaskType = "REPLICA" + TaskBalance TaskType = "BALANCE" ) var ( @@ -27,18 +31,21 @@ type TaskWorker interface { } type Task struct { - startTime time.Time - worker TaskWorker - ch chan bool - result error + Id string + startTime time.Time + worker TaskWorker + ch chan bool + result error + cleanWhenFinish bool } type TaskManager struct { TaskList map[string]*Task } -func NewTask(worker TaskWorker) *Task { +func NewTask(worker TaskWorker, id string) *Task { t := &Task{ + Id: id, worker: worker, startTime: time.Now(), result: ErrTaskNotFinish, @@ -46,7 +53,12 @@ func NewTask(worker TaskWorker) *Task { } go func(t *Task) { t.result = t.worker.Run() + if t.cleanWhenFinish { + glog.V(0).Infof("clean task (%s) when finish.", t.Id) + t.worker.Clean() + } t.ch <- true + }(t) return t } @@ -75,7 +87,7 @@ func (tm *TaskManager) NewTask(s *Store, args url.Values) (tid string, e error) return tid, ErrTaskExists } var tw TaskWorker - switch tt { + switch TaskType(tt) { case TaskVacuum: tw, e = NewVacuumTask(s, args) case TaskReplica: @@ -88,7 +100,7 @@ func (tm *TaskManager) NewTask(s *Store, args url.Values) (tid string, e error) if tw == nil { return "", ErrTaskInvalid } - tm.TaskList[tid] = NewTask(tw) + tm.TaskList[tid] = NewTask(tw, tid) return tid, nil } @@ -112,16 +124,19 @@ func (tm *TaskManager) Commit(tid string) (e error) { return t.worker.Commit() } -func (tm *TaskManager) Clean(tid string) (e error) { +func (tm *TaskManager) Clean(tid string) error { t, ok := tm.TaskList[tid] if !ok { return ErrTaskNotFound } + delete(tm.TaskList, tid) if t.QueryResult(time.Second*30) == ErrTaskNotFinish { - return ErrTaskNotFinish + t.cleanWhenFinish = true + glog.V(0).Infof("task (%s) is not finish, clean it later.", tid) + } else { + t.worker.Clean() } - delete(tm.TaskList, tid) - return t.worker.Clean() + return nil } func (tm *TaskManager) ElapsedDuration(tid string) (time.Duration, error) { diff --git a/go/storage/store_task_cli.go b/go/storage/store_task_cli.go new file mode 100644 index 000000000..fb69fed39 --- /dev/null +++ b/go/storage/store_task_cli.go @@ -0,0 +1,69 @@ +package storage + +import ( + "errors" + "fmt" + "net/url" + "time" + + "github.com/chrislusf/seaweedfs/go/util" +) + +type TaskParams map[string]string + +var ( + ErrTaskTimeout = errors.New("TaskTimeout") +) + +type TaskCli struct { + TID string + DataNode string +} + +func NewTaskCli(dataNode string, task TaskType, params TaskParams) (*TaskCli, error) { + args := url.Values{} + args.Set("task", string(task)) + for k, v := range params { + args.Set(k, v) + } + m, e := util.RemoteApiCall(dataNode, "/admin/task/new", args) + if e != nil { + return nil, e + } + tid := m["tid"].(string) + if tid == "" { + return nil, fmt.Errorf("Empty %s task", task) + } + return &TaskCli{ + TID: tid, + DataNode: dataNode, + }, nil +} + +func (c *TaskCli) WaitAndQueryResult(timeout time.Duration) error { + startTime := time.Now() + args := url.Values{} + args.Set("task", c.TID) + for time.Since(startTime) < timeout { + _, e := util.RemoteApiCall(c.DataNode, "/admin/task/query", args) + if e.Error() == ErrTaskNotFinish.Error() { + continue + } + return e + } + return ErrTaskTimeout +} + +func (c *TaskCli) Commit() error { + args := url.Values{} + args.Set("task", c.TID) + _, e := util.RemoteApiCall(c.DataNode, "/admin/task/commit", args) + return e +} + +func (c *TaskCli) Clean() error { + args := url.Values{} + args.Set("task", c.TID) + _, e := util.RemoteApiCall(c.DataNode, "/admin/task/clean", args) + return e +} diff --git a/go/storage/store_task_replication.go b/go/storage/store_task_replication.go index b6b8e230a..0931c831e 100644 --- a/go/storage/store_task_replication.go +++ b/go/storage/store_task_replication.go @@ -61,7 +61,7 @@ func (t *ReplicaTask) Run() error { } ch <- e }() - errs := make([]error, 0, 2) + errs := make([]error, 0) for i := 0; i < 2; i++ { if e := <-ch; e != nil { errs = append(errs, e) diff --git a/go/topology/allocate_volume.go b/go/topology/allocate_volume.go index 6de3130b1..e48f01495 100644 --- a/go/topology/allocate_volume.go +++ b/go/topology/allocate_volume.go @@ -19,7 +19,7 @@ func AllocateVolume(dn *DataNode, vid storage.VolumeId, option *VolumeGrowOption values.Add("volume", vid.String()) values.Add("collection", option.Collection) values.Add("ttl", option.Ttl.String()) - jsonBlob, err := util.Post("http://"+dn.Url()+"/admin/assign_volume", values) + jsonBlob, err := util.Post(dn.Url(), "/admin/assign_volume", values) if err != nil { return err } diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go index dbb7d490a..9346ca743 100644 --- a/go/topology/topology_replicate.go +++ b/go/topology/topology_replicate.go @@ -6,7 +6,7 @@ func (t *Topology) Replicate() int { glog.V(0).Infoln("Start replicate checker on demand") for _, col := range t.collectionMap.Items { c := col.(*Collection) - glog.V(0).Infoln("replicate on collection:", c.Name) + glog.V(0).Infoln("checking replicate on collection:", c.Name) for _, vl := range c.storageType2VolumeLayout.Items { if vl != nil { volumeLayout := vl.(*VolumeLayout) diff --git a/go/topology/topology_vacuum.go b/go/topology/topology_vacuum.go index cba3e8a16..cd85f3b15 100644 --- a/go/topology/topology_vacuum.go +++ b/go/topology/topology_vacuum.go @@ -110,7 +110,7 @@ func vacuumVolume_Check(urlLocation string, vid storage.VolumeId, garbageThresho values := make(url.Values) values.Add("volume", vid.String()) values.Add("garbageThreshold", garbageThreshold) - jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/check", values) + jsonBlob, err := util.Post(urlLocation, "/admin/vacuum/check", values) if err != nil { glog.V(0).Infoln("parameters:", values) return err, false @@ -127,7 +127,7 @@ func vacuumVolume_Check(urlLocation string, vid storage.VolumeId, garbageThresho func vacuumVolume_Compact(urlLocation string, vid storage.VolumeId) error { values := make(url.Values) values.Add("volume", vid.String()) - jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/compact", values) + jsonBlob, err := util.Post(urlLocation, "/admin/vacuum/compact", values) if err != nil { return err } @@ -143,7 +143,7 @@ func vacuumVolume_Compact(urlLocation string, vid storage.VolumeId) error { func vacuumVolume_Commit(urlLocation string, vid storage.VolumeId) error { values := make(url.Values) values.Add("volume", vid.String()) - jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/commit", values) + jsonBlob, err := util.Post(urlLocation, "/admin/vacuum/commit", values) if err != nil { return err } diff --git a/go/util/http_util.go b/go/util/http_util.go index 5060c77ad..65a4d21df 100644 --- a/go/util/http_util.go +++ b/go/util/http_util.go @@ -28,6 +28,18 @@ func init() { client = &http.Client{Transport: Transport} } +func MkUrl(host, path string, args url.Values) string { + u := url.URL{ + Scheme: "http", + Host: host, + Path: path, + } + if args != nil { + u.RawQuery = args.Encode() + } + return u.String() +} + func PostBytes(url string, body []byte) ([]byte, error) { r, err := client.Post(url, "application/octet-stream", bytes.NewReader(body)) if err != nil { @@ -41,20 +53,45 @@ func PostBytes(url string, body []byte) ([]byte, error) { return b, nil } -func Post(url string, values url.Values) ([]byte, error) { +func PostEx(host, path string, values url.Values) (content []byte, statusCode int, e error) { + url := MkUrl(host, path, nil) r, err := client.PostForm(url, values) if err != nil { - return nil, err + return nil, 0, err } defer r.Body.Close() b, err := ioutil.ReadAll(r.Body) if err != nil { - return nil, err + return nil, r.StatusCode, err } - return b, nil + return b, r.StatusCode, nil +} + +func Post(host, path string, values url.Values) (content []byte, e error) { + content, _, e = PostEx(host, path, values) + return +} + +func RemoteApiCall(host, path string, values url.Values) (result map[string]interface{}, e error) { + jsonBlob, code, e := PostEx(host, path, values) + if e != nil { + return nil, e + } + result = make(map[string]interface{}) + if e := json.Unmarshal(jsonBlob, result); e != nil { + return nil, e + } + if err, ok := result["error"]; ok && err.(string) != "" { + return nil, errors.New(err.(string)) + } + if code != http.StatusOK { + return nil, fmt.Errorf("RemoteApiCall %s/%s return %d", host, path, code) + } + return result, nil } -func Get(url string) ([]byte, error) { +func Get(host, path string, values url.Values) ([]byte, error) { + url := MkUrl(host, path, values) r, err := client.Get(url) if err != nil { return nil, err diff --git a/go/util/url_util.go b/go/util/url_util.go deleted file mode 100644 index 7204d08b0..000000000 --- a/go/util/url_util.go +++ /dev/null @@ -1,13 +0,0 @@ -package util - -import "net/url" - -func MkUrl(host, path string, args url.Values) string { - u := url.URL{ - Scheme: "http", - Host: host, - Path: path, - } - u.RawQuery = args.Encode() - return u.String() -} diff --git a/go/weed/benchmark.go b/go/weed/benchmark.go index 51652b1ae..daa970788 100644 --- a/go/weed/benchmark.go +++ b/go/weed/benchmark.go @@ -255,14 +255,13 @@ func readFiles(fileIdLineChan chan string, s *stat) { continue } server := ret.Locations.PickForRead().Url - url := "http://" + server + "/" + fid - if bytesRead, err := util.Get(url); err == nil { + if bytesRead, err := util.Get(server, "/"+fid, nil); err == nil { s.completed++ s.transferred += int64(len(bytesRead)) readStats.addSample(time.Now().Sub(start)) } else { s.failed++ - fmt.Printf("Failed to read %s error:%v\n", url, err) + fmt.Printf("Failed to read %s/%s error:%v\n", server, fid, err) } } } diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index 89c373ec7..4b7f809ec 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -28,7 +28,7 @@ func (ms *MasterServer) collectionDeleteHandler(w http.ResponseWriter, r *http.R return } for _, server := range collection.ListVolumeServers() { - _, err := util.Get("http://" + server.Ip + ":" + strconv.Itoa(server.Port) + "/admin/delete_collection?collection=" + r.FormValue("collection")) + _, err := util.Get(server.Ip+":"+strconv.Itoa(server.Port), "/admin/delete_collection", url.Values{"collection": r.Form["collection"]}) if err != nil { writeJsonError(w, r, http.StatusInternalServerError, err) return @@ -204,7 +204,7 @@ func (ms *MasterServer) batchSetVolumeOption(settingKey, settingValue string, vo wg.Add(1) go func(server string, values url.Values) { defer wg.Done() - jsonBlob, e := util.Post("http://"+server+"/admin/setting", values) + jsonBlob, e := util.Post(server, "/admin/setting", values) if e != nil { result[server] = map[string]interface{}{ "error": e.Error() + " " + string(jsonBlob), From 67518171dbfb1e73e27ee18def356c22de4394d2 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 17:57:08 +0800 Subject: [PATCH 29/46] *: update --- go/storage/store_task.go | 10 ++++------ go/storage/store_task_cli.go | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/go/storage/store_task.go b/go/storage/store_task.go index 123b12f5f..34ac8b07a 100644 --- a/go/storage/store_task.go +++ b/go/storage/store_task.go @@ -8,12 +8,10 @@ import ( "github.com/chrislusf/seaweedfs/go/glog" ) -type TaskType string - const ( - TaskVacuum TaskType = "VACUUM" - TaskReplica TaskType = "REPLICA" - TaskBalance TaskType = "BALANCE" + TaskVacuum = "VACUUM" + TaskReplica = "REPLICA" + TaskBalance = "BALANCE" ) var ( @@ -87,7 +85,7 @@ func (tm *TaskManager) NewTask(s *Store, args url.Values) (tid string, e error) return tid, ErrTaskExists } var tw TaskWorker - switch TaskType(tt) { + switch tt { case TaskVacuum: tw, e = NewVacuumTask(s, args) case TaskReplica: diff --git a/go/storage/store_task_cli.go b/go/storage/store_task_cli.go index fb69fed39..6457a8806 100644 --- a/go/storage/store_task_cli.go +++ b/go/storage/store_task_cli.go @@ -20,9 +20,9 @@ type TaskCli struct { DataNode string } -func NewTaskCli(dataNode string, task TaskType, params TaskParams) (*TaskCli, error) { +func NewTaskCli(dataNode string, task string, params TaskParams) (*TaskCli, error) { args := url.Values{} - args.Set("task", string(task)) + args.Set("task", task) for k, v := range params { args.Set(k, v) } From f5e8efb11e85407fa2c0fc9b7ece7acc49efb5ad Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 17:57:59 +0800 Subject: [PATCH 30/46] *: update --- go/storage/store_task_cli.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/storage/store_task_cli.go b/go/storage/store_task_cli.go index 6457a8806..ab09a4d47 100644 --- a/go/storage/store_task_cli.go +++ b/go/storage/store_task_cli.go @@ -20,9 +20,9 @@ type TaskCli struct { DataNode string } -func NewTaskCli(dataNode string, task string, params TaskParams) (*TaskCli, error) { +func NewTaskCli(dataNode string, taskType string, params TaskParams) (*TaskCli, error) { args := url.Values{} - args.Set("task", task) + args.Set("task", taskType) for k, v := range params { args.Set(k, v) } @@ -32,7 +32,7 @@ func NewTaskCli(dataNode string, task string, params TaskParams) (*TaskCli, erro } tid := m["tid"].(string) if tid == "" { - return nil, fmt.Errorf("Empty %s task", task) + return nil, fmt.Errorf("Empty %s task", taskType) } return &TaskCli{ TID: tid, From fcddc4e9849b6fae3516d23ec92be8ad28a0b188 Mon Sep 17 00:00:00 2001 From: tnextday Date: Fri, 25 Dec 2015 18:26:12 +0800 Subject: [PATCH 31/46] store task cli: update --- go/storage/store_task_cli.go | 24 +++++++++++++++++++++--- go/util/http_util.go | 18 +++++++++++++++++- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/go/storage/store_task_cli.go b/go/storage/store_task_cli.go index ab09a4d47..953c1f791 100644 --- a/go/storage/store_task_cli.go +++ b/go/storage/store_task_cli.go @@ -6,6 +6,7 @@ import ( "net/url" "time" + "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/util" ) @@ -44,12 +45,29 @@ func (c *TaskCli) WaitAndQueryResult(timeout time.Duration) error { startTime := time.Now() args := url.Values{} args.Set("task", c.TID) + args.Set("timeout", time.Minute.String()) + tryTimes := 0 for time.Since(startTime) < timeout { _, e := util.RemoteApiCall(c.DataNode, "/admin/task/query", args) - if e.Error() == ErrTaskNotFinish.Error() { - continue + if e == nil { + //task finished and have no error + return nil } - return e + if util.IsRemoteApiError(e) { + if e.Error() == ErrTaskNotFinish.Error() { + tryTimes = 0 + continue + } + return e + } else { + tryTimes++ + if tryTimes >= 10 { + return e + } + glog.V(0).Infof("query task (%s) error %v, wait 1 minute then retry %d times", c.TID, e, tryTimes) + time.Sleep(time.Minute) + } + } return ErrTaskTimeout } diff --git a/go/util/http_util.go b/go/util/http_util.go index 65a4d21df..0f3d92ea1 100644 --- a/go/util/http_util.go +++ b/go/util/http_util.go @@ -72,6 +72,22 @@ func Post(host, path string, values url.Values) (content []byte, e error) { return } +type RApiError struct { + E string +} + +func (e *RApiError) Error() string { + return e.E +} + +func IsRemoteApiError(e error) bool { + switch e.(type) { + case *RApiError: + return true + } + return false +} + func RemoteApiCall(host, path string, values url.Values) (result map[string]interface{}, e error) { jsonBlob, code, e := PostEx(host, path, values) if e != nil { @@ -82,7 +98,7 @@ func RemoteApiCall(host, path string, values url.Values) (result map[string]inte return nil, e } if err, ok := result["error"]; ok && err.(string) != "" { - return nil, errors.New(err.(string)) + return nil, &RApiError{E: err.(string)} } if code != http.StatusOK { return nil, fmt.Errorf("RemoteApiCall %s/%s return %d", host, path, code) From 6da93d6eade39c397976c8aacd5a4850a1827ef5 Mon Sep 17 00:00:00 2001 From: tnextday Date: Sun, 27 Dec 2015 02:22:05 +0800 Subject: [PATCH 32/46] NodeImpl: add pick low usage node func --- go/topology/node.go | 63 +++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/go/topology/node.go b/go/topology/node.go index 3b6d55ce9..a2c78f63d 100644 --- a/go/topology/node.go +++ b/go/topology/node.go @@ -7,6 +7,7 @@ import ( "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/storage" + "sort" ) type NodeId string @@ -51,8 +52,11 @@ type NodeImpl struct { value interface{} } +type FilterNodeFn func(dn Node) error +type PickNodesFn func(nodes []Node, count int) []Node + // the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot -func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) { +func (n *NodeImpl) PickNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn, pickFn PickNodesFn) (firstNode Node, restNodes []Node, err error) { candidates := make([]Node, 0, len(n.children)) var errs []string for _, node := range n.children { @@ -62,13 +66,14 @@ func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn func(d errs = append(errs, string(node.Id())+":"+err.Error()) } } - if len(candidates) == 0 { + ns := pickFn(candidates, 1) + if ns == nil { return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) } - firstNode = candidates[rand.Intn(len(candidates))] + firstNode = ns[0] + glog.V(2).Infoln(n.Id(), "picked main node:", firstNode.Id()) - restNodes = make([]Node, numberOfNodes-1) candidates = candidates[:0] for _, node := range n.children { if node.Id() == firstNode.Id() { @@ -81,27 +86,47 @@ func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn func(d candidates = append(candidates, node) } glog.V(2).Infoln(n.Id(), "picking", numberOfNodes-1, "from rest", len(candidates), "node candidates") - ret := len(restNodes) == 0 - for k, node := range candidates { - if k < len(restNodes) { - restNodes[k] = node - if k == len(restNodes)-1 { - ret = true - } - } else { - r := rand.Intn(k + 1) - if r < len(restNodes) { - restNodes[r] = node - } - } - } - if !ret { + restNodes = pickFn(candidates, numberOfNodes-1) + if restNodes == nil { glog.V(2).Infoln(n.Id(), "failed to pick", numberOfNodes-1, "from rest", len(candidates), "node candidates") err = errors.New("Not enough data node found!") } return } +func RandomlyPickNodeFn(nodes []Node, count int) []Node { + if len(nodes) < count { + return nil + } + for i := range nodes { + j := rand.Intn(i + 1) + nodes[i], nodes[j] = nodes[j], nodes[i] + } + return nodes[:count] +} + +func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn) (firstNode Node, restNodes []Node, err error) { + return n.PickNodes(numberOfNodes, filterFirstNodeFn, RandomlyPickNodeFn) +} + +type nodeList []Node + +func (s nodeList) Len() int { return len(s) } +func (s nodeList) Less(i, j int) bool { return s[i].FreeSpace() < s[j].FreeSpace() } +func (s nodeList) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +func LowUsagePickNodeFn(nodes []Node, count int) []Node { + if len(nodes) < count { + return nil + } + sort.Sort(nodeList(nodes)) + return nodes[:count] +} + +func (n *NodeImpl) PickLowUsageNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn) (firstNode Node, restNodes []Node, err error) { + return n.PickNodes(numberOfNodes, filterFirstNodeFn, LowUsagePickNodeFn) +} + func (n *NodeImpl) IsDataNode() bool { return n.nodeType == "DataNode" } From 47083630d79e1dceb5e320567a1f0cdce502cb98 Mon Sep 17 00:00:00 2001 From: tnextday Date: Sun, 27 Dec 2015 02:44:39 +0800 Subject: [PATCH 33/46] volume growth: default use low usage data node volume growth: update test *: add batch remote api call --- go/storage/store_task_cli.go | 2 +- go/topology/batch_operation.go | 43 +++++++++++++++++++++++++++++++ go/topology/node.go | 6 ++--- go/topology/topology_vacuum.go | 8 +++--- go/topology/volume_growth.go | 13 +++++----- go/topology/volume_growth_test.go | 2 +- go/util/http_util.go | 2 +- 7 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 go/topology/batch_operation.go diff --git a/go/storage/store_task_cli.go b/go/storage/store_task_cli.go index 953c1f791..06a18235c 100644 --- a/go/storage/store_task_cli.go +++ b/go/storage/store_task_cli.go @@ -50,7 +50,7 @@ func (c *TaskCli) WaitAndQueryResult(timeout time.Duration) error { for time.Since(startTime) < timeout { _, e := util.RemoteApiCall(c.DataNode, "/admin/task/query", args) if e == nil { - //task finished and have no error + //task have finished and have no error return nil } if util.IsRemoteApiError(e) { diff --git a/go/topology/batch_operation.go b/go/topology/batch_operation.go new file mode 100644 index 000000000..3cf791d1e --- /dev/null +++ b/go/topology/batch_operation.go @@ -0,0 +1,43 @@ +package topology + +import ( + "net/url" + "strconv" + "time" + + "github.com/chrislusf/seaweedfs/go/glog" + "github.com/chrislusf/seaweedfs/go/util" +) + +func BatchOperation(locationList *VolumeLocationList, path string, values url.Values) (isSuccess bool) { + ch := make(chan bool, locationList.Length()) + for _, dn := range locationList.list { + go func(url string, path string, values url.Values) { + _, e := util.RemoteApiCall(url, path, values) + if e != nil { + glog.V(0).Infoln("RemoteApiCall:", util.MkUrl(url, path, values), "error =", e) + } + ch <- e == nil + + }(dn.Url(), path, values) + } + isSuccess = true + for range locationList.list { + select { + case canVacuum := <-ch: + isSuccess = isSuccess && canVacuum + case <-time.After(30 * time.Minute): + isSuccess = false + break + } + } + return isSuccess +} + +func SetVolumeReadonly(locationList *VolumeLocationList, volume string, isReadonly bool) (isSuccess bool) { + forms := url.Values{} + forms.Set("key", "readonly") + forms.Set("value", strconv.FormatBool(isReadonly)) + forms.Set("volume", volume) + return BatchOperation(locationList, "/admin/setting", forms) +} diff --git a/go/topology/node.go b/go/topology/node.go index a2c78f63d..242f60b6f 100644 --- a/go/topology/node.go +++ b/go/topology/node.go @@ -115,16 +115,16 @@ func (s nodeList) Len() int { return len(s) } func (s nodeList) Less(i, j int) bool { return s[i].FreeSpace() < s[j].FreeSpace() } func (s nodeList) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func LowUsagePickNodeFn(nodes []Node, count int) []Node { +func PickLowUsageNodeFn(nodes []Node, count int) []Node { if len(nodes) < count { return nil } - sort.Sort(nodeList(nodes)) + sort.Sort(sort.Reverse(nodeList(nodes))) return nodes[:count] } func (n *NodeImpl) PickLowUsageNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn) (firstNode Node, restNodes []Node, err error) { - return n.PickNodes(numberOfNodes, filterFirstNodeFn, LowUsagePickNodeFn) + return n.PickNodes(numberOfNodes, filterFirstNodeFn, PickLowUsageNodeFn) } func (n *NodeImpl) IsDataNode() bool { diff --git a/go/topology/topology_vacuum.go b/go/topology/topology_vacuum.go index cd85f3b15..446eb0c1c 100644 --- a/go/topology/topology_vacuum.go +++ b/go/topology/topology_vacuum.go @@ -87,11 +87,11 @@ func (t *Topology) Vacuum(garbageThreshold string) int { for _, vl := range c.storageType2VolumeLayout.Items { if vl != nil { volumeLayout := vl.(*VolumeLayout) - for vid, locationlist := range volumeLayout.vid2location { + for vid, locationList := range volumeLayout.vid2location { glog.V(0).Infoln("vacuum on collection:", c.Name, "volume", vid) - if batchVacuumVolumeCheck(volumeLayout, vid, locationlist, garbageThreshold) { - if batchVacuumVolumeCompact(volumeLayout, vid, locationlist) { - batchVacuumVolumeCommit(volumeLayout, vid, locationlist) + if batchVacuumVolumeCheck(volumeLayout, vid, locationList, garbageThreshold) { + if batchVacuumVolumeCompact(volumeLayout, vid, locationList) { + batchVacuumVolumeCommit(volumeLayout, vid, locationList) } } } diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index 31307ffe0..ed3f8fee9 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -92,8 +92,9 @@ func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (i // 2. find rest data nodes func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption) (servers []*DataNode, err error) { //find main datacenter and other data centers + pickNodesFn := PickLowUsageNodeFn rp := option.ReplicaPlacement - mainDataCenter, otherDataCenters, dc_err := topo.RandomlyPickNodes(rp.DiffDataCenterCount+1, func(node Node) error { + mainDataCenter, otherDataCenters, dc_err := topo.PickNodes(rp.DiffDataCenterCount+1, func(node Node) error { if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) { return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter) } @@ -119,13 +120,13 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum return fmt.Errorf("Only has %d racks with more than %d free data nodes, not enough for %d.", possibleRacksCount, rp.SameRackCount+1, rp.DiffRackCount+1) } return nil - }) + }, pickNodesFn) if dc_err != nil { return nil, dc_err } //find main rack and other racks - mainRack, otherRacks, rack_err := mainDataCenter.(*DataCenter).RandomlyPickNodes(rp.DiffRackCount+1, func(node Node) error { + mainRack, otherRacks, rack_err := mainDataCenter.(*DataCenter).PickNodes(rp.DiffRackCount+1, func(node Node) error { if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) { return fmt.Errorf("Not matching preferred rack:%s", option.Rack) } @@ -146,13 +147,13 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum return fmt.Errorf("Only has %d data nodes with a slot, not enough for %d.", possibleDataNodesCount, rp.SameRackCount+1) } return nil - }) + }, pickNodesFn) if rack_err != nil { return nil, rack_err } //find main rack and other racks - mainServer, otherServers, server_err := mainRack.(*Rack).RandomlyPickNodes(rp.SameRackCount+1, func(node Node) error { + mainServer, otherServers, server_err := mainRack.(*Rack).PickNodes(rp.SameRackCount+1, func(node Node) error { if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) { return fmt.Errorf("Not matching preferred data node:%s", option.DataNode) } @@ -160,7 +161,7 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), 1) } return nil - }) + }, pickNodesFn) if server_err != nil { return nil, server_err } diff --git a/go/topology/volume_growth_test.go b/go/topology/volume_growth_test.go index 08377b4fd..8f50a6f90 100644 --- a/go/topology/volume_growth_test.go +++ b/go/topology/volume_growth_test.go @@ -131,6 +131,6 @@ func TestFindEmptySlotsForOneVolume(t *testing.T) { t.Fail() } for _, server := range servers { - fmt.Println("assigned node :", server.Id()) + fmt.Printf("assigned node: %s, free space: %d\n", server.Id(), server.FreeSpace()) } } diff --git a/go/util/http_util.go b/go/util/http_util.go index 0f3d92ea1..ceae6faa7 100644 --- a/go/util/http_util.go +++ b/go/util/http_util.go @@ -100,7 +100,7 @@ func RemoteApiCall(host, path string, values url.Values) (result map[string]inte if err, ok := result["error"]; ok && err.(string) != "" { return nil, &RApiError{E: err.(string)} } - if code != http.StatusOK { + if code != http.StatusOK || code != http.StatusAccepted { return nil, fmt.Errorf("RemoteApiCall %s/%s return %d", host, path, code) } return result, nil From 4e48f64512d3f6fdbab17e777fff3fe191592a76 Mon Sep 17 00:00:00 2001 From: hxiaodon Date: Mon, 28 Dec 2015 14:23:26 +0800 Subject: [PATCH 34/46] Update store_replicate.go replication operations should be equal to volume's replication setting --- go/topology/store_replicate.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/go/topology/store_replicate.go b/go/topology/store_replicate.go index dc26dade0..4adb4431e 100644 --- a/go/topology/store_replicate.go +++ b/go/topology/store_replicate.go @@ -95,6 +95,13 @@ func distributedOperation(masterNode string, store *storage.Store, volumeId stor for i := 0; i < length; i++ { ret = ret && <-results } + if volume := store.GetVolume(volumeId); volume != nil { + copyCount := volume.ReplicaPlacement.GetCopyCount() - 1 + if length < copyCount { + glog.V(0).Infoln("replicating opetations [%d] is less than volume's replication copy count [%d]", length, copyCount) + ret = false + } + } return ret } else { glog.V(0).Infoln("Failed to lookup for", volumeId, lookupErr.Error()) From 9f3ba7c15371bb2039cd2494460370668461ff9c Mon Sep 17 00:00:00 2001 From: tnextday Date: Thu, 31 Dec 2015 11:50:37 +0800 Subject: [PATCH 35/46] *:temp commit --- go/topology/node.go | 29 +++-- go/topology/topology_replicate.go | 17 ++- go/topology/volume_growth.go | 183 +++++++++++++++++----------- go/topology/volume_growth_test.go | 30 ++++- go/topology/volume_location_list.go | 77 ++++++++++++ 5 files changed, 251 insertions(+), 85 deletions(-) diff --git a/go/topology/node.go b/go/topology/node.go index 242f60b6f..292e88fad 100644 --- a/go/topology/node.go +++ b/go/topology/node.go @@ -2,12 +2,14 @@ package topology import ( "errors" + "fmt" "math/rand" "strings" + "sort" + "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/storage" - "sort" ) type NodeId string @@ -52,25 +54,34 @@ type NodeImpl struct { value interface{} } +type NodePicker interface { + PickNodes(numberOfNodes int, filterNodeFn FilterNodeFn, pickFn PickNodesFn) (nodes []Node, err error) +} + + +var ErrFilterContinue = errors.New("continue") + type FilterNodeFn func(dn Node) error type PickNodesFn func(nodes []Node, count int) []Node // the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot -func (n *NodeImpl) PickNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn, pickFn PickNodesFn) (firstNode Node, restNodes []Node, err error) { +func (n *NodeImpl) PickNodes(numberOfNodes int, filterNodeFn FilterNodeFn, pickFn PickNodesFn) (nodes []Node, err error) { candidates := make([]Node, 0, len(n.children)) var errs []string for _, node := range n.children { - if err := filterFirstNodeFn(node); err == nil { + if err := filterNodeFn(node); err == nil { candidates = append(candidates, node) + }else if err == ErrFilterContinue{ + continue } else { errs = append(errs, string(node.Id())+":"+err.Error()) } } - ns := pickFn(candidates, 1) - if ns == nil { - return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) + if len(candidates) < numberOfNodes{ + return nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) } - firstNode = ns[0] + return pickFn(candidates, numberOfNodes), nil + glog.V(2).Infoln(n.Id(), "picked main node:", firstNode.Id()) @@ -163,7 +174,7 @@ func (n *NodeImpl) GetValue() interface{} { func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) { for _, node := range n.children { freeSpace := node.FreeSpace() - // fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace) + fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace) if freeSpace <= 0 { continue } @@ -171,7 +182,7 @@ func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) { r -= freeSpace } else { if node.IsDataNode() && node.FreeSpace() > 0 { - // fmt.Println("vid =", vid, " assigned to node =", node, ", freeSpace =", node.FreeSpace()) + fmt.Println("assigned to node =", node, ", freeSpace =", node.FreeSpace()) return node.(*DataNode), nil } assignedNode, err = node.ReserveOneVolume(r) diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go index 9346ca743..000d6ef4f 100644 --- a/go/topology/topology_replicate.go +++ b/go/topology/topology_replicate.go @@ -1,8 +1,11 @@ package topology -import "github.com/chrislusf/seaweedfs/go/glog" +import ( + "github.com/chrislusf/seaweedfs/go/glog" + "github.com/chrislusf/seaweedfs/go/storage" +) -func (t *Topology) Replicate() int { +func (t *Topology) CheckReplicate() int { glog.V(0).Infoln("Start replicate checker on demand") for _, col := range t.collectionMap.Items { c := col.(*Collection) @@ -15,6 +18,7 @@ func (t *Topology) Replicate() int { if locationList.Length() < copyCount { //set volume readonly glog.V(0).Infoln("replicate volume :", vid) + SetVolumeReadonly(locationList, vid.String(), true) } } @@ -23,3 +27,12 @@ func (t *Topology) Replicate() int { } return 0 } + +func (t *Topology) doReplicate(vl *VolumeLayout, vid storage.VolumeId) { + locationList := vl.vid2location[vid] + if !SetVolumeReadonly(locationList, vid.String(), true) { + return + } + defer SetVolumeReadonly(locationList, vid.String(), false) + +} diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index ed3f8fee9..b8e3ca450 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -76,7 +76,7 @@ func (vg *VolumeGrowth) GrowByCountAndType(targetCount int, option *VolumeGrowOp } func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (int, error) { - servers, e := vg.findEmptySlotsForOneVolume(topo, option) + servers, e := vg.findEmptySlotsForOneVolume(topo, option, nil) if e != nil { return 0, e } @@ -85,105 +85,150 @@ func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (i return len(servers), err } +func filterMainDataCenter(option *VolumeGrowOption, node Node) error { + if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) { + return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter) + } + rp := option.ReplicaPlacement + if len(node.Children()) < rp.DiffRackCount+1 { + return fmt.Errorf("Only has %d racks, not enough for %d.", len(node.Children()), rp.DiffRackCount+1) + } + if node.FreeSpace() < rp.DiffRackCount+rp.SameRackCount+1 { + return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.DiffRackCount+rp.SameRackCount+1) + } + possibleRacksCount := 0 + for _, rack := range node.Children() { + possibleDataNodesCount := 0 + for _, n := range rack.Children() { + if n.FreeSpace() >= 1 { + possibleDataNodesCount++ + } + } + if possibleDataNodesCount >= rp.SameRackCount+1 { + possibleRacksCount++ + } + } + if possibleRacksCount < rp.DiffRackCount+1 { + return fmt.Errorf("Only has %d racks with more than %d free data nodes, not enough for %d.", possibleRacksCount, rp.SameRackCount+1, rp.DiffRackCount+1) + } + return nil +} + +func filterMainRack(option *VolumeGrowOption, node Node) error { + if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) { + return fmt.Errorf("Not matching preferred rack:%s", option.Rack) + } + rp := option.ReplicaPlacement + if node.FreeSpace() < rp.SameRackCount+1 { + return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.SameRackCount+1) + } + if len(node.Children()) < rp.SameRackCount+1 { + // a bit faster way to test free racks + return fmt.Errorf("Only has %d data nodes, not enough for %d.", len(node.Children()), rp.SameRackCount+1) + } + possibleDataNodesCount := 0 + for _, n := range node.Children() { + if n.FreeSpace() >= 1 { + possibleDataNodesCount++ + } + } + if possibleDataNodesCount < rp.SameRackCount+1 { + return fmt.Errorf("Only has %d data nodes with a slot, not enough for %d.", possibleDataNodesCount, rp.SameRackCount+1) + } + return nil +} + +func makeExceptNodeFilter(nodes []Node) FilterNodeFn { + m := make(map[string]bool) + for _, n := range nodes { + m[n.Id()] = true + } + return func(dn Node) { + if dn.FreeSpace() <= 0 { + return ErrFilterContinue + } + if _, ok := m[dn.Id()]; ok { + return ErrFilterContinue + } + return nil + } +} + // 1. find the main data node // 1.1 collect all data nodes that have 1 slots // 2.2 collect all racks that have rp.SameRackCount+1 // 2.2 collect all data centers that have DiffRackCount+rp.SameRackCount+1 // 2. find rest data nodes -func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption) (servers []*DataNode, err error) { +func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption, existsServer *VolumeLocationList) (additionServers []*DataNode, err error) { //find main datacenter and other data centers pickNodesFn := PickLowUsageNodeFn rp := option.ReplicaPlacement - mainDataCenter, otherDataCenters, dc_err := topo.PickNodes(rp.DiffDataCenterCount+1, func(node Node) error { - if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) { - return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter) - } - if len(node.Children()) < rp.DiffRackCount+1 { - return fmt.Errorf("Only has %d racks, not enough for %d.", len(node.Children()), rp.DiffRackCount+1) - } - if node.FreeSpace() < rp.DiffRackCount+rp.SameRackCount+1 { - return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.DiffRackCount+rp.SameRackCount+1) - } - possibleRacksCount := 0 - for _, rack := range node.Children() { - possibleDataNodesCount := 0 - for _, n := range rack.Children() { - if n.FreeSpace() >= 1 { - possibleDataNodesCount++ - } - } - if possibleDataNodesCount >= rp.SameRackCount+1 { - possibleRacksCount++ - } + + pickMainAndRestNodes := func(np NodePicker, restNodeCount int, filterNodeFn FilterNodeFn) (mainNode Node, restNodes []Node, e error) { + mainNodes, err := np.PickNodes(1, filterNodeFn, pickNodesFn) + if err != nil { + return nil, err } - if possibleRacksCount < rp.DiffRackCount+1 { - return fmt.Errorf("Only has %d racks with more than %d free data nodes, not enough for %d.", possibleRacksCount, rp.SameRackCount+1, rp.DiffRackCount+1) + restNodes, err := np.PickNodes(restNodeCount, + makeExceptNodeFilter(mainNodes), pickNodesFn) + if err != nil { + return nil, err } - return nil - }, pickNodesFn) + return mainNodes[0], restNodes + } + + mainDataCenter, otherDataCenters, dc_err := pickMainAndRestNodes(topo, rp.DiffDataCenterCount, + func(node Node) error { + return filterMainDataCenter(option, node) + }) if dc_err != nil { return nil, dc_err } - //find main rack and other racks - mainRack, otherRacks, rack_err := mainDataCenter.(*DataCenter).PickNodes(rp.DiffRackCount+1, func(node Node) error { - if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) { - return fmt.Errorf("Not matching preferred rack:%s", option.Rack) - } - if node.FreeSpace() < rp.SameRackCount+1 { - return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.SameRackCount+1) - } - if len(node.Children()) < rp.SameRackCount+1 { - // a bit faster way to test free racks - return fmt.Errorf("Only has %d data nodes, not enough for %d.", len(node.Children()), rp.SameRackCount+1) - } - possibleDataNodesCount := 0 - for _, n := range node.Children() { - if n.FreeSpace() >= 1 { - possibleDataNodesCount++ - } - } - if possibleDataNodesCount < rp.SameRackCount+1 { - return fmt.Errorf("Only has %d data nodes with a slot, not enough for %d.", possibleDataNodesCount, rp.SameRackCount+1) - } - return nil - }, pickNodesFn) + mainRack, otherRacks, rack_err := pickMainAndRestNodes(mainDataCenter.(*DataCenter), rp.DiffRackCount, + func(node Node) error { + return filterMainRack(option, node) + }, + ) if rack_err != nil { return nil, rack_err } - //find main rack and other racks - mainServer, otherServers, server_err := mainRack.(*Rack).PickNodes(rp.SameRackCount+1, func(node Node) error { - if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) { - return fmt.Errorf("Not matching preferred data node:%s", option.DataNode) - } - if node.FreeSpace() < 1 { - return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), 1) - } - return nil - }, pickNodesFn) + //find main server and other servers + mainServer, otherServers, server_err := pickMainAndRestNodes(mainRack.(*Rack), rp.SameRackCount, + func(node Node) error { + if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) { + return fmt.Errorf("Not matching preferred data node:%s", option.DataNode) + } + if node.FreeSpace() < 1 { + return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), 1) + } + return nil + }, + ) + if server_err != nil { return nil, server_err } - servers = append(servers, mainServer.(*DataNode)) + additionServers = append(additionServers, mainServer.(*DataNode)) for _, server := range otherServers { - servers = append(servers, server.(*DataNode)) + additionServers = append(additionServers, server.(*DataNode)) } for _, rack := range otherRacks { r := rand.Intn(rack.FreeSpace()) if server, e := rack.ReserveOneVolume(r); e == nil { - servers = append(servers, server) + additionServers = append(additionServers, server) } else { - return servers, e + return additionServers, e } } - for _, datacenter := range otherDataCenters { - r := rand.Intn(datacenter.FreeSpace()) - if server, e := datacenter.ReserveOneVolume(r); e == nil { - servers = append(servers, server) + for _, dc := range otherDataCenters { + r := rand.Intn(dc.FreeSpace()) + if server, e := dc.ReserveOneVolume(r); e == nil { + additionServers = append(additionServers, server) } else { - return servers, e + return additionServers, e } } return diff --git a/go/topology/volume_growth_test.go b/go/topology/volume_growth_test.go index 8f50a6f90..a89ad6986 100644 --- a/go/topology/volume_growth_test.go +++ b/go/topology/volume_growth_test.go @@ -19,7 +19,7 @@ var topologyLayout = ` {"id":2, "size":12312}, {"id":3, "size":12312} ], - "limit":3 + "limit":15 }, "server112":{ "volumes":[ @@ -28,6 +28,18 @@ var topologyLayout = ` {"id":6, "size":12312} ], "limit":10 + }, + "server113":{ + "volumes":[ + {"id":7, "size":12312}, + {"id":8, "size":12312}, + {"id":9, "size":12312} + ], + "limit":8 + }, + "server114":{ + "volumes":[], + "limit":8 } }, "rack2":{ @@ -37,11 +49,15 @@ var topologyLayout = ` {"id":5, "size":12312}, {"id":6, "size":12312} ], - "limit":4 + "limit":8 }, "server122":{ "volumes":[], - "limit":4 + "limit":8 + }, + "server124":{ + "volumes":[], + "limit":8 }, "server123":{ "volumes":[ @@ -63,7 +79,11 @@ var topologyLayout = ` {"id":3, "size":12312}, {"id":5, "size":12312} ], - "limit":4 + "limit":8 + }, + "server322":{ + "volumes":[], + "limit":7 } } } @@ -117,7 +137,7 @@ func setup(topologyLayout string) *Topology { func TestFindEmptySlotsForOneVolume(t *testing.T) { topo := setup(topologyLayout) vg := NewDefaultVolumeGrowth() - rp, _ := storage.NewReplicaPlacementFromString("002") + rp, _ := storage.NewReplicaPlacementFromString("011") volumeGrowOption := &VolumeGrowOption{ Collection: "", ReplicaPlacement: rp, diff --git a/go/topology/volume_location_list.go b/go/topology/volume_location_list.go index 7166a4add..fed2c1574 100644 --- a/go/topology/volume_location_list.go +++ b/go/topology/volume_location_list.go @@ -70,3 +70,80 @@ func (dnll *VolumeLocationList) Refresh(freshThreshHold int64) { dnll.list = l } } + +// return all data centers, first is main data center +func (dnll *VolumeLocationList) DataCenters() []*DataCenter { + m := make(map[*DataCenter]int) + maxCount := 0 + var mainDC *DataCenter + for _, dn := range dnll.list { + var dc *DataCenter + if dc = dn.GetDataCenter(); dc == nil { + continue + } + m[dc] = m[dc] + 1 + if m[dc] > maxCount { + mainDC = dc + maxCount = m[dc] + } + } + dataCenters := make([]*DataCenter, 0, len(m)) + if mainDC != nil { + dataCenters = append(dataCenters, mainDC) + } + for dc := range m { + if dc != mainDC { + dataCenters = append(dataCenters, dc) + } + } + return dataCenters +} + +// return all racks if data center set nil +func (dnll *VolumeLocationList) Racks(dc *DataCenter) []*Rack { + m := make(map[*Rack]int) + maxCount := 0 + var mainRack *Rack + for _, dn := range dnll.list { + if dc != nil && dn.GetDataCenter() != dc { + continue + } + var rack *Rack + if rack = dn.GetRack(); rack == nil { + continue + } + m[rack] = m[rack] + 1 + if m[rack] > maxCount { + mainRack = rack + maxCount = m[rack] + } + } + racks := make([]*Rack, 0, len(m)) + if mainRack != nil { + racks = append(racks, mainRack) + } + for rack := range m { + racks = append(racks, rack) + } + return racks +} + + +func (dnll *VolumeLocationList) Servers(rack *Rack) []*DataNode { + servers := make([]*DataNode) + for _, dn := range dnll.list { + if rack != nil && dn.GetRack() != rack { + continue + } + var rack *Rack + if rack = dn.GetRack(); rack == nil { + continue + } + servers = append(servers, dn) + } + return servers +} + +//func (dnll *VolumeLocationList)ContainDataNode(nodeType, id string)bool { +// +//} From 6c88abf62f9e2a03f782753fc236b790681714e6 Mon Sep 17 00:00:00 2001 From: chrislusf Date: Wed, 30 Dec 2015 22:48:19 -0800 Subject: [PATCH 36/46] adjust print out when error --- go/topology/store_replicate.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/go/topology/store_replicate.go b/go/topology/store_replicate.go index 4adb4431e..a4be4726f 100644 --- a/go/topology/store_replicate.go +++ b/go/topology/store_replicate.go @@ -96,12 +96,11 @@ func distributedOperation(masterNode string, store *storage.Store, volumeId stor ret = ret && <-results } if volume := store.GetVolume(volumeId); volume != nil { - copyCount := volume.ReplicaPlacement.GetCopyCount() - 1 - if length < copyCount { - glog.V(0).Infoln("replicating opetations [%d] is less than volume's replication copy count [%d]", length, copyCount) + if length+1 < volume.ReplicaPlacement.GetCopyCount() { + glog.V(0).Infof("replicating opetations [%d] is less than volume's replication copy count [%d]", length+1, volume.ReplicaPlacement.GetCopyCount()) ret = false } - } + } return ret } else { glog.V(0).Infoln("Failed to lookup for", volumeId, lookupErr.Error()) From 166cc0d48a389be2f6120e3fafda9563309dcf34 Mon Sep 17 00:00:00 2001 From: Xiaodong Huo Date: Mon, 4 Jan 2016 09:52:32 +0800 Subject: [PATCH 37/46] collection name could be specified in url uploading file with filer service could specify the collection name in get/post field, if no collection specified, the default collection setting against the filer service will be used --- go/weed/weed_server/filer_server_handlers.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/go/weed/weed_server/filer_server_handlers.go b/go/weed/weed_server/filer_server_handlers.go index 1695296d4..e6f1628da 100644 --- a/go/weed/weed_server/filer_server_handlers.go +++ b/go/weed/weed_server/filer_server_handlers.go @@ -130,7 +130,11 @@ func (fs *FilerServer) PostHandler(w http.ResponseWriter, r *http.Request) { if replication == "" { replication = fs.defaultReplication } - assignResult, ae := operation.Assign(fs.master, 1, replication, fs.collection, query.Get("ttl")) + collection := query.Get("collection") + if collection == "" { + collection = fs.collection + } + assignResult, ae := operation.Assign(fs.master, 1, replication, collection, query.Get("ttl")) if ae != nil { glog.V(0).Infoln("failing to assign a file id", ae.Error()) writeJsonError(w, r, http.StatusInternalServerError, ae) From cc3b8c66ed3ff8a1744e68a1f8a2ebe7b0cc973c Mon Sep 17 00:00:00 2001 From: tnextday Date: Mon, 4 Jan 2016 15:11:45 +0800 Subject: [PATCH 38/46] volume growth: growth with exists data node --- go/storage/replica_placement.go | 14 +++++-- go/topology/node.go | 35 +++------------- go/topology/volume_growth.go | 64 +++++++++++++++++++++-------- go/topology/volume_growth_test.go | 14 ++++++- go/topology/volume_location_list.go | 33 ++++++++------- 5 files changed, 96 insertions(+), 64 deletions(-) diff --git a/go/storage/replica_placement.go b/go/storage/replica_placement.go index 31f8f464a..adc93cdcc 100644 --- a/go/storage/replica_placement.go +++ b/go/storage/replica_placement.go @@ -52,8 +52,16 @@ func (rp *ReplicaPlacement) GetCopyCount() int { return rp.DiffDataCenterCount + rp.DiffRackCount + rp.SameRackCount + 1 } -func (rp *ReplicaPlacement) Equal(rp1 *ReplicaPlacement) bool { - return rp.SameRackCount == rp1.SameRackCount && +func (rp *ReplicaPlacement) Compare(rp1 *ReplicaPlacement) int { + if rp.SameRackCount == rp1.SameRackCount && rp.DiffRackCount == rp1.DiffRackCount && - rp.DiffDataCenterCount == rp1.DiffDataCenterCount + rp.DiffDataCenterCount == rp1.DiffDataCenterCount { + return 0 + } else if rp.SameRackCount < rp1.SameRackCount || + rp.DiffRackCount < rp1.DiffRackCount || + rp.DiffDataCenterCount < rp1.DiffDataCenterCount { + return -1 + } else { + return 1 + } } diff --git a/go/topology/node.go b/go/topology/node.go index 292e88fad..febfeb64e 100644 --- a/go/topology/node.go +++ b/go/topology/node.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "math/rand" - "strings" "sort" @@ -58,7 +57,6 @@ type NodePicker interface { PickNodes(numberOfNodes int, filterNodeFn FilterNodeFn, pickFn PickNodesFn) (nodes []Node, err error) } - var ErrFilterContinue = errors.New("continue") type FilterNodeFn func(dn Node) error @@ -71,38 +69,17 @@ func (n *NodeImpl) PickNodes(numberOfNodes int, filterNodeFn FilterNodeFn, pickF for _, node := range n.children { if err := filterNodeFn(node); err == nil { candidates = append(candidates, node) - }else if err == ErrFilterContinue{ + } else if err == ErrFilterContinue { continue } else { errs = append(errs, string(node.Id())+":"+err.Error()) } } - if len(candidates) < numberOfNodes{ - return nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) + if len(candidates) < numberOfNodes { + return nil, errors.New("Not enough data node found!") + // return nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) } return pickFn(candidates, numberOfNodes), nil - - - glog.V(2).Infoln(n.Id(), "picked main node:", firstNode.Id()) - - candidates = candidates[:0] - for _, node := range n.children { - if node.Id() == firstNode.Id() { - continue - } - if node.FreeSpace() <= 0 { - continue - } - glog.V(2).Infoln("select rest node candidate:", node.Id()) - candidates = append(candidates, node) - } - glog.V(2).Infoln(n.Id(), "picking", numberOfNodes-1, "from rest", len(candidates), "node candidates") - restNodes = pickFn(candidates, numberOfNodes-1) - if restNodes == nil { - glog.V(2).Infoln(n.Id(), "failed to pick", numberOfNodes-1, "from rest", len(candidates), "node candidates") - err = errors.New("Not enough data node found!") - } - return } func RandomlyPickNodeFn(nodes []Node, count int) []Node { @@ -116,7 +93,7 @@ func RandomlyPickNodeFn(nodes []Node, count int) []Node { return nodes[:count] } -func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn) (firstNode Node, restNodes []Node, err error) { +func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn) (nodes []Node, err error) { return n.PickNodes(numberOfNodes, filterFirstNodeFn, RandomlyPickNodeFn) } @@ -134,7 +111,7 @@ func PickLowUsageNodeFn(nodes []Node, count int) []Node { return nodes[:count] } -func (n *NodeImpl) PickLowUsageNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn) (firstNode Node, restNodes []Node, err error) { +func (n *NodeImpl) PickLowUsageNodes(numberOfNodes int, filterFirstNodeFn FilterNodeFn) (nodes []Node, err error) { return n.PickNodes(numberOfNodes, filterFirstNodeFn, PickLowUsageNodeFn) } diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index b8e3ca450..167660514 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -139,11 +139,11 @@ func filterMainRack(option *VolumeGrowOption, node Node) error { } func makeExceptNodeFilter(nodes []Node) FilterNodeFn { - m := make(map[string]bool) + m := make(map[NodeId]bool) for _, n := range nodes { m[n.Id()] = true } - return func(dn Node) { + return func(dn Node) error { if dn.FreeSpace() <= 0 { return ErrFilterContinue } @@ -164,38 +164,69 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum pickNodesFn := PickLowUsageNodeFn rp := option.ReplicaPlacement - pickMainAndRestNodes := func(np NodePicker, restNodeCount int, filterNodeFn FilterNodeFn) (mainNode Node, restNodes []Node, e error) { - mainNodes, err := np.PickNodes(1, filterNodeFn, pickNodesFn) - if err != nil { - return nil, err + pickMainAndRestNodes := func(np NodePicker, totalNodeCount int, filterNodeFn FilterNodeFn, existsNodes []Node) (mainNode Node, restNodes []Node, e error) { + for _, n := range existsNodes { + if filterNodeFn(n) == nil { + mainNode = n + break + } } - restNodes, err := np.PickNodes(restNodeCount, - makeExceptNodeFilter(mainNodes), pickNodesFn) - if err != nil { - return nil, err + if mainNode == nil { + mainNodes, err := np.PickNodes(1, filterNodeFn, pickNodesFn) + if err != nil { + return nil, nil, err + } + mainNode = mainNodes[0] + existsNodes = append(existsNodes, mainNode) } - return mainNodes[0], restNodes - } + glog.V(2).Infoln(mainNode.Id(), "picked main node:", mainNode.Id()) - mainDataCenter, otherDataCenters, dc_err := pickMainAndRestNodes(topo, rp.DiffDataCenterCount, + restCount := totalNodeCount - len(existsNodes) + + if restCount > 0 { + restNodes, err = np.PickNodes(restCount, + makeExceptNodeFilter(existsNodes), pickNodesFn) + if err != nil { + return nil, nil, err + } + } + + return mainNode, restNodes, nil + } + var existsNode []Node + if existsServer != nil { + existsNode = existsServer.DiffDataCenters() + } + mainDataCenter, otherDataCenters, dc_err := pickMainAndRestNodes(topo, rp.DiffDataCenterCount+1, func(node Node) error { return filterMainDataCenter(option, node) - }) + }, existsNode) if dc_err != nil { return nil, dc_err } //find main rack and other racks - mainRack, otherRacks, rack_err := pickMainAndRestNodes(mainDataCenter.(*DataCenter), rp.DiffRackCount, + if existsServer != nil { + existsNode = existsServer.DiffRacks(mainDataCenter.(*DataCenter)) + } else { + existsNode = nil + } + mainRack, otherRacks, rack_err := pickMainAndRestNodes(mainDataCenter.(*DataCenter), rp.DiffRackCount+1, func(node Node) error { return filterMainRack(option, node) }, + existsNode, ) if rack_err != nil { return nil, rack_err } //find main server and other servers - mainServer, otherServers, server_err := pickMainAndRestNodes(mainRack.(*Rack), rp.SameRackCount, + if existsServer != nil { + existsNode = existsServer.SameServers(mainRack.(*Rack)) + } else { + existsNode = nil + } + mainServer, otherServers, server_err := pickMainAndRestNodes(mainRack.(*Rack), rp.SameRackCount+1, func(node Node) error { if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) { return fmt.Errorf("Not matching preferred data node:%s", option.DataNode) @@ -205,6 +236,7 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum } return nil }, + existsNode, ) if server_err != nil { diff --git a/go/topology/volume_growth_test.go b/go/topology/volume_growth_test.go index a89ad6986..de08007bb 100644 --- a/go/topology/volume_growth_test.go +++ b/go/topology/volume_growth_test.go @@ -70,6 +70,16 @@ var topologyLayout = ` } }, "dc2":{ + "rack2":{ + "server221":{ + "volumes":[], + "limit":8 + }, + "server222":{ + "volumes":[], + "limit":8 + } + } }, "dc3":{ "rack2":{ @@ -137,7 +147,7 @@ func setup(topologyLayout string) *Topology { func TestFindEmptySlotsForOneVolume(t *testing.T) { topo := setup(topologyLayout) vg := NewDefaultVolumeGrowth() - rp, _ := storage.NewReplicaPlacementFromString("011") + rp, _ := storage.NewReplicaPlacementFromString("111") volumeGrowOption := &VolumeGrowOption{ Collection: "", ReplicaPlacement: rp, @@ -145,7 +155,7 @@ func TestFindEmptySlotsForOneVolume(t *testing.T) { Rack: "", DataNode: "", } - servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption) + servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption, nil) if err != nil { fmt.Println("finding empty slots error :", err) t.Fail() diff --git a/go/topology/volume_location_list.go b/go/topology/volume_location_list.go index fed2c1574..68ead10e8 100644 --- a/go/topology/volume_location_list.go +++ b/go/topology/volume_location_list.go @@ -27,11 +27,6 @@ func (dnll *VolumeLocationList) Length() int { return len(dnll.list) } -func (dnll *VolumeLocationList) CalcReplicaPlacement() (rp *storage.ReplicaPlacement) { - //TODO CalcReplicaPlacement - return nil -} - func (dnll *VolumeLocationList) Set(loc *DataNode) { for i := 0; i < len(dnll.list); i++ { if loc.Ip == dnll.list[i].Ip && loc.Port == dnll.list[i].Port { @@ -72,7 +67,7 @@ func (dnll *VolumeLocationList) Refresh(freshThreshHold int64) { } // return all data centers, first is main data center -func (dnll *VolumeLocationList) DataCenters() []*DataCenter { +func (dnll *VolumeLocationList) DiffDataCenters() []Node { m := make(map[*DataCenter]int) maxCount := 0 var mainDC *DataCenter @@ -87,7 +82,7 @@ func (dnll *VolumeLocationList) DataCenters() []*DataCenter { maxCount = m[dc] } } - dataCenters := make([]*DataCenter, 0, len(m)) + dataCenters := make([]Node, 0, len(m)) if mainDC != nil { dataCenters = append(dataCenters, mainDC) } @@ -100,12 +95,12 @@ func (dnll *VolumeLocationList) DataCenters() []*DataCenter { } // return all racks if data center set nil -func (dnll *VolumeLocationList) Racks(dc *DataCenter) []*Rack { +func (dnll *VolumeLocationList) DiffRacks(mainDC *DataCenter) []Node { m := make(map[*Rack]int) maxCount := 0 var mainRack *Rack for _, dn := range dnll.list { - if dc != nil && dn.GetDataCenter() != dc { + if mainDC != nil && dn.GetDataCenter() != mainDC { continue } var rack *Rack @@ -118,7 +113,7 @@ func (dnll *VolumeLocationList) Racks(dc *DataCenter) []*Rack { maxCount = m[rack] } } - racks := make([]*Rack, 0, len(m)) + racks := make([]Node, 0, len(m)) if mainRack != nil { racks = append(racks, mainRack) } @@ -128,11 +123,9 @@ func (dnll *VolumeLocationList) Racks(dc *DataCenter) []*Rack { return racks } - -func (dnll *VolumeLocationList) Servers(rack *Rack) []*DataNode { - servers := make([]*DataNode) +func (dnll *VolumeLocationList) SameServers(mainRack *Rack) (servers []Node) { for _, dn := range dnll.list { - if rack != nil && dn.GetRack() != rack { + if mainRack != nil && dn.GetRack() != mainRack { continue } var rack *Rack @@ -144,6 +137,18 @@ func (dnll *VolumeLocationList) Servers(rack *Rack) []*DataNode { return servers } +func (dnll *VolumeLocationList) CalcReplicaPlacement() (rp *storage.ReplicaPlacement) { + dcs := dnll.DiffDataCenters() + rs := dnll.DiffRacks(dcs[0].(*DataCenter)) + ss := dnll.SameServers(rs[0].(*Rack)) + rp = &storage.ReplicaPlacement{ + len(dcs) - 1, + len(rs) - 1, + len(ss) - 1, + } + return +} + //func (dnll *VolumeLocationList)ContainDataNode(nodeType, id string)bool { // //} From 836554808a749a53e53373f34a1c2d747e453c36 Mon Sep 17 00:00:00 2001 From: tnextday Date: Mon, 4 Jan 2016 21:52:03 +0800 Subject: [PATCH 39/46] volume growth: update volume growth test, and fix bugs --- go/topology/node.go | 5 +- go/topology/volume_growth.go | 19 ++++---- go/topology/volume_growth_test.go | 72 +++++++++++++++++++++++++++++ go/topology/volume_location_list.go | 33 +++++++++---- 4 files changed, 108 insertions(+), 21 deletions(-) diff --git a/go/topology/node.go b/go/topology/node.go index febfeb64e..a8c5c0ded 100644 --- a/go/topology/node.go +++ b/go/topology/node.go @@ -2,7 +2,6 @@ package topology import ( "errors" - "fmt" "math/rand" "sort" @@ -151,7 +150,7 @@ func (n *NodeImpl) GetValue() interface{} { func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) { for _, node := range n.children { freeSpace := node.FreeSpace() - fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace) + // fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace) if freeSpace <= 0 { continue } @@ -159,7 +158,7 @@ func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) { r -= freeSpace } else { if node.IsDataNode() && node.FreeSpace() > 0 { - fmt.Println("assigned to node =", node, ", freeSpace =", node.FreeSpace()) + // fmt.Println("assigned to node =", node, ", freeSpace =", node.FreeSpace()) return node.(*DataNode), nil } assignedNode, err = node.ReserveOneVolume(r) diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index 167660514..820c89026 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -159,7 +159,7 @@ func makeExceptNodeFilter(nodes []Node) FilterNodeFn { // 2.2 collect all racks that have rp.SameRackCount+1 // 2.2 collect all data centers that have DiffRackCount+rp.SameRackCount+1 // 2. find rest data nodes -func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption, existsServer *VolumeLocationList) (additionServers []*DataNode, err error) { +func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption, existsServers *VolumeLocationList) (additionServers []*DataNode, err error) { //find main datacenter and other data centers pickNodesFn := PickLowUsageNodeFn rp := option.ReplicaPlacement @@ -194,8 +194,8 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum return mainNode, restNodes, nil } var existsNode []Node - if existsServer != nil { - existsNode = existsServer.DiffDataCenters() + if existsServers != nil { + existsNode = existsServers.DiffDataCenters() } mainDataCenter, otherDataCenters, dc_err := pickMainAndRestNodes(topo, rp.DiffDataCenterCount+1, func(node Node) error { @@ -205,8 +205,8 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum return nil, dc_err } //find main rack and other racks - if existsServer != nil { - existsNode = existsServer.DiffRacks(mainDataCenter.(*DataCenter)) + if existsServers != nil { + existsNode = existsServers.DiffRacks(mainDataCenter.(*DataCenter)) } else { existsNode = nil } @@ -221,8 +221,8 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum } //find main server and other servers - if existsServer != nil { - existsNode = existsServer.SameServers(mainRack.(*Rack)) + if existsServers != nil { + existsNode = existsServers.SameServers(mainRack.(*Rack)) } else { existsNode = nil } @@ -242,8 +242,11 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum if server_err != nil { return nil, server_err } + if existsServers != nil && existsServers.ContainsDataNode(mainServer.(*DataNode)) { + } else { + additionServers = append(additionServers, mainServer.(*DataNode)) + } - additionServers = append(additionServers, mainServer.(*DataNode)) for _, server := range otherServers { additionServers = append(additionServers, server.(*DataNode)) } diff --git a/go/topology/volume_growth_test.go b/go/topology/volume_growth_test.go index de08007bb..4861b1a88 100644 --- a/go/topology/volume_growth_test.go +++ b/go/topology/volume_growth_test.go @@ -5,8 +5,11 @@ import ( "fmt" "testing" + "strings" + "github.com/chrislusf/seaweedfs/go/sequence" "github.com/chrislusf/seaweedfs/go/storage" + "github.com/syndtr/goleveldb/leveldb/errors" ) var topologyLayout = ` @@ -100,6 +103,14 @@ var topologyLayout = ` } ` +var testLocList = [][]string{ + {"server111", "server121"}, + {"server111", "server112"}, + {"server111", "server112", "server113"}, + {"server111", "server221", "server321"}, + {"server112"}, +} + func setup(topologyLayout string) *Topology { var data interface{} err := json.Unmarshal([]byte(topologyLayout), &data) @@ -163,4 +174,65 @@ func TestFindEmptySlotsForOneVolume(t *testing.T) { for _, server := range servers { fmt.Printf("assigned node: %s, free space: %d\n", server.Id(), server.FreeSpace()) } + +} + +func getDataNodeFromId(topo *Topology, id string) (foundDn *DataNode) { + nid := NodeId(id) + topo.WalkDataNode(func(dn *DataNode) (e error) { + if dn.Id() == nid { + foundDn = dn + e = errors.New("Found.") + } + return + }) + return +} + +func setupTestLocationList(topo *Topology) (ret []*VolumeLocationList) { + + for _, ll := range testLocList { + vl := &VolumeLocationList{} + for _, nid := range ll { + if n := getDataNodeFromId(topo, nid); n != nil { + vl.list = append(vl.list, n) + } + } + ret = append(ret, vl) + } + return +} + +func joinNodeId(dns []*DataNode) string { + ss := []string{} + for _, dn := range dns { + ss = append(ss, string(dn.Id())) + } + return strings.Join(ss, ", ") +} + +func TestFindEmptySlotsWithExistsNodes(t *testing.T) { + topo := setup(topologyLayout) + vg := NewDefaultVolumeGrowth() + rp, _ := storage.NewReplicaPlacementFromString("112") + volumeGrowOption := &VolumeGrowOption{ + Collection: "", + ReplicaPlacement: rp, + DataCenter: "dc1", + Rack: "", + DataNode: "", + } + testLocationList := setupTestLocationList(topo) + for _, locationList := range testLocationList { + lrp := locationList.CalcReplicaPlacement() + t.Logf("location list: [%s], replica placement = %s\n", joinNodeId(locationList.list), lrp.String()) + if lrp.Compare(rp) < 0 { + servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption, locationList) + if err != nil { + t.Log("finding empty slots error :", err) + t.Fail() + } + t.Logf("assigned node: %s\n\n", joinNodeId(servers)) + } + } } diff --git a/go/topology/volume_location_list.go b/go/topology/volume_location_list.go index 68ead10e8..9c7f60881 100644 --- a/go/topology/volume_location_list.go +++ b/go/topology/volume_location_list.go @@ -118,7 +118,9 @@ func (dnll *VolumeLocationList) DiffRacks(mainDC *DataCenter) []Node { racks = append(racks, mainRack) } for rack := range m { - racks = append(racks, rack) + if rack != mainRack { + racks = append(racks, rack) + } } return racks } @@ -138,17 +140,28 @@ func (dnll *VolumeLocationList) SameServers(mainRack *Rack) (servers []Node) { } func (dnll *VolumeLocationList) CalcReplicaPlacement() (rp *storage.ReplicaPlacement) { - dcs := dnll.DiffDataCenters() - rs := dnll.DiffRacks(dcs[0].(*DataCenter)) - ss := dnll.SameServers(rs[0].(*Rack)) + var dcs, rs, ss []Node + dcs = dnll.DiffDataCenters() + if len(dcs) > 0 { + rs = dnll.DiffRacks(dcs[0].(*DataCenter)) + if len(rs) > 0 { + ss = dnll.SameServers(rs[0].(*Rack)) + } + } + rp = &storage.ReplicaPlacement{ - len(dcs) - 1, - len(rs) - 1, - len(ss) - 1, + SameRackCount: len(ss) - 1, + DiffRackCount: len(rs) - 1, + DiffDataCenterCount: len(dcs) - 1, } return } -//func (dnll *VolumeLocationList)ContainDataNode(nodeType, id string)bool { -// -//} +func (dnll *VolumeLocationList) ContainsDataNode(n *DataNode) bool { + for _, dn := range dnll.list { + if dn == n { + return true + } + } + return false +} From 9fb1954f4b0186684f09771838d6e1bf7ee629a3 Mon Sep 17 00:00:00 2001 From: tnextday Date: Mon, 4 Jan 2016 22:00:39 +0800 Subject: [PATCH 40/46] volume growth: update --- go/topology/volume_growth.go | 46 +++++++++++++++---------------- go/topology/volume_growth_test.go | 6 ++-- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index 820c89026..f7f4e8187 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -76,7 +76,7 @@ func (vg *VolumeGrowth) GrowByCountAndType(targetCount int, option *VolumeGrowOp } func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (int, error) { - servers, e := vg.findEmptySlotsForOneVolume(topo, option, nil) + servers, e := FindEmptySlotsForOneVolume(topo, option, nil) if e != nil { return 0, e } @@ -85,6 +85,27 @@ func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (i return len(servers), err } +func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *VolumeGrowOption, servers ...*DataNode) error { + for _, server := range servers { + if err := AllocateVolume(server, vid, option); err == nil { + vi := storage.VolumeInfo{ + Id: vid, + Size: 0, + Collection: option.Collection, + Ttl: option.Ttl, + Version: storage.CurrentVersion, + } + server.AddOrUpdateVolume(vi) + topo.RegisterVolumeLayout(vi, server) + glog.V(0).Infoln("Created Volume", vid, "on", server.NodeImpl.String()) + } else { + glog.V(0).Infoln("Failed to assign volume", vid, "to", servers, "error", err) + return fmt.Errorf("Failed to assign %d: %v", vid, err) + } + } + return nil +} + func filterMainDataCenter(option *VolumeGrowOption, node Node) error { if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) { return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter) @@ -159,7 +180,7 @@ func makeExceptNodeFilter(nodes []Node) FilterNodeFn { // 2.2 collect all racks that have rp.SameRackCount+1 // 2.2 collect all data centers that have DiffRackCount+rp.SameRackCount+1 // 2. find rest data nodes -func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption, existsServers *VolumeLocationList) (additionServers []*DataNode, err error) { +func FindEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption, existsServers *VolumeLocationList) (additionServers []*DataNode, err error) { //find main datacenter and other data centers pickNodesFn := PickLowUsageNodeFn rp := option.ReplicaPlacement @@ -268,24 +289,3 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum } return } - -func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *VolumeGrowOption, servers ...*DataNode) error { - for _, server := range servers { - if err := AllocateVolume(server, vid, option); err == nil { - vi := storage.VolumeInfo{ - Id: vid, - Size: 0, - Collection: option.Collection, - Ttl: option.Ttl, - Version: storage.CurrentVersion, - } - server.AddOrUpdateVolume(vi) - topo.RegisterVolumeLayout(vi, server) - glog.V(0).Infoln("Created Volume", vid, "on", server.NodeImpl.String()) - } else { - glog.V(0).Infoln("Failed to assign volume", vid, "to", servers, "error", err) - return fmt.Errorf("Failed to assign %d: %v", vid, err) - } - } - return nil -} diff --git a/go/topology/volume_growth_test.go b/go/topology/volume_growth_test.go index 4861b1a88..04125e7f2 100644 --- a/go/topology/volume_growth_test.go +++ b/go/topology/volume_growth_test.go @@ -157,7 +157,6 @@ func setup(topologyLayout string) *Topology { func TestFindEmptySlotsForOneVolume(t *testing.T) { topo := setup(topologyLayout) - vg := NewDefaultVolumeGrowth() rp, _ := storage.NewReplicaPlacementFromString("111") volumeGrowOption := &VolumeGrowOption{ Collection: "", @@ -166,7 +165,7 @@ func TestFindEmptySlotsForOneVolume(t *testing.T) { Rack: "", DataNode: "", } - servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption, nil) + servers, err := FindEmptySlotsForOneVolume(topo, volumeGrowOption, nil) if err != nil { fmt.Println("finding empty slots error :", err) t.Fail() @@ -213,7 +212,6 @@ func joinNodeId(dns []*DataNode) string { func TestFindEmptySlotsWithExistsNodes(t *testing.T) { topo := setup(topologyLayout) - vg := NewDefaultVolumeGrowth() rp, _ := storage.NewReplicaPlacementFromString("112") volumeGrowOption := &VolumeGrowOption{ Collection: "", @@ -227,7 +225,7 @@ func TestFindEmptySlotsWithExistsNodes(t *testing.T) { lrp := locationList.CalcReplicaPlacement() t.Logf("location list: [%s], replica placement = %s\n", joinNodeId(locationList.list), lrp.String()) if lrp.Compare(rp) < 0 { - servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption, locationList) + servers, err := FindEmptySlotsForOneVolume(topo, volumeGrowOption, locationList) if err != nil { t.Log("finding empty slots error :", err) t.Fail() From 955d4b25d83f568962fc8854d491f769b067beaf Mon Sep 17 00:00:00 2001 From: tnextday Date: Mon, 4 Jan 2016 23:51:40 +0800 Subject: [PATCH 41/46] Node: add planned vloume count --- go/topology/node.go | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/go/topology/node.go b/go/topology/node.go index a8c5c0ded..655e496b1 100644 --- a/go/topology/node.go +++ b/go/topology/node.go @@ -19,11 +19,13 @@ type Node interface { UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int) UpAdjustVolumeCountDelta(volumeCountDelta int) UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int) + UpAdjustPlannedVolumeCountDelta(delta int) UpAdjustMaxVolumeId(vid storage.VolumeId) GetVolumeCount() int GetActiveVolumeCount() int GetMaxVolumeCount() int + GetPlannedVolumeCount() int GetMaxVolumeId() storage.VolumeId SetParent(Node) LinkChildNode(node Node) @@ -39,13 +41,14 @@ type Node interface { GetValue() interface{} //get reference to the topology,dc,rack,datanode } type NodeImpl struct { - id NodeId - volumeCount int - activeVolumeCount int - maxVolumeCount int - parent Node - children map[NodeId]Node - maxVolumeId storage.VolumeId + id NodeId + volumeCount int + activeVolumeCount int + maxVolumeCount int + plannedVolumeCount int + parent Node + children map[NodeId]Node + maxVolumeId storage.VolumeId //for rack, data center, topology nodeType string @@ -133,7 +136,7 @@ func (n *NodeImpl) Id() NodeId { return n.id } func (n *NodeImpl) FreeSpace() int { - return n.maxVolumeCount - n.volumeCount + return n.maxVolumeCount - n.volumeCount - n.plannedVolumeCount } func (n *NodeImpl) SetParent(node Node) { n.parent = node @@ -188,6 +191,14 @@ func (n *NodeImpl) UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int) { n.parent.UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta) } } + +func (n *NodeImpl) UpAdjustPlannedVolumeCountDelta(delta int) { //can be negative + n.plannedVolumeCount += delta + if n.parent != nil { + n.parent.UpAdjustPlannedVolumeCountDelta(delta) + } +} + func (n *NodeImpl) UpAdjustMaxVolumeId(vid storage.VolumeId) { //can be negative if n.maxVolumeId < vid { n.maxVolumeId = vid @@ -209,6 +220,10 @@ func (n *NodeImpl) GetMaxVolumeCount() int { return n.maxVolumeCount } +func (n *NodeImpl) GetPlannedVolumeCount() int { + return n.plannedVolumeCount +} + func (n *NodeImpl) LinkChildNode(node Node) { if n.children[node.Id()] == nil { n.children[node.Id()] = node From 8b32d2330448cc76ed6d82599ec1618c6aa66656 Mon Sep 17 00:00:00 2001 From: tnextday Date: Wed, 6 Jan 2016 12:35:59 +0800 Subject: [PATCH 42/46] master: check volume ReplicatePlacement and auto replicate the lost --- go/topology/collection.go | 2 +- go/topology/topology.go | 2 +- go/topology/topology_event_handling.go | 1 + go/topology/topology_replicate.go | 128 ++++++++++++++++-- go/topology/volume_layout.go | 4 +- go/topology/volume_location_list.go | 9 ++ go/weed/weed_server/master_server_handlers.go | 6 +- .../master_server_handlers_admin.go | 7 +- 8 files changed, 134 insertions(+), 25 deletions(-) diff --git a/go/topology/collection.go b/go/topology/collection.go index 3d7bb7e0e..f8217a7ff 100644 --- a/go/topology/collection.go +++ b/go/topology/collection.go @@ -35,7 +35,7 @@ func (c *Collection) GetOrCreateVolumeLayout(ttl *storage.TTL) *VolumeLayout { return vl.(*VolumeLayout) } -func (c *Collection) Lookup(vid storage.VolumeId) []*DataNode { +func (c *Collection) Lookup(vid storage.VolumeId) *VolumeLocationList { for _, vl := range c.storageType2VolumeLayout.Items { if vl != nil { if list := vl.(*VolumeLayout).Lookup(vid); list != nil { diff --git a/go/topology/topology.go b/go/topology/topology.go index 6cdd1e1fa..410a1c70e 100644 --- a/go/topology/topology.go +++ b/go/topology/topology.go @@ -90,7 +90,7 @@ func (t *Topology) loadConfiguration(configurationFile string) error { return nil } -func (t *Topology) Lookup(collection string, vid storage.VolumeId) []*DataNode { +func (t *Topology) Lookup(collection string, vid storage.VolumeId) *VolumeLocationList { //maybe an issue if lots of collections? if collection == "" { for _, c := range t.collectionMap.Items { diff --git a/go/topology/topology_event_handling.go b/go/topology/topology_event_handling.go index 2bb2a9d66..8ad8fc67e 100644 --- a/go/topology/topology_event_handling.go +++ b/go/topology/topology_event_handling.go @@ -23,6 +23,7 @@ func (t *Topology) StartRefreshWritableVolumes(garbageThreshold string) { if t.IsLeader() { for range c { t.Vacuum(garbageThreshold) + t.CheckReplicate() } } }(garbageThreshold) diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go index 000d6ef4f..5a0186be9 100644 --- a/go/topology/topology_replicate.go +++ b/go/topology/topology_replicate.go @@ -1,38 +1,138 @@ package topology import ( + "container/list" + "fmt" + "time" + "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/storage" ) -func (t *Topology) CheckReplicate() int { - glog.V(0).Infoln("Start replicate checker on demand") +const ReplicateTaskTimeout = time.Hour + +type ReplicateTask struct { + Vid storage.VolumeId + Collection string + SrcDN *DataNode + DstDN *DataNode +} + +func planReplicateTasks(t *Topology) (tasks []*ReplicateTask) { for _, col := range t.collectionMap.Items { c := col.(*Collection) glog.V(0).Infoln("checking replicate on collection:", c.Name) + growOption := &VolumeGrowOption{ReplicaPlacement: c.rp} for _, vl := range c.storageType2VolumeLayout.Items { if vl != nil { volumeLayout := vl.(*VolumeLayout) - copyCount := volumeLayout.rp.GetCopyCount() for vid, locationList := range volumeLayout.vid2location { - if locationList.Length() < copyCount { - //set volume readonly - glog.V(0).Infoln("replicate volume :", vid) - SetVolumeReadonly(locationList, vid.String(), true) - + rp1 := locationList.CalcReplicaPlacement() + if rp1.Compare(volumeLayout.rp) >= 0 { + continue + } + if additionServers, e := FindEmptySlotsForOneVolume(t, growOption, locationList); e == nil { + for _, s := range additionServers { + s.UpAdjustPlannedVolumeCountDelta(1) + rt := &ReplicateTask{ + Vid: vid, + Collection: c.Name, + SrcDN: locationList.PickForRead(), + DstDN: s, + } + tasks = append(tasks, rt) + glog.V(0).Infof("add replicate task, vid: %v, src: %s, dst: %s", vid, rt.SrcDN.Url(), rt.DstDN.Url()) + } + } else { + glog.V(0).Infof("find empty slots error, vid: %v, rp: %s => %s, %v", vid, rp1.String(), volumeLayout.rp.String(), e) } } } } } - return 0 + return } -func (t *Topology) doReplicate(vl *VolumeLayout, vid storage.VolumeId) { - locationList := vl.vid2location[vid] - if !SetVolumeReadonly(locationList, vid.String(), true) { - return +func (topo *Topology) CheckReplicate() { + glog.V(1).Infoln("Start replicate checker on demand") + busyDataNodes := make(map[*DataNode]int) + taskCount := 0 + taskQueue := list.New() + for _, t := range planReplicateTasks(topo) { + taskQueue.PushBack(t) + taskCount++ } - defer SetVolumeReadonly(locationList, vid.String(), false) + taskChan := make(chan *ReplicateTask) + for taskCount > 0 { + TaskQueueLoop: + for e := taskQueue.Front(); e != nil; e = e.Next() { + task := e.Value.(*ReplicateTask) + //only one task will run on the data node + dns := task.WorkingDataNodes() + for _, dn := range dns { + if busyDataNodes[dn] > 0 { + continue TaskQueueLoop + } + } + for _, dn := range dns { + busyDataNodes[dn]++ + } + go func(t *ReplicateTask) { + if e := t.Run(topo); e != nil { + glog.V(0).Infof("ReplicateTask run error, vid: %v, dst: %s. %v", t.Vid, t.DstDN.Url(), e) + } else { + glog.V(2).Infof("ReplicateTask finished, vid: %v, dst: %s", t.Vid, t.DstDN.Url()) + + } + taskChan <- t + }(task) + taskQueue.Remove(e) + + } + finishedTask := <-taskChan + for _, dn := range finishedTask.WorkingDataNodes() { + if busyDataNodes[dn] > 0 { + busyDataNodes[dn]-- + } + } + taskCount-- + finishedTask.DstDN.UpAdjustPlannedVolumeCountDelta(-1) + } + glog.V(1).Infoln("finish replicate check.") +} + +func (t *ReplicateTask) Run(topo *Topology) error { + //is lookup thread safe? + locationList := topo.Lookup(t.Collection, t.Vid) + rp := topo.CollectionSettings.GetReplicaPlacement(t.Collection) + if locationList.CalcReplicaPlacement().Compare(rp) >= 0 { + glog.V(0).Infof("volume [%v] has right replica placement, rp: %s", t.Vid, rp.String()) + return nil + } + if !SetVolumeReadonly(locationList, t.Vid.String(), true) { + return fmt.Errorf("set volume readonly failed, vid=%v", t.Vid) + } + defer SetVolumeReadonly(locationList, t.Vid.String(), false) + tc, e := storage.NewTaskCli(t.DstDN.Url(), storage.TaskReplica, storage.TaskParams{ + "volume": t.Vid.String(), + "source": t.SrcDN.Url(), + "collection": t.Collection, + }) + if e != nil { + return e + } + if e = tc.WaitAndQueryResult(ReplicateTaskTimeout); e != nil { + tc.Clean() + return e + } + e = tc.Commit() + return e +} + +func (t *ReplicateTask) WorkingDataNodes() []*DataNode { + return []*DataNode{ + t.SrcDN, + t.DstDN, + } } diff --git a/go/topology/volume_layout.go b/go/topology/volume_layout.go index 8a922f945..2a38fda99 100644 --- a/go/topology/volume_layout.go +++ b/go/topology/volume_layout.go @@ -73,9 +73,9 @@ func (vl *VolumeLayout) isWritable(v *storage.VolumeInfo) bool { !v.ReadOnly } -func (vl *VolumeLayout) Lookup(vid storage.VolumeId) []*DataNode { +func (vl *VolumeLayout) Lookup(vid storage.VolumeId) *VolumeLocationList { if location := vl.vid2location[vid]; location != nil { - return location.list + return location } return nil } diff --git a/go/topology/volume_location_list.go b/go/topology/volume_location_list.go index 9c7f60881..929d0c8a3 100644 --- a/go/topology/volume_location_list.go +++ b/go/topology/volume_location_list.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/chrislusf/seaweedfs/go/storage" + "math/rand" ) type VolumeLocationList struct { @@ -23,6 +24,14 @@ func (dnll *VolumeLocationList) Head() *DataNode { return dnll.list[0] } +func (dnll *VolumeLocationList) PickForRead() *DataNode { + return dnll.list[rand.Intn(len(dnll.list))] +} + +func (dnll *VolumeLocationList) AllDataNode() []*DataNode { + return dnll.list +} + func (dnll *VolumeLocationList) Length() int { return len(dnll.list) } diff --git a/go/weed/weed_server/master_server_handlers.go b/go/weed/weed_server/master_server_handlers.go index 6a5b06c3c..a61dd765d 100644 --- a/go/weed/weed_server/master_server_handlers.go +++ b/go/weed/weed_server/master_server_handlers.go @@ -23,10 +23,10 @@ func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volume } volumeId, err := storage.NewVolumeId(vid) if err == nil { - machines := ms.Topo.Lookup(collection, volumeId) - if machines != nil { + locationList := ms.Topo.Lookup(collection, volumeId) + if locationList != nil { var ret operation.Locations - for _, dn := range machines { + for _, dn := range locationList.AllDataNode() { ret = append(ret, operation.Location{Url: dn.Url(), PublicUrl: dn.PublicUrl}) } volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Locations: ret} diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index 4b7f809ec..4c883ae2a 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "io/ioutil" - "math/rand" "net/http" "strconv" "strings" @@ -124,9 +123,9 @@ func (ms *MasterServer) redirectHandler(w http.ResponseWriter, r *http.Request) debug("parsing error:", err, r.URL.Path) return } - machines := ms.Topo.Lookup("", volumeId) - if machines != nil && len(machines) > 0 { - http.Redirect(w, r, util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl)+r.URL.Path, http.StatusMovedPermanently) + locations := ms.Topo.Lookup("", volumeId) + if locations != nil && locations.Length() > 0 { + http.Redirect(w, r, util.NormalizeUrl(locations.PickForRead().PublicUrl)+r.URL.Path, http.StatusMovedPermanently) } else { writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("volume id %d not found", volumeId)) } From b39fedd3cc3605f1efc88a45aa1ea52133a70556 Mon Sep 17 00:00:00 2001 From: tnextday Date: Wed, 6 Jan 2016 15:11:01 +0800 Subject: [PATCH 43/46] master: disable auto check replicate and add a manual api `/vol/check_replicate` --- go/topology/topology_event_handling.go | 2 +- go/topology/topology_replicate.go | 78 +++++++++++-------- go/weed/weed_server/master_server.go | 1 + .../master_server_handlers_admin.go | 5 ++ 4 files changed, 54 insertions(+), 32 deletions(-) diff --git a/go/topology/topology_event_handling.go b/go/topology/topology_event_handling.go index 8ad8fc67e..65792a08a 100644 --- a/go/topology/topology_event_handling.go +++ b/go/topology/topology_event_handling.go @@ -23,7 +23,7 @@ func (t *Topology) StartRefreshWritableVolumes(garbageThreshold string) { if t.IsLeader() { for range c { t.Vacuum(garbageThreshold) - t.CheckReplicate() + // t.CheckReplicate() } } }(garbageThreshold) diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go index 5a0186be9..13400e0d7 100644 --- a/go/topology/topology_replicate.go +++ b/go/topology/topology_replicate.go @@ -9,6 +9,10 @@ import ( "github.com/chrislusf/seaweedfs/go/storage" ) +var ( + isReplicateCheckerRunning = false +) + const ReplicateTaskTimeout = time.Hour type ReplicateTask struct { @@ -18,6 +22,41 @@ type ReplicateTask struct { DstDN *DataNode } +func (t *ReplicateTask) Run(topo *Topology) error { + //is lookup thread safe? + locationList := topo.Lookup(t.Collection, t.Vid) + rp := topo.CollectionSettings.GetReplicaPlacement(t.Collection) + if locationList.CalcReplicaPlacement().Compare(rp) >= 0 { + glog.V(0).Infof("volume [%v] has right replica placement, rp: %s", t.Vid, rp.String()) + return nil + } + if !SetVolumeReadonly(locationList, t.Vid.String(), true) { + return fmt.Errorf("set volume readonly failed, vid=%v", t.Vid) + } + defer SetVolumeReadonly(locationList, t.Vid.String(), false) + tc, e := storage.NewTaskCli(t.DstDN.Url(), storage.TaskReplica, storage.TaskParams{ + "volume": t.Vid.String(), + "source": t.SrcDN.Url(), + "collection": t.Collection, + }) + if e != nil { + return e + } + if e = tc.WaitAndQueryResult(ReplicateTaskTimeout); e != nil { + tc.Clean() + return e + } + e = tc.Commit() + return e +} + +func (t *ReplicateTask) WorkingDataNodes() []*DataNode { + return []*DataNode{ + t.SrcDN, + t.DstDN, + } +} + func planReplicateTasks(t *Topology) (tasks []*ReplicateTask) { for _, col := range t.collectionMap.Items { c := col.(*Collection) @@ -54,6 +93,10 @@ func planReplicateTasks(t *Topology) (tasks []*ReplicateTask) { } func (topo *Topology) CheckReplicate() { + isReplicateCheckerRunning = true + defer func() { + isReplicateCheckerRunning = false + }() glog.V(1).Infoln("Start replicate checker on demand") busyDataNodes := make(map[*DataNode]int) taskCount := 0 @@ -102,37 +145,10 @@ func (topo *Topology) CheckReplicate() { glog.V(1).Infoln("finish replicate check.") } -func (t *ReplicateTask) Run(topo *Topology) error { - //is lookup thread safe? - locationList := topo.Lookup(t.Collection, t.Vid) - rp := topo.CollectionSettings.GetReplicaPlacement(t.Collection) - if locationList.CalcReplicaPlacement().Compare(rp) >= 0 { - glog.V(0).Infof("volume [%v] has right replica placement, rp: %s", t.Vid, rp.String()) - return nil - } - if !SetVolumeReadonly(locationList, t.Vid.String(), true) { - return fmt.Errorf("set volume readonly failed, vid=%v", t.Vid) - } - defer SetVolumeReadonly(locationList, t.Vid.String(), false) - tc, e := storage.NewTaskCli(t.DstDN.Url(), storage.TaskReplica, storage.TaskParams{ - "volume": t.Vid.String(), - "source": t.SrcDN.Url(), - "collection": t.Collection, - }) - if e != nil { - return e - } - if e = tc.WaitAndQueryResult(ReplicateTaskTimeout); e != nil { - tc.Clean() - return e +func (topo *Topology) StartCheckReplicate() { + if isReplicateCheckerRunning { + return } - e = tc.Commit() - return e -} + go topo.CheckReplicate() -func (t *ReplicateTask) WorkingDataNodes() []*DataNode { - return []*DataNode{ - t.SrcDN, - t.DstDN, - } } diff --git a/go/weed/weed_server/master_server.go b/go/weed/weed_server/master_server.go index 1adb8820e..62ec5c9aa 100644 --- a/go/weed/weed_server/master_server.go +++ b/go/weed/weed_server/master_server.go @@ -73,6 +73,7 @@ func NewMasterServer(r *mux.Router, port int, metaFolder string, r.HandleFunc("/vol/grow", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeGrowHandler))) r.HandleFunc("/vol/status", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeStatusHandler))) r.HandleFunc("/vol/vacuum", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeVacuumHandler))) + r.HandleFunc("/vol/check_replicate", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeCheckReplicateHandler))) r.HandleFunc("/submit", ms.guard.WhiteList(ms.submitFromMasterServerHandler)) r.HandleFunc("/delete", ms.guard.WhiteList(ms.deleteFromMasterServerHandler)) r.HandleFunc("/{fileId}", ms.proxyToLeader(ms.redirectHandler)) diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index 4c883ae2a..129734692 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -84,6 +84,11 @@ func (ms *MasterServer) volumeVacuumHandler(w http.ResponseWriter, r *http.Reque ms.dirStatusHandler(w, r) } +func (ms *MasterServer) volumeCheckReplicateHandler(w http.ResponseWriter, r *http.Request) { + ms.Topo.StartCheckReplicate() + ms.dirStatusHandler(w, r) +} + func (ms *MasterServer) volumeGrowHandler(w http.ResponseWriter, r *http.Request) { count := 0 option, err := ms.getVolumeGrowOption(r) From a4cb8c659cb0317dfed767f176ad0ea2fbc40e5a Mon Sep 17 00:00:00 2001 From: tnextday Date: Wed, 6 Jan 2016 21:50:28 +0800 Subject: [PATCH 44/46] *: test and fix bug --- go/operation/system_message.pb.go | 77 ++++++++++++++++++- go/proto/system_message.proto | 2 +- go/storage/collection_settings.go | 41 +++++----- go/storage/store_task.go | 11 +-- go/storage/store_task_cli.go | 6 +- go/topology/collection.go | 2 +- go/topology/topology_replicate.go | 2 +- go/topology/volume_growth.go | 2 +- go/util/http_util.go | 36 +++++++-- .../volume_server_handlers_sync.go | 6 +- .../volume_server_handlers_task.go | 11 ++- 11 files changed, 152 insertions(+), 44 deletions(-) diff --git a/go/operation/system_message.pb.go b/go/operation/system_message.pb.go index 742a1ca4e..2574b2af6 100644 --- a/go/operation/system_message.pb.go +++ b/go/operation/system_message.pb.go @@ -11,6 +11,9 @@ It is generated from these files: It has these top-level messages: VolumeInformationMessage JoinMessage + CollectionSetting + GlobalSetting + JoinResponse */ package operation @@ -29,7 +32,7 @@ type VolumeInformationMessage struct { DeleteCount *uint64 `protobuf:"varint,5,req,name=delete_count" json:"delete_count,omitempty"` DeletedByteCount *uint64 `protobuf:"varint,6,req,name=deleted_byte_count" json:"deleted_byte_count,omitempty"` ReadOnly *bool `protobuf:"varint,7,opt,name=read_only" json:"read_only,omitempty"` - ReplicaPlacement *uint32 `protobuf:"varint,8,req,name=replica_placement" json:"replica_placement,omitempty"` + ReplicaPlacement *uint32 `protobuf:"varint,8,opt,name=replica_placement" json:"replica_placement,omitempty"` Version *uint32 `protobuf:"varint,9,opt,name=version,def=2" json:"version,omitempty"` Ttl *uint32 `protobuf:"varint,10,opt,name=ttl" json:"ttl,omitempty"` XXX_unrecognized []byte `json:"-"` @@ -199,5 +202,77 @@ func (m *JoinMessage) GetAdminPort() uint32 { return 0 } +type CollectionSetting struct { + Collection *string `protobuf:"bytes,1,opt,name=collection" json:"collection,omitempty"` + ReplicaPlacement *string `protobuf:"bytes,2,opt,name=replica_placement" json:"replica_placement,omitempty"` + VacuumGarbageThreshold *float32 `protobuf:"fixed32,3,opt,name=vacuum_garbage_threshold" json:"vacuum_garbage_threshold,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CollectionSetting) Reset() { *m = CollectionSetting{} } +func (m *CollectionSetting) String() string { return proto.CompactTextString(m) } +func (*CollectionSetting) ProtoMessage() {} + +func (m *CollectionSetting) GetCollection() string { + if m != nil && m.Collection != nil { + return *m.Collection + } + return "" +} + +func (m *CollectionSetting) GetReplicaPlacement() string { + if m != nil && m.ReplicaPlacement != nil { + return *m.ReplicaPlacement + } + return "" +} + +func (m *CollectionSetting) GetVacuumGarbageThreshold() float32 { + if m != nil && m.VacuumGarbageThreshold != nil { + return *m.VacuumGarbageThreshold + } + return 0 +} + +type GlobalSetting struct { + Settings []*CollectionSetting `protobuf:"bytes,1,rep,name=settings" json:"settings,omitempty"` + MasterPeers []string `protobuf:"bytes,2,rep,name=master_peers" json:"master_peers,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *GlobalSetting) Reset() { *m = GlobalSetting{} } +func (m *GlobalSetting) String() string { return proto.CompactTextString(m) } +func (*GlobalSetting) ProtoMessage() {} + +func (m *GlobalSetting) GetSettings() []*CollectionSetting { + if m != nil { + return m.Settings + } + return nil +} + +func (m *GlobalSetting) GetMasterPeers() []string { + if m != nil { + return m.MasterPeers + } + return nil +} + +type JoinResponse struct { + Settings *GlobalSetting `protobuf:"bytes,1,opt,name=settings" json:"settings,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *JoinResponse) Reset() { *m = JoinResponse{} } +func (m *JoinResponse) String() string { return proto.CompactTextString(m) } +func (*JoinResponse) ProtoMessage() {} + +func (m *JoinResponse) GetSettings() *GlobalSetting { + if m != nil { + return m.Settings + } + return nil +} + func init() { } diff --git a/go/proto/system_message.proto b/go/proto/system_message.proto index b7a2456f6..30dd2a22c 100644 --- a/go/proto/system_message.proto +++ b/go/proto/system_message.proto @@ -8,7 +8,7 @@ message VolumeInformationMessage { required uint64 delete_count = 5; required uint64 deleted_byte_count = 6; optional bool read_only = 7; - required uint32 replica_placement = 8; + optional uint32 replica_placement = 8; optional uint32 version = 9 [default=2]; optional uint32 ttl = 10; } diff --git a/go/storage/collection_settings.go b/go/storage/collection_settings.go index 89fc89d28..ec98b5d9b 100644 --- a/go/storage/collection_settings.go +++ b/go/storage/collection_settings.go @@ -3,8 +3,8 @@ package storage type SettingKey int const ( - KeyReplicatePlacement SettingKey = iota - KeyGarbageThreshold + keyReplicatePlacement SettingKey = iota + keyGarbageThreshold ) type CollectionSettings struct { @@ -19,50 +19,55 @@ func NewCollectionSettings(defaultReplicatePlacement, defaultGarbageThreshold st c := &CollectionSettings{ settings: make(map[string]map[SettingKey]interface{}), } - c.Set("", KeyReplicatePlacement, rp) - c.Set("", KeyGarbageThreshold, defaultGarbageThreshold) + c.set("", keyReplicatePlacement, rp) + c.set("", keyGarbageThreshold, defaultGarbageThreshold) return c } -func (c *CollectionSettings) Get(collection string, key SettingKey) interface{} { +func (c *CollectionSettings) get(collection string, key SettingKey) interface{} { if m, ok := c.settings[collection]; ok { if v, ok := m[key]; ok { return v } } if m, ok := c.settings[""]; ok { - if v, ok := m[key]; ok { - return v - } + return m[key] } return nil } -func (c *CollectionSettings) Set(collection string, key SettingKey, value interface{}) { - if _, ok := c.settings[collection]; !ok { - c.settings[collection] = make(map[SettingKey]interface{}) +func (c *CollectionSettings) set(collection string, key SettingKey, value interface{}) { + m := c.settings[collection] + if m == nil { + m = make(map[SettingKey]interface{}) + c.settings[collection] = m } if value == nil { - delete(c.settings[collection], key) + //mustn't delete default setting + if collection != "" { + delete(m, key) + } + } else { + m[key] = value } } -func (c *CollectionSettings) GetGarbageThreshold(collection string) float32 { - return c.Get(collection, KeyGarbageThreshold).(float32) +func (c *CollectionSettings) GetGarbageThreshold(collection string) string { + return c.get(collection, keyGarbageThreshold).(string) } -func (c *CollectionSettings) SetGarbageThreshold(collection string, gt float32) { - c.Set(collection, KeyGarbageThreshold, gt) +func (c *CollectionSettings) SetGarbageThreshold(collection string, gt string) { + c.set(collection, keyGarbageThreshold, gt) } func (c *CollectionSettings) GetReplicaPlacement(collection string) *ReplicaPlacement { - return c.Get(collection, KeyReplicatePlacement).(*ReplicaPlacement) + return c.get(collection, keyReplicatePlacement).(*ReplicaPlacement) } func (c *CollectionSettings) SetReplicaPlacement(collection, t string) error { rp, e := NewReplicaPlacementFromString(t) if e == nil { - c.Set(collection, KeyReplicatePlacement, rp) + c.set(collection, keyReplicatePlacement, rp) } return e } diff --git a/go/storage/store_task.go b/go/storage/store_task.go index 34ac8b07a..25e6f1fb4 100644 --- a/go/storage/store_task.go +++ b/go/storage/store_task.go @@ -9,9 +9,9 @@ import ( ) const ( - TaskVacuum = "VACUUM" - TaskReplica = "REPLICA" - TaskBalance = "BALANCE" + TaskVacuum = "vacuum" + TaskReplicate = "replicate" + TaskBalance = "balance" ) var ( @@ -79,16 +79,17 @@ func NewTaskManager() *TaskManager { func (tm *TaskManager) NewTask(s *Store, args url.Values) (tid string, e error) { tt := args.Get("task") - vid := args.Get("volumme") + vid := args.Get("volume") tid = tt + "-" + vid if _, ok := tm.TaskList[tid]; ok { return tid, ErrTaskExists } + var tw TaskWorker switch tt { case TaskVacuum: tw, e = NewVacuumTask(s, args) - case TaskReplica: + case TaskReplicate: tw, e = NewReplicaTask(s, args) case TaskBalance: } diff --git a/go/storage/store_task_cli.go b/go/storage/store_task_cli.go index 06a18235c..90d22ce83 100644 --- a/go/storage/store_task_cli.go +++ b/go/storage/store_task_cli.go @@ -44,7 +44,7 @@ func NewTaskCli(dataNode string, taskType string, params TaskParams) (*TaskCli, func (c *TaskCli) WaitAndQueryResult(timeout time.Duration) error { startTime := time.Now() args := url.Values{} - args.Set("task", c.TID) + args.Set("tid", c.TID) args.Set("timeout", time.Minute.String()) tryTimes := 0 for time.Since(startTime) < timeout { @@ -74,14 +74,14 @@ func (c *TaskCli) WaitAndQueryResult(timeout time.Duration) error { func (c *TaskCli) Commit() error { args := url.Values{} - args.Set("task", c.TID) + args.Set("tid", c.TID) _, e := util.RemoteApiCall(c.DataNode, "/admin/task/commit", args) return e } func (c *TaskCli) Clean() error { args := url.Values{} - args.Set("task", c.TID) + args.Set("tid", c.TID) _, e := util.RemoteApiCall(c.DataNode, "/admin/task/clean", args) return e } diff --git a/go/topology/collection.go b/go/topology/collection.go index f8217a7ff..e5c7b0f0f 100644 --- a/go/topology/collection.go +++ b/go/topology/collection.go @@ -15,7 +15,7 @@ type Collection struct { } func NewCollection(name string, rp *storage.ReplicaPlacement, volumeSizeLimit uint64) *Collection { - c := &Collection{Name: name, volumeSizeLimit: volumeSizeLimit} + c := &Collection{Name: name, volumeSizeLimit: volumeSizeLimit, rp: rp} c.storageType2VolumeLayout = util.NewConcurrentReadMap() return c } diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go index 13400e0d7..92d5f48c6 100644 --- a/go/topology/topology_replicate.go +++ b/go/topology/topology_replicate.go @@ -34,7 +34,7 @@ func (t *ReplicateTask) Run(topo *Topology) error { return fmt.Errorf("set volume readonly failed, vid=%v", t.Vid) } defer SetVolumeReadonly(locationList, t.Vid.String(), false) - tc, e := storage.NewTaskCli(t.DstDN.Url(), storage.TaskReplica, storage.TaskParams{ + tc, e := storage.NewTaskCli(t.DstDN.Url(), storage.TaskReplicate, storage.TaskParams{ "volume": t.Vid.String(), "source": t.SrcDN.Url(), "collection": t.Collection, diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index f7f4e8187..311f20a3d 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -200,7 +200,7 @@ func FindEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption, exists mainNode = mainNodes[0] existsNodes = append(existsNodes, mainNode) } - glog.V(2).Infoln(mainNode.Id(), "picked main node:", mainNode.Id()) + glog.V(3).Infoln(mainNode.Id(), "picked main node:", mainNode.Id()) restCount := totalNodeCount - len(existsNodes) diff --git a/go/util/http_util.go b/go/util/http_util.go index ceae6faa7..d53c26d18 100644 --- a/go/util/http_util.go +++ b/go/util/http_util.go @@ -13,7 +13,10 @@ import ( "os" + "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/security" + "github.com/pierrec/lz4" + "strconv" ) var ( @@ -55,6 +58,7 @@ func PostBytes(url string, body []byte) ([]byte, error) { func PostEx(host, path string, values url.Values) (content []byte, statusCode int, e error) { url := MkUrl(host, path, nil) + glog.V(4).Infoln("Post", url+"?"+values.Encode()) r, err := client.PostForm(url, values) if err != nil { return nil, 0, err @@ -94,13 +98,13 @@ func RemoteApiCall(host, path string, values url.Values) (result map[string]inte return nil, e } result = make(map[string]interface{}) - if e := json.Unmarshal(jsonBlob, result); e != nil { + if e := json.Unmarshal(jsonBlob, &result); e != nil { return nil, e } if err, ok := result["error"]; ok && err.(string) != "" { return nil, &RApiError{E: err.(string)} } - if code != http.StatusOK || code != http.StatusAccepted { + if code != http.StatusOK && code != http.StatusAccepted { return nil, fmt.Errorf("RemoteApiCall %s/%s return %d", host, path, code) } return result, nil @@ -145,7 +149,7 @@ func Delete(url string, jwt security.EncodedJwt) error { return nil } m := make(map[string]interface{}) - if e := json.Unmarshal(body, m); e == nil { + if e := json.Unmarshal(body, &m); e == nil { if s, ok := m["error"].(string); ok { return errors.New(s) } @@ -211,16 +215,36 @@ func DownloadUrl(fileUrl string) (filename string, rc io.ReadCloser, e error) { } func DownloadToFile(fileUrl, savePath string) (e error) { - _, rc, err := DownloadUrl(fileUrl) + response, err := client.Get(fileUrl) if err != nil { return err } - defer rc.Close() + defer response.Body.Close() + if response.StatusCode != http.StatusOK { + return fmt.Errorf("%s: %s", fileUrl, response.Status) + } + var r io.Reader + content_encoding := strings.ToLower(response.Header.Get("Content-Encoding")) + size := response.ContentLength + if n, e := strconv.ParseInt(response.Header.Get("X-Content-Length"), 10, 64); e == nil { + size = n + } + switch content_encoding { + case "lz4": + r = lz4.NewReader(response.Body) + default: + r = response.Body + } var f *os.File if f, e = os.OpenFile(savePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm); e != nil { return } - _, e = io.Copy(f, rc) + if size >= 0 { + _, e = io.CopyN(f, r, size) + } else { + _, e = io.Copy(f, r) + } + f.Close() return } diff --git a/go/weed/weed_server/volume_server_handlers_sync.go b/go/weed/weed_server/volume_server_handlers_sync.go index fef434d28..6f0d5aa8b 100644 --- a/go/weed/weed_server/volume_server_handlers_sync.go +++ b/go/weed/weed_server/volume_server_handlers_sync.go @@ -91,7 +91,7 @@ func (vs *VolumeServer) getVolume(volumeParameterName string, r *http.Request) ( func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http.Request) { v, e := vs.getVolume("volume", r) if v == nil { - http.Error(w, fmt.Sprintf("Not Found volume: %v", e), http.StatusBadRequest) + http.Error(w, e.Error(), http.StatusBadRequest) return } cr, e := v.GetVolumeCleanReader() @@ -109,7 +109,7 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http rangeReq := r.Header.Get("Range") if rangeReq == "" { - w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) + w.Header().Set("X-Content-Length", strconv.FormatInt(totalSize, 10)) w.Header().Set("Content-Encoding", "lz4") lz4w := lz4.NewWriter(w) if _, e = io.Copy(lz4w, cr); e != nil { @@ -132,7 +132,7 @@ func (vs *VolumeServer) getVolumeCleanDataHandler(w http.ResponseWriter, r *http http.Error(w, e.Error(), http.StatusInternalServerError) return } - w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) + w.Header().Set("X-Content-Length", strconv.FormatInt(ra.length, 10)) w.Header().Set("Content-Range", ra.contentRange(totalSize)) w.Header().Set("Content-Encoding", "lz4") w.WriteHeader(http.StatusPartialContent) diff --git a/go/weed/weed_server/volume_server_handlers_task.go b/go/weed/weed_server/volume_server_handlers_task.go index cd0319660..9677e7d95 100644 --- a/go/weed/weed_server/volume_server_handlers_task.go +++ b/go/weed/weed_server/volume_server_handlers_task.go @@ -12,6 +12,7 @@ import ( ) func (vs *VolumeServer) newTaskHandler(w http.ResponseWriter, r *http.Request) { + r.ParseForm() tid, e := vs.store.TaskManager.NewTask(vs.store, r.Form) if e == nil { writeJsonQuiet(w, r, http.StatusOK, map[string]string{"tid": tid}) @@ -22,8 +23,8 @@ func (vs *VolumeServer) newTaskHandler(w http.ResponseWriter, r *http.Request) { } func (vs *VolumeServer) queryTaskHandler(w http.ResponseWriter, r *http.Request) { - tid := r.Form.Get("tid") - timeoutStr := strings.TrimSpace(r.Form.Get("timeout")) + tid := r.FormValue("tid") + timeoutStr := strings.TrimSpace(r.FormValue("timeout")) d := time.Minute if td, e := time.ParseDuration(timeoutStr); e == nil { d = td @@ -33,11 +34,13 @@ func (vs *VolumeServer) queryTaskHandler(w http.ResponseWriter, r *http.Request) writeJsonError(w, r, http.StatusRequestTimeout, err) } else if err == nil { writeJsonError(w, r, http.StatusOK, err) + } else { + writeJsonError(w, r, http.StatusInternalServerError, err) } glog.V(2).Infoln("query task =", tid, ", error =", err) } func (vs *VolumeServer) commitTaskHandler(w http.ResponseWriter, r *http.Request) { - tid := r.Form.Get("tid") + tid := r.FormValue("tid") err := vs.store.TaskManager.Commit(tid) if err == storage.ErrTaskNotFinish { writeJsonError(w, r, http.StatusRequestTimeout, err) @@ -47,7 +50,7 @@ func (vs *VolumeServer) commitTaskHandler(w http.ResponseWriter, r *http.Request glog.V(2).Infoln("query task =", tid, ", error =", err) } func (vs *VolumeServer) cleanTaskHandler(w http.ResponseWriter, r *http.Request) { - tid := r.Form.Get("tid") + tid := r.FormValue("tid") err := vs.store.TaskManager.Clean(tid) if err == storage.ErrTaskNotFinish { writeJsonError(w, r, http.StatusRequestTimeout, err) From 6a4aa3ec368939e68d074b05af2c616d181f716c Mon Sep 17 00:00:00 2001 From: tnextday Date: Thu, 7 Jan 2016 22:01:38 +0800 Subject: [PATCH 45/46] volume growth: remove main node filter with exists node --- go/topology/topology_replicate.go | 3 +-- go/topology/volume_growth.go | 12 +++++------- go/util/http_util.go | 2 +- go/weed/weed_server/master_server_handlers_admin.go | 2 +- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/go/topology/topology_replicate.go b/go/topology/topology_replicate.go index 92d5f48c6..9ed262dd2 100644 --- a/go/topology/topology_replicate.go +++ b/go/topology/topology_replicate.go @@ -142,7 +142,7 @@ func (topo *Topology) CheckReplicate() { taskCount-- finishedTask.DstDN.UpAdjustPlannedVolumeCountDelta(-1) } - glog.V(1).Infoln("finish replicate check.") + glog.V(0).Infoln("finish replicate check.") } func (topo *Topology) StartCheckReplicate() { @@ -150,5 +150,4 @@ func (topo *Topology) StartCheckReplicate() { return } go topo.CheckReplicate() - } diff --git a/go/topology/volume_growth.go b/go/topology/volume_growth.go index 311f20a3d..f010894b9 100644 --- a/go/topology/volume_growth.go +++ b/go/topology/volume_growth.go @@ -185,15 +185,13 @@ func FindEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption, exists pickNodesFn := PickLowUsageNodeFn rp := option.ReplicaPlacement - pickMainAndRestNodes := func(np NodePicker, totalNodeCount int, filterNodeFn FilterNodeFn, existsNodes []Node) (mainNode Node, restNodes []Node, e error) { - for _, n := range existsNodes { - if filterNodeFn(n) == nil { - mainNode = n - break - } + pickMainAndRestNodes := func(np NodePicker, totalNodeCount int, filterFirstNodeFn FilterNodeFn, existsNodes []Node) (mainNode Node, restNodes []Node, e error) { + if len(existsNodes) > 0 { + mainNode = existsNodes[0] } + if mainNode == nil { - mainNodes, err := np.PickNodes(1, filterNodeFn, pickNodesFn) + mainNodes, err := np.PickNodes(1, filterFirstNodeFn, pickNodesFn) if err != nil { return nil, nil, err } diff --git a/go/util/http_util.go b/go/util/http_util.go index d53c26d18..7a395aca6 100644 --- a/go/util/http_util.go +++ b/go/util/http_util.go @@ -236,7 +236,7 @@ func DownloadToFile(fileUrl, savePath string) (e error) { r = response.Body } var f *os.File - if f, e = os.OpenFile(savePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm); e != nil { + if f, e = os.OpenFile(savePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); e != nil { return } if size >= 0 { diff --git a/go/weed/weed_server/master_server_handlers_admin.go b/go/weed/weed_server/master_server_handlers_admin.go index 129734692..ae3ad5488 100644 --- a/go/weed/weed_server/master_server_handlers_admin.go +++ b/go/weed/weed_server/master_server_handlers_admin.go @@ -86,7 +86,7 @@ func (ms *MasterServer) volumeVacuumHandler(w http.ResponseWriter, r *http.Reque func (ms *MasterServer) volumeCheckReplicateHandler(w http.ResponseWriter, r *http.Request) { ms.Topo.StartCheckReplicate() - ms.dirStatusHandler(w, r) + writeJsonQuiet(w, r, http.StatusOK, map[string]interface{}{"status": "running"}) } func (ms *MasterServer) volumeGrowHandler(w http.ResponseWriter, r *http.Request) { From 0e44e8db125141a124954821611483422f2c292a Mon Sep 17 00:00:00 2001 From: tnextday Date: Sun, 10 Jan 2016 23:08:01 +0800 Subject: [PATCH 46/46] update --- go/topology/store_replicate.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/go/topology/store_replicate.go b/go/topology/store_replicate.go index b105aac3d..772bf8516 100644 --- a/go/topology/store_replicate.go +++ b/go/topology/store_replicate.go @@ -83,12 +83,14 @@ func distributedOperation(masterNode string, store *storage.Store, volumeId stor for i := 0; i < length; i++ { ret = ret && <-results } - // if volume := store.GetVolume(volumeId); volume != nil { - // if length+1 < volume.ReplicaPlacement.GetCopyCount() { - // glog.V(0).Infof("replicating opetations [%d] is less than volume's replication copy count [%d]", length+1, volume.ReplicaPlacement.GetCopyCount()) - // ret = false - // } - // } + // we shouldn't check ReplicaPlacement because the needle have been written in head volume + + // if volume := store.GetVolume(volumeId); volume != nil { + // if length+1 < volume.ReplicaPlacement.GetCopyCount() { + // glog.V(0).Infof("replicating opetations [%d] is less than volume's replication copy count [%d]", length+1, volume.ReplicaPlacement.GetCopyCount()) + // ret = false + // } + // } return ret } else { glog.V(0).Infoln("Failed to lookup for", volumeId, lookupErr.Error())