From 86c8f248bde7efd37740ede31d4041f8d13897ba Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Wed, 7 Nov 2012 01:51:43 -0800 Subject: [PATCH] support compacting a volume --- weed-fs/.project | 29 +- weed-fs/note/memory_usage.txt | 279 +++++++++++++++++++ weed-fs/src/cmd/weed/fix.go | 10 +- weed-fs/src/cmd/weed/master.go | 21 ++ weed-fs/src/cmd/weed/version.go | 2 +- weed-fs/src/cmd/weed/volume.go | 28 +- weed-fs/src/pkg/storage/needle.go | 3 +- weed-fs/src/pkg/storage/needle_map.go | 2 + weed-fs/src/pkg/storage/replication_type.go | 123 ++++++++ weed-fs/src/pkg/storage/store.go | 17 +- weed-fs/src/pkg/storage/volume.go | 91 +++++- weed-fs/src/pkg/storage/volume_info.go | 118 -------- weed-fs/src/pkg/topology/topology.go | 46 --- weed-fs/src/pkg/topology/topology_compact.go | 87 ++++++ weed-fs/src/pkg/topology/topology_map.go | 51 ++++ weed-fs/src/pkg/topology/volume_location.go | 1 + 16 files changed, 709 insertions(+), 199 deletions(-) create mode 100644 weed-fs/note/memory_usage.txt create mode 100644 weed-fs/src/pkg/storage/replication_type.go create mode 100644 weed-fs/src/pkg/topology/topology_compact.go create mode 100644 weed-fs/src/pkg/topology/topology_map.go diff --git a/weed-fs/.project b/weed-fs/.project index 12b0f107c..f5551728c 100644 --- a/weed-fs/.project +++ b/weed-fs/.project @@ -1,17 +1,18 @@ - weed-fs - - - - - - com.googlecode.goclipse.goBuilder - - - - - - goclipse.goNature - + weed-fs + + + + + + com.googlecode.goclipse.goBuilder + + + + + + goclipse.goNature + org.eclipse.wst.common.project.facet.core.nature + diff --git a/weed-fs/note/memory_usage.txt b/weed-fs/note/memory_usage.txt new file mode 100644 index 000000000..821140ea2 --- /dev/null +++ b/weed-fs/note/memory_usage.txt @@ -0,0 +1,279 @@ +64 32G volumes consumes 10G memory +Each volume has 25M index, so each cost 160MB memory + + + +Things happened when I use lots of threads ( almost 120 ) keeping read file from Weed-FS. +But I'm not so familiar with linux so I can't tell you exactly what happened. +Next I'll show you things I know , if you need more info , contact me + +My weed-fs version is about 0.12 + +1. top + +top - 12:07:37 up 1 day, 3:17, 2 users, load average: 0.00, 0.00, 0.00 +Tasks: 152 total, 1 running, 151 sleeping, 0 stopped, 0 zombie +Cpu(s): 0.0%us, 0.0%sy, 0.0%ni, 99.8%id, 0.1%wa, 0.0%hi, 0.0%si, 0.0%st +Mem: 16269880k total, 16192364k used, 77516k free, 58172k buffers +Swap: 2064376k total, 12324k used, 2052052k free, 2827520k cached + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND + 1499 root 20 0 11.6g 10g 1424 S 0.0 65.6 7:32.53 weedvolume + 1498 root 20 0 3204m 2.1g 1428 S 0.0 13.5 4:36.59 weedvolume + 1737 root 20 0 98868 4932 2920 S 0.0 0.0 0:00.56 sshd + 1497 root 20 0 151m 4404 1152 S 0.0 0.0 1:21.40 weedmaster + 1335 root 20 0 97816 3044 2896 S 0.0 0.0 0:00.76 sshd + +After system became steady , weedvolume used 65.6% memory . + +2. free -m + + total used free shared buffers cached +Mem: 15888 15809 79 0 56 2758 +-/+ buffers/cache: 12994 2894 +Swap: 2015 12 2003 + +3. startup cmd + +screen -d -m /opt/weed/weedmaster -mdir /data/weeddata/ > /data/logs/weed/master.log & +screen -d -m /opt/weed/weedvolume -volumes=0-64 -dir /data/weeddata/ -pulseSeconds=20 -publicUrl="x.y.z:9334" -port 9334 > /data/logs/weed/s01.log & +screen -d -m /opt/weed/weedvolume -volumes=65-107 -dir /data/weeddata/ -pulseSeconds=20 -publicUrl="x.y.z:9335" -port 9335 > /data/logs/weed/s02.log & + +4. du -sh . + +32G 0.dat +26M 0.idx +8.2G 100.dat +6.8M 100.idx +8.2G 101.dat +6.9M 101.idx +8.2G 102.dat +6.8M 102.idx +8.2G 103.dat +6.8M 103.idx +8.2G 104.dat +6.8M 104.idx +8.2G 105.dat +6.9M 105.idx +8.2G 106.dat +6.9M 106.idx +8.2G 107.dat +6.9M 107.idx +32G 10.dat +25M 10.idx +32G 11.dat +25M 11.idx +32G 12.dat +25M 12.idx +32G 13.dat +25M 13.idx +32G 14.dat +25M 14.idx +32G 15.dat +25M 15.idx +32G 16.dat +25M 16.idx +32G 17.dat +25M 17.idx +32G 18.dat +25M 18.idx +32G 19.dat +25M 19.idx +32G 1.dat +26M 1.idx +32G 20.dat +25M 20.idx +32G 21.dat +25M 21.idx +32G 22.dat +25M 22.idx +32G 23.dat +25M 23.idx +32G 24.dat +25M 24.idx +32G 25.dat +25M 25.idx +32G 26.dat +25M 26.idx +32G 27.dat +25M 27.idx +32G 28.dat +25M 28.idx +32G 29.dat +25M 29.idx +32G 2.dat +26M 2.idx +32G 30.dat +25M 30.idx +32G 31.dat +25M 31.idx +32G 32.dat +25M 32.idx +32G 33.dat +25M 33.idx +32G 34.dat +25M 34.idx +32G 35.dat +25M 35.idx +32G 36.dat +25M 36.idx +32G 37.dat +25M 37.idx +32G 38.dat +25M 38.idx +32G 39.dat +25M 39.idx +32G 3.dat +26M 3.idx +32G 40.dat +25M 40.idx +32G 41.dat +25M 41.idx +32G 42.dat +25M 42.idx +32G 43.dat +25M 43.idx +32G 44.dat +25M 44.idx +32G 45.dat +25M 45.idx +32G 46.dat +25M 46.idx +32G 47.dat +25M 47.idx +32G 48.dat +25M 48.idx +32G 49.dat +25M 49.idx +32G 4.dat +26M 4.idx +32G 50.dat +25M 50.idx +32G 51.dat +25M 51.idx +32G 52.dat +25M 52.idx +32G 53.dat +25M 53.idx +32G 54.dat +25M 54.idx +32G 55.dat +25M 55.idx +32G 56.dat +25M 56.idx +32G 57.dat +25M 57.idx +32G 58.dat +25M 58.idx +32G 59.dat +25M 59.idx +32G 5.dat +26M 5.idx +32G 60.dat +25M 60.idx +32G 61.dat +25M 61.idx +32G 62.dat +25M 62.idx +32G 63.dat +25M 63.idx +32G 64.dat +25M 64.idx +8.2G 65.dat +6.9M 65.idx +8.2G 66.dat +6.9M 66.idx +8.2G 67.dat +6.9M 67.idx +8.2G 68.dat +6.8M 68.idx +8.2G 69.dat +6.9M 69.idx +32G 6.dat +25M 6.idx +8.2G 70.dat +6.8M 70.idx +8.2G 71.dat +6.9M 71.idx +8.2G 72.dat +6.9M 72.idx +8.2G 73.dat +6.9M 73.idx +8.2G 74.dat +6.9M 74.idx +8.2G 75.dat +6.9M 75.idx +8.1G 76.dat +6.8M 76.idx +8.2G 77.dat +6.8M 77.idx +8.2G 78.dat +6.8M 78.idx +8.1G 79.dat +6.8M 79.idx +32G 7.dat +25M 7.idx +8.2G 80.dat +6.8M 80.idx +8.2G 81.dat +6.9M 81.idx +8.2G 82.dat +6.9M 82.idx +8.2G 83.dat +6.9M 83.idx +8.2G 84.dat +6.9M 84.idx +8.2G 85.dat +6.8M 85.idx +8.2G 86.dat +6.9M 86.idx +8.2G 87.dat +6.9M 87.idx +8.2G 88.dat +6.9M 88.idx +8.2G 89.dat +6.8M 89.idx +32G 8.dat +25M 8.idx +8.2G 90.dat +6.9M 90.idx +8.1G 91.dat +6.8M 91.idx +8.1G 92.dat +6.8M 92.idx +8.1G 93.dat +6.8M 93.idx +8.2G 94.dat +6.9M 94.idx +8.2G 95.dat +6.9M 95.idx +8.2G 96.dat +6.9M 96.idx +8.2G 97.dat +6.9M 97.idx +8.2G 98.dat +6.9M 98.idx +8.2G 99.dat +6.9M 99.idx +32G 9.dat +25M 9.idx +4.0K directory.seq + +You can see the volume 1-64 is now full. + +5. more log + +see logs.zip + +In messages you can see these lines: (Line 51095) + +Sep 26 06:14:31 wedb-01 kernel: auditd: page allocation failure. order:0, mode:0x20 +Sep 26 06:14:31 wedb-01 kernel: Pid: 1009, comm: auditd Not tainted 2.6.32-220.el6.x86_64 #1 +Sep 26 06:14:31 wedb-01 kernel: Call Trace: + +After those lines , the system deny any new network connect request + +6. /dir/status + +{"Machines":[{"Server":{"Url":"127.0.0.1:9335","PublicUrl":"x.y.z:9335"},"Volumes":[{"Id":106,"Size":8728909632},{"Id":66,"Size":8729852744},{"Id":90,"Size":8747834896},{"Id":103,"Size":8718106024},{"Id":87,"Size":8732133512},{"Id":96,"Size":8737251904},{"Id":80,"Size":8704130712},{"Id":77,"Size":8717989496},{"Id":70,"Size":8731474744},{"Id":94,"Size":8758656144},{"Id":107,"Size":8729599232},{"Id":67,"Size":8736848088},{"Id":91,"Size":8665847760},{"Id":100,"Size":8703272552},{"Id":84,"Size":8745121528},{"Id":97,"Size":8713031744},{"Id":81,"Size":8726088872},{"Id":74,"Size":8738588152},{"Id":71,"Size":8729349920},{"Id":95,"Size":8741526896},{"Id":104,"Size":8699374736},{"Id":88,"Size":8740362880},{"Id":101,"Size":8711832992},{"Id":85,"Size":8723479552},{"Id":78,"Size":8700345400},{"Id":75,"Size":8727796912},{"Id":68,"Size":8698607440},{"Id":92,"Size":8682683056},{"Id":105,"Size":8741226152},{"Id":65,"Size":8725365752},{"Id":89,"Size":8703062600},{"Id":98,"Size":8742331560},{"Id":82,"Size":8762554952},{"Id":79,"Size":8696300376},{"Id":72,"Size":8708217304},{"Id":69,"Size":8740268144},{"Id":93,"Size":8685060320},{"Id":102,"Size":8708695352},{"Id":86,"Size":8783247776},{"Id":99,"Size":8753463608},{"Id":83,"Size":8725963952},{"Id":76,"Size":8694693536},{"Id":73,"Size":8733560832}]},{"Server":{"Url":"127.0.0.1:9334","PublicUrl":"x.y.z:9334"},"Volumes":[{"Id":34,"Size":33415706800},{"Id":58,"Size":33569224784},{"Id":18,"Size":33474649968},{"Id":55,"Size":33542422680},{"Id":15,"Size":33517247576},{"Id":48,"Size":33574860328},{"Id":8,"Size":33511257144},{"Id":45,"Size":33463948408},{"Id":5,"Size":34317702920},{"Id":29,"Size":33465695776},{"Id":38,"Size":33553119624},{"Id":62,"Size":33448316736},{"Id":22,"Size":33566586296},{"Id":35,"Size":33493733728},{"Id":59,"Size":33498554904},{"Id":19,"Size":33493313784},{"Id":52,"Size":33552978448},{"Id":12,"Size":33505183752},{"Id":49,"Size":33603029896},{"Id":9,"Size":33515778064},{"Id":42,"Size":33500402248},{"Id":2,"Size":34223232992},{"Id":26,"Size":33526519600},{"Id":39,"Size":33580414336},{"Id":63,"Size":33476332456},{"Id":23,"Size":33543872592},{"Id":32,"Size":33515290168},{"Id":56,"Size":33499171184},{"Id":16,"Size":33556591168},{"Id":64,"Size":33495148616},{"Id":53,"Size":33467738560},{"Id":13,"Size":33596873960},{"Id":46,"Size":33508120448},{"Id":6,"Size":33417470256},{"Id":30,"Size":33532933992},{"Id":43,"Size":33591802008},{"Id":3,"Size":34270682080},{"Id":27,"Size":33525736944},{"Id":36,"Size":33443597824},{"Id":60,"Size":33427931336},{"Id":20,"Size":33499083096},{"Id":33,"Size":33531396280},{"Id":57,"Size":33578015104},{"Id":17,"Size":33510525480},{"Id":50,"Size":33503123704},{"Id":10,"Size":33502391608},{"Id":47,"Size":33521868568},{"Id":7,"Size":33497101664},{"Id":31,"Size":33426905232},{"Id":40,"Size":33472978696},{"Id":0,"Size":34337344304},{"Id":24,"Size":33550157192},{"Id":37,"Size":33477162720},{"Id":61,"Size":33537175080},{"Id":21,"Size":33517192456},{"Id":54,"Size":33480720288},{"Id":14,"Size":33513192896},{"Id":51,"Size":33531336080},{"Id":11,"Size":33562385088},{"Id":44,"Size":33554479104},{"Id":4,"Size":34333127520},{"Id":28,"Size":33510503000},{"Id":41,"Size":33574922928},{"Id":1,"Size":34307181368},{"Id":25,"Size":33542834568}]}],"Writers":[106,66,90,103,87,96,80,77,70,94,107,67,91,100,84,97,81,74,71,95,104,88,101,85,78,75,68,92,105,65,89,98,82,79,72,69,93,102,86,99,83,76,73,34,58,18,55,15,48,8,45,5,29,38,62,22,35,59,19,52,12,49,9,42,2,26,39,63,23,32,56,16,64,53,13,46,6,30,43,3,27,36,60,20,33,57,17,50,10,47,7,31,40,0,24,37,61,21,54,14,51,11,44,4,28,41,1,25],"FileIdSequence":110250000} + \ No newline at end of file diff --git a/weed-fs/src/cmd/weed/fix.go b/weed-fs/src/cmd/weed/fix.go index acf2d6643..e75bfa2f6 100644 --- a/weed-fs/src/cmd/weed/fix.go +++ b/weed-fs/src/cmd/weed/fix.go @@ -47,17 +47,19 @@ func runFix(cmd *Command, args []string) bool { //skip the volume super block dataFile.Seek(storage.SuperBlockSize, 0) - n, length := storage.ReadNeedle(dataFile) + n, rest := storage.ReadNeedle(dataFile) + dataFile.Seek(int64(rest), 1) nm := storage.NewNeedleMap(indexFile) offset := uint32(storage.SuperBlockSize) for n != nil { - debug("key", n.Id, "volume offset", offset, "data_size", n.Size, "length", length) + debug("key", n.Id, "volume offset", offset, "data_size", n.Size, "rest", rest) if n.Size > 0 { count, pe := nm.Put(n.Id, offset/8, n.Size) debug("saved", count, "with error", pe) } - offset += length - n, length = storage.ReadNeedle(dataFile) + offset += rest+16 + n, rest = storage.ReadNeedle(dataFile) + dataFile.Seek(int64(rest), 1) } return true } diff --git a/weed-fs/src/cmd/weed/master.go b/weed-fs/src/cmd/weed/master.go index 759e5fdc9..b2b114a35 100644 --- a/weed-fs/src/cmd/weed/master.go +++ b/weed-fs/src/cmd/weed/master.go @@ -122,6 +122,27 @@ func dirStatusHandler(w http.ResponseWriter, r *http.Request) { writeJson(w, r, m) } +func volumeVacuumHandler(w http.ResponseWriter, r *http.Request) { + count := 0 + rt, err := storage.NewReplicationTypeFromString(r.FormValue("replication")) + if err == nil { + if count, err = strconv.Atoi(r.FormValue("count")); err == nil { + if topo.FreeSpace() < count*rt.GetCopyCount() { + err = errors.New("Only " + strconv.Itoa(topo.FreeSpace()) + " volumes left! Not enough for " + strconv.Itoa(count*rt.GetCopyCount())) + } else { + count, err = vg.GrowByCountAndType(count, rt, topo) + } + } + } + if err != nil { + w.WriteHeader(http.StatusNotAcceptable) + writeJson(w, r, map[string]string{"error": err.Error()}) + } else { + w.WriteHeader(http.StatusNotAcceptable) + writeJson(w, r, map[string]interface{}{"count": count}) + } +} + func volumeGrowHandler(w http.ResponseWriter, r *http.Request) { count := 0 rt, err := storage.NewReplicationTypeFromString(r.FormValue("replication")) diff --git a/weed-fs/src/cmd/weed/version.go b/weed-fs/src/cmd/weed/version.go index aa2396b11..f63b5bd70 100644 --- a/weed-fs/src/cmd/weed/version.go +++ b/weed-fs/src/cmd/weed/version.go @@ -6,7 +6,7 @@ import ( ) const ( - VERSION = "0.23" + VERSION = "0.24" ) var cmdVersion = &Command{ diff --git a/weed-fs/src/cmd/weed/volume.go b/weed-fs/src/cmd/weed/volume.go index 259dbfded..9e6f3e9ac 100644 --- a/weed-fs/src/cmd/weed/volume.go +++ b/weed-fs/src/cmd/weed/volume.go @@ -55,7 +55,25 @@ func assignVolumeHandler(w http.ResponseWriter, r *http.Request) { } else { writeJson(w, r, map[string]string{"error": err.Error()}) } - debug("volume =", r.FormValue("volume"), ", replicationType =", r.FormValue("replicationType"), ", error =", err) + debug("assign volume =", r.FormValue("volume"), ", replicationType =", r.FormValue("replicationType"), ", error =", err) +} +func vacuumVolumeCompactHandler(w http.ResponseWriter, r *http.Request) { + err := store.CompactVolume(r.FormValue("volume")) + if err == nil { + writeJson(w, r, map[string]string{"error": ""}) + } else { + writeJson(w, r, map[string]string{"error": err.Error()}) + } + debug("compacted volume =", r.FormValue("volume"), ", error =", err) +} +func vacuumVolumeCommitHandler(w http.ResponseWriter, r *http.Request) { + count, err := store.CommitCompactVolume(r.FormValue("volume")) + if err == nil { + writeJson(w, r, map[string]interface{}{"error": "", "size":count}) + } else { + writeJson(w, r, map[string]string{"error": err.Error()}) + } + debug("commit compact volume =", r.FormValue("volume"), ", error =", err) } func storeHandler(w http.ResponseWriter, r *http.Request) { switch r.Method { @@ -250,9 +268,9 @@ func distributedOperation(volumeId storage.VolumeId, op func(location operation. } func runVolume(cmd *Command, args []string) bool { - if *vMaxCpu < 1 { - *vMaxCpu = runtime.NumCPU() - } + if *vMaxCpu < 1 { + *vMaxCpu = runtime.NumCPU() + } runtime.GOMAXPROCS(*vMaxCpu) fileInfo, err := os.Stat(*volumeFolder) if err != nil { @@ -273,6 +291,8 @@ func runVolume(cmd *Command, args []string) bool { http.HandleFunc("/", storeHandler) http.HandleFunc("/status", statusHandler) http.HandleFunc("/admin/assign_volume", assignVolumeHandler) + http.HandleFunc("/admin/vacuum_volume_compact", vacuumVolumeCompactHandler) + http.HandleFunc("/admin/vacuum_volume_commit", vacuumVolumeCommitHandler) go func() { for { diff --git a/weed-fs/src/pkg/storage/needle.go b/weed-fs/src/pkg/storage/needle.go index e352eb55d..99765589d 100644 --- a/weed-fs/src/pkg/storage/needle.go +++ b/weed-fs/src/pkg/storage/needle.go @@ -116,8 +116,7 @@ func ReadNeedle(r *os.File) (*Needle, uint32) { n.Id = util.BytesToUint64(bytes[4:12]) n.Size = util.BytesToUint32(bytes[12:16]) rest := 8 - ((n.Size + 16 + 4) % 8) - r.Seek(int64(n.Size+4+rest), 1) - return n, 16 + n.Size + 4 + rest + return n, n.Size + 4 + rest } func ParseKeyHash(key_hash_string string) (uint64, uint32) { key_hash_bytes, khe := hex.DecodeString(key_hash_string) diff --git a/weed-fs/src/pkg/storage/needle_map.go b/weed-fs/src/pkg/storage/needle_map.go index 44b74e8c1..09a25a83e 100644 --- a/weed-fs/src/pkg/storage/needle_map.go +++ b/weed-fs/src/pkg/storage/needle_map.go @@ -44,9 +44,11 @@ func LoadNeedleMap(file *os.File) *NeedleMap { size := util.BytesToUint32(bytes[i+12 : i+16]) if offset > 0 { nm.m.Set(Key(key), offset, size) + log.Println("reading key", key, "offset", offset, "size", size) nm.fileCounter++ } else { nm.m.Delete(Key(key)) + log.Println("removing key", key) nm.deletionCounter++ } } diff --git a/weed-fs/src/pkg/storage/replication_type.go b/weed-fs/src/pkg/storage/replication_type.go new file mode 100644 index 000000000..86a9d219d --- /dev/null +++ b/weed-fs/src/pkg/storage/replication_type.go @@ -0,0 +1,123 @@ +package storage + +import ( + "errors" +) + +type ReplicationType string + +const ( + Copy000 = ReplicationType("000") // single copy + Copy001 = ReplicationType("001") // 2 copies, both on the same racks, and same data center + Copy010 = ReplicationType("010") // 2 copies, both on different racks, but same data center + Copy100 = ReplicationType("100") // 2 copies, each on different data center + Copy110 = ReplicationType("110") // 3 copies, 2 on different racks and local data center, 1 on different data center + Copy200 = ReplicationType("200") // 3 copies, each on dffereint data center + LengthRelicationType = 6 + CopyNil = ReplicationType(255) // nil value +) + +func NewReplicationTypeFromString(t string) (ReplicationType, error) { + switch t { + case "000": + return Copy000, nil + case "001": + return Copy001, nil + case "010": + return Copy010, nil + case "100": + return Copy100, nil + case "110": + return Copy110, nil + case "200": + return Copy200, nil + } + return Copy000, errors.New("Unknown Replication Type:"+t) +} +func NewReplicationTypeFromByte(b byte) (ReplicationType, error) { + switch b { + case byte(000): + return Copy000, nil + case byte(001): + return Copy001, nil + case byte(010): + return Copy010, nil + case byte(100): + return Copy100, nil + case byte(110): + return Copy110, nil + case byte(200): + return Copy200, nil + } + return Copy000, errors.New("Unknown Replication Type:"+string(b)) +} + +func (r *ReplicationType) String() string { + switch *r { + case Copy000: + return "000" + case Copy001: + return "001" + case Copy010: + return "010" + case Copy100: + return "100" + case Copy110: + return "110" + case Copy200: + return "200" + } + return "000" +} +func (r *ReplicationType) Byte() byte { + switch *r { + case Copy000: + return byte(000) + case Copy001: + return byte(001) + case Copy010: + return byte(010) + case Copy100: + return byte(100) + case Copy110: + return byte(110) + case Copy200: + return byte(200) + } + return byte(000) +} + +func (repType ReplicationType)GetReplicationLevelIndex() int { + switch repType { + case Copy000: + return 0 + case Copy001: + return 1 + case Copy010: + return 2 + case Copy100: + return 3 + case Copy110: + return 4 + case Copy200: + return 5 + } + return -1 +} +func (repType ReplicationType)GetCopyCount() int { + switch repType { + case Copy000: + return 1 + case Copy001: + return 2 + case Copy010: + return 2 + case Copy100: + return 2 + case Copy110: + return 3 + case Copy200: + return 3 + } + return 0 +} diff --git a/weed-fs/src/pkg/storage/store.go b/weed-fs/src/pkg/storage/store.go index 225fc9d92..6beb224f5 100644 --- a/weed-fs/src/pkg/storage/store.go +++ b/weed-fs/src/pkg/storage/store.go @@ -36,7 +36,7 @@ func (s *Store) AddVolume(volumeListString string, replicationType string) error for _, range_string := range strings.Split(volumeListString, ",") { if strings.Index(range_string, "-") < 0 { id_string := range_string - id, err := strconv.ParseUint(id_string, 10, 64) + id, err := NewVolumeId(id_string) if err != nil { return errors.New("Volume Id " + id_string + " is not a valid unsigned integer!") } @@ -68,6 +68,21 @@ func (s *Store) addVolume(vid VolumeId, replicationType ReplicationType) error { s.volumes[vid] = NewVolume(s.dir, vid, replicationType) return nil } + +func (s *Store) CompactVolume(volumeIdString string) error { + vid, err := NewVolumeId(volumeIdString) + if err != nil { + return errors.New("Volume Id " + volumeIdString + " is not a valid unsigned integer!") + } + return s.volumes[vid].compact() +} +func (s *Store) CommitCompactVolume(volumeIdString string) (int,error) { + vid, err := NewVolumeId(volumeIdString) + if err != nil { + return 0, errors.New("Volume Id " + volumeIdString + " is not a valid unsigned integer!") + } + return s.volumes[vid].commitCompact() +} func (s *Store) loadExistingVolumes() { if dirs, err := ioutil.ReadDir(s.dir); err == nil { for _, dir := range dirs { diff --git a/weed-fs/src/pkg/storage/volume.go b/weed-fs/src/pkg/storage/volume.go index a9713c36b..ee1d98b6a 100644 --- a/weed-fs/src/pkg/storage/volume.go +++ b/weed-fs/src/pkg/storage/volume.go @@ -1,11 +1,11 @@ package storage import ( + "errors" "log" "os" "path" "sync" - "errors" ) const ( @@ -21,29 +21,30 @@ type Volume struct { replicaType ReplicationType accessLock sync.Mutex - } func NewVolume(dirname string, id VolumeId, replicationType ReplicationType) (v *Volume) { - var e error v = &Volume{dir: dirname, Id: id, replicaType: replicationType} - fileName := id.String() - v.dataFile, e = os.OpenFile(path.Join(v.dir, fileName+".dat"), os.O_RDWR|os.O_CREATE, 0644) + v.load() + return +} +func (v *Volume) load() { + var e error + fileName := path.Join(v.dir, v.Id.String()) + v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644) if e != nil { log.Fatalf("New Volume [ERROR] %s\n", e) } - if replicationType == CopyNil { + if v.replicaType == CopyNil { v.readSuperBlock() } else { v.maybeWriteSuperBlock() } - indexFile, ie := os.OpenFile(path.Join(v.dir, fileName+".idx"), os.O_RDWR|os.O_CREATE, 0644) + indexFile, ie := os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644) if ie != nil { log.Fatalf("Write Volume Index [ERROR] %s\n", ie) } v.nm = LoadNeedleMap(indexFile) - - return } func (v *Volume) Size() int64 { stat, e := v.dataFile.Stat() @@ -107,3 +108,75 @@ func (v *Volume) read(n *Needle) (int, error) { } return -1, errors.New("Not Found") } + +func (v *Volume) compact() error { + v.accessLock.Lock() + defer v.accessLock.Unlock() + + filePath := path.Join(v.dir, v.Id.String()) + return v.copyDataAndGenerateIndexFile(filePath+".dat", filePath+".cpd", filePath+".cpx") +} +func (v *Volume) commitCompact() (int, error) { + v.accessLock.Lock() + defer v.accessLock.Unlock() + v.dataFile.Close() + os.Rename(path.Join(v.dir, v.Id.String()+".cpd"), path.Join(v.dir, v.Id.String()+".dat")) + os.Rename(path.Join(v.dir, v.Id.String()+".cpx"), path.Join(v.dir, v.Id.String()+".idx")) + v.load() + return 0, nil +} + +func (v *Volume) copyDataAndGenerateIndexFile(srcName, dstName, idxName string) (err error) { + src, err := os.OpenFile(srcName, os.O_RDONLY, 0644) + if err != nil { + return err + } + defer src.Close() + + dst, err := os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return err + } + defer dst.Close() + + idx, err := os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return err + } + defer idx.Close() + + src.Seek(0, 0) + header := make([]byte, SuperBlockSize) + if _, error := src.Read(header); error == nil { + dst.Write(header) + } + + n, rest := ReadNeedle(src) + nm := NewNeedleMap(idx) + old_offset := uint32(SuperBlockSize) + new_offset := uint32(SuperBlockSize) + for n != nil { + nv, ok := v.nm.Get(n.Id) + //log.Println("file size is", n.Size, "rest", rest) + if !ok || nv.Offset*8 != old_offset { + log.Println("expected offset should be", nv.Offset*8, "skipping", (rest - 16), "key", n.Id, "volume offset", old_offset, "data_size", n.Size, "rest", rest) + src.Seek(int64(rest), 1) + } else { + if nv.Size > 0 { + nm.Put(n.Id, new_offset/8, n.Size) + bytes := make([]byte, n.Size+4) + src.Read(bytes) + n.Data = bytes[:n.Size] + n.Checksum = NewCRC(n.Data) + n.Append(dst) + new_offset += rest+16 + log.Println("saving key", n.Id, "volume offset", old_offset, "=>", new_offset, "data_size", n.Size, "rest", rest) + } + src.Seek(int64(rest-n.Size-4), 1) + } + old_offset += rest+16 + n, rest = ReadNeedle(src) + } + + return nil +} diff --git a/weed-fs/src/pkg/storage/volume_info.go b/weed-fs/src/pkg/storage/volume_info.go index b8eb62f0a..05b9e6205 100644 --- a/weed-fs/src/pkg/storage/volume_info.go +++ b/weed-fs/src/pkg/storage/volume_info.go @@ -1,7 +1,6 @@ package storage import ( - "errors" ) type VolumeInfo struct { @@ -11,120 +10,3 @@ type VolumeInfo struct { FileCount int DeleteCount int } -type ReplicationType string - -const ( - Copy000 = ReplicationType("000") // single copy - Copy001 = ReplicationType("001") // 2 copies, both on the same racks, and same data center - Copy010 = ReplicationType("010") // 2 copies, both on different racks, but same data center - Copy100 = ReplicationType("100") // 2 copies, each on different data center - Copy110 = ReplicationType("110") // 3 copies, 2 on different racks and local data center, 1 on different data center - Copy200 = ReplicationType("200") // 3 copies, each on dffereint data center - LengthRelicationType = 6 - CopyNil = ReplicationType(255) // nil value -) - -func NewReplicationTypeFromString(t string) (ReplicationType, error) { - switch t { - case "000": - return Copy000, nil - case "001": - return Copy001, nil - case "010": - return Copy010, nil - case "100": - return Copy100, nil - case "110": - return Copy110, nil - case "200": - return Copy200, nil - } - return Copy000, errors.New("Unknown Replication Type:"+t) -} -func NewReplicationTypeFromByte(b byte) (ReplicationType, error) { - switch b { - case byte(000): - return Copy000, nil - case byte(001): - return Copy001, nil - case byte(010): - return Copy010, nil - case byte(100): - return Copy100, nil - case byte(110): - return Copy110, nil - case byte(200): - return Copy200, nil - } - return Copy000, errors.New("Unknown Replication Type:"+string(b)) -} - -func (r *ReplicationType) String() string { - switch *r { - case Copy000: - return "000" - case Copy001: - return "001" - case Copy010: - return "010" - case Copy100: - return "100" - case Copy110: - return "110" - case Copy200: - return "200" - } - return "000" -} -func (r *ReplicationType) Byte() byte { - switch *r { - case Copy000: - return byte(000) - case Copy001: - return byte(001) - case Copy010: - return byte(010) - case Copy100: - return byte(100) - case Copy110: - return byte(110) - case Copy200: - return byte(200) - } - return byte(000) -} - -func (repType ReplicationType)GetReplicationLevelIndex() int { - switch repType { - case Copy000: - return 0 - case Copy001: - return 1 - case Copy010: - return 2 - case Copy100: - return 3 - case Copy110: - return 4 - case Copy200: - return 5 - } - return -1 -} -func (repType ReplicationType)GetCopyCount() int { - switch repType { - case Copy000: - return 1 - case Copy001: - return 2 - case Copy010: - return 2 - case Copy100: - return 2 - case Copy110: - return 3 - case Copy200: - return 3 - } - return 0 -} diff --git a/weed-fs/src/pkg/topology/topology.go b/weed-fs/src/pkg/topology/topology.go index 836a5cc69..52d13135f 100644 --- a/weed-fs/src/pkg/topology/topology.go +++ b/weed-fs/src/pkg/topology/topology.go @@ -143,49 +143,3 @@ func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter { return dc } -func (t *Topology) ToMap() interface{} { - m := make(map[string]interface{}) - m["Max"] = t.GetMaxVolumeCount() - m["Free"] = t.FreeSpace() - var dcs []interface{} - for _, c := range t.Children() { - dc := c.(*DataCenter) - dcs = append(dcs, dc.ToMap()) - } - m["DataCenters"] = dcs - var layouts []interface{} - for _, layout := range t.replicaType2VolumeLayout { - if layout != nil { - layouts = append(layouts, layout.ToMap()) - } - } - m["layouts"] = layouts - return m -} - -func (t *Topology) ToVolumeMap() interface{} { - m := make(map[string]interface{}) - m["Max"] = t.GetMaxVolumeCount() - m["Free"] = t.FreeSpace() - dcs := make(map[NodeId]interface{}) - for _, c := range t.Children() { - dc := c.(*DataCenter) - racks := make(map[NodeId]interface{}) - for _, r := range dc.Children() { - rack := r.(*Rack) - dataNodes := make(map[NodeId]interface{}) - for _, d := range rack.Children() { - dn := d.(*DataNode) - var volumes []interface{} - for _, v := range dn.volumes { - volumes = append(volumes, v) - } - dataNodes[d.Id()] = volumes - } - racks[r.Id()] = dataNodes - } - dcs[dc.Id()] = racks - } - m["DataCenters"] = dcs - return m -} diff --git a/weed-fs/src/pkg/topology/topology_compact.go b/weed-fs/src/pkg/topology/topology_compact.go new file mode 100644 index 000000000..e6232cec6 --- /dev/null +++ b/weed-fs/src/pkg/topology/topology_compact.go @@ -0,0 +1,87 @@ +package topology + +import ( + "encoding/json" + "errors" + "fmt" + "net/url" + "pkg/storage" + "pkg/util" + "time" +) + +func (t *Topology) Vacuum() int { + total_counter := 0 + for _, vl := range t.replicaType2VolumeLayout { + if vl != nil { + for vid, locationlist := range vl.vid2location { + each_volume_counter := 0 + vl.removeFromWritable(vid) + ch := make(chan int, locationlist.Length()) + for _, dn := range locationlist.list { + go func(url string, vid storage.VolumeId) { + vacuumVolume_Compact(url, vid) + }(dn.Url(), vid) + } + for _ = range locationlist.list { + select { + case count := <-ch: + each_volume_counter += count + case <-time.After(30 * time.Minute): + each_volume_counter = 0 + break + } + } + if each_volume_counter > 0 { + for _, dn := range locationlist.list { + if e := vacuumVolume_Commit(dn.Url(), vid); e != nil { + fmt.Println("Error when committing on", dn.Url(), e) + panic(e) + } + } + vl.setVolumeWritable(vid) + total_counter += each_volume_counter + } + } + } + } + return 0 +} + +type VacuumVolumeResult struct { + Bytes int + Error string +} + +func vacuumVolume_Compact(urlLocation string, vid storage.VolumeId) (error, int) { + values := make(url.Values) + values.Add("volume", vid.String()) + jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum_volume_compact", values) + if err != nil { + return err, 0 + } + var ret VacuumVolumeResult + if err := json.Unmarshal(jsonBlob, &ret); err != nil { + return err, 0 + } + if ret.Error != "" { + return errors.New(ret.Error), 0 + } + return nil, ret.Bytes +} +func vacuumVolume_Commit(urlLocation string, vid storage.VolumeId) error { + values := make(url.Values) + values.Add("volume", vid.String()) + jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum_volume_commit", values) + if err != nil { + return err + } + var ret VacuumVolumeResult + if err := json.Unmarshal(jsonBlob, &ret); err != nil { + return err + } + if ret.Error != "" { + return errors.New(ret.Error) + } + return nil +} diff --git a/weed-fs/src/pkg/topology/topology_map.go b/weed-fs/src/pkg/topology/topology_map.go new file mode 100644 index 000000000..9ccf08ae3 --- /dev/null +++ b/weed-fs/src/pkg/topology/topology_map.go @@ -0,0 +1,51 @@ +package topology + +import ( +) + +func (t *Topology) ToMap() interface{} { + m := make(map[string]interface{}) + m["Max"] = t.GetMaxVolumeCount() + m["Free"] = t.FreeSpace() + var dcs []interface{} + for _, c := range t.Children() { + dc := c.(*DataCenter) + dcs = append(dcs, dc.ToMap()) + } + m["DataCenters"] = dcs + var layouts []interface{} + for _, layout := range t.replicaType2VolumeLayout { + if layout != nil { + layouts = append(layouts, layout.ToMap()) + } + } + m["layouts"] = layouts + return m +} + +func (t *Topology) ToVolumeMap() interface{} { + m := make(map[string]interface{}) + m["Max"] = t.GetMaxVolumeCount() + m["Free"] = t.FreeSpace() + dcs := make(map[NodeId]interface{}) + for _, c := range t.Children() { + dc := c.(*DataCenter) + racks := make(map[NodeId]interface{}) + for _, r := range dc.Children() { + rack := r.(*Rack) + dataNodes := make(map[NodeId]interface{}) + for _, d := range rack.Children() { + dn := d.(*DataNode) + var volumes []interface{} + for _, v := range dn.volumes { + volumes = append(volumes, v) + } + dataNodes[d.Id()] = volumes + } + racks[r.Id()] = dataNodes + } + dcs[dc.Id()] = racks + } + m["DataCenters"] = dcs + return m +} diff --git a/weed-fs/src/pkg/topology/volume_location.go b/weed-fs/src/pkg/topology/volume_location.go index 16afb2dfb..64d8cdf43 100644 --- a/weed-fs/src/pkg/topology/volume_location.go +++ b/weed-fs/src/pkg/topology/volume_location.go @@ -27,6 +27,7 @@ func (dnll *VolumeLocationList) Add(loc *DataNode) bool { dnll.list = append(dnll.list, loc) return true } + func (dnll *VolumeLocationList) Remove(loc *DataNode) bool { for i, dnl := range dnll.list { if loc.Ip == dnl.Ip && loc.Port == dnl.Port {