From 11cc489ca53db915b32312a61aa3bcd5dacb72ce Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 21 Dec 2012 00:36:55 -0800 Subject: [PATCH] refactoring --- weed-fs/src/cmd/weed/fix.go | 4 +- weed-fs/src/pkg/storage/needle.go | 42 +++++++++-------- weed-fs/src/pkg/storage/needle_read_write.go | 48 +++++++++++++------- weed-fs/src/pkg/storage/volume.go | 30 ++++++------ 4 files changed, 72 insertions(+), 52 deletions(-) diff --git a/weed-fs/src/cmd/weed/fix.go b/weed-fs/src/cmd/weed/fix.go index 7a186f44f..7bed70edd 100644 --- a/weed-fs/src/cmd/weed/fix.go +++ b/weed-fs/src/cmd/weed/fix.go @@ -52,7 +52,7 @@ func runFix(cmd *Command, args []string) bool { ver, _, _ := storage.ParseSuperBlock(header) - n, rest := storage.ReadNeedle(dataFile, ver) + n, rest := storage.ReadNeedleHeader(dataFile, ver) dataFile.Seek(int64(rest), 1) nm := storage.NewNeedleMap(indexFile) offset := uint32(storage.SuperBlockSize) @@ -63,7 +63,7 @@ func runFix(cmd *Command, args []string) bool { debug("saved", count, "with error", pe) } offset += rest + 16 - n, rest = storage.ReadNeedle(dataFile, ver) + n, rest = storage.ReadNeedleHeader(dataFile, ver) dataFile.Seek(int64(rest), 1) } return true diff --git a/weed-fs/src/pkg/storage/needle.go b/weed-fs/src/pkg/storage/needle.go index ffa9beb06..0363fcb13 100644 --- a/weed-fs/src/pkg/storage/needle.go +++ b/weed-fs/src/pkg/storage/needle.go @@ -11,16 +11,22 @@ import ( "strings" ) +const ( + NeedleHeaderSize = 16 //should never change this + NeedlePaddingSize = 8 +) + type Needle struct { - Cookie uint32 "random number to mitigate brute force lookups" - Id uint64 "needle id" - Size uint32 "sum of DataSize,Data,NameSize,Name,MimeSize,Mime" - // DataSize uint32 "Data size" - Data []byte "The actual file data" - // NameSize uint16 - // Name []byte "maximum 256 characters" - // MimeSize uint16 - // Mime []byte "maximum 256 characters" + Cookie uint32 "random number to mitigate brute force lookups" + Id uint64 "needle id" + Size uint32 "sum of DataSize,Data,NameSize,Name,MimeSize,Mime" + Flags byte "boolean flags" //version2 + DataSize uint32 "Data size" //version2 + Data []byte "The actual file data" + NameSize uint8 //version2 + Name []byte "maximum 256 characters" //version2 + MimeSize uint8 //version2 + Mime []byte "maximum 256 characters" //version2 Checksum CRC "CRC32 to check integrity" Padding []byte "Aligned to 8 bytes" } @@ -87,13 +93,13 @@ func (n *Needle) ParsePath(fid string) { } } func ParseKeyHash(key_hash_string string) (uint64, uint32) { - key_hash_bytes, khe := hex.DecodeString(key_hash_string) - key_hash_len := len(key_hash_bytes) - if khe != nil || key_hash_len <= 4 { - println("Invalid key_hash", key_hash_string, "length:", key_hash_len, "error", khe) - return 0, 0 - } - key := util.BytesToUint64(key_hash_bytes[0 : key_hash_len-4]) - hash := util.BytesToUint32(key_hash_bytes[key_hash_len-4 : key_hash_len]) - return key, hash + key_hash_bytes, khe := hex.DecodeString(key_hash_string) + key_hash_len := len(key_hash_bytes) + if khe != nil || key_hash_len <= 4 { + println("Invalid key_hash", key_hash_string, "length:", key_hash_len, "error", khe) + return 0, 0 + } + key := util.BytesToUint64(key_hash_bytes[0 : key_hash_len-4]) + hash := util.BytesToUint32(key_hash_bytes[key_hash_len-4 : key_hash_len]) + return key, hash } diff --git a/weed-fs/src/pkg/storage/needle_read_write.go b/weed-fs/src/pkg/storage/needle_read_write.go index 482323701..72096edf7 100644 --- a/weed-fs/src/pkg/storage/needle_read_write.go +++ b/weed-fs/src/pkg/storage/needle_read_write.go @@ -7,22 +7,25 @@ import ( "pkg/util" ) -func (n *Needle) Append(w io.Writer) uint32 { - header := make([]byte, 16) - util.Uint32toBytes(header[0:4], n.Cookie) - util.Uint64toBytes(header[4:12], n.Id) - n.Size = uint32(len(n.Data)) - util.Uint32toBytes(header[12:16], n.Size) - w.Write(header) - w.Write(n.Data) - rest := 8 - ((16 + n.Size + 4) % 8) - util.Uint32toBytes(header[0:4], n.Checksum.Value()) - w.Write(header[0 : 4+rest]) +func (n *Needle) Append(w io.Writer, version Version) uint32 { + if version == Version1 { + header := make([]byte, NeedleHeaderSize) + util.Uint32toBytes(header[0:4], n.Cookie) + util.Uint64toBytes(header[4:12], n.Id) + n.Size = uint32(len(n.Data)) + util.Uint32toBytes(header[12:16], n.Size) + w.Write(header) + w.Write(n.Data) + rest := NeedlePaddingSize - ((NeedleHeaderSize + n.Size + 4) % NeedlePaddingSize) + util.Uint32toBytes(header[0:4], n.Checksum.Value()) + w.Write(header[0 : 4+rest]) + } else if version == Version2 { + } return n.Size } func (n *Needle) Read(r io.Reader, size uint32, version Version) (int, error) { if version == Version1 { - bytes := make([]byte, 16+size+4) + bytes := make([]byte, NeedleHeaderSize+size+4) ret, e := r.Read(bytes) n.Cookie = util.BytesToUint32(bytes[0:4]) n.Id = util.BytesToUint64(bytes[4:12]) @@ -37,10 +40,10 @@ func (n *Needle) Read(r io.Reader, size uint32, version Version) (int, error) { } return 0, errors.New("Unsupported Version!") } -func ReadNeedle(r *os.File, version Version) (n *Needle, bytesTillNextFile uint32) { +func ReadNeedleHeader(r *os.File, version Version) (n *Needle, bodyLength uint32) { n = new(Needle) if version == Version1 { - bytes := make([]byte, 16) + bytes := make([]byte, NeedleHeaderSize) count, e := r.Read(bytes) if count <= 0 || e != nil { return nil, 0 @@ -48,8 +51,21 @@ func ReadNeedle(r *os.File, version Version) (n *Needle, bytesTillNextFile uint3 n.Cookie = util.BytesToUint32(bytes[0:4]) n.Id = util.BytesToUint64(bytes[4:12]) n.Size = util.BytesToUint32(bytes[12:16]) - rest := 8 - ((n.Size + 16 + 4) % 8) - bytesTillNextFile = n.Size + 4 + rest + rest := NeedlePaddingSize - ((n.Size + 16 + 4) % NeedlePaddingSize) + bodyLength = n.Size + 4 + rest + } else if version == Version2 { + } + return +} + +//n should be a needle already read the header +//the input stream will read until next file entry +func (n *Needle) ReadNeedleBody(r *os.File, version Version, bodyLength uint32) { + if version == Version1 { + bytes := make([]byte, bodyLength) + r.Read(bytes) + n.Data = bytes[:n.Size] + n.Checksum = NewCRC(n.Data) } else if version == Version2 { } return diff --git a/weed-fs/src/pkg/storage/volume.go b/weed-fs/src/pkg/storage/volume.go index a9ffe4385..d12c9d889 100644 --- a/weed-fs/src/pkg/storage/volume.go +++ b/weed-fs/src/pkg/storage/volume.go @@ -105,7 +105,7 @@ func (v *Volume) write(n *Needle) uint32 { v.accessLock.Lock() defer v.accessLock.Unlock() offset, _ := v.dataFile.Seek(0, 2) - ret := n.Append(v.dataFile) + ret := n.Append(v.dataFile, v.version) nv, ok := v.nm.Get(n.Id) if !ok || int64(nv.Offset)*8 < offset { v.nm.Put(n.Id, uint32(offset/8), n.Size) @@ -119,8 +119,8 @@ func (v *Volume) delete(n *Needle) uint32 { //log.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size) if ok { v.nm.Delete(n.Id) - v.dataFile.Seek(int64(nv.Offset*8), 0) - n.Append(v.dataFile) + v.dataFile.Seek(int64(nv.Offset*NeedlePaddingSize), 0) + n.Append(v.dataFile, v.version) return nv.Size } return 0 @@ -131,7 +131,7 @@ func (v *Volume) read(n *Needle) (int, error) { defer v.accessLock.Unlock() nv, ok := v.nm.Get(n.Id) if ok && nv.Offset > 0 { - v.dataFile.Seek(int64(nv.Offset)*8, 0) + v.dataFile.Seek(int64(nv.Offset)*NeedlePaddingSize, 0) return n.Read(v.dataFile, nv.Size, v.version) } return -1, errors.New("Not Found") @@ -192,30 +192,28 @@ func (v *Volume) copyDataAndGenerateIndexFile(srcName, dstName, idxName string) version, _, _ := ParseSuperBlock(header) - n, rest := ReadNeedle(src, version) + n, rest := ReadNeedleHeader(src, version) nm := NewNeedleMap(idx) old_offset := uint32(SuperBlockSize) new_offset := uint32(SuperBlockSize) for n != nil { nv, ok := v.nm.Get(n.Id) //log.Println("file size is", n.Size, "rest", rest) - if !ok || nv.Offset*8 != old_offset { + if !ok || nv.Offset*NeedlePaddingSize != old_offset { src.Seek(int64(rest), 1) } else { if nv.Size > 0 { - nm.Put(n.Id, new_offset/8, n.Size) - bytes := make([]byte, n.Size+4) - src.Read(bytes) - n.Data = bytes[:n.Size] - n.Checksum = NewCRC(n.Data) - n.Append(dst) - new_offset += rest + 16 + nm.Put(n.Id, new_offset/NeedlePaddingSize, n.Size) + n.ReadNeedleBody(src, version, rest) + n.Append(dst, v.version) + new_offset += rest + NeedleHeaderSize //log.Println("saving key", n.Id, "volume offset", old_offset, "=>", new_offset, "data_size", n.Size, "rest", rest) + } else { + src.Seek(int64(rest), 1) } - src.Seek(int64(rest-n.Size-4), 1) } - old_offset += rest + 16 - n, rest = ReadNeedle(src, version) + old_offset += rest + NeedleHeaderSize + n, rest = ReadNeedleHeader(src, version) } return nil