From 6b0894d80635047479548bb19029300a596b4d55 Mon Sep 17 00:00:00 2001 From: tnextday Date: Tue, 1 Dec 2015 20:23:50 +0800 Subject: [PATCH] update ChunkedFile to seekable reader, so we can use io.* to read data --- go/operation/chunked_file.go | 203 +++++++++++++----- go/storage/needle.go | 11 +- go/storage/needle_read_write.go | 10 +- go/weed/weed_server/common.go | 2 +- .../volume_server_handlers_read.go | 150 +++++++++++++ 5 files changed, 312 insertions(+), 64 deletions(-) diff --git a/go/operation/chunked_file.go b/go/operation/chunked_file.go index 953765149..ca97566a5 100644 --- a/go/operation/chunked_file.go +++ b/go/operation/chunked_file.go @@ -8,10 +8,18 @@ import ( "net/http" "sort" + "sync" + + "github.com/chrislusf/seaweedfs/go/glog" "github.com/chrislusf/seaweedfs/go/util" ) -var ErrOutOfRange = errors.New("Out of Range") +var ( + // when the remote server does not allow range requests (Accept-Ranges was not set) + ErrRangeRequestsNotSupported = errors.New("Range requests are not supported by the remote server") + // ErrInvalidRange is returned by Read when trying to read past the end of the file + ErrInvalidRange = errors.New("Invalid range") +) type ChunkInfo struct { Fid string `json:"fid,omitempty"` @@ -21,41 +29,74 @@ type ChunkInfo struct { type ChunkList []*ChunkInfo -type ChunkedFile struct { - Name string `json:"name,omitempty"` - Mime string `json:"mime,omitempty"` - Size int64 `json:"size,omitempty"` - Chunks ChunkList `json:"chunks,omitempty"` +type ChunkManifest struct { + Name string `json:"name,omitempty"` + Mime string `json:"mime,omitempty"` + Size int64 `json:"size,omitempty"` + Chunks ChunkList `json:"chunks,omitempty"` +} - master string `json:"-"` +// seekable chunked file reader +type ChunkedFileReader struct { + Manifest *ChunkManifest + Master string + pos int64 + pr *io.PipeReader + pw *io.PipeWriter + mutex sync.Mutex } func (s ChunkList) Len() int { return len(s) } func (s ChunkList) Less(i, j int) bool { return s[i].Offset < s[j].Offset } func (s ChunkList) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func NewChunkedNeedle(buffer []byte, master string) (*ChunkedFile, error) { - c := ChunkedFile{} - - if e := json.Unmarshal(buffer, c); e != nil { +func LoadChunkedManifest(buffer []byte) (*ChunkManifest, error) { + cm := ChunkManifest{} + if e := json.Unmarshal(buffer, cm); e != nil { return nil, e } - sort.Sort(c.Chunks) - c.master = master - return &c, nil + sort.Sort(cm.Chunks) + return &cm, nil +} + +func (cm *ChunkManifest) GetData() ([]byte, error) { + return json.Marshal(cm) +} + +func (cm *ChunkManifest) DeleteChunks(master string) error { + fileIds := make([]string, 0, len(cm.Chunks)) + for _, ci := range cm.Chunks { + fileIds = append(fileIds, ci.Fid) + } + results, e := DeleteFiles(master, fileIds) + if e != nil { + return e + } + deleteError := 0 + for _, ret := range results.Results { + if ret.Error != "" { + deleteError++ + glog.V(0).Infoln("delete error:", ret.Error, ret.Fid) + } + } + if deleteError > 0 { + return errors.New("Not all chunks deleted.") + } + return nil } -func (c *ChunkedFile) Marshal() ([]byte, error) { - return json.Marshal(c) +func (cm *ChunkManifest) StoredHelper() error { + //TODO + return nil } -func copyChunk(fileUrl string, w io.Writer, startOffset, size int64) (written int64, e error) { +func httpRangeDownload(fileUrl string, w io.Writer, offset int64) (written int64, e error) { req, err := http.NewRequest("GET", fileUrl, nil) if err != nil { return written, err } - if startOffset > 0 { - req.Header.Set("Range", fmt.Sprintf("bytes=%d-", startOffset)) + if offset > 0 { + req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset)) } resp, err := util.Do(req) @@ -63,65 +104,117 @@ func copyChunk(fileUrl string, w io.Writer, startOffset, size int64) (written in return written, err } defer resp.Body.Close() - if startOffset > 0 && resp.StatusCode != 206 { - return written, fmt.Errorf("Cannot Read Needle Position: %d [%s]", startOffset, fileUrl) - } - if size > 0 { - return io.CopyN(w, resp.Body, size) - } else { - return io.Copy(w, resp.Body) + switch resp.StatusCode { + case http.StatusRequestedRangeNotSatisfiable: + return written, ErrInvalidRange + case http.StatusOK: + if offset > 0 { + return written, ErrRangeRequestsNotSupported + } + case http.StatusPartialContent: + break + default: + return written, fmt.Errorf("Read Needle http error: [%d] %s", resp.StatusCode, fileUrl) + } + return io.Copy(w, resp.Body) } -func (c *ChunkedFile) WriteBuffer(w io.Writer, offset, size int64) (written int64, e error) { - if offset >= c.Size || offset+size > c.Size { - return written, ErrOutOfRange +func (cf *ChunkedFileReader) Seek(offset int64, whence int) (int64, error) { + var err error + switch whence { + case 0: + case 1: + offset += cf.pos + case 2: + offset = cf.Manifest.Size - offset + } + if offset > cf.Manifest.Size { + err = ErrInvalidRange } + if cf.pos != offset { + cf.Close() + } + cf.pos = offset + return cf.pos, err +} + +func (cf *ChunkedFileReader) WriteTo(w io.Writer) (n int64, err error) { + cm := cf.Manifest chunkIndex := -1 chunkStartOffset := int64(0) - for i, ci := range c.Chunks { - if offset >= ci.Offset && offset < ci.Offset+ci.Size { + for i, ci := range cm.Chunks { + if cf.pos >= ci.Offset && cf.pos < ci.Offset+ci.Size { chunkIndex = i - chunkStartOffset = offset - ci.Offset + chunkStartOffset = cf.pos - ci.Offset break } } if chunkIndex < 0 { - return written, ErrOutOfRange + return n, ErrInvalidRange } - //preload next chunk? - for ; chunkIndex < c.Chunks.Len(); chunkIndex++ { - ci := c.Chunks[chunkIndex] - fileUrl, lookupError := LookupFileId(c.master, ci.Fid) + for ; chunkIndex < cm.Chunks.Len(); chunkIndex++ { + ci := cm.Chunks[chunkIndex] + // if we need read date from local volume server first? + fileUrl, lookupError := LookupFileId(cf.Master, ci.Fid) if lookupError != nil { - return written, lookupError + return n, lookupError } - rsize := int64(0) - if size > 0 { - rsize = size - written - } - if n, e := copyChunk(fileUrl, w, chunkStartOffset, rsize); e != nil { - return written, e + if wn, e := httpRangeDownload(fileUrl, w, chunkStartOffset); e != nil { + return n, e } else { - written += n + n += wn + cf.pos += wn } - if size > 0 && written >= size { - break - } chunkStartOffset = 0 } + return n, nil +} - return written, nil +func (cf *ChunkedFileReader) ReadAt(p []byte, off int64) (n int, err error) { + cf.Seek(off, 0) + return cf.Read(p) } -func (c *ChunkedFile) DeleteHelper() error { - //TODO Delete all chunks - return nil +func (cf *ChunkedFileReader) Read(p []byte) (int, error) { + return cf.getPipeReader().Read(p) } -func (c *ChunkedFile) StoredHelper() error { - //TODO - return nil +func (cf *ChunkedFileReader) Close() (e error) { + cf.mutex.Lock() + defer cf.mutex.Unlock() + return cf.closePipe() +} + +func (cf *ChunkedFileReader) closePipe() (e error) { + if cf.pr != nil { + if err := cf.pr.Close(); err != nil { + e = err + } + } + cf.pr = nil + if cf.pw != nil { + if err := cf.pw.Close(); err != nil { + e = err + } + } + cf.pw = nil + return e +} + +func (cf *ChunkedFileReader) getPipeReader() io.Reader { + cf.mutex.Lock() + defer cf.mutex.Unlock() + if cf.pr != nil && cf.pw != nil { + return cf.pr + } + cf.closePipe() + cf.pr, cf.pw = io.Pipe() + go func(pw *io.PipeWriter) { + _, e := cf.WriteTo(pw) + pw.CloseWithError(e) + }(cf.pw) + return cf.pr } diff --git a/go/storage/needle.go b/go/storage/needle.go index 04a9dc78d..c09ad061a 100644 --- a/go/storage/needle.go +++ b/go/storage/needle.go @@ -52,7 +52,7 @@ func (n *Needle) String() (str string) { return } -func ParseUpload(r *http.Request) (fileName string, data []byte, mimeType string, isGzipped bool, modifiedTime uint64, ttl *TTL, e error) { +func ParseUpload(r *http.Request) (fileName string, data []byte, mimeType string, isGzipped bool, modifiedTime uint64, ttl *TTL, isChunkedFile bool, e error) { form, fe := r.MultipartReader() if fe != nil { glog.V(0).Infoln("MultipartReader [ERROR]", fe) @@ -132,12 +132,13 @@ func ParseUpload(r *http.Request) (fileName string, data []byte, mimeType string } modifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64) ttl, _ = ReadTTL(r.FormValue("ttl")) + isChunkedFile, _ = strconv.ParseBool(r.FormValue("cf")) return } func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) { - fname, mimeType, isGzipped := "", "", false + fname, mimeType, isGzipped, isChunkedFile := "", "", false, false n = new(Needle) - fname, n.Data, mimeType, isGzipped, n.LastModified, n.Ttl, e = ParseUpload(r) + fname, n.Data, mimeType, isGzipped, n.LastModified, n.Ttl, isChunkedFile, e = ParseUpload(r) if e != nil { return } @@ -160,6 +161,10 @@ func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) { n.SetHasTtl() } + if isChunkedFile { + n.SetChunkedFile() + } + if fixJpgOrientation { loweredName := strings.ToLower(fname) if mimeType == "image/jpeg" || strings.HasSuffix(loweredName, ".jpg") || strings.HasSuffix(loweredName, ".jpeg") { diff --git a/go/storage/needle_read_write.go b/go/storage/needle_read_write.go index 2a5bd89ea..28b0e8fde 100644 --- a/go/storage/needle_read_write.go +++ b/go/storage/needle_read_write.go @@ -16,7 +16,7 @@ const ( FlagHasMime = 0x04 FlagHasLastModifiedDate = 0x08 FlagHasTtl = 0x10 - FlagChunkList = 0x80 + FlagChunkedFile = 0x80 LastModifiedBytesLength = 5 TtlBytesLength = 2 ) @@ -282,10 +282,10 @@ func (n *Needle) SetHasTtl() { n.Flags = n.Flags | FlagHasTtl } -func (n *Needle) IsChunkList() bool { - return n.Flags&FlagChunkList > 0 +func (n *Needle) IsChunkedFile() bool { + return n.Flags&FlagChunkedFile > 0 } -func (n *Needle) SetChunkList() { - n.Flags = n.Flags | FlagChunkList +func (n *Needle) SetChunkedFile() { + n.Flags = n.Flags | FlagChunkedFile } diff --git a/go/weed/weed_server/common.go b/go/weed/weed_server/common.go index 4ad9824b1..a7fa2de53 100644 --- a/go/weed/weed_server/common.go +++ b/go/weed/weed_server/common.go @@ -86,7 +86,7 @@ func submitForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl st } debug("parsing upload file...") - fname, data, mimeType, isGzipped, lastModified, _, pe := storage.ParseUpload(r) + fname, data, mimeType, isGzipped, lastModified, _, _, pe := storage.ParseUpload(r) if pe != nil { writeJsonError(w, r, http.StatusBadRequest, pe) return diff --git a/go/weed/weed_server/volume_server_handlers_read.go b/go/weed/weed_server/volume_server_handlers_read.go index 9e252d205..febed354b 100644 --- a/go/weed/weed_server/volume_server_handlers_read.go +++ b/go/weed/weed_server/volume_server_handlers_read.go @@ -81,6 +81,11 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) return } w.Header().Set("Etag", etag) + + if vs.tryHandleChunkedFile(n, filename, w, r) { + return + } + if n.NameSize > 0 && filename == "" { filename = string(n.Name) dotIndex := strings.LastIndex(filename, ".") @@ -215,3 +220,148 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) io.CopyN(w, sendContent, sendSize) } + +func (vs *VolumeServer) tryHandleChunkedFile(n *storage.Needle, fileName string, w http.ResponseWriter, r *http.Request) (processed bool) { + if !n.IsChunkedFile() { + return false + } + processed = true + raw, _ := strconv.ParseBool(r.FormValue("raw")) + if raw { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Length", strconv.Itoa(len(n.Data))) + if _, e := w.Write(n.Data); e != nil { + glog.V(0).Infoln("response write error:", e) + } + return true + } + chunkManifest, e := operation.LoadChunkedManifest(n.Data) + if e != nil { + return false + } + ext := "" + if fileName == "" && chunkManifest.Name != "" { + fileName = chunkManifest.Name + dotIndex := strings.LastIndex(fileName, ".") + if dotIndex > 0 { + ext = fileName[dotIndex:] + } + } + mtype := "" + if ext != "" { + mtype = mime.TypeByExtension(ext) + } + if chunkManifest.Mime != "" { + mt := chunkManifest.Mime + if !strings.HasPrefix(mt, "application/octet-stream") { + mtype = mt + } + } + if mtype != "" { + w.Header().Set("Content-Type", mtype) + } + if fileName != "" { + w.Header().Set("Content-Disposition", `filename="`+fileNameEscaper.Replace(fileName)+`"`) + } + w.Header().Set("X-File-Store", "chunked") + w.Header().Set("Accept-Ranges", "bytes") + if r.Method == "HEAD" { + w.Header().Set("Content-Length", strconv.FormatInt(chunkManifest.Size, 10)) + return true + } + + chunkedFileReader := operation.ChunkedFileReader{ + Manifest: chunkManifest, + Master: vs.GetMasterNode(), + } + defer chunkedFileReader.Close() + rangeReq := r.Header.Get("Range") + if rangeReq == "" { + w.Header().Set("Content-Length", strconv.FormatInt(chunkManifest.Size, 10)) + if _, e = io.Copy(w, chunkedFileReader); e != nil { + glog.V(0).Infoln("response write error:", e) + } + return true + } + + //the rest is dealing with partial content request + //mostly copy from src/pkg/net/http/fs.go + size := chunkManifest.Size + ranges, err := parseRange(rangeReq, size) + if err != nil { + http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) + return + } + if sumRangesSize(ranges) > size { + // The total number of bytes in all the ranges + // is larger than the size of the file by + // itself, so this is probably an attack, or a + // dumb client. Ignore the range request. + ranges = nil + return + } + if len(ranges) == 0 { + return + } + if len(ranges) == 1 { + // RFC 2616, Section 14.16: + // "When an HTTP message includes the content of a single + // range (for example, a response to a request for a + // single range, or to a request for a set of ranges + // that overlap without any holes), this content is + // transmitted with a Content-Range header, and a + // Content-Length header showing the number of bytes + // actually transferred. + // ... + // A response to a request for a single range MUST NOT + // be sent using the multipart/byteranges media type." + ra := ranges[0] + w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) + w.Header().Set("Content-Range", ra.contentRange(size)) + w.WriteHeader(http.StatusPartialContent) + if _, e = chunkedFileReader.Seek(ra.start, 0); e != nil { + glog.V(0).Infoln("response write error:", e) + } + if _, e = io.CopyN(w, chunkedFileReader, ra.length); e != nil { + glog.V(0).Infoln("response write error:", e) + } + return + } + // process multiple ranges + for _, ra := range ranges { + if ra.start > size { + http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable) + return + } + } + sendSize := rangesMIMESize(ranges, mtype, size) + pr, pw := io.Pipe() + mw := multipart.NewWriter(pw) + w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary()) + sendContent := pr + defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish. + go func() { + for _, ra := range ranges { + part, err := mw.CreatePart(ra.mimeHeader(mtype, size)) + if err != nil { + pw.CloseWithError(err) + return + } + if _, e = chunkedFileReader.Seek(ra.start, 0); e != nil { + glog.V(0).Infoln("response write error:", e) + } + if _, err = io.CopyN(part, chunkedFileReader, ra.length); err != nil { + pw.CloseWithError(err) + return + } + } + mw.Close() + pw.Close() + }() + if w.Header().Get("Content-Encoding") == "" { + w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) + } + w.WriteHeader(http.StatusPartialContent) + io.CopyN(w, sendContent, sendSize) + return +}