diff --git a/unmaintained/volume_tailer/volume_tailer.go b/unmaintained/volume_tailer/volume_tailer.go index d9220d2de..9458ea6c7 100644 --- a/unmaintained/volume_tailer/volume_tailer.go +++ b/unmaintained/volume_tailer/volume_tailer.go @@ -48,8 +48,8 @@ func main() { if *showTextFile { data := n.Data - if n.IsGzipped() { - if data, err = util2.UnGzipData(data); err != nil { + if n.IsCompressed() { + if data, err = util2.UnCompressData(data); err != nil { return err } } @@ -57,7 +57,7 @@ func main() { println(string(data)) } - println("-", n.String(), "compressed", n.IsGzipped(), "original size", len(data)) + println("-", n.String(), "compressed", n.IsCompressed(), "original size", len(data)) } return nil }) diff --git a/weed/command/export.go b/weed/command/export.go index 8c32b3f4d..5d304b5a0 100644 --- a/weed/command/export.go +++ b/weed/command/export.go @@ -19,6 +19,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/storage/needle_map" "github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" ) const ( @@ -79,7 +80,7 @@ func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, key, n.Name, size, - n.IsGzipped(), + n.IsCompressed(), n.Mime, n.LastModifiedString(), n.Ttl.String(), @@ -108,8 +109,8 @@ func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset in vid := scanner.vid nv, ok := needleMap.Get(n.Id) - glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", - n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped(), ok, nv) + glog.V(3).Infof("key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v", + n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed(), ok, nv) if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && nv.Offset.ToAcutalOffset() == offset { if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", @@ -242,8 +243,11 @@ func writeFile(vid needle.VolumeId, n *needle.Needle) (err error) { fileName := fileNameTemplateBuffer.String() - if n.IsGzipped() && path.Ext(fileName) != ".gz" { - fileName = fileName + ".gz" + if n.IsCompressed() { + if util.IsGzippedContent(n.Data) && path.Ext(fileName) != ".gz" { + fileName = fileName + ".gz" + } + // TODO other compression method } tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data)) diff --git a/weed/command/fix.go b/weed/command/fix.go index 90d1c4893..223808f4b 100644 --- a/weed/command/fix.go +++ b/weed/command/fix.go @@ -46,7 +46,7 @@ func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool { } func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { - glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped()) + glog.V(2).Infof("key %d offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed()) if n.Size > 0 && n.Size != types.TombstoneFileSize { pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size) glog.V(2).Infof("saved %d with error %v", n.Size, pe) diff --git a/weed/operation/chunked_file.go b/weed/operation/chunked_file.go index baa0038c4..3ee8f5079 100644 --- a/weed/operation/chunked_file.go +++ b/weed/operation/chunked_file.go @@ -53,10 +53,10 @@ func (s ChunkList) Len() int { return len(s) } func (s ChunkList) Less(i, j int) bool { return s[i].Offset < s[j].Offset } func (s ChunkList) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func LoadChunkManifest(buffer []byte, isGzipped bool) (*ChunkManifest, error) { - if isGzipped { +func LoadChunkManifest(buffer []byte, isCompressed bool) (*ChunkManifest, error) { + if isCompressed { var err error - if buffer, err = util.UnGzipData(buffer); err != nil { + if buffer, err = util.UnCompressData(buffer); err != nil { return nil, err } } diff --git a/weed/operation/upload_content.go b/weed/operation/upload_content.go index b26b82800..7216853ac 100644 --- a/weed/operation/upload_content.go +++ b/weed/operation/upload_content.go @@ -121,7 +121,7 @@ func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, i } } else if isInputGzipped { // just to get the clear data length - clearData, err := util.UnGzipData(data) + clearData, err := util.UnCompressData(data) if err == nil { clearDataLen = len(clearData) } diff --git a/weed/server/volume_grpc_file.go b/weed/server/volume_grpc_file.go index 4d71ddeb1..61fcaeb00 100644 --- a/weed/server/volume_grpc_file.go +++ b/weed/server/volume_grpc_file.go @@ -11,6 +11,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/util" ) +// Deprecated func (vs *VolumeServer) FileGet(req *volume_server_pb.FileGetRequest, stream volume_server_pb.VolumeServer_FileGetServer) error { headResponse := &volume_server_pb.FileGetResponse{} @@ -90,10 +91,10 @@ func (vs *VolumeServer) FileGet(req *volume_server_pb.FileGetRequest, stream vol } headResponse.ContentType = mtype - headResponse.IsGzipped = n.IsGzipped() + headResponse.IsGzipped = n.IsCompressed() - if n.IsGzipped() && req.AcceptGzip { - if n.Data, err = util.UnGzipData(n.Data); err != nil { + if n.IsCompressed() && req.AcceptGzip { + if n.Data, err = util.UnCompressData(n.Data); err != nil { glog.V(0).Infof("ungzip %s error: %v", req.FileId, err) } } diff --git a/weed/server/volume_server_handlers_read.go b/weed/server/volume_server_handlers_read.go index 19b459136..d20cd5670 100644 --- a/weed/server/volume_server_handlers_read.go +++ b/weed/server/volume_server_handlers_read.go @@ -143,17 +143,19 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) } if ext != ".gz" { - if n.IsGzipped() { + if n.IsCompressed() { if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") { if _, _, _, shouldResize := shouldResizeImages(ext, r); shouldResize { - if n.Data, err = util.UnGzipData(n.Data); err != nil { + if n.Data, err = util.UnCompressData(n.Data); err != nil { glog.V(0).Infoln("ungzip error:", err, r.URL.Path) } } else { - w.Header().Set("Content-Encoding", "gzip") + if util.IsGzippedContent(n.Data) { + w.Header().Set("Content-Encoding", "gzip") + } } } else { - if n.Data, err = util.UnGzipData(n.Data); err != nil { + if n.Data, err = util.UnCompressData(n.Data); err != nil { glog.V(0).Infoln("ungzip error:", err, r.URL.Path) } } @@ -172,7 +174,7 @@ func (vs *VolumeServer) tryHandleChunkedFile(n *needle.Needle, fileName string, return false } - chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsGzipped()) + chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsCompressed()) if e != nil { glog.V(0).Infof("load chunked manifest (%s) error: %v", r.URL.Path, e) return false diff --git a/weed/server/volume_server_handlers_write.go b/weed/server/volume_server_handlers_write.go index 9a00dcc29..74dad28de 100644 --- a/weed/server/volume_server_handlers_write.go +++ b/weed/server/volume_server_handlers_write.go @@ -120,7 +120,7 @@ func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { count := int64(n.Size) if n.IsChunkedManifest() { - chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsGzipped()) + chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsCompressed()) if e != nil { writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Load chunks manifest error: %v", e)) return diff --git a/weed/storage/needle/needle.go b/weed/storage/needle/needle.go index d3969e868..7d02758d6 100644 --- a/weed/storage/needle/needle.go +++ b/weed/storage/needle/needle.go @@ -81,7 +81,7 @@ func CreateNeedleFromRequest(r *http.Request, fixJpgOrientation bool, sizeLimit } } if pu.IsGzipped { - n.SetGzipped() + n.SetIsCompressed() } if n.LastModified == 0 { n.LastModified = uint64(time.Now().Unix()) diff --git a/weed/storage/needle/needle_parse_upload.go b/weed/storage/needle/needle_parse_upload.go index 9873c2412..c1fb10713 100644 --- a/weed/storage/needle/needle_parse_upload.go +++ b/weed/storage/needle/needle_parse_upload.go @@ -51,7 +51,7 @@ func ParseUpload(r *http.Request, sizeLimit int64) (pu *ParsedUpload, e error) { pu.OriginalDataSize = len(pu.Data) pu.UncompressedData = pu.Data - // println("received data", len(pu.Data), "isGzipped", pu.IsGzipped, "mime", pu.MimeType, "name", pu.FileName) + // println("received data", len(pu.Data), "isGzipped", pu.IsCompressed, "mime", pu.MimeType, "name", pu.FileName) if pu.MimeType == "" { pu.MimeType = http.DetectContentType(pu.Data) // println("detected mimetype to", pu.MimeType) @@ -60,7 +60,7 @@ func ParseUpload(r *http.Request, sizeLimit int64) (pu *ParsedUpload, e error) { } } if pu.IsGzipped { - if unzipped, e := util.UnGzipData(pu.Data); e == nil { + if unzipped, e := util.UnCompressData(pu.Data); e == nil { pu.OriginalDataSize = len(unzipped) pu.UncompressedData = unzipped // println("ungzipped data size", len(unzipped)) diff --git a/weed/storage/needle/needle_read_write.go b/weed/storage/needle/needle_read_write.go index e89e253cd..9702cf939 100644 --- a/weed/storage/needle/needle_read_write.go +++ b/weed/storage/needle/needle_read_write.go @@ -13,7 +13,7 @@ import ( ) const ( - FlagGzip = 0x01 + FlagIsCompressed = 0x01 FlagHasName = 0x02 FlagHasMime = 0x04 FlagHasLastModifiedDate = 0x08 @@ -343,11 +343,11 @@ func (n *Needle) ReadNeedleBodyBytes(needleBody []byte, version Version) (err er return } -func (n *Needle) IsGzipped() bool { - return n.Flags&FlagGzip > 0 +func (n *Needle) IsCompressed() bool { + return n.Flags&FlagIsCompressed > 0 } -func (n *Needle) SetGzipped() { - n.Flags = n.Flags | FlagGzip +func (n *Needle) SetIsCompressed() { + n.Flags = n.Flags | FlagIsCompressed } func (n *Needle) HasName() bool { return n.Flags&FlagHasName > 0 diff --git a/weed/topology/store_replicate.go b/weed/topology/store_replicate.go index 236f8d773..481e72fe0 100644 --- a/weed/topology/store_replicate.go +++ b/weed/topology/store_replicate.go @@ -80,7 +80,7 @@ func ReplicatedWrite(masterNode string, s *storage.Store, volumeId needle.Volume } // volume server do not know about encryption - _, err := operation.UploadData(u.String(), string(n.Name), false, n.Data, n.IsGzipped(), string(n.Mime), pairMap, jwt) + _, err := operation.UploadData(u.String(), string(n.Name), false, n.Data, n.IsCompressed(), string(n.Mime), pairMap, jwt) return err }); err != nil { err = fmt.Errorf("failed to write to replicas for volume %d: %v", volumeId, err) diff --git a/weed/util/compression.go b/weed/util/compression.go index 1f778b5d5..f6315ebc2 100644 --- a/weed/util/compression.go +++ b/weed/util/compression.go @@ -25,7 +25,25 @@ func GzipData(input []byte) ([]byte, error) { } return buf.Bytes(), nil } -func UnGzipData(input []byte) ([]byte, error) { +func UnCompressData(input []byte) ([]byte, error) { + if IsGzippedContent(input) { + return ungzipData(input) + } + +} + +func ungzipData(input []byte) ([]byte, error) { + buf := bytes.NewBuffer(input) + r, _ := gzip.NewReader(buf) + defer r.Close() + output, err := ioutil.ReadAll(r) + if err != nil { + glog.V(2).Infoln("error uncompressing data:", err) + } + return output, err +} + +func ungzipData(input []byte) ([]byte, error) { buf := bytes.NewBuffer(input) r, _ := gzip.NewReader(buf) defer r.Close() @@ -51,6 +69,13 @@ func IsGzippable(ext, mtype string, data []byte) bool { return isMostlyText } +func IsGzippedContent(data []byte) bool { + if len(data) < 2 { + return false + } + return data[0] == 31 && data[1] == 139 +} + /* * Default more not to gzip since gzip can be done on client side. */func IsGzippableFileType(ext, mtype string) (shouldBeZipped, iAmSure bool) { diff --git a/weed/util/http_util.go b/weed/util/http_util.go index 5df79a7be..51748e92a 100644 --- a/weed/util/http_util.go +++ b/weed/util/http_util.go @@ -189,11 +189,11 @@ func NormalizeUrl(url string) string { return "http://" + url } -func ReadUrl(fileUrl string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64, size int, buf []byte) (int64, error) { +func ReadUrl(fileUrl string, cipherKey []byte, isContentCompressed bool, isFullChunk bool, offset int64, size int, buf []byte) (int64, error) { if cipherKey != nil { var n int - err := readEncryptedUrl(fileUrl, cipherKey, isGzipped, isFullChunk, offset, size, func(data []byte) { + err := readEncryptedUrl(fileUrl, cipherKey, isContentCompressed, isFullChunk, offset, size, func(data []byte) { n = copy(buf, data) }) return int64(n), err @@ -300,7 +300,7 @@ func ReadUrlAsStream(fileUrl string, cipherKey []byte, isContentGzipped bool, is } -func readEncryptedUrl(fileUrl string, cipherKey []byte, isContentGzipped bool, isFullChunk bool, offset int64, size int, fn func(data []byte)) error { +func readEncryptedUrl(fileUrl string, cipherKey []byte, isContentCompressed bool, isFullChunk bool, offset int64, size int, fn func(data []byte)) error { encryptedData, err := Get(fileUrl) if err != nil { return fmt.Errorf("fetch %s: %v", fileUrl, err) @@ -309,8 +309,8 @@ func readEncryptedUrl(fileUrl string, cipherKey []byte, isContentGzipped bool, i if err != nil { return fmt.Errorf("decrypt %s: %v", fileUrl, err) } - if isContentGzipped { - decryptedData, err = UnGzipData(decryptedData) + if isContentCompressed { + decryptedData, err = UnCompressData(decryptedData) if err != nil { return fmt.Errorf("unzip decrypt %s: %v", fileUrl, err) }