From e07590539fad517dad68938ea1b40c059dbda990 Mon Sep 17 00:00:00 2001 From: mutantmonkey Date: Mon, 18 Mar 2019 20:59:54 -0700 Subject: [PATCH] Add PutMetadata function to storage backends This function is not currently used, but it will be useful for helper scripts that need to regenerate metadata on the fly, especially scripts to migrate between storage backends. In the future, we can also use it to automatically regenerate metadata if it is found to be missing or corrupted. * Add PutMetadata function to storage backend interface and implementations * Rework metadata generation to be more efficient and work better with the PutMetadata function * Add a basic test for metadata generation --- backends/localfs/localfs.go | 36 +++++++++++++++++++-- backends/s3/s3.go | 31 +++++++++++++++++-- backends/storage.go | 1 + helpers/helpers.go | 62 +++++++++++++++++++++++-------------- helpers/helpers_test.go | 29 +++++++++++++++++ 5 files changed, 130 insertions(+), 29 deletions(-) create mode 100644 helpers/helpers_test.go diff --git a/backends/localfs/localfs.go b/backends/localfs/localfs.go index 3f6f5ad..ee40b31 100644 --- a/backends/localfs/localfs.go +++ b/backends/localfs/localfs.go @@ -126,11 +126,16 @@ func (b LocalfsBackend) Put(key string, r io.Reader, expiry time.Time, deleteKey return m, err } + dst.Seek(0 ,0) + m, err = helpers.GenerateMetadata(dst) + if err != nil { + os.Remove(filePath) + return + } + dst.Seek(0 ,0) + m.Expiry = expiry m.DeleteKey = deleteKey - m.Size = bytes - m.Mimetype, _ = helpers.DetectMime(dst) - m.Sha256sum, _ = helpers.Sha256sum(dst) m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, dst) err = b.writeMetadata(key, m) @@ -142,6 +147,31 @@ func (b LocalfsBackend) Put(key string, r io.Reader, expiry time.Time, deleteKey return } +func (b LocalfsBackend) PutMetadata(key string, r io.Reader, expiry time.Time, deleteKey string) (m backends.Metadata, err error) { + m, err = helpers.GenerateMetadata(r) + if err != nil { + return + } + m.Expiry = expiry + m.DeleteKey = deleteKey + + filePath := path.Join(b.filesPath, key) + dst, err := os.Open(filePath) + if err != nil { + return + } + defer dst.Close() + + m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, dst) + + err = b.writeMetadata(key, m) + if err != nil { + return + } + + return +} + func (b LocalfsBackend) Size(key string) (int64, error) { fileInfo, err := os.Stat(path.Join(b.filesPath, key)) if err != nil { diff --git a/backends/s3/s3.go b/backends/s3/s3.go index 45067c1..1a408f0 100644 --- a/backends/s3/s3.go +++ b/backends/s3/s3.go @@ -122,11 +122,12 @@ func (b S3Backend) Put(key string, r io.Reader, expiry time.Time, deleteKey stri return m, err } + m, err = helpers.GenerateMetadata(r) + if err != nil { + return + } m.Expiry = expiry m.DeleteKey = deleteKey - m.Size = bytes - m.Mimetype, _ = helpers.DetectMime(tmpDst) - m.Sha256sum, _ = helpers.Sha256sum(tmpDst) // XXX: we may not be able to write this to AWS easily //m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, tmpDst) @@ -145,6 +146,30 @@ func (b S3Backend) Put(key string, r io.Reader, expiry time.Time, deleteKey stri return } +func (b S3Backend) PutMetadata(key string, r io.Reader, expiry time.Time, deleteKey string) (m backends.Metadata, err error) { + m, err = helpers.GenerateMetadata(r) + if err != nil { + return + } + m.Expiry = expiry + m.DeleteKey = deleteKey + // XXX: we may not be able to write this to AWS easily + //m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, tmpDst) + + _, err = b.svc.CopyObject(&s3.CopyObjectInput{ + Bucket: aws.String(b.bucket), + Key: aws.String(key), + CopySource: aws.String("/" + b.bucket + "/" + key), + Metadata: mapMetadata(m), + + }) + if err != nil { + return + } + + return +} + func (b S3Backend) Size(key string) (int64, error) { input := &s3.HeadObjectInput{ Bucket: aws.String(b.bucket), diff --git a/backends/storage.go b/backends/storage.go index d40a2b9..b9e181e 100644 --- a/backends/storage.go +++ b/backends/storage.go @@ -12,6 +12,7 @@ type StorageBackend interface { Head(key string) (Metadata, error) Get(key string) (Metadata, io.ReadCloser, error) Put(key string, r io.Reader, expiry time.Time, deleteKey string) (Metadata, error) + PutMetadata(key string, r io.Reader, expiry time.Time, deleteKey string) (Metadata, error) Size(key string) (int64, error) } diff --git a/helpers/helpers.go b/helpers/helpers.go index aef68ff..f51d998 100644 --- a/helpers/helpers.go +++ b/helpers/helpers.go @@ -1,49 +1,65 @@ package helpers import ( + "bytes" "encoding/hex" "io" "unicode" + "github.com/andreimarcu/linx-server/backends" "github.com/minio/sha256-simd" "gopkg.in/h2non/filetype.v1" ) -func DetectMime(r io.ReadSeeker) (string, error) { +func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) { + // Since we don't have the ability to seek within a file, we can use a + // Buffer in combination with a TeeReader to keep a copy of the bytes + // we read when detecting the file type. These bytes are still needed + // to hash the file and determine its size and cannot be discarded. + var buf bytes.Buffer + teeReader := io.TeeReader(r, &buf) + // Get first 512 bytes for mimetype detection header := make([]byte, 512) - - r.Seek(0, 0) - r.Read(header) - r.Seek(0, 0) - - kind, err := filetype.Match(header) + _, err = teeReader.Read(header) if err != nil { - return "application/octet-stream", err - } else if kind.MIME.Value != "" { - return kind.MIME.Value, nil + return } - // Check if the file seems anything like text - if printable(header) { - return "text/plain", nil + // Create a Hash and a MultiReader that includes the Buffer we created + // above along with the original Reader, which will have the rest of + // the file. + hasher := sha256.New() + multiReader := io.MultiReader(&buf, r) + + // Copy everything into the Hash, then use the number of bytes written + // as the file size. + var readLen int64 + readLen, err = io.Copy(hasher, multiReader) + if err != nil { + return } else { - return "application/octet-stream", nil + m.Size += readLen } -} -func Sha256sum(r io.ReadSeeker) (string, error) { - hasher := sha256.New() + // Get the hex-encoded string version of the Hash checksum + m.Sha256sum = hex.EncodeToString(hasher.Sum(nil)) - r.Seek(0, 0) - _, err := io.Copy(hasher, r) + // Use the bytes we extracted earlier and attempt to determine the file + // type + kind, err := filetype.Match(header) if err != nil { - return "", err + m.Mimetype = "application/octet-stream" + return m, err + } else if kind.MIME.Value != "" { + m.Mimetype = kind.MIME.Value + } else if printable(header) { + m.Mimetype = "text/plain" + } else { + m.Mimetype = "application/octet-stream" } - r.Seek(0, 0) - - return hex.EncodeToString(hasher.Sum(nil)), nil + return } func printable(data []byte) bool { diff --git a/helpers/helpers_test.go b/helpers/helpers_test.go new file mode 100644 index 0000000..800d0d2 --- /dev/null +++ b/helpers/helpers_test.go @@ -0,0 +1,29 @@ +package helpers + +import ( + "strings" + "testing" +) + +func TestGenerateMetadata(t *testing.T) { + r := strings.NewReader("This is my test content") + m, err := GenerateMetadata(r) + if err != nil { + t.Fatal(err) + } + + expectedSha256sum := "966152d20a77e739716a625373ee15af16e8f4aec631a329a27da41c204b0171" + if m.Sha256sum != expectedSha256sum { + t.Fatalf("Sha256sum was %q instead of expected value of %q", m.Sha256sum, expectedSha256sum) + } + + expectedMimetype := "text/plain" + if m.Mimetype != expectedMimetype { + t.Fatalf("Mimetype was %q instead of expected value of %q", m.Mimetype, expectedMimetype) + } + + expectedSize := int64(23) + if m.Size != expectedSize { + t.Fatalf("Size was %d instead of expected value of %d", m.Size, expectedSize) + } +}