Browse Source

Basic implementation

add-file-preservation
Drew Short 5 years ago
parent
commit
4db6fac8c0
  1. 5
      .gitignore
  2. 0
      Dockerfile
  3. 110
      README.md
  4. 45
      acm-config.json.example
  5. 286
      acm.py
  6. 2
      requirements.txt
  7. 11
      setup.py

5
.gitignore

@ -0,0 +1,5 @@
venv/
l_venv/
acm-config.json
*.json

0
Dockerfile

110
README.md

@ -1,4 +1,4 @@
Asset Compression Helper
Asset Compression Manager (ACM)
======================== ========================
## About ## About
@ -11,20 +11,116 @@ This tool is designed to work with an S3 compatible bucket storage provider.
- An S3 Compatible Storage Service - An S3 Compatible Storage Service
- Python 3.7+ - Python 3.7+
- FFMpeg for jpeg, video, and audio compression
- MozJpeg for jpeg compression
- OptiPNG for png optimization - OptiPNG for png optimization
- FFMpeg for video compression and audio decompression
- Opusenc for audio compression
## Using ## Using
### Configuring Profiles
### Configuring
### Checking For Changed Assets
ACM expects a configuration file specified with `--config` or as `acm-config.json` in the current directory.
### Compressing Changed Assets
The S3 compatible endpoint needs to be configured in the `s3` object of the configuration file.
```json
{
"s3": {
"secure": false,
"host": "127.0.0.1:9000"
}
}
```
- `secure` - specifies if __*https*__ protocol is used.
- `host` - is the \<hostname>\[:\<port>] for the S3 compatible endpoint.
### Common Options
- `-c, --config`: The config file to read. Default file is `acm-config.json` in the current directory.
- `-x, --context`: The remote bucket to use. For `store` and `retrieve` operations it is `<value>-data`.
- `-s, --stdin`: Read the file list to process from stdin.
- `-p, --prefix`: The prefix to strip from the input. i.e. `acm.py -x test -p /tmp/data/storage/ check /tmp/data/storage/images/img1.jpg` => `images/img1.jpg`
### Listing Files
List all files in a bucket
```bash
$ ./acm.py -x <bucket> list
```
List all files while adding a prefix and stripping a suffix
```bash
$ ./acm.py -x <bucket> -p <prefix> list --suffix <suffix>
```
List all files with sha256sum compatible output
```bash
$ ./acm.py --context testing --prefix "/tmp/" --stdin list --suffix .json --sha256sum
```
Print out a sha256sum compatible check list
### Checking For Changes
Do a comparison of the remote bucket for missing files or files with a mismatch in their sha256sum values.
Process a list of files
```bash
$ ./acm.py -x <bucket> -p <prefix to strip> check FILES...
```
Process a list from stdin
```bash
$ find /tmp -name '*.jpg' | ./acm.py -x <bucket> -p <prefix to strip> check
```
### Uploading Changed Assets
### Updating Metadata For Changed Files
### Downloading Assets
Update the remote bucket with new metadata for the listed files. Calculates new sha256sum values.
Process a list of files
```bash
$ ./acm.py -x <bucket> -p <prefix to strip> update FILES...
```
Process a list from stdin
```bash
$ find /tmp -name '*.jpg' | ./acm.py -x <bucket> -p <prefix to strip> update
```
### Storing Files
Store the listed files in `<bucket>-data`.
Process a list of files
```bash
$ ./acm.py -x <bucket> -p <prefix to strip> store FILES...
```
Process a list from stdin
```bash
$ find /tmp -name '*.jpg' | ./acm.py -x <bucket> -p <prefix to strip> store
```
### Retrieving Files
Retrieve remote files matching listed files. Optionally place the downloaded files in a different destination.
Process a list of files
```bash
$ ./acm.py -x <bucket> -p <prefix to strip> retrieve [-d <destination>] FILES...
```
Process a list from stdin
```bash
$ find /tmp -name '*.jpg' | ./acm.py -x <bucket> -p <prefix to strip> retrieve [-d <destination>]
```
### Configuring Profiles
### Compressing Changed Assets
## Contributing ## Contributing

45
acm-config.json.example

@ -0,0 +1,45 @@
{
"s3": {
"secure": false,
"host": "127.0.0.1:9000"
},
"profiles": {
"default": {
"jpeg": {
"processors": ["mozjpeg"],
"extensions": [
"jpg",
"jpeg"
],
"outputExtension": "jpg",
"command": "cjpeg -optimize -quality 75 -progressive -out {output_file} {input_file}"
},
"png": {
"processors": ["optipng"],
"extensions": [
"png"
],
"outputExtension": "png",
"command": "optipng -o2 -strip all -out {output_file} {input_file}}"
},
"video": {
"processors": ["ffmpeg"],
"extensions": [
"mp4",
"webm"
],
"outputExtension": "mp4",
"command": "ffmpeg -i {input_file} -vcodec libx264 -crf 24 {output_file}"
},
"audio": {
"processors": ["ffmpeg", "opusenc"],
"extensions": [
"wav",
"mp3"
],
"outputExtension": "ogg",
"command": "ffmpeg -hide_banner -loglevel panic -i {input_file} -f wav -| opusenc --quiet --bitrate 64 --vbr --downmix-stereo --discard-comments --discard-pictures - {output_file} >/dev/null 2>&1"
}
}
}
}

286
acm.py

@ -0,0 +1,286 @@
#!/usr/bin/env python
import hashlib
import io
import json
import os
from typing import List
import click
from minio import Minio, ResponseError
from minio.error import NoSuchKey
# MinIO Metadata Prefix
METADATA_PREFIX = 'X-Amz-Meta-'
# Metadata Constants
METADATA_SHA256SUM = "Sha256sum"
# Size of the buffer to read files with
BUF_SIZE = 4096
def get_metadata_name(key):
return METADATA_PREFIX + 'SHA256SUM'.capitalize()
def get_clean_stdin_iterator(stdin_stream):
return (line.strip() for line in stdin_stream if line.strip() != '')
def get_file_identity(ctx_obj, file):
if 'PREFIX' in ctx_obj and ctx_obj['PREFIX'] is not None:
path = file.replace(ctx_obj['PREFIX'], '')
else:
path = file
if os.pathsep != '/':
path = '/'.join(path.split(os.pathsep))
return path
def list_minio_dir(minio: Minio, bucket: str, prefix: str) -> List[str]:
found_files = []
for obj in minio.list_objects_v2(bucket, prefix=prefix):
if obj.is_dir:
found_files.extend(list_minio_dir(minio, bucket, obj.object_name))
else:
found_files.append(obj.object_name)
return found_files
def get_minio_client(config: any) -> Minio:
host = config['host']
secure = config['secure']
access_key = config['access']
secret_key = config['secret']
return Minio(host, secure=secure, access_key=access_key, secret_key=secret_key)
def load_config(path: str) -> any:
with open(path, 'r') as config_file:
config = json.load(config_file)
# Setup S3 Settings
config['s3']['access'] = os.getenv('ACM_S3_ACCESS')
config['s3']['secret'] = os.getenv('ACM_S3_SECRET')
return config
@click.group()
@click.option('-d', '--debug/--no-debug', default=False)
@click.option('-c', '--config', default=lambda: os.path.join(os.getcwd(), 'acm-config.json'), show_default=True)
@click.option('-x', '--context', required=True)
@click.option('-s', '--stdin/--no-stdin', default=False)
@click.option('-p', '--prefix', default=None)
@click.pass_context
def cli(ctx, debug, config, context, stdin, prefix):
ctx.ensure_object(dict)
ctx.obj['DEBUG'] = debug
ctx.obj['CONFIG'] = load_config(config)
ctx.obj['CONTEXT'] = context
ctx.obj['READ_STDIN'] = stdin
ctx.obj['PREFIX'] = prefix
@cli.command(name="list")
@click.option('--sha256sum/--no-sha256sum', default=False)
@click.option('--suffix', default=None)
@click.pass_context
def list_files(ctx, sha256sum, suffix):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = ctx.obj['CONTEXT']
minio = get_minio_client(minio_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
found_files: List[str] = []
found_objects: List[str] = []
for obj in minio.list_objects_v2(minio_bucket, recursive=False):
if obj.is_dir:
found_objects.extend(list_minio_dir(minio, minio_bucket, obj.object_name))
else:
found_objects.append(obj.object_name)
for obj in found_objects:
file = obj
if 'PREFIX' in ctx.obj and ctx.obj['PREFIX'] is not None:
file = os.path.join(ctx.obj['PREFIX'], file)
if suffix is not None and suffix in file:
file = file.replace(suffix, '')
file = file.strip()
if sha256sum:
stat = minio.stat_object(minio_bucket, obj)
sha256sum_value = stat.metadata[get_metadata_name("SHA256SUM")]
file = f'{sha256sum_value} {file}'
found_files.append(file)
print(os.linesep.join(found_files))
@cli.command(name="check")
@click.pass_context
@click.argument('files', nargs=-1)
def check_changed_files_hashes(ctx, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = ctx.obj['CONTEXT']
minio = get_minio_client(minio_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
changed_files: List[str] = []
if ctx.obj['READ_STDIN']:
files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
for file in files:
file_identity = f'{get_file_identity(ctx.obj, file)}.json'
try:
file_object = minio.stat_object(minio_bucket, file_identity)
stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")]
sha256sum = hashlib.sha256()
with open(file, 'rb') as f:
for byte_block in iter(lambda: f.read(BUF_SIZE), b""):
sha256sum.update(byte_block)
calculated_file_hash = sha256sum.hexdigest()
if calculated_file_hash != stored_file_hash:
changed_files.append(file)
except NoSuchKey as e:
changed_files.append(file)
except ValueError or ResponseError as e:
print(f'ERROR: {file} {e}')
print(os.linesep.join(changed_files))
@cli.command(name="update")
@click.pass_context
@click.argument('files', nargs=-1)
def update_changed_files_hashes(ctx, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = ctx.obj['CONTEXT']
minio = get_minio_client(minio_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
updated_files: List[str] = []
if ctx.obj['READ_STDIN']:
files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
for file in files:
file_identity = f'{get_file_identity(ctx.obj, file)}.json'
try:
sha256sum = hashlib.sha256()
with open(file, 'rb') as f:
for byte_block in iter(lambda: f.read(BUF_SIZE), b''):
sha256sum.update(byte_block)
calculated_file_hash = sha256sum.hexdigest()
object_data = {
"path": file
}
with io.BytesIO(json.dumps(object_data, sort_keys=True, indent=None).encode('utf-8')) as data:
data.seek(0, os.SEEK_END)
data_length = data.tell()
data.seek(0)
minio.put_object(
minio_bucket,
file_identity,
data,
data_length,
content_type="application/json",
metadata={
"SHA256SUM": calculated_file_hash
}
)
updated_files.append(file)
except ValueError or ResponseError as e:
print(f'ERROR: {file} {e}')
print(os.linesep.join(updated_files))
@cli.command(name="store")
@click.pass_context
@click.argument('files', nargs=-1)
def store_files(ctx, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = f'{ctx.obj["CONTEXT"]}-data'
minio = get_minio_client(minio_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
stored_files: List[str] = []
if ctx.obj['READ_STDIN']:
files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
for file in files:
file_identity = get_file_identity(ctx.obj, file)
try:
minio.fput_object(
minio_bucket,
file_identity,
file,
content_type="application/octet-stream"
)
stored_files.append(file)
except ResponseError as e:
print(f'ERROR: {file} {e}')
print(os.linesep.join(stored_files))
@cli.command(name="retrieve")
@click.pass_context
@click.option('-d', '--destination', default=None)
@click.argument('files', nargs=-1)
def retrieve_files(ctx, destination, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = f'{ctx.obj["CONTEXT"]}-data'
minio = get_minio_client(minio_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
retrieved_files: List[str] = []
if ctx.obj['READ_STDIN']:
files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
for file in files:
file_identity = get_file_identity(ctx.obj, file)
file_destination = file
if destination is not None:
file_destination = os.path.join(destination, file_identity)
try:
minio.fget_object(
minio_bucket,
file_identity,
file_destination
)
retrieved_files.append(file_destination)
except ResponseError as e:
print(f'ERROR: {file_destination} {e}')
print(os.linesep.join(retrieved_files))
if __name__ == '__main__':
cli(obj={})

2
requirements.txt

@ -0,0 +1,2 @@
click == 7.1.1
minio == 5.0.8

11
setup.py

@ -0,0 +1,11 @@
#!/usr/bin/env python
from distutils.core import setup
setup(
name='Asset-Compression-Manager',
version='0.1.0',
description='Helper Utility For Managing Compressed Assets',
author='Drew Short',
author_email='warrick@sothr.com'
)
Loading…
Cancel
Save