diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e6a0efe --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +venv/ +l_venv/ + +acm-config.json +*.json \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md index b564c09..77b9f14 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Asset Compression Helper +Asset Compression Manager (ACM) ======================== ## About @@ -11,20 +11,116 @@ This tool is designed to work with an S3 compatible bucket storage provider. - An S3 Compatible Storage Service - Python 3.7+ -- FFMpeg for jpeg, video, and audio compression +- MozJpeg for jpeg compression - OptiPNG for png optimization +- FFMpeg for video compression and audio decompression +- Opusenc for audio compression ## Using -### Configuring Profiles +### Configuring -### Checking For Changed Assets +ACM expects a configuration file specified with `--config` or as `acm-config.json` in the current directory. -### Compressing Changed Assets +The S3 compatible endpoint needs to be configured in the `s3` object of the configuration file. + +```json +{ + "s3": { + "secure": false, + "host": "127.0.0.1:9000" + } +} +``` + +- `secure` - specifies if __*https*__ protocol is used. +- `host` - is the \\[:\] for the S3 compatible endpoint. + +### Common Options + +- `-c, --config`: The config file to read. Default file is `acm-config.json` in the current directory. +- `-x, --context`: The remote bucket to use. For `store` and `retrieve` operations it is `-data`. +- `-s, --stdin`: Read the file list to process from stdin. +- `-p, --prefix`: The prefix to strip from the input. i.e. `acm.py -x test -p /tmp/data/storage/ check /tmp/data/storage/images/img1.jpg` => `images/img1.jpg` + +### Listing Files + +List all files in a bucket +```bash +$ ./acm.py -x list +``` + +List all files while adding a prefix and stripping a suffix +```bash +$ ./acm.py -x -p list --suffix +``` + +List all files with sha256sum compatible output +```bash +$ ./acm.py --context testing --prefix "/tmp/" --stdin list --suffix .json --sha256sum +``` + +Print out a sha256sum compatible check list + +### Checking For Changes + +Do a comparison of the remote bucket for missing files or files with a mismatch in their sha256sum values. + +Process a list of files +```bash +$ ./acm.py -x -p check FILES... +``` + +Process a list from stdin +```bash +$ find /tmp -name '*.jpg' | ./acm.py -x -p check +``` -### Uploading Changed Assets +### Updating Metadata For Changed Files -### Downloading Assets +Update the remote bucket with new metadata for the listed files. Calculates new sha256sum values. + +Process a list of files +```bash +$ ./acm.py -x -p update FILES... +``` + +Process a list from stdin +```bash +$ find /tmp -name '*.jpg' | ./acm.py -x -p update +``` + +### Storing Files + +Store the listed files in `-data`. + +Process a list of files +```bash +$ ./acm.py -x -p store FILES... +``` + +Process a list from stdin +```bash +$ find /tmp -name '*.jpg' | ./acm.py -x -p store +``` + +### Retrieving Files + +Retrieve remote files matching listed files. Optionally place the downloaded files in a different destination. + +Process a list of files +```bash +$ ./acm.py -x -p retrieve [-d ] FILES... +``` + +Process a list from stdin +```bash +$ find /tmp -name '*.jpg' | ./acm.py -x -p retrieve [-d ] +``` + +### Configuring Profiles + +### Compressing Changed Assets ## Contributing diff --git a/acm-config.json.example b/acm-config.json.example new file mode 100644 index 0000000..989a7bd --- /dev/null +++ b/acm-config.json.example @@ -0,0 +1,45 @@ +{ + "s3": { + "secure": false, + "host": "127.0.0.1:9000" + }, + "profiles": { + "default": { + "jpeg": { + "processors": ["mozjpeg"], + "extensions": [ + "jpg", + "jpeg" + ], + "outputExtension": "jpg", + "command": "cjpeg -optimize -quality 75 -progressive -out {output_file} {input_file}" + }, + "png": { + "processors": ["optipng"], + "extensions": [ + "png" + ], + "outputExtension": "png", + "command": "optipng -o2 -strip all -out {output_file} {input_file}}" + }, + "video": { + "processors": ["ffmpeg"], + "extensions": [ + "mp4", + "webm" + ], + "outputExtension": "mp4", + "command": "ffmpeg -i {input_file} -vcodec libx264 -crf 24 {output_file}" + }, + "audio": { + "processors": ["ffmpeg", "opusenc"], + "extensions": [ + "wav", + "mp3" + ], + "outputExtension": "ogg", + "command": "ffmpeg -hide_banner -loglevel panic -i {input_file} -f wav -| opusenc --quiet --bitrate 64 --vbr --downmix-stereo --discard-comments --discard-pictures - {output_file} >/dev/null 2>&1" + } + } + } +} \ No newline at end of file diff --git a/acm.py b/acm.py new file mode 100644 index 0000000..83fa39f --- /dev/null +++ b/acm.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python + +import hashlib +import io +import json +import os +from typing import List + +import click +from minio import Minio, ResponseError +from minio.error import NoSuchKey + +# MinIO Metadata Prefix +METADATA_PREFIX = 'X-Amz-Meta-' + +# Metadata Constants +METADATA_SHA256SUM = "Sha256sum" + +# Size of the buffer to read files with +BUF_SIZE = 4096 + + +def get_metadata_name(key): + return METADATA_PREFIX + 'SHA256SUM'.capitalize() + + +def get_clean_stdin_iterator(stdin_stream): + return (line.strip() for line in stdin_stream if line.strip() != '') + + +def get_file_identity(ctx_obj, file): + if 'PREFIX' in ctx_obj and ctx_obj['PREFIX'] is not None: + path = file.replace(ctx_obj['PREFIX'], '') + else: + path = file + + if os.pathsep != '/': + path = '/'.join(path.split(os.pathsep)) + + return path + + +def list_minio_dir(minio: Minio, bucket: str, prefix: str) -> List[str]: + found_files = [] + for obj in minio.list_objects_v2(bucket, prefix=prefix): + if obj.is_dir: + found_files.extend(list_minio_dir(minio, bucket, obj.object_name)) + else: + found_files.append(obj.object_name) + return found_files + + +def get_minio_client(config: any) -> Minio: + host = config['host'] + secure = config['secure'] + access_key = config['access'] + secret_key = config['secret'] + return Minio(host, secure=secure, access_key=access_key, secret_key=secret_key) + + +def load_config(path: str) -> any: + with open(path, 'r') as config_file: + config = json.load(config_file) + + # Setup S3 Settings + config['s3']['access'] = os.getenv('ACM_S3_ACCESS') + config['s3']['secret'] = os.getenv('ACM_S3_SECRET') + + return config + + +@click.group() +@click.option('-d', '--debug/--no-debug', default=False) +@click.option('-c', '--config', default=lambda: os.path.join(os.getcwd(), 'acm-config.json'), show_default=True) +@click.option('-x', '--context', required=True) +@click.option('-s', '--stdin/--no-stdin', default=False) +@click.option('-p', '--prefix', default=None) +@click.pass_context +def cli(ctx, debug, config, context, stdin, prefix): + ctx.ensure_object(dict) + ctx.obj['DEBUG'] = debug + ctx.obj['CONFIG'] = load_config(config) + ctx.obj['CONTEXT'] = context + ctx.obj['READ_STDIN'] = stdin + ctx.obj['PREFIX'] = prefix + + +@cli.command(name="list") +@click.option('--sha256sum/--no-sha256sum', default=False) +@click.option('--suffix', default=None) +@click.pass_context +def list_files(ctx, sha256sum, suffix): + minio_config = ctx.obj['CONFIG']['minio'] + minio_bucket = ctx.obj['CONTEXT'] + + minio = get_minio_client(minio_config) + + if not minio.bucket_exists(minio_bucket): + minio.make_bucket(minio_bucket) + + found_files: List[str] = [] + found_objects: List[str] = [] + + for obj in minio.list_objects_v2(minio_bucket, recursive=False): + if obj.is_dir: + found_objects.extend(list_minio_dir(minio, minio_bucket, obj.object_name)) + else: + found_objects.append(obj.object_name) + + for obj in found_objects: + file = obj + if 'PREFIX' in ctx.obj and ctx.obj['PREFIX'] is not None: + file = os.path.join(ctx.obj['PREFIX'], file) + + if suffix is not None and suffix in file: + file = file.replace(suffix, '') + + file = file.strip() + + if sha256sum: + stat = minio.stat_object(minio_bucket, obj) + sha256sum_value = stat.metadata[get_metadata_name("SHA256SUM")] + file = f'{sha256sum_value} {file}' + + found_files.append(file) + + print(os.linesep.join(found_files)) + + +@cli.command(name="check") +@click.pass_context +@click.argument('files', nargs=-1) +def check_changed_files_hashes(ctx, files): + minio_config = ctx.obj['CONFIG']['minio'] + minio_bucket = ctx.obj['CONTEXT'] + + minio = get_minio_client(minio_config) + + if not minio.bucket_exists(minio_bucket): + minio.make_bucket(minio_bucket) + + changed_files: List[str] = [] + + if ctx.obj['READ_STDIN']: + files = get_clean_stdin_iterator(click.get_text_stream('stdin')) + + for file in files: + file_identity = f'{get_file_identity(ctx.obj, file)}.json' + try: + file_object = minio.stat_object(minio_bucket, file_identity) + stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")] + sha256sum = hashlib.sha256() + with open(file, 'rb') as f: + for byte_block in iter(lambda: f.read(BUF_SIZE), b""): + sha256sum.update(byte_block) + calculated_file_hash = sha256sum.hexdigest() + if calculated_file_hash != stored_file_hash: + changed_files.append(file) + except NoSuchKey as e: + changed_files.append(file) + except ValueError or ResponseError as e: + print(f'ERROR: {file} {e}') + + print(os.linesep.join(changed_files)) + + +@cli.command(name="update") +@click.pass_context +@click.argument('files', nargs=-1) +def update_changed_files_hashes(ctx, files): + minio_config = ctx.obj['CONFIG']['minio'] + minio_bucket = ctx.obj['CONTEXT'] + + minio = get_minio_client(minio_config) + + if not minio.bucket_exists(minio_bucket): + minio.make_bucket(minio_bucket) + + updated_files: List[str] = [] + + if ctx.obj['READ_STDIN']: + files = get_clean_stdin_iterator(click.get_text_stream('stdin')) + + for file in files: + file_identity = f'{get_file_identity(ctx.obj, file)}.json' + try: + sha256sum = hashlib.sha256() + with open(file, 'rb') as f: + for byte_block in iter(lambda: f.read(BUF_SIZE), b''): + sha256sum.update(byte_block) + calculated_file_hash = sha256sum.hexdigest() + object_data = { + "path": file + } + with io.BytesIO(json.dumps(object_data, sort_keys=True, indent=None).encode('utf-8')) as data: + data.seek(0, os.SEEK_END) + data_length = data.tell() + data.seek(0) + minio.put_object( + minio_bucket, + file_identity, + data, + data_length, + content_type="application/json", + metadata={ + "SHA256SUM": calculated_file_hash + } + ) + updated_files.append(file) + except ValueError or ResponseError as e: + print(f'ERROR: {file} {e}') + + print(os.linesep.join(updated_files)) + + +@cli.command(name="store") +@click.pass_context +@click.argument('files', nargs=-1) +def store_files(ctx, files): + minio_config = ctx.obj['CONFIG']['minio'] + minio_bucket = f'{ctx.obj["CONTEXT"]}-data' + + minio = get_minio_client(minio_config) + + if not minio.bucket_exists(minio_bucket): + minio.make_bucket(minio_bucket) + + stored_files: List[str] = [] + + if ctx.obj['READ_STDIN']: + files = get_clean_stdin_iterator(click.get_text_stream('stdin')) + + for file in files: + file_identity = get_file_identity(ctx.obj, file) + try: + minio.fput_object( + minio_bucket, + file_identity, + file, + content_type="application/octet-stream" + ) + stored_files.append(file) + except ResponseError as e: + print(f'ERROR: {file} {e}') + + print(os.linesep.join(stored_files)) + + +@cli.command(name="retrieve") +@click.pass_context +@click.option('-d', '--destination', default=None) +@click.argument('files', nargs=-1) +def retrieve_files(ctx, destination, files): + minio_config = ctx.obj['CONFIG']['minio'] + minio_bucket = f'{ctx.obj["CONTEXT"]}-data' + + minio = get_minio_client(minio_config) + + if not minio.bucket_exists(minio_bucket): + minio.make_bucket(minio_bucket) + + retrieved_files: List[str] = [] + + if ctx.obj['READ_STDIN']: + files = get_clean_stdin_iterator(click.get_text_stream('stdin')) + + for file in files: + file_identity = get_file_identity(ctx.obj, file) + file_destination = file + if destination is not None: + file_destination = os.path.join(destination, file_identity) + try: + minio.fget_object( + minio_bucket, + file_identity, + file_destination + ) + retrieved_files.append(file_destination) + except ResponseError as e: + print(f'ERROR: {file_destination} {e}') + + print(os.linesep.join(retrieved_files)) + + +if __name__ == '__main__': + cli(obj={}) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..38014c1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +click == 7.1.1 +minio == 5.0.8 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8132a86 --- /dev/null +++ b/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from distutils.core import setup + +setup( + name='Asset-Compression-Manager', + version='0.1.0', + description='Helper Utility For Managing Compressed Assets', + author='Drew Short', + author_email='warrick@sothr.com' +) \ No newline at end of file