diff --git a/.gitignore b/.gitignore index e6a0efe..bb8b487 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ venv/ l_venv/ -acm-config.json -*.json \ No newline at end of file +acm-config.json \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index ea2c68b..1968e16 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,7 +30,7 @@ RUN apt-get remove -y curl \ WORKDIR /app # Copy application -COPY acm.py requirements.txt /app/ +COPY acm-config-default.json acm.py requirements.txt /app/ # Install application requirements RUN python3 -m pip install -r requirements.txt diff --git a/acm-config-default.json b/acm-config-default.json new file mode 100644 index 0000000..8e3e4e4 --- /dev/null +++ b/acm-config-default.json @@ -0,0 +1,120 @@ +{ + "concurrency": 0, + "profiles": { + "default": { + "jpeg": { + "processors": ["cjpeg"], + "extensions": [ + "jpg", + "jpeg" + ], + "outputExtension": "jpg", + "command": "cjpeg -optimize -quality 90 -progressive -outfile {{output_file}} {{input_file}}" + }, + "png": { + "processors": ["optipng"], + "extensions": [ + "png" + ], + "outputExtension": "png", + "command": "optipng -o2 -strip all -out {{output_file}} {{input_file}}" + }, + "video": { + "processors": ["ffmpeg"], + "extensions": [ + "mp4", + "webm" + ], + "outputExtension": "mp4", + "command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -vcodec libx264 -crf 20 {{output_file}}" + }, + "audio": { + "processors": ["ffmpeg", "opusenc"], + "extensions": [ + "wav", + "mp3" + ], + "outputExtension": "ogg", + "command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -f wav -| opusenc --bitrate 64 --vbr --downmix-stereo --discard-comments --discard-pictures - {{output_file}}" + } + }, + "placebo": { + "jpeg": { + "processors": ["cp"], + "extensions": [ + "jpg", + "jpeg" + ], + "outputExtension": "jpg", + "preserveInputExtension": true, + "command": "cp {{input_file}} {{output_file}}" + }, + "png": { + "processors": ["cp"], + "extensions": [ + "png" + ], + "outputExtension": "png", + "preserveInputExtension": true, + "command": "cp {{input_file}} {{output_file}}" + }, + "video": { + "processors": ["cp"], + "extensions": [ + "mp4", + "webm" + ], + "outputExtension": "mp4", + "preserveInputExtension": true, + "command": "cp {{input_file}} {{output_file}}" + }, + "audio": { + "processors": ["cp"], + "extensions": [ + "wav", + "mp3" + ], + "outputExtension": "ogg", + "preserveInputExtension": true, + "command": "cp {{input_file}} {{output_file}}" + } + }, + "aggressive": { + "jpeg": { + "processors": ["ffmpeg", "cjpeg"], + "extensions": [ + "jpg", + "jpeg" + ], + "outputExtension": "jpg", + "command": "export FILE={{output_file}} && export TEMP_FILE=${FILE}_tmp.jpg && ffmpeg -i {{input_file}} -vf scale=-1:720 ${TEMP_FILE} && cjpeg -optimize -quality 75 -progressive -outfile {{output_file}} ${TEMP_FILE} && rm ${TEMP_FILE}" + }, + "png": { + "processors": ["optipng"], + "extensions": [ + "png" + ], + "outputExtension": "png", + "command": "optipng -o2 -strip all -out {{output_file}} {{input_file}}" + }, + "video": { + "processors": ["ffmpeg"], + "extensions": [ + "mp4", + "webm" + ], + "outputExtension": "mp4", + "command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -vf scale=-1:720 -vcodec libx264 -crf 24 {{output_file}}" + }, + "audio": { + "processors": ["ffmpeg", "opusenc"], + "extensions": [ + "wav", + "mp3" + ], + "outputExtension": "ogg", + "command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -f wav -| opusenc --bitrate 64 --vbr --downmix-stereo --discard-comments --discard-pictures - {{output_file}}" + } + } + } +} diff --git a/acm.py b/acm.py old mode 100644 new mode 100755 index a1a03a6..e55679a --- a/acm.py +++ b/acm.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import asyncio +import collections.abc import hashlib import io import json @@ -98,6 +99,15 @@ def run_asyncio_commands(tasks, max_concurrent_tasks=0): ########### +def update(d, u): + for k, v in u.items(): + if isinstance(v, collections.abc.Mapping): + d[k] = update(d.get(k, {}), v) + else: + d[k] = v + return d + + def get_metadata_name(key): return METADATA_PREFIX + 'SHA256SUM'.capitalize() @@ -154,9 +164,9 @@ def prep_s3(ctx): return s3_bucket, s3 -def get_file_sha256sum(stored_data, file): +def get_file_sha256sum(stored_data, profile, file): stored_file_hash = stored_data['sha256sum'] - stored_profile_hash = stored_data['profilesHash'] + stored_profile_hash = stored_data['profileHashes'][profile] sha256sum = hashlib.sha256() with open(file, 'rb') as f: for byte_block in iter(lambda: f.read(BUF_SIZE), b""): @@ -165,7 +175,49 @@ def get_file_sha256sum(stored_data, file): return stored_profile_hash, stored_file_hash, calculated_file_hash +def get_string_sha256sum(string: str, encoding='utf-8') -> str: + sha256sum = hashlib.sha256() + with io.BytesIO(json.dumps(string).encode(encoding)) as c: + for byte_block in iter(lambda: c.read(BUF_SIZE), b''): + sha256sum.update(byte_block) + return sha256sum.hexdigest() + + +def add_nested_key(config: Dict[str, any], path: List[str], value: str) -> bool: + target = path[0].lower() + if len(path) == 1: + config[target] = value + return True + else: + if target not in config: + config[target] = {} + add_nested_key(config[target], path[1:],value) + return False + + +def read_env_config(prefix, separator='__') -> any: + prefix = prefix+separator + env_config = {} + + environment_variables = [env for env in os.environ.keys() if env.startswith(prefix)] + + for env in environment_variables: + path = env[len(prefix):].split('__') + add_nested_key(env_config, path, os.environ[env]) + + return env_config + def load_config(path: str) -> any: + combined_config = {} + with open( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + 'acm-config-default.json'), + 'r') as combined_config_file: + combined_config = json.load(combined_config_file) + + + config = {} with open(path, 'r') as config_file: config = json.load(config_file) @@ -179,15 +231,18 @@ def load_config(path: str) -> any: else: config['concurrency'] = 0 - # Calculate profiles hash - sha256sum = hashlib.sha256() - with io.BytesIO(json.dumps(config['profiles']).encode('utf-8')) as c: - for byte_block in iter(lambda: c.read(BUF_SIZE), b''): - sha256sum.update(byte_block) - profiles_hash = sha256sum.hexdigest() - config['profilesHash'] = profiles_hash + update(combined_config, config) + update(combined_config, read_env_config('ACM')) + + # Calculate profiles hash + profile_hashes={} + profile_hashes['all'] = get_string_sha256sum(json.dumps(combined_config['profiles'])) + for profile in combined_config['profiles'].keys(): + profile_hashes[profile] = get_string_sha256sum(json.dumps(combined_config['profiles'][profile])) + + combined_config['profileHashes'] = profile_hashes - return config + return combined_config @click.group() @@ -206,6 +261,17 @@ def cli(ctx, debug, config, stdin, remove_prefix, add_prefix): ctx.obj['ADD_PREFIX'] = add_prefix +#################### +# Generic Commands # +#################### + + +@cli.command(name="config") +@click.pass_context +def print_config(ctx): + print(json.dumps(ctx.obj['CONFIG'], indent=2, sort_keys=True)) + + ############################### # S3 Storage Focused Commands # ############################### @@ -258,11 +324,12 @@ def list_files(ctx, context, sha256sum, suffix): @cli.command(name="match") @click.option('-x', '--context', required=True) @click.option('--print-identity/--no-print-identity', default=False) +@click.option('-p', '--profile', default='all') @click.argument('files', nargs=-1) @click.pass_context -def check_matched_files_hashes(ctx, context, print_identity, files): +def check_matched_files_hashes(ctx, context, print_identity, profile, files): """ - List all files that have matching stored sha256sum and profilesHash + List all files that have matching stored sha256sum and profile hash """ ctx.obj['CONTEXT'] = context s3_bucket, s3 = prep_s3(ctx) @@ -276,9 +343,9 @@ def check_matched_files_hashes(ctx, context, print_identity, files): try: file_object = s3.get_object(s3_bucket, file_identity) stored_data = json.load(file_object) - stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, file) + stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, profile, file) if calculated_file_hash == stored_file_hash \ - and ctx.obj['CONFIG']['profilesHash'] == stored_profile_hash: + and ctx.obj['CONFIG']['profileHashs'][profile] == stored_profile_hash: if print_identity: matching_files.append(stored_data['storedAssetIdentity']) else: @@ -293,11 +360,12 @@ def check_matched_files_hashes(ctx, context, print_identity, files): @cli.command(name="check") @click.option('-x', '--context', required=True) +@click.option('-p', '--profile', default='all') @click.argument('files', nargs=-1) @click.pass_context -def check_changed_files_hashes(ctx, context, files): +def check_changed_files_hashes(ctx, context, profile, files): """ - List all files that do not have a matching sha256sum or profilesHash + List all files that do not have a matching sha256sum or profile hash """ ctx.obj['CONTEXT'] = context s3_bucket, s3 = prep_s3(ctx) @@ -311,9 +379,9 @@ def check_changed_files_hashes(ctx, context, files): try: file_object = s3.get_object(s3_bucket, file_identity) stored_data = json.load(file_object) - stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, file) + stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, profile, file) if calculated_file_hash != stored_file_hash \ - or ctx.obj['CONFIG']['profilesHash'] != stored_profile_hash: + or ctx.obj['CONFIG']['profileHashes'][profile] != stored_profile_hash: changed_files.append(file) except NoSuchKey as e: changed_files.append(file) @@ -326,9 +394,10 @@ def check_changed_files_hashes(ctx, context, files): @cli.command(name="update") @click.option('-x', '--context', required=True) @click.option('--input-and-identity/--no-input-and-identity', default=False) +@click.option('-p', '--profile', default='all') @click.argument('files', nargs=-1) @click.pass_context -def update_changed_files_hashes(ctx, context, input_and_identity, files): +def update_changed_files_hashes(ctx, context, input_and_identity, profile, files): """ Store new data objects for the provided files """ @@ -356,7 +425,7 @@ def update_changed_files_hashes(ctx, context, input_and_identity, files): "storedAssetIdentity": identity, "identity": file_identity, "sha256sum": calculated_file_hash, - "profilesHash": ctx.obj['CONFIG']['profilesHash'] + "profileHash": ctx.obj['CONFIG']['profileHashes'][profile] } with io.BytesIO(json.dumps(object_data, sort_keys=True, indent=None).encode('utf-8')) as data: