diff --git a/README.md b/README.md index abff33c..4bbec04 100644 --- a/README.md +++ b/README.md @@ -69,12 +69,12 @@ $ ./acm.py list -x List all files while adding a prefix and stripping a suffix ```bash -$ ./acm.py -p list -x --suffix +$ ./acm.py --remove-prefix list -x --suffix ``` List all files with sha256sum compatible output ```bash -$ ./acm.py --prefix "/tmp/" --stdin list -x testing --suffix .json --sha256sum +$ ./acm.py --remove-prefix "/tmp/" --stdin list -x testing --suffix .json --sha256sum ``` Print out a sha256sum compatible check list @@ -85,12 +85,12 @@ Do a comparison of the remote bucket for files with a matching sha256sum value. Process a list of files ```bash -$ ./acm.py -p match -x FILES... +$ ./acm.py --remove-prefix match -x FILES... ``` Process a list from stdin ```bash -$ find /tmp -name '*.jpg' | ./acm.py -p match -x +$ find /tmp -name '*.jpg' | ./acm.py --remove-prefix match -x ``` ### Checking For Changes @@ -99,12 +99,12 @@ Do a comparison of the remote bucket for missing files or files with a mismatch Process a list of files ```bash -$ ./acm.py -p check -x FILES... +$ ./acm.py --remove-prefix check -x FILES... ``` Process a list from stdin ```bash -$ find /tmp -name '*.jpg' | ./acm.py -p check -x +$ find /tmp -name '*.jpg' | ./acm.py --remove-prefix check -x ``` ### Updating Metadata For Changed Files @@ -113,12 +113,12 @@ Update the remote bucket with new metadata for the listed files. Calculates new Process a list of files ```bash -$ ./acm.py -p update -x FILES... +$ ./acm.py --remove-prefix update -x FILES... ``` Process a list from stdin ```bash -$ find /tmp -name '*.jpg' | ./acm.py -p update -x +$ find /tmp -name '*.jpg' | ./acm.py --remove-prefix update -x ``` ### Storing Files @@ -127,12 +127,12 @@ Store the listed files in `-data`. Process a list of files ```bash -$ ./acm.py -p store -x FILES... +$ ./acm.py --remove-prefix store -x FILES... ``` Process a list from stdin ```bash -$ find /tmp -name '*.jpg' | ./acm.py -p store -x +$ find /tmp -name '*.jpg' | ./acm.py --remove-prefix store -x ``` ### Retrieving Files @@ -141,18 +141,59 @@ Retrieve remote files matching listed files. Optionally place the downloaded fil Process a list of files ```bash -$ ./acm.py -p retrieve -x [-d ] FILES... +$ ./acm.py --remove-prefix retrieve -x [-d ] FILES... ``` Process a list from stdin ```bash -$ find /tmp -name '*.jpg' | ./acm.py -p retrieve -x [-d ] +$ find /tmp -name '*.jpg' | ./acm.py --remove-prefix retrieve -x [-d ] ``` ### Configuring Profiles ### Compressing Changed Assets +Compressing assets based on profiles +```bash + +``` + +## Usage as a workflow + +Compressing changed files and storing them for later use +1. Identify changed files + - `find -type f | ./acm.py --stdin -p check -x aggressive` +1. Pass identified files to the compressor with a specific profile + - ` | ./acm.py --stdin -p compress -p aggressive -d /tmp/profile-aggressive/` +1. Store compressed assets in data bucket + - ` | ./acm.py --stdin -p /tmp/profile/aggressive/ store -x aggressive` +1. Update metadata about files + - ` | ./acm.py --stdin -p update -x aggressive` + +As a combined workflow to only compress and store changed assets +```bash +export TARGET_DIRECTORY=/mnt/e/data/files/ +export CONTEXT=android +export PROFILE=aggressive +export PROFILE_TMP_DIR="/tmp/profile-${PROFILE}/" +export COMPRESSED_FILES="$(mktemp)" +# Compress changed assets +find "$TARGET_DIRECTORY" -type f | ./acm.py --stdin --remove-prefix "$TARGET_DIRECTORY" check -x "$CONTEXT" | ./acm.py --stdin --remove-prefix "$TARGET_DIRECTORY" compress --print-input-and-identity -p "$PROFILE" -d "$PROFILE_TMP_DIR" > "$COMPRESSED_FILES" +# Store compressed assets +find "$PROFILE_TMP_DIR" -type f | ./acm.py --stdin --remove-prefix "$PROFILE_TMP_DIR" --add-prefix "$TARGET_DIRECTORY" store -x "${CONTEXT}-data" > /dev/null +# Update Asset manifests +cat "$COMPRESSED_FILES" | ./acm.py --stdin --remove-prefix "$TARGET_DIRECTORY" update -x "$CONTEXT" --input-and-identity > /dev/null +``` + +As a combined workflow to download matching assets +```bash +TARGET_DIRECTORY=/mnt/e/data/files/ +CONTEXT=android +PROFILE=aggressive +PROFILE_TMP_DIR="/tmp/profile-${PROFILE}" +find "$TARGET_DIRECTORY" -type f | ./acm.py --stdin --remove-prefix "$TARGET_DIRECTORY" match -x "$CONTEXT" --print-identity | ./acm.py --stdin retrieve -x "${CONTEXT}-data" -d "$PROFILE_TMP_DIR" > /dev/null` +``` + ## Contributing ## License diff --git a/acm.py b/acm.py index d21082c..a1a03a6 100644 --- a/acm.py +++ b/acm.py @@ -5,6 +5,7 @@ import io import json import os import platform +import sys import tempfile from typing import List, Dict, Callable @@ -12,12 +13,6 @@ import click from minio import Minio, ResponseError from minio.error import NoSuchKey -# MinIO Metadata Prefix -METADATA_PREFIX = 'X-Amz-Meta-' - -# Metadata Constants -METADATA_SHA256SUM = "Sha256sum" - # Size of the buffer to read files with BUF_SIZE = 4096 @@ -118,8 +113,8 @@ def strip_prefix(prefix: str, file: str) -> str: def get_file_identity(ctx_obj, file): - if 'PREFIX' in ctx_obj and ctx_obj['PREFIX'] is not None: - path = strip_prefix(ctx_obj['PREFIX'], file) + if 'REMOVE_PREFIX' in ctx_obj and ctx_obj['REMOVE_PREFIX'] is not None: + path = strip_prefix(ctx_obj['REMOVE_PREFIX'], file) else: path = file @@ -159,15 +154,15 @@ def prep_s3(ctx): return s3_bucket, s3 -def get_file_sha256sum(s3, s3_bucket, file_identity, file): - file_object = s3.stat_object(s3_bucket, file_identity) - stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")] +def get_file_sha256sum(stored_data, file): + stored_file_hash = stored_data['sha256sum'] + stored_profile_hash = stored_data['profilesHash'] sha256sum = hashlib.sha256() with open(file, 'rb') as f: for byte_block in iter(lambda: f.read(BUF_SIZE), b""): sha256sum.update(byte_block) calculated_file_hash = sha256sum.hexdigest() - return stored_file_hash, calculated_file_hash + return stored_profile_hash, stored_file_hash, calculated_file_hash def load_config(path: str) -> any: @@ -184,6 +179,14 @@ def load_config(path: str) -> any: else: config['concurrency'] = 0 + # Calculate profiles hash + sha256sum = hashlib.sha256() + with io.BytesIO(json.dumps(config['profiles']).encode('utf-8')) as c: + for byte_block in iter(lambda: c.read(BUF_SIZE), b''): + sha256sum.update(byte_block) + profiles_hash = sha256sum.hexdigest() + config['profilesHash'] = profiles_hash + return config @@ -191,14 +194,16 @@ def load_config(path: str) -> any: @click.option('-d', '--debug/--no-debug', default=False) @click.option('-c', '--config', default=lambda: os.path.join(os.getcwd(), 'acm-config.json'), show_default=True) @click.option('-s', '--stdin/--no-stdin', default=False) -@click.option('-p', '--prefix', default=None) +@click.option('--remove-prefix', default=None) +@click.option('--add-prefix', default=None) @click.pass_context -def cli(ctx, debug, config, stdin, prefix): +def cli(ctx, debug, config, stdin, remove_prefix, add_prefix): ctx.ensure_object(dict) ctx.obj['DEBUG'] = debug ctx.obj['CONFIG'] = load_config(config) ctx.obj['READ_STDIN'] = stdin - ctx.obj['PREFIX'] = prefix + ctx.obj['REMOVE_PREFIX'] = remove_prefix + ctx.obj['ADD_PREFIX'] = add_prefix ############################### @@ -232,8 +237,8 @@ def list_files(ctx, context, sha256sum, suffix): for obj in found_objects: file = obj - if 'PREFIX' in ctx.obj and ctx.obj['PREFIX'] is not None: - file = os.path.join(ctx.obj['PREFIX'], file) + if 'REMOVE_PREFIX' in ctx.obj and ctx.obj['REMOVE_PREFIX'] is not None: + file = os.path.join(ctx.obj['REMOVE_PREFIX'], file) if suffix is not None and suffix in file: file = file.replace(suffix, '') @@ -252,9 +257,13 @@ def list_files(ctx, context, sha256sum, suffix): @cli.command(name="match") @click.option('-x', '--context', required=True) +@click.option('--print-identity/--no-print-identity', default=False) @click.argument('files', nargs=-1) @click.pass_context -def check_matched_files_hashes(ctx, context, files): +def check_matched_files_hashes(ctx, context, print_identity, files): + """ + List all files that have matching stored sha256sum and profilesHash + """ ctx.obj['CONTEXT'] = context s3_bucket, s3 = prep_s3(ctx) matching_files: List[str] = [] @@ -265,9 +274,15 @@ def check_matched_files_hashes(ctx, context, files): for file in files: file_identity = f'{get_file_identity(ctx.obj, file)}.json' try: - stored_file_hash, calculated_file_hash = get_file_sha256sum(s3, s3_bucket, file_identity, file) - if calculated_file_hash == stored_file_hash: - matching_files.append(file) + file_object = s3.get_object(s3_bucket, file_identity) + stored_data = json.load(file_object) + stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, file) + if calculated_file_hash == stored_file_hash \ + and ctx.obj['CONFIG']['profilesHash'] == stored_profile_hash: + if print_identity: + matching_files.append(stored_data['storedAssetIdentity']) + else: + matching_files.append(file) except NoSuchKey as e: continue except ValueError or ResponseError as e: @@ -281,6 +296,9 @@ def check_matched_files_hashes(ctx, context, files): @click.argument('files', nargs=-1) @click.pass_context def check_changed_files_hashes(ctx, context, files): + """ + List all files that do not have a matching sha256sum or profilesHash + """ ctx.obj['CONTEXT'] = context s3_bucket, s3 = prep_s3(ctx) changed_files: List[str] = [] @@ -291,8 +309,11 @@ def check_changed_files_hashes(ctx, context, files): for file in files: file_identity = f'{get_file_identity(ctx.obj, file)}.json' try: - stored_file_hash, calculated_file_hash = get_file_sha256sum(s3, s3_bucket, file_identity, file) - if calculated_file_hash != stored_file_hash: + file_object = s3.get_object(s3_bucket, file_identity) + stored_data = json.load(file_object) + stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, file) + if calculated_file_hash != stored_file_hash \ + or ctx.obj['CONFIG']['profilesHash'] != stored_profile_hash: changed_files.append(file) except NoSuchKey as e: changed_files.append(file) @@ -304,9 +325,13 @@ def check_changed_files_hashes(ctx, context, files): @cli.command(name="update") @click.option('-x', '--context', required=True) +@click.option('--input-and-identity/--no-input-and-identity', default=False) @click.argument('files', nargs=-1) @click.pass_context -def update_changed_files_hashes(ctx, context, files): +def update_changed_files_hashes(ctx, context, input_and_identity, files): + """ + Store new data objects for the provided files + """ ctx.obj['CONTEXT'] = context s3_bucket, s3 = prep_s3(ctx) updated_files: List[str] = [] @@ -315,6 +340,9 @@ def update_changed_files_hashes(ctx, context, files): files = get_clean_stdin_iterator(click.get_text_stream('stdin')) for file in files: + identity = None + if input_and_identity: + file, identity = file.split('\t') file_identity = f'{get_file_identity(ctx.obj, file)}.json' try: sha256sum = hashlib.sha256() @@ -322,9 +350,15 @@ def update_changed_files_hashes(ctx, context, files): for byte_block in iter(lambda: f.read(BUF_SIZE), b''): sha256sum.update(byte_block) calculated_file_hash = sha256sum.hexdigest() + object_data = { - "path": file + "sourcePath": file, + "storedAssetIdentity": identity, + "identity": file_identity, + "sha256sum": calculated_file_hash, + "profilesHash": ctx.obj['CONFIG']['profilesHash'] } + with io.BytesIO(json.dumps(object_data, sort_keys=True, indent=None).encode('utf-8')) as data: data.seek(0, os.SEEK_END) data_length = data.tell() @@ -335,9 +369,7 @@ def update_changed_files_hashes(ctx, context, files): data, data_length, content_type="application/json", - metadata={ - "SHA256SUM": calculated_file_hash - } + metadata={} ) updated_files.append(file) except ValueError or ResponseError as e: @@ -351,6 +383,9 @@ def update_changed_files_hashes(ctx, context, files): @click.argument('files', nargs=-1) @click.pass_context def store_files(ctx, context, files): + """ + Store specified files in a bucket for retrieval. + """ ctx.obj['CONTEXT'] = context s3_bucket, s3 = prep_s3(ctx) stored_files: List[str] = [] @@ -367,9 +402,12 @@ def store_files(ctx, context, files): file, content_type="application/octet-stream" ) - stored_files.append(file) + if 'ADD_PREFIX' in ctx.obj and ctx.obj['ADD_PREFIX'] is not None: + stored_files.append(os.path.join(ctx.obj['ADD_PREFIX'], file_identity)) + else: + stored_files.append(file) except ResponseError as e: - print(f'ERROR: {file} {e}') + print(f'ERROR: {file} {e}', file=sys.stderr) print(os.linesep.join(stored_files)) @@ -380,6 +418,9 @@ def store_files(ctx, context, files): @click.argument('files', nargs=-1) @click.pass_context def retrieve_files(ctx, context, destination, files): + """ + Retrieve specified files from a bucket + """ ctx.obj['CONTEXT'] = context s3_bucket, s3 = prep_s3(ctx) retrieved_files: List[str] = [] @@ -399,8 +440,10 @@ def retrieve_files(ctx, context, destination, files): file_destination ) retrieved_files.append(file_destination) + except NoSuchKey as e: + print(f'ERROR: {file_identity} {file_destination} {e}', file=sys.stderr) except ResponseError as e: - print(f'ERROR: {file_destination} {e}') + print(f'ERROR: {file_destination} {e}', file=sys.stderr) print(os.linesep.join(retrieved_files)) @@ -414,9 +457,10 @@ def retrieve_files(ctx, context, destination, files): @click.option('-p', '--profile', default='default') @click.option('-c', '--content', default='all') @click.option('-d', '--destination', default=None) +@click.option('--print-input-and-identity/--no-print-input-and-identity', default=False) @click.argument('files', nargs=-1) @click.pass_context -def compress_assets(ctx, profile, content, destination, files): +def compress_assets(ctx, profile, content, destination, print_input_and_identity, files): profiles = ctx.obj['CONFIG']['profiles'] if profile not in profiles: @@ -471,8 +515,8 @@ def compress_assets(ctx, profile, content, destination, files): for content_configuration in content_configurations: if any([input_file.endswith(extension) for extension in content_configuration['extensions']]): file = input_file - if 'PREFIX' in ctx.obj and ctx.obj['PREFIX'] is not None: - file = strip_prefix(ctx.obj['PREFIX'], input_file) + if 'REMOVE_PREFIX' in ctx.obj and ctx.obj['REMOVE_PREFIX'] is not None: + file = strip_prefix(ctx.obj['REMOVE_PREFIX'], input_file) if 'preserveInputExtension' in content_configuration \ and content_configuration['preserveInputExtension']: @@ -481,6 +525,8 @@ def compress_assets(ctx, profile, content, destination, files): output_file_without_ext = os.path.splitext(os.path.join(destination, file))[0] output_file = f'{output_file_without_ext}.{content_configuration["outputExtension"]}' + output_file_identity = get_file_identity({'REMOVE_PREFIX': destination}, output_file) + output_file_dir = os.path.dirname(output_file) os.makedirs(output_file_dir, exist_ok=True) @@ -493,7 +539,10 @@ def compress_assets(ctx, profile, content, destination, files): command, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL, - on_success=store_filename(compressed_files, output_file) + on_success=store_filename( + compressed_files, + f'{input_file}\t{output_file_identity}' if print_input_and_identity else output_file + ) ) )