From 567f0856a3060df86bf745ef168b3dbd0247d6ce Mon Sep 17 00:00:00 2001 From: Drew Short Date: Mon, 27 Apr 2020 11:47:19 -0500 Subject: [PATCH] Add a clean command --- acm.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/acm.py b/acm.py index 01b2d52..8b2a1cd 100755 --- a/acm.py +++ b/acm.py @@ -522,6 +522,75 @@ def retrieve_files(ctx, context, destination, files): print(os.linesep.join(retrieved_files)) +@cli.command(name="clean") +@click.option('-x', '--context', required=True) +@click.option('-d', '--context-data', default=None) +@click.argument('files', nargs=-1) +@click.pass_context +def clean_files(ctx, context, context_data, files): + """ + Remove non matching specified files in a bucket for retrieval. + """ + ctx.obj['CONTEXT'] = context + s3_bucket, s3 = prep_s3(ctx) + s3_data_bucket = context_data + found_files: List[str] = [] + found_data_files: List[str] = [] + removed_files: List[str] = [] + + if ctx.obj['READ_STDIN']: + files = get_clean_stdin_iterator(click.get_text_stream('stdin')) + + # Go through and find all matching files + for file in files: + file_identity = get_file_identity(ctx.obj, file) + try: + if s3_data_bucket is not None: + file_object = s3.get_object(s3_bucket, file_identity) + stored_data = json.load(file_object) + stored_data_file_identity = stored_data['storedAssetIdentity'] + found_files.append(file_identity) + found_data_files.append(stored_data_file_identity) + else: + file_object = s3.get_object(s3_bucket, file_identity) + found_files.append(file_identity) + except ResponseError as e: + print(f'ERROR: {file} {e}', file=sys.stderr) + + # Find all objects in s3 bucket + found_objects: List[str] = [] + for obj in s3.list_objects_v2(s3_bucket, recursive=False): + if obj.is_dir: + found_objects.extend(list_s3_dir(s3, s3_bucket, obj.object_name)) + else: + found_objects.append(obj.object_name) + + found_data_objects: List[str] = [] + for obj in s3.list_objects_v2(s3_data_bucket, recursive=False): + if obj.is_dir: + found_data_objects.extend(list_s3_dir(s3, s3_bucket, obj.object_name)) + else: + found_data_objects.append(obj.object_name) + + for file_identity in found_objects: + if file_identity not in found_files: + try: + s3.remove_object(s3_bucket, file_identity) + removed_files.append(f'{s3_bucket}-{file_identity}') + except ResponseError as e: + print(f'ERROR: {s3_bucket}-{file_identity} {e}', file=sys.stderr) + + for file_identity in found_data_objects: + if file_identity not in found_data_files: + try: + s3.remove_object(s3_data_bucket, file_identity) + removed_files.append(f'{s3_data_bucket}-{file_identity}') + except ResponseError as e: + print(f'ERROR: {s3_data_bucket}-{file_identity} {e}', file=sys.stderr) + + print(os.linesep.join(removed_files)) + + ###################################### # Asset Compression Focused Commands # ######################################