diff --git a/README.md b/README.md index 4496b7b..a217476 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,20 @@ $ ./acm.py --context testing --prefix "/tmp/" --stdin list --suffix .json --sha2 Print out a sha256sum compatible check list +### Checking For Matches + +Do a comparison of the remote bucket for files with a matching sha256sum value. + +Process a list of files +```bash +$ ./acm.py -x -p match FILES... +``` + +Process a list from stdin +```bash +$ find /tmp -name '*.jpg' | ./acm.py -x -p match +``` + ### Checking For Changes Do a comparison of the remote bucket for missing files or files with a mismatch in their sha256sum values. diff --git a/acm.py b/acm.py index bbacc47..5c96a78 100644 --- a/acm.py +++ b/acm.py @@ -58,6 +58,29 @@ def get_s3_client(config: any) -> Minio: return Minio(host, secure=secure, access_key=access_key, secret_key=secret_key) +def prep_s3(ctx): + s3_config = ctx.obj['CONFIG']['s3'] + s3_bucket = ctx.obj['CONTEXT'] + + s3 = get_s3_client(s3_config) + + if not s3.bucket_exists(s3_bucket): + s3.make_bucket(s3_bucket) + + return s3_bucket, s3 + + +def get_file_sha256sum(s3, s3_bucket, file_identity, file): + file_object = s3.stat_object(s3_bucket, file_identity) + stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")] + sha256sum = hashlib.sha256() + with open(file, 'rb') as f: + for byte_block in iter(lambda: f.read(BUF_SIZE), b""): + sha256sum.update(byte_block) + calculated_file_hash = sha256sum.hexdigest() + return stored_file_hash, calculated_file_hash + + def load_config(path: str) -> any: with open(path, 'r') as config_file: config = json.load(config_file) @@ -131,15 +154,8 @@ def list_files(ctx, sha256sum, suffix): @click.pass_context @click.argument('files', nargs=-1) def check_matched_files_hashes(ctx, files): - s3_config = ctx.obj['CONFIG']['s3'] - s3_bucket = ctx.obj['CONTEXT'] - - s3 = get_s3_client(s3_config) - - if not s3.bucket_exists(s3_bucket): - s3.make_bucket(s3_bucket) - - changed_files: List[str] = [] + s3_bucket, s3 = prep_s3(ctx) + matching_files: List[str] = [] if ctx.obj['READ_STDIN']: files = get_clean_stdin_iterator(click.get_text_stream('stdin')) @@ -147,34 +163,22 @@ def check_matched_files_hashes(ctx, files): for file in files: file_identity = f'{get_file_identity(ctx.obj, file)}.json' try: - file_object = s3.stat_object(s3_bucket, file_identity) - stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")] - sha256sum = hashlib.sha256() - with open(file, 'rb') as f: - for byte_block in iter(lambda: f.read(BUF_SIZE), b""): - sha256sum.update(byte_block) - calculated_file_hash = sha256sum.hexdigest() + stored_file_hash, calculated_file_hash = get_file_sha256sum(s3, s3_bucket, file_identity, file) if calculated_file_hash == stored_file_hash: - changed_files.append(file) + matching_files.append(file) except NoSuchKey as e: continue except ValueError or ResponseError as e: print(f'ERROR: {file} {e}') - print(os.linesep.join(changed_files)) + print(os.linesep.join(matching_files)) + @cli.command(name="check") @click.pass_context @click.argument('files', nargs=-1) def check_changed_files_hashes(ctx, files): - s3_config = ctx.obj['CONFIG']['s3'] - s3_bucket = ctx.obj['CONTEXT'] - - s3 = get_s3_client(s3_config) - - if not s3.bucket_exists(s3_bucket): - s3.make_bucket(s3_bucket) - + s3_bucket, s3 = prep_s3(ctx) changed_files: List[str] = [] if ctx.obj['READ_STDIN']: @@ -183,13 +187,7 @@ def check_changed_files_hashes(ctx, files): for file in files: file_identity = f'{get_file_identity(ctx.obj, file)}.json' try: - file_object = s3.stat_object(s3_bucket, file_identity) - stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")] - sha256sum = hashlib.sha256() - with open(file, 'rb') as f: - for byte_block in iter(lambda: f.read(BUF_SIZE), b""): - sha256sum.update(byte_block) - calculated_file_hash = sha256sum.hexdigest() + stored_file_hash, calculated_file_hash = get_file_sha256sum(s3, s3_bucket, file_identity, file) if calculated_file_hash != stored_file_hash: changed_files.append(file) except NoSuchKey as e: @@ -204,14 +202,7 @@ def check_changed_files_hashes(ctx, files): @click.pass_context @click.argument('files', nargs=-1) def update_changed_files_hashes(ctx, files): - s3_config = ctx.obj['CONFIG']['s3'] - s3_bucket = ctx.obj['CONTEXT'] - - s3 = get_s3_client(s3_config) - - if not s3.bucket_exists(s3_bucket): - s3.make_bucket(s3_bucket) - + s3_bucket, s3 = prep_s3(ctx) updated_files: List[str] = [] if ctx.obj['READ_STDIN']: @@ -253,14 +244,7 @@ def update_changed_files_hashes(ctx, files): @click.pass_context @click.argument('files', nargs=-1) def store_files(ctx, files): - s3_config = ctx.obj['CONFIG']['s3'] - s3_bucket = f'{ctx.obj["CONTEXT"]}-data' - - s3 = get_s3_client(s3_config) - - if not s3.bucket_exists(s3_bucket): - s3.make_bucket(s3_bucket) - + s3_bucket, s3 = prep_s3(ctx) stored_files: List[str] = [] if ctx.obj['READ_STDIN']: @@ -287,14 +271,7 @@ def store_files(ctx, files): @click.option('-d', '--destination', default=None) @click.argument('files', nargs=-1) def retrieve_files(ctx, destination, files): - s3_config = ctx.obj['CONFIG']['s3'] - s3_bucket = f'{ctx.obj["CONTEXT"]}-data' - - s3 = get_s3_client(s3_config) - - if not s3.bucket_exists(s3_bucket): - s3.make_bucket(s3_bucket) - + s3_bucket, s3 = prep_s3(ctx) retrieved_files: List[str] = [] if ctx.obj['READ_STDIN']: