Browse Source

Cleanup and updated README

add-file-preservation
Drew Short 5 years ago
parent
commit
e48876ca27
  1. 14
      README.md
  2. 91
      acm.py

14
README.md

@ -67,6 +67,20 @@ $ ./acm.py --context testing --prefix "/tmp/" --stdin list --suffix .json --sha2
Print out a sha256sum compatible check list
### Checking For Matches
Do a comparison of the remote bucket for files with a matching sha256sum value.
Process a list of files
```bash
$ ./acm.py -x <bucket> -p <prefix to strip> match FILES...
```
Process a list from stdin
```bash
$ find /tmp -name '*.jpg' | ./acm.py -x <bucket> -p <prefix to strip> match
```
### Checking For Changes
Do a comparison of the remote bucket for missing files or files with a mismatch in their sha256sum values.

91
acm.py

@ -58,6 +58,29 @@ def get_s3_client(config: any) -> Minio:
return Minio(host, secure=secure, access_key=access_key, secret_key=secret_key)
def prep_s3(ctx):
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
s3 = get_s3_client(s3_config)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
return s3_bucket, s3
def get_file_sha256sum(s3, s3_bucket, file_identity, file):
file_object = s3.stat_object(s3_bucket, file_identity)
stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")]
sha256sum = hashlib.sha256()
with open(file, 'rb') as f:
for byte_block in iter(lambda: f.read(BUF_SIZE), b""):
sha256sum.update(byte_block)
calculated_file_hash = sha256sum.hexdigest()
return stored_file_hash, calculated_file_hash
def load_config(path: str) -> any:
with open(path, 'r') as config_file:
config = json.load(config_file)
@ -131,15 +154,8 @@ def list_files(ctx, sha256sum, suffix):
@click.pass_context
@click.argument('files', nargs=-1)
def check_matched_files_hashes(ctx, files):
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
s3 = get_s3_client(s3_config)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
changed_files: List[str] = []
s3_bucket, s3 = prep_s3(ctx)
matching_files: List[str] = []
if ctx.obj['READ_STDIN']:
files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
@ -147,34 +163,22 @@ def check_matched_files_hashes(ctx, files):
for file in files:
file_identity = f'{get_file_identity(ctx.obj, file)}.json'
try:
file_object = s3.stat_object(s3_bucket, file_identity)
stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")]
sha256sum = hashlib.sha256()
with open(file, 'rb') as f:
for byte_block in iter(lambda: f.read(BUF_SIZE), b""):
sha256sum.update(byte_block)
calculated_file_hash = sha256sum.hexdigest()
stored_file_hash, calculated_file_hash = get_file_sha256sum(s3, s3_bucket, file_identity, file)
if calculated_file_hash == stored_file_hash:
changed_files.append(file)
matching_files.append(file)
except NoSuchKey as e:
continue
except ValueError or ResponseError as e:
print(f'ERROR: {file} {e}')
print(os.linesep.join(changed_files))
print(os.linesep.join(matching_files))
@cli.command(name="check")
@click.pass_context
@click.argument('files', nargs=-1)
def check_changed_files_hashes(ctx, files):
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
s3 = get_s3_client(s3_config)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
s3_bucket, s3 = prep_s3(ctx)
changed_files: List[str] = []
if ctx.obj['READ_STDIN']:
@ -183,13 +187,7 @@ def check_changed_files_hashes(ctx, files):
for file in files:
file_identity = f'{get_file_identity(ctx.obj, file)}.json'
try:
file_object = s3.stat_object(s3_bucket, file_identity)
stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")]
sha256sum = hashlib.sha256()
with open(file, 'rb') as f:
for byte_block in iter(lambda: f.read(BUF_SIZE), b""):
sha256sum.update(byte_block)
calculated_file_hash = sha256sum.hexdigest()
stored_file_hash, calculated_file_hash = get_file_sha256sum(s3, s3_bucket, file_identity, file)
if calculated_file_hash != stored_file_hash:
changed_files.append(file)
except NoSuchKey as e:
@ -204,14 +202,7 @@ def check_changed_files_hashes(ctx, files):
@click.pass_context
@click.argument('files', nargs=-1)
def update_changed_files_hashes(ctx, files):
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
s3 = get_s3_client(s3_config)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
s3_bucket, s3 = prep_s3(ctx)
updated_files: List[str] = []
if ctx.obj['READ_STDIN']:
@ -253,14 +244,7 @@ def update_changed_files_hashes(ctx, files):
@click.pass_context
@click.argument('files', nargs=-1)
def store_files(ctx, files):
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = f'{ctx.obj["CONTEXT"]}-data'
s3 = get_s3_client(s3_config)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
s3_bucket, s3 = prep_s3(ctx)
stored_files: List[str] = []
if ctx.obj['READ_STDIN']:
@ -287,14 +271,7 @@ def store_files(ctx, files):
@click.option('-d', '--destination', default=None)
@click.argument('files', nargs=-1)
def retrieve_files(ctx, destination, files):
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = f'{ctx.obj["CONTEXT"]}-data'
s3 = get_s3_client(s3_config)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
s3_bucket, s3 = prep_s3(ctx)
retrieved_files: List[str] = []
if ctx.obj['READ_STDIN']:

Loading…
Cancel
Save