Browse Source

Added a match function

add-file-preservation
Drew Short 5 years ago
parent
commit
9afa0bbc3c
  1. 114
      acm.py

114
acm.py

@ -40,17 +40,17 @@ def get_file_identity(ctx_obj, file):
return path return path
def list_minio_dir(minio: Minio, bucket: str, prefix: str) -> List[str]:
def list_s3_dir(s3: Minio, bucket: str, prefix: str) -> List[str]:
found_files = [] found_files = []
for obj in minio.list_objects_v2(bucket, prefix=prefix):
for obj in s3.list_objects_v2(bucket, prefix=prefix):
if obj.is_dir: if obj.is_dir:
found_files.extend(list_minio_dir(minio, bucket, obj.object_name))
found_files.extend(list_s3_dir(s3, bucket, obj.object_name))
else: else:
found_files.append(obj.object_name) found_files.append(obj.object_name)
return found_files return found_files
def get_minio_client(config: any) -> Minio:
def get_s3_client(config: any) -> Minio:
host = config['host'] host = config['host']
secure = config['secure'] secure = config['secure']
access_key = config['access'] access_key = config['access']
@ -90,20 +90,20 @@ def cli(ctx, debug, config, context, stdin, prefix):
@click.option('--suffix', default=None) @click.option('--suffix', default=None)
@click.pass_context @click.pass_context
def list_files(ctx, sha256sum, suffix): def list_files(ctx, sha256sum, suffix):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = ctx.obj['CONTEXT']
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
minio = get_minio_client(minio_config)
s3 = get_s3_client(s3_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
found_files: List[str] = [] found_files: List[str] = []
found_objects: List[str] = [] found_objects: List[str] = []
for obj in minio.list_objects_v2(minio_bucket, recursive=False):
for obj in s3.list_objects_v2(s3_bucket, recursive=False):
if obj.is_dir: if obj.is_dir:
found_objects.extend(list_minio_dir(minio, minio_bucket, obj.object_name))
found_objects.extend(list_s3_dir(s3, s3_bucket, obj.object_name))
else: else:
found_objects.append(obj.object_name) found_objects.append(obj.object_name)
@ -118,7 +118,7 @@ def list_files(ctx, sha256sum, suffix):
file = file.strip() file = file.strip()
if sha256sum: if sha256sum:
stat = minio.stat_object(minio_bucket, obj)
stat = s3.stat_object(s3_bucket, obj)
sha256sum_value = stat.metadata[get_metadata_name("SHA256SUM")] sha256sum_value = stat.metadata[get_metadata_name("SHA256SUM")]
file = f'{sha256sum_value} {file}' file = f'{sha256sum_value} {file}'
@ -127,17 +127,53 @@ def list_files(ctx, sha256sum, suffix):
print(os.linesep.join(found_files)) print(os.linesep.join(found_files))
@cli.command(name="match")
@click.pass_context
@click.argument('files', nargs=-1)
def check_matched_files_hashes(ctx, files):
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
s3 = get_s3_client(s3_config)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
changed_files: List[str] = []
if ctx.obj['READ_STDIN']:
files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
for file in files:
file_identity = f'{get_file_identity(ctx.obj, file)}.json'
try:
file_object = s3.stat_object(s3_bucket, file_identity)
stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")]
sha256sum = hashlib.sha256()
with open(file, 'rb') as f:
for byte_block in iter(lambda: f.read(BUF_SIZE), b""):
sha256sum.update(byte_block)
calculated_file_hash = sha256sum.hexdigest()
if calculated_file_hash == stored_file_hash:
changed_files.append(file)
except NoSuchKey as e:
continue
except ValueError or ResponseError as e:
print(f'ERROR: {file} {e}')
print(os.linesep.join(changed_files))
@cli.command(name="check") @cli.command(name="check")
@click.pass_context @click.pass_context
@click.argument('files', nargs=-1) @click.argument('files', nargs=-1)
def check_changed_files_hashes(ctx, files): def check_changed_files_hashes(ctx, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = ctx.obj['CONTEXT']
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
minio = get_minio_client(minio_config)
s3 = get_s3_client(s3_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
changed_files: List[str] = [] changed_files: List[str] = []
@ -147,7 +183,7 @@ def check_changed_files_hashes(ctx, files):
for file in files: for file in files:
file_identity = f'{get_file_identity(ctx.obj, file)}.json' file_identity = f'{get_file_identity(ctx.obj, file)}.json'
try: try:
file_object = minio.stat_object(minio_bucket, file_identity)
file_object = s3.stat_object(s3_bucket, file_identity)
stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")] stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")]
sha256sum = hashlib.sha256() sha256sum = hashlib.sha256()
with open(file, 'rb') as f: with open(file, 'rb') as f:
@ -168,13 +204,13 @@ def check_changed_files_hashes(ctx, files):
@click.pass_context @click.pass_context
@click.argument('files', nargs=-1) @click.argument('files', nargs=-1)
def update_changed_files_hashes(ctx, files): def update_changed_files_hashes(ctx, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = ctx.obj['CONTEXT']
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = ctx.obj['CONTEXT']
minio = get_minio_client(minio_config)
s3 = get_s3_client(s3_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
updated_files: List[str] = [] updated_files: List[str] = []
@ -196,8 +232,8 @@ def update_changed_files_hashes(ctx, files):
data.seek(0, os.SEEK_END) data.seek(0, os.SEEK_END)
data_length = data.tell() data_length = data.tell()
data.seek(0) data.seek(0)
minio.put_object(
minio_bucket,
s3.put_object(
s3_bucket,
file_identity, file_identity,
data, data,
data_length, data_length,
@ -217,13 +253,13 @@ def update_changed_files_hashes(ctx, files):
@click.pass_context @click.pass_context
@click.argument('files', nargs=-1) @click.argument('files', nargs=-1)
def store_files(ctx, files): def store_files(ctx, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = f'{ctx.obj["CONTEXT"]}-data'
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = f'{ctx.obj["CONTEXT"]}-data'
minio = get_minio_client(minio_config)
s3 = get_s3_client(s3_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
stored_files: List[str] = [] stored_files: List[str] = []
@ -233,8 +269,8 @@ def store_files(ctx, files):
for file in files: for file in files:
file_identity = get_file_identity(ctx.obj, file) file_identity = get_file_identity(ctx.obj, file)
try: try:
minio.fput_object(
minio_bucket,
s3.fput_object(
s3_bucket,
file_identity, file_identity,
file, file,
content_type="application/octet-stream" content_type="application/octet-stream"
@ -251,13 +287,13 @@ def store_files(ctx, files):
@click.option('-d', '--destination', default=None) @click.option('-d', '--destination', default=None)
@click.argument('files', nargs=-1) @click.argument('files', nargs=-1)
def retrieve_files(ctx, destination, files): def retrieve_files(ctx, destination, files):
minio_config = ctx.obj['CONFIG']['minio']
minio_bucket = f'{ctx.obj["CONTEXT"]}-data'
s3_config = ctx.obj['CONFIG']['s3']
s3_bucket = f'{ctx.obj["CONTEXT"]}-data'
minio = get_minio_client(minio_config)
s3 = get_s3_client(s3_config)
if not minio.bucket_exists(minio_bucket):
minio.make_bucket(minio_bucket)
if not s3.bucket_exists(s3_bucket):
s3.make_bucket(s3_bucket)
retrieved_files: List[str] = [] retrieved_files: List[str] = []
@ -270,8 +306,8 @@ def retrieve_files(ctx, destination, files):
if destination is not None: if destination is not None:
file_destination = os.path.join(destination, file_identity) file_destination = os.path.join(destination, file_identity)
try: try:
minio.fget_object(
minio_bucket,
s3.fget_object(
s3_bucket,
file_identity, file_identity,
file_destination file_destination
) )

Loading…
Cancel
Save