|
@ -18,7 +18,7 @@ from minio.error import NoSuchKey |
|
|
BUF_SIZE = 4096 |
|
|
BUF_SIZE = 4096 |
|
|
|
|
|
|
|
|
#Application Version |
|
|
#Application Version |
|
|
VERSION = "1.2.1" |
|
|
|
|
|
|
|
|
VERSION = "1.3.0" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
########### |
|
|
########### |
|
@ -531,9 +531,10 @@ def retrieve_files(ctx, context, destination, files): |
|
|
@cli.command(name="clean") |
|
|
@cli.command(name="clean") |
|
|
@click.option('-x', '--context', required=True) |
|
|
@click.option('-x', '--context', required=True) |
|
|
@click.option('-d', '--context-data', default=None) |
|
|
@click.option('-d', '--context-data', default=None) |
|
|
|
|
|
@click.option('-n', '--dry-run/--no-dry-run', default=False) |
|
|
@click.argument('files', nargs=-1) |
|
|
@click.argument('files', nargs=-1) |
|
|
@click.pass_context |
|
|
@click.pass_context |
|
|
def clean_files(ctx, context, context_data, files): |
|
|
|
|
|
|
|
|
def clean_files(ctx, context, context_data, dry_run, files): |
|
|
""" |
|
|
""" |
|
|
Remove non matching specified files in a <context> bucket for retrieval. |
|
|
Remove non matching specified files in a <context> bucket for retrieval. |
|
|
""" |
|
|
""" |
|
@ -549,7 +550,7 @@ def clean_files(ctx, context, context_data, files): |
|
|
|
|
|
|
|
|
# Go through and find all matching files |
|
|
# Go through and find all matching files |
|
|
for file in files: |
|
|
for file in files: |
|
|
file_identity = get_file_identity(ctx.obj, file) |
|
|
|
|
|
|
|
|
file_identity = f'{get_file_identity(ctx.obj, file)}.json' |
|
|
try: |
|
|
try: |
|
|
if s3_data_bucket is not None: |
|
|
if s3_data_bucket is not None: |
|
|
file_object = s3.get_object(s3_bucket, file_identity) |
|
|
file_object = s3.get_object(s3_bucket, file_identity) |
|
@ -561,7 +562,15 @@ def clean_files(ctx, context, context_data, files): |
|
|
file_object = s3.get_object(s3_bucket, file_identity) |
|
|
file_object = s3.get_object(s3_bucket, file_identity) |
|
|
found_files.append(file_identity) |
|
|
found_files.append(file_identity) |
|
|
except ResponseError as e: |
|
|
except ResponseError as e: |
|
|
print(f'ERROR: {file} {e}', file=sys.stderr) |
|
|
|
|
|
|
|
|
print(f'ERROR: ResponseError {file_identity} {e}', file=sys.stderr) |
|
|
|
|
|
except NoSuchKey as e: |
|
|
|
|
|
print(f'ERROR: NoSuchKey {file_identity}', file=sys.stderr) |
|
|
|
|
|
|
|
|
|
|
|
# print(os.linesep.join(found_objects)) |
|
|
|
|
|
# print(os.linesep.join(found_objects)) |
|
|
|
|
|
|
|
|
|
|
|
found_files = set(found_files) |
|
|
|
|
|
found_data_files = set(found_data_files) |
|
|
|
|
|
|
|
|
# Find all objects in s3 bucket |
|
|
# Find all objects in s3 bucket |
|
|
found_objects: List[str] = [] |
|
|
found_objects: List[str] = [] |
|
@ -571,28 +580,38 @@ def clean_files(ctx, context, context_data, files): |
|
|
else: |
|
|
else: |
|
|
found_objects.append(obj.object_name) |
|
|
found_objects.append(obj.object_name) |
|
|
|
|
|
|
|
|
|
|
|
# print(os.linesep.join(found_objects)) |
|
|
|
|
|
|
|
|
found_data_objects: List[str] = [] |
|
|
found_data_objects: List[str] = [] |
|
|
for obj in s3.list_objects_v2(s3_data_bucket, recursive=False): |
|
|
for obj in s3.list_objects_v2(s3_data_bucket, recursive=False): |
|
|
if obj.is_dir: |
|
|
if obj.is_dir: |
|
|
found_data_objects.extend(list_s3_dir(s3, s3_bucket, obj.object_name)) |
|
|
|
|
|
|
|
|
found_data_objects.extend(list_s3_dir(s3, s3_data_bucket, obj.object_name)) |
|
|
else: |
|
|
else: |
|
|
found_data_objects.append(obj.object_name) |
|
|
found_data_objects.append(obj.object_name) |
|
|
|
|
|
|
|
|
|
|
|
# print(os.linesep.join(found_data_objects)) |
|
|
|
|
|
|
|
|
for file_identity in found_objects: |
|
|
for file_identity in found_objects: |
|
|
if file_identity not in found_files: |
|
|
|
|
|
try: |
|
|
|
|
|
s3.remove_object(s3_bucket, file_identity) |
|
|
|
|
|
removed_files.append(f'{s3_bucket}-{file_identity}') |
|
|
|
|
|
except ResponseError as e: |
|
|
|
|
|
print(f'ERROR: {s3_bucket}-{file_identity} {e}', file=sys.stderr) |
|
|
|
|
|
|
|
|
|
|
|
for file_identity in found_data_objects: |
|
|
|
|
|
if file_identity not in found_data_files: |
|
|
|
|
|
try: |
|
|
|
|
|
s3.remove_object(s3_data_bucket, file_identity) |
|
|
|
|
|
removed_files.append(f'{s3_data_bucket}-{file_identity}') |
|
|
|
|
|
except ResponseError as e: |
|
|
|
|
|
print(f'ERROR: {s3_data_bucket}-{file_identity} {e}', file=sys.stderr) |
|
|
|
|
|
|
|
|
if not file_identity in found_files: |
|
|
|
|
|
if dry_run: |
|
|
|
|
|
removed_files.append(f'{s3_bucket}:{file_identity}') |
|
|
|
|
|
else: |
|
|
|
|
|
try: |
|
|
|
|
|
s3.remove_object(s3_bucket, file_identity) |
|
|
|
|
|
removed_files.append(f'{s3_bucket}:{file_identity}') |
|
|
|
|
|
except ResponseError as e: |
|
|
|
|
|
print(f'ERROR: {s3_bucket}:{file_identity} {e}', file=sys.stderr) |
|
|
|
|
|
|
|
|
|
|
|
for file_data_identity in found_data_objects: |
|
|
|
|
|
if not f'/{file_data_identity}' in found_data_files: |
|
|
|
|
|
if dry_run: |
|
|
|
|
|
removed_files.append(f'{s3_data_bucket}:{file_data_identity}') |
|
|
|
|
|
else: |
|
|
|
|
|
try: |
|
|
|
|
|
s3.remove_object(s3_data_bucket, file_data_identity) |
|
|
|
|
|
removed_files.append(f'{s3_data_bucket}:{file_data_identity}') |
|
|
|
|
|
except ResponseError as e: |
|
|
|
|
|
print(f'ERROR: {s3_data_bucket}:{file_data_identity} {e}', file=sys.stderr) |
|
|
|
|
|
|
|
|
print(os.linesep.join(removed_files)) |
|
|
print(os.linesep.join(removed_files)) |
|
|
|
|
|
|
|
|