Tooling for managing asset compression, storage, and retrieval
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

286 lines
8.4 KiB

  1. #!/usr/bin/env python
  2. import hashlib
  3. import io
  4. import json
  5. import os
  6. from typing import List
  7. import click
  8. from minio import Minio, ResponseError
  9. from minio.error import NoSuchKey
  10. # MinIO Metadata Prefix
  11. METADATA_PREFIX = 'X-Amz-Meta-'
  12. # Metadata Constants
  13. METADATA_SHA256SUM = "Sha256sum"
  14. # Size of the buffer to read files with
  15. BUF_SIZE = 4096
  16. def get_metadata_name(key):
  17. return METADATA_PREFIX + 'SHA256SUM'.capitalize()
  18. def get_clean_stdin_iterator(stdin_stream):
  19. return (line.strip() for line in stdin_stream if line.strip() != '')
  20. def get_file_identity(ctx_obj, file):
  21. if 'PREFIX' in ctx_obj and ctx_obj['PREFIX'] is not None:
  22. path = file.replace(ctx_obj['PREFIX'], '')
  23. else:
  24. path = file
  25. if os.pathsep != '/':
  26. path = '/'.join(path.split(os.pathsep))
  27. return path
  28. def list_minio_dir(minio: Minio, bucket: str, prefix: str) -> List[str]:
  29. found_files = []
  30. for obj in minio.list_objects_v2(bucket, prefix=prefix):
  31. if obj.is_dir:
  32. found_files.extend(list_minio_dir(minio, bucket, obj.object_name))
  33. else:
  34. found_files.append(obj.object_name)
  35. return found_files
  36. def get_minio_client(config: any) -> Minio:
  37. host = config['host']
  38. secure = config['secure']
  39. access_key = config['access']
  40. secret_key = config['secret']
  41. return Minio(host, secure=secure, access_key=access_key, secret_key=secret_key)
  42. def load_config(path: str) -> any:
  43. with open(path, 'r') as config_file:
  44. config = json.load(config_file)
  45. # Setup S3 Settings
  46. config['s3']['access'] = os.getenv('ACM_S3_ACCESS')
  47. config['s3']['secret'] = os.getenv('ACM_S3_SECRET')
  48. return config
  49. @click.group()
  50. @click.option('-d', '--debug/--no-debug', default=False)
  51. @click.option('-c', '--config', default=lambda: os.path.join(os.getcwd(), 'acm-config.json'), show_default=True)
  52. @click.option('-x', '--context', required=True)
  53. @click.option('-s', '--stdin/--no-stdin', default=False)
  54. @click.option('-p', '--prefix', default=None)
  55. @click.pass_context
  56. def cli(ctx, debug, config, context, stdin, prefix):
  57. ctx.ensure_object(dict)
  58. ctx.obj['DEBUG'] = debug
  59. ctx.obj['CONFIG'] = load_config(config)
  60. ctx.obj['CONTEXT'] = context
  61. ctx.obj['READ_STDIN'] = stdin
  62. ctx.obj['PREFIX'] = prefix
  63. @cli.command(name="list")
  64. @click.option('--sha256sum/--no-sha256sum', default=False)
  65. @click.option('--suffix', default=None)
  66. @click.pass_context
  67. def list_files(ctx, sha256sum, suffix):
  68. minio_config = ctx.obj['CONFIG']['minio']
  69. minio_bucket = ctx.obj['CONTEXT']
  70. minio = get_minio_client(minio_config)
  71. if not minio.bucket_exists(minio_bucket):
  72. minio.make_bucket(minio_bucket)
  73. found_files: List[str] = []
  74. found_objects: List[str] = []
  75. for obj in minio.list_objects_v2(minio_bucket, recursive=False):
  76. if obj.is_dir:
  77. found_objects.extend(list_minio_dir(minio, minio_bucket, obj.object_name))
  78. else:
  79. found_objects.append(obj.object_name)
  80. for obj in found_objects:
  81. file = obj
  82. if 'PREFIX' in ctx.obj and ctx.obj['PREFIX'] is not None:
  83. file = os.path.join(ctx.obj['PREFIX'], file)
  84. if suffix is not None and suffix in file:
  85. file = file.replace(suffix, '')
  86. file = file.strip()
  87. if sha256sum:
  88. stat = minio.stat_object(minio_bucket, obj)
  89. sha256sum_value = stat.metadata[get_metadata_name("SHA256SUM")]
  90. file = f'{sha256sum_value} {file}'
  91. found_files.append(file)
  92. print(os.linesep.join(found_files))
  93. @cli.command(name="check")
  94. @click.pass_context
  95. @click.argument('files', nargs=-1)
  96. def check_changed_files_hashes(ctx, files):
  97. minio_config = ctx.obj['CONFIG']['minio']
  98. minio_bucket = ctx.obj['CONTEXT']
  99. minio = get_minio_client(minio_config)
  100. if not minio.bucket_exists(minio_bucket):
  101. minio.make_bucket(minio_bucket)
  102. changed_files: List[str] = []
  103. if ctx.obj['READ_STDIN']:
  104. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  105. for file in files:
  106. file_identity = f'{get_file_identity(ctx.obj, file)}.json'
  107. try:
  108. file_object = minio.stat_object(minio_bucket, file_identity)
  109. stored_file_hash = file_object.metadata[get_metadata_name("SHA256SUM")]
  110. sha256sum = hashlib.sha256()
  111. with open(file, 'rb') as f:
  112. for byte_block in iter(lambda: f.read(BUF_SIZE), b""):
  113. sha256sum.update(byte_block)
  114. calculated_file_hash = sha256sum.hexdigest()
  115. if calculated_file_hash != stored_file_hash:
  116. changed_files.append(file)
  117. except NoSuchKey as e:
  118. changed_files.append(file)
  119. except ValueError or ResponseError as e:
  120. print(f'ERROR: {file} {e}')
  121. print(os.linesep.join(changed_files))
  122. @cli.command(name="update")
  123. @click.pass_context
  124. @click.argument('files', nargs=-1)
  125. def update_changed_files_hashes(ctx, files):
  126. minio_config = ctx.obj['CONFIG']['minio']
  127. minio_bucket = ctx.obj['CONTEXT']
  128. minio = get_minio_client(minio_config)
  129. if not minio.bucket_exists(minio_bucket):
  130. minio.make_bucket(minio_bucket)
  131. updated_files: List[str] = []
  132. if ctx.obj['READ_STDIN']:
  133. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  134. for file in files:
  135. file_identity = f'{get_file_identity(ctx.obj, file)}.json'
  136. try:
  137. sha256sum = hashlib.sha256()
  138. with open(file, 'rb') as f:
  139. for byte_block in iter(lambda: f.read(BUF_SIZE), b''):
  140. sha256sum.update(byte_block)
  141. calculated_file_hash = sha256sum.hexdigest()
  142. object_data = {
  143. "path": file
  144. }
  145. with io.BytesIO(json.dumps(object_data, sort_keys=True, indent=None).encode('utf-8')) as data:
  146. data.seek(0, os.SEEK_END)
  147. data_length = data.tell()
  148. data.seek(0)
  149. minio.put_object(
  150. minio_bucket,
  151. file_identity,
  152. data,
  153. data_length,
  154. content_type="application/json",
  155. metadata={
  156. "SHA256SUM": calculated_file_hash
  157. }
  158. )
  159. updated_files.append(file)
  160. except ValueError or ResponseError as e:
  161. print(f'ERROR: {file} {e}')
  162. print(os.linesep.join(updated_files))
  163. @cli.command(name="store")
  164. @click.pass_context
  165. @click.argument('files', nargs=-1)
  166. def store_files(ctx, files):
  167. minio_config = ctx.obj['CONFIG']['minio']
  168. minio_bucket = f'{ctx.obj["CONTEXT"]}-data'
  169. minio = get_minio_client(minio_config)
  170. if not minio.bucket_exists(minio_bucket):
  171. minio.make_bucket(minio_bucket)
  172. stored_files: List[str] = []
  173. if ctx.obj['READ_STDIN']:
  174. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  175. for file in files:
  176. file_identity = get_file_identity(ctx.obj, file)
  177. try:
  178. minio.fput_object(
  179. minio_bucket,
  180. file_identity,
  181. file,
  182. content_type="application/octet-stream"
  183. )
  184. stored_files.append(file)
  185. except ResponseError as e:
  186. print(f'ERROR: {file} {e}')
  187. print(os.linesep.join(stored_files))
  188. @cli.command(name="retrieve")
  189. @click.pass_context
  190. @click.option('-d', '--destination', default=None)
  191. @click.argument('files', nargs=-1)
  192. def retrieve_files(ctx, destination, files):
  193. minio_config = ctx.obj['CONFIG']['minio']
  194. minio_bucket = f'{ctx.obj["CONTEXT"]}-data'
  195. minio = get_minio_client(minio_config)
  196. if not minio.bucket_exists(minio_bucket):
  197. minio.make_bucket(minio_bucket)
  198. retrieved_files: List[str] = []
  199. if ctx.obj['READ_STDIN']:
  200. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  201. for file in files:
  202. file_identity = get_file_identity(ctx.obj, file)
  203. file_destination = file
  204. if destination is not None:
  205. file_destination = os.path.join(destination, file_identity)
  206. try:
  207. minio.fget_object(
  208. minio_bucket,
  209. file_identity,
  210. file_destination
  211. )
  212. retrieved_files.append(file_destination)
  213. except ResponseError as e:
  214. print(f'ERROR: {file_destination} {e}')
  215. print(os.linesep.join(retrieved_files))
  216. if __name__ == '__main__':
  217. cli(obj={})