Tooling for managing asset compression, storage, and retrieval
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

741 lines
25 KiB

2 years ago
  1. #!/usr/bin/env python3
  2. import asyncio
  3. import collections.abc
  4. import hashlib
  5. import io
  6. import json
  7. import logging
  8. import os
  9. import pathlib
  10. import platform
  11. import sys
  12. import tempfile
  13. from typing import List, Dict, Callable
  14. import click
  15. from minio import Minio, InvalidResponseError
  16. from minio.error import S3Error
  17. from acm.asyncio import make_chunks, run_asyncio_commands, run_command_shell
  18. from acm.config import get_default_config
  19. from acm.logging import setup_basic_logging, update_logging_level
  20. from acm.utility import get_file_sha256sum, get_string_sha256sum
  21. from acm.version import VERSION
  22. LOG = setup_basic_logging("acm")
  23. ###########
  24. # Helpers #
  25. ###########
  26. def update(d, u):
  27. for k, v in u.items():
  28. if isinstance(v, collections.abc.Mapping):
  29. d[k] = update(d.get(k, {}), v)
  30. else:
  31. d[k] = v
  32. return d
  33. def get_metadata_name(key):
  34. return METADATA_PREFIX + 'SHA256SUM'.capitalize()
  35. def get_clean_stdin_iterator(stdin_stream):
  36. return (line for line in [line.strip() for line in stdin_stream if line.strip() != ''])
  37. def strip_prefix(prefix: str, file: str) -> str:
  38. if file.startswith(prefix):
  39. return file.replace(prefix, '')
  40. return file
  41. def get_file_identity(ctx_obj, file):
  42. if 'REMOVE_PREFIX' in ctx_obj and ctx_obj['REMOVE_PREFIX'] is not None:
  43. path = strip_prefix(ctx_obj['REMOVE_PREFIX'], file)
  44. else:
  45. path = file
  46. if os.pathsep != '/':
  47. path = '/'.join(path.split(os.pathsep))
  48. return path
  49. def list_s3_dir(s3: Minio, bucket: str, prefix: str) -> List[str]:
  50. found_files = []
  51. for obj in s3.list_objects_v2(bucket, prefix=prefix):
  52. if obj.is_dir:
  53. found_files.extend(list_s3_dir(s3, bucket, obj.object_name))
  54. else:
  55. found_files.append(obj.object_name)
  56. return found_files
  57. def get_s3_client(config: any) -> Minio:
  58. host = config['host']
  59. secure = config['secure']
  60. access_key = config['access']
  61. secret_key = config['secret']
  62. return Minio(host, secure=secure, access_key=access_key, secret_key=secret_key)
  63. def prep_s3(ctx):
  64. s3_config = ctx.obj['CONFIG']['s3']
  65. s3_bucket = ctx.obj['CONTEXT']
  66. s3 = get_s3_client(s3_config)
  67. if not s3.bucket_exists(s3_bucket):
  68. s3.make_bucket(s3_bucket)
  69. return s3_bucket, s3
  70. def get_stored_and_computed_sha256sums(stored_data, profile, file):
  71. stored_file_hash = stored_data['sha256sum']
  72. stored_profile_hash = stored_data['profileHash']
  73. calculated_file_hash = get_file_sha256sum(file)
  74. return stored_profile_hash, stored_file_hash, calculated_file_hash
  75. def add_nested_key(config: Dict[str, any], path: List[str], value: str) -> bool:
  76. target = path[0].lower()
  77. if len(path) == 1:
  78. config[target] = value
  79. return True
  80. else:
  81. if target not in config:
  82. config[target] = {}
  83. add_nested_key(config[target], path[1:], value)
  84. return False
  85. def read_env_config(prefix, separator='__') -> any:
  86. prefix = prefix+separator
  87. env_config = {}
  88. environment_variables = [
  89. env for env in os.environ.keys() if env.startswith(prefix)]
  90. for env in environment_variables:
  91. path = env[len(prefix):].split('__')
  92. add_nested_key(env_config, path, os.environ[env])
  93. return env_config
  94. def load_config(path: str) -> any:
  95. combined_config = {}
  96. with open(
  97. os.path.join(
  98. os.path.dirname(os.path.realpath(__file__)),
  99. 'acm-config-default.json'),
  100. 'r') as combined_config_file:
  101. combined_config = json.load(combined_config_file)
  102. config = {}
  103. with open(path, 'r') as config_file:
  104. config = json.load(config_file)
  105. # Setup concurrency
  106. if 'concurrency' in config:
  107. config['concurrency'] = abs(int(config['concurrency']))
  108. else:
  109. config['concurrency'] = 0
  110. update(combined_config, config)
  111. update(combined_config, read_env_config('ACM'))
  112. # Calculate profiles hash
  113. profile_hashes = {}
  114. profile_hashes['all'] = get_string_sha256sum(
  115. json.dumps(combined_config['profiles']))
  116. for profile in combined_config['profiles'].keys():
  117. profile_hashes[profile] = get_string_sha256sum(
  118. json.dumps(combined_config['profiles'][profile]))
  119. combined_config['profileHashes'] = profile_hashes
  120. return combined_config
  121. @click.group()
  122. @click.option('-d', '--debug/--no-debug', default=False)
  123. @click.option('-c', '--config', default=lambda: os.path.join(os.getcwd(), 'acm-config.json'), show_default=True)
  124. @click.option('-s', '--stdin/--no-stdin', default=False)
  125. @click.option('--remove-prefix', default=None)
  126. @click.option('--add-prefix', default=None)
  127. @click.pass_context
  128. def cli(ctx, debug, config, stdin, remove_prefix, add_prefix):
  129. ctx.ensure_object(dict)
  130. # Propagate the global configs
  131. ctx.obj['DEBUG'] = debug
  132. # ctx.obj['CONFIG'] = load_config(config)
  133. ctx.obj['READ_STDIN'] = stdin
  134. ctx.obj['REMOVE_PREFIX'] = remove_prefix
  135. ctx.obj['ADD_PREFIX'] = add_prefix
  136. if debug:
  137. update_logging_level(3, LOG)
  138. # Reduce the logging noise for library loggers
  139. update_logging_level(0, "asyncio")
  140. ####################
  141. # Generic Commands #
  142. ####################
  143. @cli.command(name="config")
  144. @click.pass_context
  145. def print_config(ctx):
  146. """
  147. Print the configuration
  148. """
  149. print(json.dumps(ctx.obj['CONFIG'], indent=2, sort_keys=True))
  150. @cli.command(name="default-config")
  151. @click.argument('profile', default="all")
  152. @click.pass_context
  153. def print_default_config(ctx, profile):
  154. """
  155. Print the configuration
  156. """
  157. if profile == "all":
  158. print(get_default_config().json(exclude_none=True, indent=2, sort_keys=True))
  159. else:
  160. config = get_default_config()
  161. profile_names = config.get_profile_names()
  162. if profile in profile_names:
  163. print(config.get_profile(profile).json(exclude_none=True, indent=2, sort_keys=True))
  164. else:
  165. print(f"Profile \"{profile}\" is not in {profile_names}")
  166. ###############################
  167. # S3 Storage Focused Commands #
  168. ###############################
  169. @cli.command(name="list")
  170. @click.option('--sha256sum/--no-sha256sum', default=False)
  171. @click.option('--suffix', default=None)
  172. @click.option('-x', '--context', required=True)
  173. @click.option('--print-identity/--no-print-identity', default=False)
  174. @click.pass_context
  175. def list_files(ctx, context, sha256sum, suffix, print_identity):
  176. """
  177. List all file object in a bucket
  178. """
  179. ctx.obj['CONTEXT'] = context
  180. s3_config = ctx.obj['CONFIG']['s3']
  181. s3_bucket = ctx.obj['CONTEXT']
  182. LOG.debug(f"connecting to s3 {s3_config['']}")
  183. s3 = get_s3_client(s3_config)
  184. if not s3.bucket_exists(s3_bucket):
  185. s3.make_bucket(s3_bucket)
  186. found_files: List[str] = []
  187. found_objects: List[str] = []
  188. for obj in s3.list_objects_v2(s3_bucket, recursive=False):
  189. if obj.is_dir:
  190. found_objects.extend(list_s3_dir(s3, s3_bucket, obj.object_name))
  191. else:
  192. found_objects.append(obj.object_name)
  193. for obj in found_objects:
  194. file = obj
  195. if 'REMOVE_PREFIX' in ctx.obj and ctx.obj['REMOVE_PREFIX'] is not None:
  196. file = os.path.join(ctx.obj['REMOVE_PREFIX'], file)
  197. if suffix is not None and suffix in file:
  198. file = file.replace(suffix, '')
  199. file = file.strip()
  200. if sha256sum:
  201. file_object = s3.get_object(s3_bucket, obj)
  202. stored_data = json.load(file_object)
  203. sha256sum_value = stored_data['sha256sum']
  204. file = f'{sha256sum_value} {file}'
  205. elif print_identity:
  206. file_object = s3.get_object(s3_bucket, obj)
  207. stored_data = json.load(file_object)
  208. found_files.append(stored_data['storedAssetIdentity'])
  209. else:
  210. found_files.append(file)
  211. print(os.linesep.join(found_files))
  212. @cli.command(name="match")
  213. @click.option('-x', '--context', required=True)
  214. @click.option('--print-identity/--no-print-identity', default=False)
  215. @click.option('-p', '--profile', default='all')
  216. @click.argument('files', nargs=-1)
  217. @click.pass_context
  218. def check_matched_files_hashes(ctx, context, print_identity, profile, files):
  219. """
  220. List all files that have matching stored sha256sum and profile hash
  221. """
  222. ctx.obj['CONTEXT'] = context
  223. s3_bucket, s3 = prep_s3(ctx)
  224. matching_files: List[str] = []
  225. if ctx.obj['READ_STDIN']:
  226. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  227. for file in files:
  228. file_identity = f'{get_file_identity(ctx.obj, file)}.json'
  229. try:
  230. file_object = s3.get_object(s3_bucket, file_identity)
  231. stored_data = json.load(file_object)
  232. stored_profile_hash, stored_file_hash, calculated_file_hash = get_stored_and_computed_sha256sums(
  233. stored_data, profile, file)
  234. if calculated_file_hash == stored_file_hash \
  235. and ctx.obj['CONFIG']['profileHashes'][profile] == stored_profile_hash:
  236. if print_identity:
  237. matching_files.append(stored_data['storedAssetIdentity'])
  238. else:
  239. matching_files.append(file)
  240. except S3Error as e:
  241. if e.code == "NoSuchKey":
  242. continue
  243. else:
  244. LOG.error(e)
  245. except ValueError or InvalidResponseError as e:
  246. LOG.error(f'ERROR: {file} {e}')
  247. print(os.linesep.join(matching_files))
  248. @cli.command(name="check")
  249. @click.option('-x', '--context', required=True)
  250. @click.option('-p', '--profile', default='all')
  251. @click.argument('files', nargs=-1)
  252. @click.pass_context
  253. def check_changed_files_hashes(ctx, context, profile, files):
  254. """
  255. List all files that do not have a matching sha256sum or profile hash
  256. """
  257. ctx.obj['CONTEXT'] = context
  258. s3_bucket, s3 = prep_s3(ctx)
  259. changed_files: List[str] = []
  260. if ctx.obj['READ_STDIN']:
  261. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  262. for file in files:
  263. file_identity = f'{get_file_identity(ctx.obj, file)}.json'
  264. try:
  265. file_object = s3.get_object(s3_bucket, file_identity)
  266. stored_data = json.load(file_object)
  267. stored_profile_hash, stored_file_hash, calculated_file_hash = get_stored_and_computed_sha256sums(
  268. stored_data, profile, file)
  269. if calculated_file_hash != stored_file_hash \
  270. or ctx.obj['CONFIG']['profileHashes'][profile] != stored_profile_hash:
  271. changed_files.append(file)
  272. except S3Error as e:
  273. if e.code == "NoSuchKey":
  274. changed_files.append(file)
  275. else:
  276. LOG.error(e)
  277. except ValueError or InvalidResponseError as e:
  278. LOG.error(f'ERROR: {file} {e}')
  279. print(os.linesep.join(changed_files))
  280. @cli.command(name="update")
  281. @click.option('-x', '--context', required=True)
  282. @click.option('--input-and-identity/--no-input-and-identity', default=False)
  283. @click.option('-p', '--profile', default='all')
  284. @click.argument('files', nargs=-1)
  285. @click.pass_context
  286. def update_changed_files_hashes(ctx, context, input_and_identity, profile, files):
  287. """
  288. Store new data objects for the provided files
  289. """
  290. ctx.obj['CONTEXT'] = context
  291. s3_bucket, s3 = prep_s3(ctx)
  292. updated_files: List[str] = []
  293. if ctx.obj['READ_STDIN']:
  294. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  295. for file in files:
  296. identity = None
  297. if input_and_identity:
  298. file, identity = file.split('\t')
  299. file_identity = f'{get_file_identity(ctx.obj, file)}.json'
  300. try:
  301. calculated_file_hash = get_file_sha256sum(file)
  302. object_data = {
  303. "sourcePath": file,
  304. "storedAssetIdentity": identity,
  305. "identity": file_identity,
  306. "sha256sum": calculated_file_hash,
  307. "profileHash": ctx.obj['CONFIG']['profileHashes'][profile]
  308. }
  309. with io.BytesIO(json.dumps(object_data, sort_keys=True, indent=None).encode('utf-8')) as data:
  310. data.seek(0, os.SEEK_END)
  311. data_length = data.tell()
  312. data.seek(0)
  313. s3.put_object(
  314. s3_bucket,
  315. file_identity,
  316. data,
  317. data_length,
  318. content_type="application/json",
  319. metadata={}
  320. )
  321. updated_files.append(file)
  322. except ValueError or InvalidResponseError as e:
  323. LOG.error(f'ERROR: {file} {e}')
  324. print(os.linesep.join(updated_files))
  325. @cli.command(name="store")
  326. @click.option('-x', '--context', required=True)
  327. @click.argument('files', nargs=-1)
  328. @click.pass_context
  329. def store_files(ctx, context, files):
  330. """
  331. Store specified files in a <context> bucket for retrieval.
  332. """
  333. ctx.obj['CONTEXT'] = context
  334. s3_bucket, s3 = prep_s3(ctx)
  335. stored_files: List[str] = []
  336. if ctx.obj['READ_STDIN']:
  337. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  338. for file in files:
  339. file_identity = get_file_identity(ctx.obj, file)
  340. try:
  341. s3.fput_object(
  342. s3_bucket,
  343. file_identity,
  344. file,
  345. content_type="application/octet-stream"
  346. )
  347. if 'ADD_PREFIX' in ctx.obj and ctx.obj['ADD_PREFIX'] is not None:
  348. stored_files.append(os.path.join(
  349. ctx.obj['ADD_PREFIX'], file_identity))
  350. else:
  351. stored_files.append(file)
  352. except InvalidResponseError as e:
  353. LOG.error(f'ERROR: {file} {e}', file=sys.stderr)
  354. print(os.linesep.join(stored_files))
  355. @cli.command(name="retrieve")
  356. @click.option('-x', '--context', required=True)
  357. @click.option('-d', '--destination', default=None)
  358. @click.argument('files', nargs=-1)
  359. @click.pass_context
  360. def retrieve_files(ctx, context, destination, files):
  361. """
  362. Retrieve specified files from a <context> bucket
  363. """
  364. ctx.obj['CONTEXT'] = context
  365. s3_bucket, s3 = prep_s3(ctx)
  366. retrieved_files: List[str] = []
  367. if ctx.obj['READ_STDIN']:
  368. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  369. for file in files:
  370. file_identity = get_file_identity(ctx.obj, file)
  371. file_destination = file
  372. if destination is not None:
  373. file_destination = os.path.join(destination, file_identity)
  374. try:
  375. s3.fget_object(
  376. s3_bucket,
  377. file_identity,
  378. file_destination
  379. )
  380. retrieved_files.append(file_destination)
  381. except S3Error as e:
  382. if e.code == "NoSuchKey":
  383. LOG.error(f'ERROR: {file_identity} {file_destination} {e}', file=sys.stderr)
  384. else:
  385. LOG.error(e)
  386. except InvalidResponseError as e:
  387. LOG.error(f'ERROR: {file_destination} {e}', file=sys.stderr)
  388. print(os.linesep.join(retrieved_files))
  389. @cli.command(name="clean")
  390. @click.option('-x', '--context', required=True)
  391. @click.option('-d', '--context-data', default=None)
  392. @click.option('-n', '--dry-run/--no-dry-run', default=False)
  393. @click.argument('files', nargs=-1)
  394. @click.pass_context
  395. def clean_files(ctx, context, context_data, dry_run, files):
  396. """
  397. Remove non matching specified files in a <context> bucket for retrieval.
  398. """
  399. ctx.obj['CONTEXT'] = context
  400. s3_bucket, s3 = prep_s3(ctx)
  401. s3_data_bucket = context_data
  402. found_files: List[str] = []
  403. found_data_files: List[str] = []
  404. removed_files: List[str] = []
  405. if ctx.obj['READ_STDIN']:
  406. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  407. # Go through and find all matching files
  408. for file in files:
  409. file_identity = f'{get_file_identity(ctx.obj, file)}.json'
  410. try:
  411. if s3_data_bucket is not None:
  412. file_object = s3.get_object(s3_bucket, file_identity)
  413. stored_data = json.load(file_object)
  414. stored_data_file_identity = stored_data['storedAssetIdentity']
  415. found_files.append(file_identity)
  416. found_data_files.append(stored_data_file_identity)
  417. else:
  418. file_object = s3.get_object(s3_bucket, file_identity)
  419. found_files.append(file_identity)
  420. except InvalidResponseError as e:
  421. LOG.error(f'ERROR: InvalidResponseError {file_identity} {e}', file=sys.stderr)
  422. except S3Error as e:
  423. if e.code == "NoSuchKey":
  424. LOG.error(f'ERROR: NoSuchKey {file_identity}', file=sys.stderr)
  425. else:
  426. LOG.error(e)
  427. # print(os.linesep.join(found_objects))
  428. # print(os.linesep.join(found_objects))
  429. found_files = set(found_files)
  430. found_data_files = set(found_data_files)
  431. # Find all objects in s3 bucket
  432. found_objects: List[str] = []
  433. for obj in s3.list_objects_v2(s3_bucket, recursive=False):
  434. if obj.is_dir:
  435. found_objects.extend(list_s3_dir(s3, s3_bucket, obj.object_name))
  436. else:
  437. found_objects.append(obj.object_name)
  438. # print(os.linesep.join(found_objects))
  439. found_data_objects: List[str] = []
  440. for obj in s3.list_objects_v2(s3_data_bucket, recursive=False):
  441. if obj.is_dir:
  442. found_data_objects.extend(list_s3_dir(
  443. s3, s3_data_bucket, obj.object_name))
  444. else:
  445. found_data_objects.append(obj.object_name)
  446. # print(os.linesep.join(found_data_objects))
  447. for file_identity in found_objects:
  448. if file_identity not in found_files:
  449. if dry_run:
  450. removed_files.append(f'{s3_bucket}:{file_identity}')
  451. else:
  452. try:
  453. s3.remove_object(s3_bucket, file_identity)
  454. removed_files.append(f'{s3_bucket}:{file_identity}')
  455. except InvalidResponseError as e:
  456. LOG.error(
  457. f'ERROR: {s3_bucket}:{file_identity} {e}', file=sys.stderr)
  458. for file_data_identity in found_data_objects:
  459. if file_data_identity not in found_data_files:
  460. if dry_run:
  461. removed_files.append(f'{s3_data_bucket}:{file_data_identity}')
  462. else:
  463. try:
  464. s3.remove_object(s3_data_bucket, file_data_identity)
  465. removed_files.append(
  466. f'{s3_data_bucket}:{file_data_identity}')
  467. except InvalidResponseError as e:
  468. LOG.error(
  469. f'ERROR: {s3_data_bucket}:{file_data_identity} {e}', file=sys.stderr)
  470. print(os.linesep.join(removed_files))
  471. ######################################
  472. # Asset Compression Focused Commands #
  473. ######################################
  474. @cli.command(name="compress")
  475. @click.option('-p', '--profile', default='default')
  476. @click.option('-c', '--content', default='all')
  477. @click.option('-d', '--destination', default=None)
  478. @click.option('--print-input-and-identity/--no-print-input-and-identity', default=False)
  479. @click.argument('files', nargs=-1)
  480. @click.pass_context
  481. def compress_assets(ctx, profile, content, destination, print_input_and_identity, files):
  482. """
  483. Compress the request files and store them in a storage bucket.
  484. """
  485. profiles = ctx.obj['CONFIG']['profiles']
  486. if profile not in profiles:
  487. raise ValueError(f'Unrecognized profile: {profile}')
  488. default_profile: Dict[str, any] = profiles['default']
  489. profile: Dict[str, any] = profiles[profile]
  490. if content != 'all':
  491. if content not in profile and content not in default_profile:
  492. raise ValueError(f'Unrecognized content: {content}')
  493. content_configurations = []
  494. if content == 'all':
  495. content_names: set = set()
  496. for content_name in profile.keys():
  497. content_names.add(content_name)
  498. content_configurations.append(profile[content_name])
  499. for content_name in default_profile.keys():
  500. if content_name not in content_names:
  501. content_names.add(content_name)
  502. content_configurations.append(default_profile[content_name])
  503. else:
  504. if content in profile:
  505. content_configurations.append(profile[content])
  506. else:
  507. content_configurations.append(default_profile[content])
  508. if ctx.obj['READ_STDIN']:
  509. files = get_clean_stdin_iterator(click.get_text_stream('stdin'))
  510. if destination is None:
  511. destination = tempfile.mkdtemp()
  512. task_output = []
  513. tasks = []
  514. follow_up_tasks = []
  515. def store_filename(storage_list: List[str], filename: str):
  516. """
  517. A simple lambda wrapper to asynchronously add processed files to the list
  518. :param storage_list:
  519. :param filename:
  520. :return:
  521. """
  522. return lambda: storage_list.append(filename)
  523. def queue_follow_up_task_if_keep_smaller_input(follow_up_tasks, input_file: str, output_file: str, keep_smaller_input: bool = True):
  524. """
  525. A lambda wrapper that handles keeping the smallest of the two files.
  526. """
  527. if keep_smaller_input:
  528. command = f"cp {input_file} {output_file}"
  529. def task():
  530. input_size = os.path.getsize(input_file)
  531. output_size = os.path.getsize(output_file)
  532. if output_size > input_size:
  533. follow_up_tasks.append(
  534. run_command_shell(
  535. command,
  536. stdout=asyncio.subprocess.DEVNULL,
  537. stderr=asyncio.subprocess.DEVNULL,
  538. on_success=[store_filename(
  539. task_output,
  540. f'Preserved smaller "{input_file}" {output_size} > {input_size}'
  541. )]
  542. )
  543. )
  544. return task
  545. return lambda: True
  546. for input_file in files:
  547. for content_configuration in content_configurations:
  548. if any([input_file.endswith(extension) for extension in content_configuration['extensions']]):
  549. file = input_file
  550. file_extension = pathlib.Path(input_file).suffix
  551. if 'REMOVE_PREFIX' in ctx.obj and ctx.obj['REMOVE_PREFIX'] is not None:
  552. file = strip_prefix(ctx.obj['REMOVE_PREFIX'], input_file)
  553. if 'preserveInputExtension' in content_configuration \
  554. and content_configuration['preserveInputExtension']:
  555. output_file = os.path.join(destination, file)
  556. else:
  557. output_file_without_ext = os.path.splitext(
  558. os.path.join(destination, file))[0]
  559. output_file = f'{output_file_without_ext}.{content_configuration["outputExtension"]}'
  560. output_file_identity = get_file_identity(
  561. {'REMOVE_PREFIX': destination}, output_file)
  562. output_file_dir = os.path.dirname(output_file)
  563. os.makedirs(output_file_dir, exist_ok=True)
  564. if 'preserveSmallerInput' in content_configuration:
  565. preserve_smaller_input = bool(
  566. content_configuration['preserveSmallerInput'])
  567. else:
  568. preserve_smaller_input = True
  569. if 'forcePreserveSmallerInput' in content_configuration:
  570. force_preserve_smaller_input = bool(
  571. content_configuration['forcePreserveSmallerInput'])
  572. else:
  573. force_preserve_smaller_input = False
  574. # Only preserve the input if requested AND the extensions of the input and the output match
  575. preserve_smaller_input = preserve_smaller_input and (
  576. force_preserve_smaller_input or file_extension == content_configuration["outputExtension"])
  577. command: str = content_configuration['command'] \
  578. .replace('{input_file}', f'\'{input_file}\'') \
  579. .replace('{output_file}', f'\'{output_file}\'')
  580. tasks.append(
  581. run_command_shell(
  582. command,
  583. stdout=asyncio.subprocess.DEVNULL,
  584. stderr=asyncio.subprocess.DEVNULL,
  585. on_success=[store_filename(
  586. task_output,
  587. f'{input_file}\t{output_file_identity}' if print_input_and_identity else output_file
  588. ), queue_follow_up_task_if_keep_smaller_input(
  589. follow_up_tasks,
  590. input_file,
  591. output_file,
  592. preserve_smaller_input
  593. )]
  594. )
  595. )
  596. results = run_asyncio_commands(
  597. tasks, max_concurrent_tasks=ctx.obj['CONFIG']['concurrency']
  598. )
  599. follow_up_results = run_asyncio_commands(
  600. follow_up_tasks, max_concurrent_tasks=ctx.obj['CONFIG']['concurrency']
  601. )
  602. print(os.linesep.join(task_output))
  603. if __name__ == '__main__':
  604. cli(obj={})