Browse Source

Added default file processing and env processing

add-file-preservation 1.1.0
Drew Short 5 years ago
parent
commit
9f847aca1a
  1. 1
      .gitignore
  2. 2
      Dockerfile
  3. 120
      acm-config-default.json
  4. 107
      acm.py

1
.gitignore

@ -2,4 +2,3 @@ venv/
l_venv/
acm-config.json
*.json

2
Dockerfile

@ -30,7 +30,7 @@ RUN apt-get remove -y curl \
WORKDIR /app
# Copy application
COPY acm.py requirements.txt /app/
COPY acm-config-default.json acm.py requirements.txt /app/
# Install application requirements
RUN python3 -m pip install -r requirements.txt

120
acm-config-default.json

@ -0,0 +1,120 @@
{
"concurrency": 0,
"profiles": {
"default": {
"jpeg": {
"processors": ["cjpeg"],
"extensions": [
"jpg",
"jpeg"
],
"outputExtension": "jpg",
"command": "cjpeg -optimize -quality 90 -progressive -outfile {{output_file}} {{input_file}}"
},
"png": {
"processors": ["optipng"],
"extensions": [
"png"
],
"outputExtension": "png",
"command": "optipng -o2 -strip all -out {{output_file}} {{input_file}}"
},
"video": {
"processors": ["ffmpeg"],
"extensions": [
"mp4",
"webm"
],
"outputExtension": "mp4",
"command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -vcodec libx264 -crf 20 {{output_file}}"
},
"audio": {
"processors": ["ffmpeg", "opusenc"],
"extensions": [
"wav",
"mp3"
],
"outputExtension": "ogg",
"command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -f wav -| opusenc --bitrate 64 --vbr --downmix-stereo --discard-comments --discard-pictures - {{output_file}}"
}
},
"placebo": {
"jpeg": {
"processors": ["cp"],
"extensions": [
"jpg",
"jpeg"
],
"outputExtension": "jpg",
"preserveInputExtension": true,
"command": "cp {{input_file}} {{output_file}}"
},
"png": {
"processors": ["cp"],
"extensions": [
"png"
],
"outputExtension": "png",
"preserveInputExtension": true,
"command": "cp {{input_file}} {{output_file}}"
},
"video": {
"processors": ["cp"],
"extensions": [
"mp4",
"webm"
],
"outputExtension": "mp4",
"preserveInputExtension": true,
"command": "cp {{input_file}} {{output_file}}"
},
"audio": {
"processors": ["cp"],
"extensions": [
"wav",
"mp3"
],
"outputExtension": "ogg",
"preserveInputExtension": true,
"command": "cp {{input_file}} {{output_file}}"
}
},
"aggressive": {
"jpeg": {
"processors": ["ffmpeg", "cjpeg"],
"extensions": [
"jpg",
"jpeg"
],
"outputExtension": "jpg",
"command": "export FILE={{output_file}} && export TEMP_FILE=${FILE}_tmp.jpg && ffmpeg -i {{input_file}} -vf scale=-1:720 ${TEMP_FILE} && cjpeg -optimize -quality 75 -progressive -outfile {{output_file}} ${TEMP_FILE} && rm ${TEMP_FILE}"
},
"png": {
"processors": ["optipng"],
"extensions": [
"png"
],
"outputExtension": "png",
"command": "optipng -o2 -strip all -out {{output_file}} {{input_file}}"
},
"video": {
"processors": ["ffmpeg"],
"extensions": [
"mp4",
"webm"
],
"outputExtension": "mp4",
"command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -vf scale=-1:720 -vcodec libx264 -crf 24 {{output_file}}"
},
"audio": {
"processors": ["ffmpeg", "opusenc"],
"extensions": [
"wav",
"mp3"
],
"outputExtension": "ogg",
"command": "ffmpeg -hide_banner -loglevel panic -i {{input_file}} -f wav -| opusenc --bitrate 64 --vbr --downmix-stereo --discard-comments --discard-pictures - {{output_file}}"
}
}
}
}

107
acm.py

@ -1,5 +1,6 @@
#!/usr/bin/env python
import asyncio
import collections.abc
import hashlib
import io
import json
@ -98,6 +99,15 @@ def run_asyncio_commands(tasks, max_concurrent_tasks=0):
###########
def update(d, u):
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update(d.get(k, {}), v)
else:
d[k] = v
return d
def get_metadata_name(key):
return METADATA_PREFIX + 'SHA256SUM'.capitalize()
@ -154,9 +164,9 @@ def prep_s3(ctx):
return s3_bucket, s3
def get_file_sha256sum(stored_data, file):
def get_file_sha256sum(stored_data, profile, file):
stored_file_hash = stored_data['sha256sum']
stored_profile_hash = stored_data['profilesHash']
stored_profile_hash = stored_data['profileHashes'][profile]
sha256sum = hashlib.sha256()
with open(file, 'rb') as f:
for byte_block in iter(lambda: f.read(BUF_SIZE), b""):
@ -165,7 +175,49 @@ def get_file_sha256sum(stored_data, file):
return stored_profile_hash, stored_file_hash, calculated_file_hash
def get_string_sha256sum(string: str, encoding='utf-8') -> str:
sha256sum = hashlib.sha256()
with io.BytesIO(json.dumps(string).encode(encoding)) as c:
for byte_block in iter(lambda: c.read(BUF_SIZE), b''):
sha256sum.update(byte_block)
return sha256sum.hexdigest()
def add_nested_key(config: Dict[str, any], path: List[str], value: str) -> bool:
target = path[0].lower()
if len(path) == 1:
config[target] = value
return True
else:
if target not in config:
config[target] = {}
add_nested_key(config[target], path[1:],value)
return False
def read_env_config(prefix, separator='__') -> any:
prefix = prefix+separator
env_config = {}
environment_variables = [env for env in os.environ.keys() if env.startswith(prefix)]
for env in environment_variables:
path = env[len(prefix):].split('__')
add_nested_key(env_config, path, os.environ[env])
return env_config
def load_config(path: str) -> any:
combined_config = {}
with open(
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'acm-config-default.json'),
'r') as combined_config_file:
combined_config = json.load(combined_config_file)
config = {}
with open(path, 'r') as config_file:
config = json.load(config_file)
@ -179,15 +231,18 @@ def load_config(path: str) -> any:
else:
config['concurrency'] = 0
update(combined_config, config)
update(combined_config, read_env_config('ACM'))
# Calculate profiles hash
sha256sum = hashlib.sha256()
with io.BytesIO(json.dumps(config['profiles']).encode('utf-8')) as c:
for byte_block in iter(lambda: c.read(BUF_SIZE), b''):
sha256sum.update(byte_block)
profiles_hash = sha256sum.hexdigest()
config['profilesHash'] = profiles_hash
profile_hashes={}
profile_hashes['all'] = get_string_sha256sum(json.dumps(combined_config['profiles']))
for profile in combined_config['profiles'].keys():
profile_hashes[profile] = get_string_sha256sum(json.dumps(combined_config['profiles'][profile]))
return config
combined_config['profileHashes'] = profile_hashes
return combined_config
@click.group()
@ -206,6 +261,17 @@ def cli(ctx, debug, config, stdin, remove_prefix, add_prefix):
ctx.obj['ADD_PREFIX'] = add_prefix
####################
# Generic Commands #
####################
@cli.command(name="config")
@click.pass_context
def print_config(ctx):
print(json.dumps(ctx.obj['CONFIG'], indent=2, sort_keys=True))
###############################
# S3 Storage Focused Commands #
###############################
@ -258,11 +324,12 @@ def list_files(ctx, context, sha256sum, suffix):
@cli.command(name="match")
@click.option('-x', '--context', required=True)
@click.option('--print-identity/--no-print-identity', default=False)
@click.option('-p', '--profile', default='all')
@click.argument('files', nargs=-1)
@click.pass_context
def check_matched_files_hashes(ctx, context, print_identity, files):
def check_matched_files_hashes(ctx, context, print_identity, profile, files):
"""
List all files that have matching stored sha256sum and profilesHash
List all files that have matching stored sha256sum and profile hash
"""
ctx.obj['CONTEXT'] = context
s3_bucket, s3 = prep_s3(ctx)
@ -276,9 +343,9 @@ def check_matched_files_hashes(ctx, context, print_identity, files):
try:
file_object = s3.get_object(s3_bucket, file_identity)
stored_data = json.load(file_object)
stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, file)
stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, profile, file)
if calculated_file_hash == stored_file_hash \
and ctx.obj['CONFIG']['profilesHash'] == stored_profile_hash:
and ctx.obj['CONFIG']['profileHashs'][profile] == stored_profile_hash:
if print_identity:
matching_files.append(stored_data['storedAssetIdentity'])
else:
@ -293,11 +360,12 @@ def check_matched_files_hashes(ctx, context, print_identity, files):
@cli.command(name="check")
@click.option('-x', '--context', required=True)
@click.option('-p', '--profile', default='all')
@click.argument('files', nargs=-1)
@click.pass_context
def check_changed_files_hashes(ctx, context, files):
def check_changed_files_hashes(ctx, context, profile, files):
"""
List all files that do not have a matching sha256sum or profilesHash
List all files that do not have a matching sha256sum or profile hash
"""
ctx.obj['CONTEXT'] = context
s3_bucket, s3 = prep_s3(ctx)
@ -311,9 +379,9 @@ def check_changed_files_hashes(ctx, context, files):
try:
file_object = s3.get_object(s3_bucket, file_identity)
stored_data = json.load(file_object)
stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, file)
stored_profile_hash, stored_file_hash, calculated_file_hash = get_file_sha256sum(stored_data, profile, file)
if calculated_file_hash != stored_file_hash \
or ctx.obj['CONFIG']['profilesHash'] != stored_profile_hash:
or ctx.obj['CONFIG']['profileHashes'][profile] != stored_profile_hash:
changed_files.append(file)
except NoSuchKey as e:
changed_files.append(file)
@ -326,9 +394,10 @@ def check_changed_files_hashes(ctx, context, files):
@cli.command(name="update")
@click.option('-x', '--context', required=True)
@click.option('--input-and-identity/--no-input-and-identity', default=False)
@click.option('-p', '--profile', default='all')
@click.argument('files', nargs=-1)
@click.pass_context
def update_changed_files_hashes(ctx, context, input_and_identity, files):
def update_changed_files_hashes(ctx, context, input_and_identity, profile, files):
"""
Store new data objects for the provided files
"""
@ -356,7 +425,7 @@ def update_changed_files_hashes(ctx, context, input_and_identity, files):
"storedAssetIdentity": identity,
"identity": file_identity,
"sha256sum": calculated_file_hash,
"profilesHash": ctx.obj['CONFIG']['profilesHash']
"profileHash": ctx.obj['CONFIG']['profileHashes'][profile]
}
with io.BytesIO(json.dumps(object_data, sort_keys=True, indent=None).encode('utf-8')) as data:

Loading…
Cancel
Save