mirror of https://gitlab.com/tildes/tildes.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
5.9 KiB
170 lines
5.9 KiB
# Copyright (c) 2018 Tildes contributors <code@tildes.net>
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
"""Command-line tools for managing a breached-passwords bloom filter.
|
|
|
|
This tool will help with creating and updating a bloom filter in Redis (using ReBloom:
|
|
https://github.com/RedisLabsModules/rebloom) to hold hashes for passwords that have been
|
|
revealed through data breaches (to prevent users from using these passwords here). The
|
|
dumps are likely primarily sourced from Troy Hunt's "Pwned Passwords" files:
|
|
https://haveibeenpwned.com/Passwords
|
|
|
|
Specifically, the commands in this tool allow building the bloom filter somewhere else,
|
|
then the RDB file can be transferred to the production server. Note that it is expected
|
|
that a separate redis server instance is running solely for holding this bloom filter.
|
|
Replacing the RDB file will result in all other keys being lost.
|
|
|
|
Expected usage of this tool should look something like:
|
|
|
|
On the machine building the bloom filter:
|
|
python breached_passwords.py init --estimate 350000000
|
|
python breached_passwords.py addhashes pwned-passwords-1.0.txt
|
|
python breached_passwords.py addhashes pwned-passwords-update-1.txt
|
|
|
|
Then the RDB file can simply be transferred to the production server, overwriting any
|
|
previous RDB file.
|
|
|
|
"""
|
|
|
|
import subprocess
|
|
from typing import Any
|
|
|
|
import click
|
|
from redis import Redis, ResponseError
|
|
|
|
from tildes.lib.password import (
|
|
BREACHED_PASSWORDS_BF_KEY,
|
|
BREACHED_PASSWORDS_REDIS_SOCKET,
|
|
)
|
|
|
|
|
|
REDIS = Redis(unix_socket_path=BREACHED_PASSWORDS_REDIS_SOCKET)
|
|
|
|
|
|
def generate_redis_protocol(*elements: Any) -> str:
|
|
"""Generate a command in the Redis protocol from the specified elements.
|
|
|
|
Based on the example Ruby code from
|
|
https://redis.io/topics/mass-insert#generating-redis-protocol
|
|
"""
|
|
command = f"*{len(elements)}\r\n"
|
|
|
|
for element in elements:
|
|
element = str(element)
|
|
command += f"${len(element)}\r\n{element}\r\n"
|
|
|
|
return command
|
|
|
|
|
|
@click.group()
|
|
def cli() -> None:
|
|
"""Create a functionality-less command group to attach subcommands to."""
|
|
pass
|
|
|
|
|
|
def validate_init_error_rate(ctx: Any, param: Any, value: Any) -> float:
|
|
"""Validate the --error-rate arg for the init command."""
|
|
# pylint: disable=unused-argument
|
|
if not 0 < value < 1:
|
|
raise click.BadParameter("error rate must be a float between 0 and 1")
|
|
|
|
return value
|
|
|
|
|
|
@cli.command(help="Initialize a new empty bloom filter")
|
|
@click.option(
|
|
"--estimate",
|
|
required=True,
|
|
type=int,
|
|
help="Expected number of passwords that will be added",
|
|
)
|
|
@click.option(
|
|
"--error-rate",
|
|
default=0.01,
|
|
show_default=True,
|
|
help="Bloom filter desired false positive ratio",
|
|
callback=validate_init_error_rate,
|
|
)
|
|
@click.confirmation_option(
|
|
prompt="Are you sure you want to clear any existing bloom filter?"
|
|
)
|
|
def init(estimate: int, error_rate: float) -> None:
|
|
"""Initialize a new bloom filter (destroying any existing one).
|
|
|
|
It generally shouldn't be necessary to re-init a new bloom filter very often with
|
|
this command, only if the previous one was created with too low of an estimate for
|
|
number of passwords, or to change to a different false positive rate. For choosing
|
|
an estimate value, according to the ReBloom documentation: "Performance will begin
|
|
to degrade after adding more items than this number. The actual degradation will
|
|
depend on how far the limit has been exceeded. Performance will degrade linearly as
|
|
the number of entries grow exponentially."
|
|
"""
|
|
REDIS.delete(BREACHED_PASSWORDS_BF_KEY)
|
|
|
|
# BF.RESERVE {key} {error_rate} {size}
|
|
REDIS.execute_command("BF.RESERVE", BREACHED_PASSWORDS_BF_KEY, error_rate, estimate)
|
|
|
|
click.echo(
|
|
"Initialized bloom filter with expected size of {:,} and false "
|
|
"positive rate of {}%".format(estimate, error_rate * 100)
|
|
)
|
|
|
|
|
|
@cli.command(help="Add hashes from a file to the bloom filter")
|
|
@click.argument("filename", type=click.Path(exists=True, dir_okay=False))
|
|
def addhashes(filename: str) -> None:
|
|
"""Add all hashes from a file to the bloom filter.
|
|
|
|
This uses the method of generating commands in Redis protocol and feeding them into
|
|
an instance of `redis-cli --pipe`, as recommended in
|
|
https://redis.io/topics/mass-insert
|
|
"""
|
|
# make sure the key exists and is a bloom filter
|
|
try:
|
|
REDIS.execute_command("BF.DEBUG", BREACHED_PASSWORDS_BF_KEY)
|
|
except ResponseError:
|
|
click.echo("Bloom filter is not set up properly - run init first.")
|
|
raise click.Abort
|
|
|
|
# call wc to count the number of lines in the file for the progress bar
|
|
click.echo("Determining hash count...")
|
|
result = subprocess.run(["wc", "-l", filename], stdout=subprocess.PIPE)
|
|
line_count = int(result.stdout.split(b" ")[0])
|
|
|
|
progress_bar: Any = click.progressbar(length=line_count)
|
|
update_interval = 100_000
|
|
|
|
click.echo("Adding {:,} hashes to bloom filter...".format(line_count))
|
|
|
|
redis_pipe = subprocess.Popen(
|
|
["redis-cli", "-s", BREACHED_PASSWORDS_REDIS_SOCKET, "--pipe"],
|
|
stdin=subprocess.PIPE,
|
|
stdout=subprocess.DEVNULL,
|
|
encoding="utf-8",
|
|
)
|
|
|
|
for count, line in enumerate(open(filename), start=1):
|
|
hashval = line.strip().lower()
|
|
|
|
# the Pwned Passwords hash lists now have a frequency count for each hash, which
|
|
# is separated from the hash with a colon, so we need to handle that if it's
|
|
# present
|
|
hashval = hashval.split(":")[0]
|
|
|
|
command = generate_redis_protocol("BF.ADD", BREACHED_PASSWORDS_BF_KEY, hashval)
|
|
redis_pipe.stdin.write(command)
|
|
|
|
if count % update_interval == 0:
|
|
progress_bar.update(update_interval)
|
|
|
|
# call SAVE to update the RDB file
|
|
REDIS.save()
|
|
|
|
# manually finish the progress bar so it shows 100% and renders properly
|
|
progress_bar.finish()
|
|
progress_bar.render_progress()
|
|
progress_bar.render_finish()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli()
|