mirror of https://gitlab.com/tildes/tildes.git
Browse Source
Replace RabbitMQ uses with Redis streams
Replace RabbitMQ uses with Redis streams
RabbitMQ was used to support asynchronous/background processing tasks, such as determining word count for text topics and scraping the destinations or relevant APIs for link topics. This commit replaces RabbitMQ's role (as the message broker) with Redis streams. This included building a new "PostgreSQL to Redis bridge" that takes over the previous role of pg-amqp-bridge: listening for NOTIFY messages on a particular PostgreSQL channel and translating them to messages in appropriate Redis streams. One particular change of note is that the names of message "sources" were adjusted a little and standardized. For example, the routing key for a message caused by a new comment was previously "comment.created", but is now "comments.insert". Similarly, "comment.edited" became "comments.update.markdown". The new naming scheme uses the table name, proper name for the SQL operation, and column name instead of the previous unpredictable terms.merge-requests/88/merge
Deimos
5 years ago
22 changed files with 634 additions and 91 deletions
-
6salt/salt/consumers/comment_user_mentions_generator.service.jinja2
-
6salt/salt/consumers/site_icon_downloader.service.jinja2
-
6salt/salt/consumers/topic_embedly_extractor.service.jinja2
-
6salt/salt/consumers/topic_interesting_activity_updater.service.jinja2
-
6salt/salt/consumers/topic_metadata_generator.service.jinja2
-
6salt/salt/consumers/topic_youtube_scraper.service.jinja2
-
12salt/salt/redis/init.sls
-
16salt/salt/redis/postgresql_redis_bridge.service.jinja2
-
181tildes/alembic/versions/4fb2c786c7a0_add_new_notify_triggers.py
-
22tildes/consumers/comment_user_mentions_generator.py
-
19tildes/consumers/site_icon_downloader.py
-
25tildes/consumers/topic_embedly_extractor.py
-
32tildes/consumers/topic_interesting_activity_updater.py
-
27tildes/consumers/topic_metadata_generator.py
-
25tildes/consumers/topic_youtube_scraper.py
-
70tildes/scripts/postgresql_redis_bridge.py
-
9tildes/sql/init/functions/event_stream.sql
-
25tildes/sql/init/triggers/comment_labels/event_stream.sql
-
38tildes/sql/init/triggers/comments/event_stream.sql
-
23tildes/sql/init/triggers/scraper_results/event_stream.sql
-
33tildes/sql/init/triggers/topics/event_stream.sql
-
132tildes/tildes/lib/event_stream.py
@ -0,0 +1,16 @@ |
|||
{% from 'common.jinja2' import app_dir, bin_dir -%} |
|||
[Unit] |
|||
Description=postgresql_redis_bridge - convert NOTIFY to Redis streams |
|||
Requires=redis.service |
|||
After=redis.service |
|||
PartOf=redis.service |
|||
|
|||
[Service] |
|||
WorkingDirectory={{ app_dir }}/scripts |
|||
Environment="INI_FILE={{ app_dir }}/{{ pillar['ini_file'] }}" |
|||
ExecStart={{ bin_dir }}/python postgresql_redis_bridge.py |
|||
Restart=always |
|||
RestartSec=5 |
|||
|
|||
[Install] |
|||
WantedBy=multi-user.target |
@ -0,0 +1,181 @@ |
|||
"""Add new NOTIFY triggers |
|||
|
|||
Revision ID: 4fb2c786c7a0 |
|||
Revises: f4e1ef359307 |
|||
Create Date: 2020-01-19 19:45:32.460821 |
|||
|
|||
""" |
|||
from alembic import op |
|||
import sqlalchemy as sa |
|||
|
|||
|
|||
# revision identifiers, used by Alembic. |
|||
revision = "4fb2c786c7a0" |
|||
down_revision = "f4e1ef359307" |
|||
branch_labels = None |
|||
depends_on = None |
|||
|
|||
|
|||
def upgrade(): |
|||
op.execute( |
|||
""" |
|||
create or replace function add_to_event_stream(stream_name_pieces text[], fields text[]) returns void as $$ |
|||
select pg_notify( |
|||
'postgresql_events', |
|||
array_to_string(stream_name_pieces, '.') || ':' || json_object(fields) |
|||
); |
|||
$$ language sql; |
|||
""" |
|||
) |
|||
|
|||
# comments |
|||
op.execute( |
|||
""" |
|||
create or replace function comments_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'comment_id', affected_row.comment_id |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger comments_events_insert_delete |
|||
after insert or delete on comments |
|||
for each row |
|||
execute function comments_events_trigger(); |
|||
|
|||
create trigger comments_events_update_markdown |
|||
after update of markdown on comments |
|||
for each row |
|||
execute function comments_events_trigger('markdown'); |
|||
|
|||
create trigger comments_events_update_is_deleted |
|||
after update of is_deleted on comments |
|||
for each row |
|||
execute function comments_events_trigger('is_deleted'); |
|||
|
|||
create trigger comments_events_update_is_removed |
|||
after update of is_removed on comments |
|||
for each row |
|||
execute function comments_events_trigger('is_removed'); |
|||
""" |
|||
) |
|||
|
|||
# topics |
|||
op.execute( |
|||
""" |
|||
create or replace function topics_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'topic_id', affected_row.topic_id |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger topics_events_insert_delete |
|||
after insert or delete on topics |
|||
for each row |
|||
execute function topics_events_trigger(); |
|||
|
|||
create trigger topics_events_update_markdown |
|||
after update of markdown on topics |
|||
for each row |
|||
execute function topics_events_trigger('markdown'); |
|||
|
|||
create trigger topics_events_update_link |
|||
after update of link on topics |
|||
for each row |
|||
execute function topics_events_trigger('link'); |
|||
""" |
|||
) |
|||
|
|||
# comment_labels |
|||
op.execute( |
|||
""" |
|||
create or replace function comment_labels_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'comment_id', affected_row.comment_id, |
|||
'user_id', affected_row.user_id, |
|||
'label', affected_row.label |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger comment_labels_events_insert_delete |
|||
after insert or delete on comment_labels |
|||
for each row |
|||
execute function comment_labels_events_trigger(); |
|||
""" |
|||
) |
|||
|
|||
# scraper_results |
|||
op.execute( |
|||
""" |
|||
create or replace function scraper_results_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'result_id', affected_row.result_id |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger scraper_results_events_insert_delete |
|||
after insert or delete on scraper_results |
|||
for each row |
|||
execute function scraper_results_events_trigger(); |
|||
""" |
|||
) |
|||
|
|||
|
|||
def downgrade(): |
|||
op.execute("drop trigger scraper_results_events_insert_delete on scraper_results") |
|||
op.execute("drop function scraper_results_events_trigger") |
|||
|
|||
op.execute("drop trigger comment_labels_events_insert_delete on comment_labels") |
|||
op.execute("drop function comment_labels_events_trigger") |
|||
|
|||
op.execute("drop trigger topics_events_update_link on topics") |
|||
op.execute("drop trigger topics_events_update_markdown on topics") |
|||
op.execute("drop trigger topics_events_insert_delete on topics") |
|||
op.execute("drop function topics_events_trigger") |
|||
|
|||
op.execute("drop trigger comments_events_update_is_removed on comments") |
|||
op.execute("drop trigger comments_events_update_is_deleted on comments") |
|||
op.execute("drop trigger comments_events_update_markdown on comments") |
|||
op.execute("drop trigger comments_events_insert_delete on comments") |
|||
op.execute("drop function comments_events_trigger") |
|||
|
|||
op.execute("drop function add_to_event_stream") |
@ -0,0 +1,70 @@ |
|||
# Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
# SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
"""Script that converts NOTIFY events on a PostgreSQL channel to Redis stream entries. |
|||
|
|||
Should be kept running at all times as a service. |
|||
""" |
|||
|
|||
import json |
|||
import os |
|||
from configparser import ConfigParser |
|||
from select import select |
|||
|
|||
from redis import Redis |
|||
from sqlalchemy.engine.url import make_url |
|||
import psycopg2 |
|||
|
|||
from tildes.lib.event_stream import REDIS_KEY_PREFIX |
|||
|
|||
|
|||
NOTIFY_CHANNEL = "postgresql_events" |
|||
|
|||
|
|||
def postgresql_redis_bridge(config_path: str) -> None: |
|||
"""Listen for NOTIFY events and add them to Redis streams.""" |
|||
config = ConfigParser() |
|||
config.read(config_path) |
|||
|
|||
redis = Redis(unix_socket_path=config.get("app:main", "redis.unix_socket_path")) |
|||
|
|||
postgresql_url = make_url(config.get("app:main", "sqlalchemy.url")) |
|||
postgresql = psycopg2.connect( |
|||
user=postgresql_url.username, dbname=postgresql_url.database |
|||
) |
|||
postgresql.autocommit = True |
|||
|
|||
with postgresql.cursor() as cursor: |
|||
cursor.execute(f"listen {NOTIFY_CHANNEL}") |
|||
|
|||
while True: |
|||
# block until a NOTIFY comes through on the channel |
|||
select([postgresql], [], []) |
|||
|
|||
# fetch any notifications without needing to execute a query |
|||
postgresql.poll() |
|||
|
|||
# add each NOTIFY to the specified stream(s), using a Redis pipeline to avoid |
|||
# round trips when there are multiple sent by the same PostgreSQL transaction |
|||
with redis.pipeline(transaction=False) as pipe: |
|||
while postgresql.notifies: |
|||
notify = postgresql.notifies.pop(0) |
|||
|
|||
# the payload format should be "<destination stream name>:<json dict>" |
|||
try: |
|||
stream_name, fields_json = notify.payload.split(":", maxsplit=1) |
|||
except ValueError: |
|||
continue |
|||
|
|||
try: |
|||
fields = json.loads(fields_json) |
|||
except json.decoder.JSONDecodeError: |
|||
continue |
|||
|
|||
pipe.xadd(f"{REDIS_KEY_PREFIX}{stream_name}", fields) |
|||
|
|||
pipe.execute() |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
postgresql_redis_bridge(os.environ["INI_FILE"]) |
@ -0,0 +1,9 @@ |
|||
-- Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
-- SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
create or replace function add_to_event_stream(stream_name_pieces text[], fields text[]) returns void as $$ |
|||
select pg_notify( |
|||
'postgresql_events', |
|||
array_to_string(stream_name_pieces, '.') || ':' || json_object(fields) |
|||
); |
|||
$$ language sql; |
@ -0,0 +1,25 @@ |
|||
-- Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
-- SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
create or replace function comment_labels_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'comment_id', affected_row.comment_id, |
|||
'user_id', affected_row.user_id, |
|||
'label', affected_row.label |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger comment_labels_events_insert_delete |
|||
after insert or delete on comment_labels |
|||
for each row |
|||
execute function comment_labels_events_trigger(); |
@ -0,0 +1,38 @@ |
|||
-- Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
-- SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
create or replace function comments_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'comment_id', affected_row.comment_id |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger comments_events_insert_delete |
|||
after insert or delete on comments |
|||
for each row |
|||
execute function comments_events_trigger(); |
|||
|
|||
create trigger comments_events_update_markdown |
|||
after update of markdown on comments |
|||
for each row |
|||
execute function comments_events_trigger('markdown'); |
|||
|
|||
create trigger comments_events_update_is_deleted |
|||
after update of is_deleted on comments |
|||
for each row |
|||
execute function comments_events_trigger('is_deleted'); |
|||
|
|||
create trigger comments_events_update_is_removed |
|||
after update of is_removed on comments |
|||
for each row |
|||
execute function comments_events_trigger('is_removed'); |
@ -0,0 +1,23 @@ |
|||
-- Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
-- SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
create or replace function scraper_results_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'result_id', affected_row.result_id |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger scraper_results_events_insert_delete |
|||
after insert or delete on scraper_results |
|||
for each row |
|||
execute function scraper_results_events_trigger(); |
@ -0,0 +1,33 @@ |
|||
-- Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
-- SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
create or replace function topics_events_trigger() returns trigger as $$ |
|||
declare |
|||
affected_row record := coalesce(NEW, OLD); |
|||
stream_name_pieces text[] := array[TG_TABLE_NAME, lower(TG_OP)]::text[] || TG_ARGV; |
|||
|
|||
-- in general, only the below declaration of payload_fields should be edited |
|||
payload_fields text[] := array[ |
|||
'topic_id', affected_row.topic_id |
|||
]::text[]; |
|||
begin |
|||
perform add_to_event_stream(stream_name_pieces, payload_fields); |
|||
|
|||
return null; |
|||
end; |
|||
$$ language plpgsql; |
|||
|
|||
create trigger topics_events_insert_delete |
|||
after insert or delete on topics |
|||
for each row |
|||
execute function topics_events_trigger(); |
|||
|
|||
create trigger topics_events_update_markdown |
|||
after update of markdown on topics |
|||
for each row |
|||
execute function topics_events_trigger('markdown'); |
|||
|
|||
create trigger topics_events_update_link |
|||
after update of link on topics |
|||
for each row |
|||
execute function topics_events_trigger('link'); |
@ -0,0 +1,132 @@ |
|||
# Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
# SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
"""Contains classes related to handling the Redis-based event streams.""" |
|||
|
|||
import os |
|||
from abc import abstractmethod |
|||
from configparser import ConfigParser |
|||
from typing import Any, Dict, List, Sequence |
|||
|
|||
from redis import Redis, ResponseError |
|||
|
|||
from tildes.lib.database import get_session_from_config |
|||
|
|||
REDIS_KEY_PREFIX = "event_stream:" |
|||
|
|||
|
|||
class Message: |
|||
"""Represents a single message taken from a stream.""" |
|||
|
|||
def __init__( |
|||
self, redis: Redis, stream: str, message_id: str, fields: Dict[str, str] |
|||
): |
|||
"""Initialize a new message from a Redis stream.""" |
|||
self.redis = redis |
|||
self.stream = stream |
|||
self.message_id = message_id |
|||
self.fields = fields |
|||
|
|||
def ack(self, consumer_group: str) -> None: |
|||
"""Acknowledge the message, removing it from the pending entries list.""" |
|||
self.redis.xack( |
|||
f"{REDIS_KEY_PREFIX}{self.stream}", consumer_group, self.message_id |
|||
) |
|||
|
|||
|
|||
class EventStreamConsumer: |
|||
"""Base class for consumers of events retrieved from a stream in Redis. |
|||
|
|||
This class is intended to be used in a completely "stand-alone" manner, such as |
|||
inside a script being run separately as a background job. As such, it also includes |
|||
connecting to Redis, creating the consumer group and the relevant streams, and |
|||
(optionally) connecting to the database to be able to fetch and modify data as |
|||
necessary. It relies on the environment variable INI_FILE being set. |
|||
""" |
|||
|
|||
def __init__( |
|||
self, consumer_group: str, source_streams: Sequence[str], uses_db: bool = True, |
|||
): |
|||
"""Initialize a new consumer, creating consumer groups and streams if needed.""" |
|||
ini_file_path = os.environ["INI_FILE"] |
|||
config = ConfigParser() |
|||
config.read(ini_file_path) |
|||
|
|||
self.redis = Redis( |
|||
unix_socket_path=config.get("app:main", "redis.unix_socket_path") |
|||
) |
|||
self.consumer_group = consumer_group |
|||
self.source_streams = [ |
|||
f"{REDIS_KEY_PREFIX}{stream}" for stream in source_streams |
|||
] |
|||
|
|||
# hardcoded for now, will need to change for multiple consumers in same group |
|||
self.name = f"{consumer_group}-1" |
|||
|
|||
# create all the consumer groups and streams (if necessary) |
|||
for stream in self.source_streams: |
|||
try: |
|||
self.redis.xgroup_create(stream, consumer_group, mkstream=True) |
|||
except ResponseError as error: |
|||
# if the consumer group already exists, a BUSYGROUP error will be |
|||
# returned, so we want to ignore that one but raise anything else |
|||
if not str(error).startswith("BUSYGROUP"): |
|||
raise |
|||
|
|||
if uses_db: |
|||
self.db_session = get_session_from_config(ini_file_path) |
|||
else: |
|||
self.db_session = None |
|||
|
|||
def consume_streams(self) -> None: |
|||
"""Process messages from the streams indefinitely.""" |
|||
while True: |
|||
# Get any messages from the source streams that haven't already been |
|||
# delivered to a consumer in this group - will fetch a maximum of one |
|||
# message from each stream, and block indefinitely if none are available |
|||
response = self.redis.xreadgroup( |
|||
self.consumer_group, |
|||
self.name, |
|||
{stream: ">" for stream in self.source_streams}, |
|||
count=1, |
|||
block=0, |
|||
) |
|||
|
|||
messages = self._xreadgroup_response_to_messages(response) |
|||
|
|||
for message in messages: |
|||
self.process_message(message) |
|||
|
|||
# after processing finishes, commit the transaction and ack the message |
|||
if self.db_session: |
|||
self.db_session.commit() |
|||
|
|||
message.ack(self.consumer_group) |
|||
|
|||
def _xreadgroup_response_to_messages(self, response: Any) -> List[Message]: |
|||
"""Convert a response from XREADGROUP to a list of Messages.""" |
|||
messages = [] |
|||
|
|||
# responses come back in an ugly format, a list of (one for each stream): |
|||
# [b'<stream name>', [(b'<entry id>', {<entry fields, all bytestrings>})]] |
|||
for stream_response in response: |
|||
stream_name = stream_response[0].decode("utf-8") |
|||
|
|||
for entry in stream_response[1]: |
|||
message = Message( |
|||
self.redis, |
|||
stream_name[len(REDIS_KEY_PREFIX) :], |
|||
message_id=entry[0].decode("utf-8"), |
|||
fields={ |
|||
key.decode("utf-8"): value.decode("utf-8") |
|||
for key, value in entry[1].items() |
|||
}, |
|||
) |
|||
messages.append(message) |
|||
|
|||
return messages |
|||
|
|||
@abstractmethod |
|||
def process_message(self, message: Message) -> None: |
|||
"""Process a message from the stream (subclasses must implement).""" |
|||
pass |
Write
Preview
Loading…
Cancel
Save
Reference in new issue