From 7d1c3297fb64c05c8f3e31cd293a545c64859e3a Mon Sep 17 00:00:00 2001 From: Deimos Date: Mon, 9 Mar 2020 17:50:44 -0600 Subject: [PATCH] Add group_stats table, track daily topics/comments This adds a group_stats table and cronjob that will insert the previous day's stats into it each day just after 00:00 UTC. --- salt/salt/cronjobs.sls | 7 ++ .../9148909b78e9_add_group_stats_table.py | 50 +++++++++++ .../generate_group_stats_for_yesterday.py | 86 +++++++++++++++++++ tildes/tildes/database_models.py | 2 +- tildes/tildes/enums.py | 7 ++ tildes/tildes/models/group/__init__.py | 1 + tildes/tildes/models/group/group_stat.py | 54 ++++++++++++ 7 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 tildes/alembic/versions/9148909b78e9_add_group_stats_table.py create mode 100644 tildes/scripts/generate_group_stats_for_yesterday.py create mode 100644 tildes/tildes/models/group/group_stat.py diff --git a/salt/salt/cronjobs.sls b/salt/salt/cronjobs.sls index ea0e4f3..fc57a52 100644 --- a/salt/salt/cronjobs.sls +++ b/salt/salt/cronjobs.sls @@ -13,6 +13,13 @@ data-cleanup-cronjob: - hour: 4 - minute: 10 +generate-group-stats-for-yesterday-cronjob: + cron.present: + - name: {{ bin_dir }}/python -c "from scripts.generate_group_stats_for_yesterday import generate_stats; generate_stats('{{ app_dir }}/{{ pillar['ini_file'] }}')" + - user: {{ app_username }} + - hour: 0 + - minute: 10 + generate-site-icons-css-cronjob: cron.present: - name: {{ bin_dir }}/python -c "from scripts.generate_site_icons_css import generate_css; generate_css()" diff --git a/tildes/alembic/versions/9148909b78e9_add_group_stats_table.py b/tildes/alembic/versions/9148909b78e9_add_group_stats_table.py new file mode 100644 index 0000000..500bfb3 --- /dev/null +++ b/tildes/alembic/versions/9148909b78e9_add_group_stats_table.py @@ -0,0 +1,50 @@ +"""Add group_stats table + +Revision ID: 9148909b78e9 +Revises: fe91222503ef +Create Date: 2020-03-06 02:27:31.720325 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "9148909b78e9" +down_revision = "fe91222503ef" +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "group_stats", + sa.Column("group_id", sa.Integer(), nullable=False), + sa.Column( + "stat", + postgresql.ENUM("TOPICS_POSTED", "COMMENTS_POSTED", name="groupstattype"), + nullable=False, + ), + sa.Column("period", postgresql.TSTZRANGE(), nullable=False), + sa.Column("value", sa.Float(), nullable=False), + sa.ForeignKeyConstraint( + ["group_id"], + ["groups.group_id"], + name=op.f("fk_group_stats_group_id_groups"), + ), + sa.PrimaryKeyConstraint( + "group_id", "stat", "period", name=op.f("pk_group_stats") + ), + ) + op.create_index( + "ix_group_stats_period_gist", + "group_stats", + ["period"], + unique=False, + postgresql_using="gist", + ) + + +def downgrade(): + op.drop_index("ix_group_stats_period_gist", table_name="group_stats") + op.drop_table("group_stats") diff --git a/tildes/scripts/generate_group_stats_for_yesterday.py b/tildes/scripts/generate_group_stats_for_yesterday.py new file mode 100644 index 0000000..e53895c --- /dev/null +++ b/tildes/scripts/generate_group_stats_for_yesterday.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Script for generating group statistics for yesterday (UTC). + +This script is not very flexible - no matter what time it is run, it will always +generate stats for the previous UTC day for all groups and store them in the group_stats +table. +""" + +from datetime import datetime, timedelta + +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session + +from tildes.enums import GroupStatType +from tildes.lib.database import get_session_from_config +from tildes.lib.datetime import utc_now +from tildes.models.comment import Comment +from tildes.models.group import Group, GroupStat +from tildes.models.topic import Topic + + +def generate_stats(config_path: str) -> None: + """Generate all stats for all groups for yesterday (UTC).""" + db_session = get_session_from_config(config_path) + + # the end time is the start of the current day, start time 1 day before that + end_time = utc_now().replace(hour=0, minute=0, second=0, microsecond=0) + start_time = end_time - timedelta(days=1) + + groups = db_session.query(Group).all() + + for group in groups: + with db_session.no_autoflush: + db_session.add(topics_posted(db_session, group, start_time, end_time)) + db_session.add(comments_posted(db_session, group, start_time, end_time)) + + try: + db_session.commit() + except IntegrityError: + # stats have already run for this group/period combination, just skip + continue + + +def topics_posted( + db_session: Session, group: Group, start_time: datetime, end_time: datetime +) -> GroupStat: + """Generate a GroupStat for topics posted in the group between start/end times.""" + num_topics = ( + db_session.query(Topic) + .filter( + Topic.group == group, + Topic.created_time >= start_time, + Topic.created_time < end_time, + Topic.is_deleted == False, # noqa + Topic.is_removed == False, # noqa + ) + .count() + ) + + return GroupStat( + group, GroupStatType.TOPICS_POSTED, start_time, end_time, num_topics + ) + + +def comments_posted( + db_session: Session, group: Group, start_time: datetime, end_time: datetime +) -> GroupStat: + """Generate a GroupStat for comments posted in the group between start/end times.""" + num_comments = ( + db_session.query(Comment) + .join(Topic) + .filter( + Topic.group == group, + Comment.created_time >= start_time, + Comment.created_time < end_time, + Comment.is_deleted == False, # noqa + Comment.is_removed == False, # noqa + ) + .count() + ) + + return GroupStat( + group, GroupStatType.COMMENTS_POSTED, start_time, end_time, num_comments + ) diff --git a/tildes/tildes/database_models.py b/tildes/tildes/database_models.py index b2dfb8f..6b3e1a3 100644 --- a/tildes/tildes/database_models.py +++ b/tildes/tildes/database_models.py @@ -13,7 +13,7 @@ from tildes.models.comment import ( CommentVote, ) from tildes.models.financials import Financials -from tildes.models.group import Group, GroupSubscription +from tildes.models.group import Group, GroupStat, GroupSubscription from tildes.models.log import Log from tildes.models.message import MessageConversation, MessageReply from tildes.models.scraper import ScraperResult diff --git a/tildes/tildes/enums.py b/tildes/tildes/enums.py index a12bd4f..0e184e9 100644 --- a/tildes/tildes/enums.py +++ b/tildes/tildes/enums.py @@ -165,6 +165,13 @@ class FinancialEntryType(enum.Enum): INCOME = enum.auto() +class GroupStatType(enum.Enum): + """Enum for types of group statistics.""" + + TOPICS_POSTED = enum.auto() + COMMENTS_POSTED = enum.auto() + + class LogEventType(enum.Enum): """Enum for the types of events stored in logs.""" diff --git a/tildes/tildes/models/group/__init__.py b/tildes/tildes/models/group/__init__.py index 4e34429..1189c8a 100644 --- a/tildes/tildes/models/group/__init__.py +++ b/tildes/tildes/models/group/__init__.py @@ -2,5 +2,6 @@ from .group import Group from .group_query import GroupQuery +from .group_stat import GroupStat from .group_subscription import GroupSubscription from .group_wiki_page import GroupWikiPage diff --git a/tildes/tildes/models/group/group_stat.py b/tildes/tildes/models/group/group_stat.py new file mode 100644 index 0000000..c0f61fa --- /dev/null +++ b/tildes/tildes/models/group/group_stat.py @@ -0,0 +1,54 @@ +# Copyright (c) 2020 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Contains the GroupStat class.""" + +from datetime import datetime +from typing import Union + +from psycopg2.extras import DateTimeTZRange +from sqlalchemy import Column, Float, ForeignKey, Index, Integer +from sqlalchemy.dialects.postgresql import ENUM, TSTZRANGE +from sqlalchemy.orm import relationship + +from tildes.enums import GroupStatType +from tildes.models import DatabaseModel + +from .group import Group + + +class GroupStat(DatabaseModel): + """Model for a statistic of a group inside a certain time period.""" + + __tablename__ = "group_stats" + + group_id: int = Column( + Integer, ForeignKey("groups.group_id"), nullable=False, primary_key=True, + ) + stat: GroupStatType = Column(ENUM(GroupStatType), nullable=False, primary_key=True) + period: DateTimeTZRange = Column(TSTZRANGE, nullable=False, primary_key=True) + value: float = Column(Float, nullable=False) + + group: Group = relationship("Group", innerjoin=True, lazy=False) + + # Add a GiST index on the period column for range operators + __table_args__ = ( + Index("ix_group_stats_period_gist", period, postgresql_using="gist"), + ) + + def __init__( + self, + group: Group, + stat: GroupStatType, + start_time: datetime, + end_time: datetime, + value: Union[int, float], + ): + """Create a new statistic for the group and time period. + + The time period will be inclusive of start_time but exclusive of end_time. + """ + self.group = group + self.stat = stat + self.period = DateTimeTZRange(start_time, end_time, bounds="[)") + self.value = float(value)