mirror of https://gitlab.com/tildes/tildes.git
Browse Source
Add group_stats table, track daily topics/comments
Add group_stats table, track daily topics/comments
This adds a group_stats table and cronjob that will insert the previous day's stats into it each day just after 00:00 UTC.merge-requests/102/head
Deimos
5 years ago
7 changed files with 206 additions and 1 deletions
-
7salt/salt/cronjobs.sls
-
50tildes/alembic/versions/9148909b78e9_add_group_stats_table.py
-
86tildes/scripts/generate_group_stats_for_yesterday.py
-
2tildes/tildes/database_models.py
-
7tildes/tildes/enums.py
-
1tildes/tildes/models/group/__init__.py
-
54tildes/tildes/models/group/group_stat.py
@ -0,0 +1,50 @@ |
|||
"""Add group_stats table |
|||
|
|||
Revision ID: 9148909b78e9 |
|||
Revises: fe91222503ef |
|||
Create Date: 2020-03-06 02:27:31.720325 |
|||
|
|||
""" |
|||
from alembic import op |
|||
import sqlalchemy as sa |
|||
from sqlalchemy.dialects import postgresql |
|||
|
|||
# revision identifiers, used by Alembic. |
|||
revision = "9148909b78e9" |
|||
down_revision = "fe91222503ef" |
|||
branch_labels = None |
|||
depends_on = None |
|||
|
|||
|
|||
def upgrade(): |
|||
op.create_table( |
|||
"group_stats", |
|||
sa.Column("group_id", sa.Integer(), nullable=False), |
|||
sa.Column( |
|||
"stat", |
|||
postgresql.ENUM("TOPICS_POSTED", "COMMENTS_POSTED", name="groupstattype"), |
|||
nullable=False, |
|||
), |
|||
sa.Column("period", postgresql.TSTZRANGE(), nullable=False), |
|||
sa.Column("value", sa.Float(), nullable=False), |
|||
sa.ForeignKeyConstraint( |
|||
["group_id"], |
|||
["groups.group_id"], |
|||
name=op.f("fk_group_stats_group_id_groups"), |
|||
), |
|||
sa.PrimaryKeyConstraint( |
|||
"group_id", "stat", "period", name=op.f("pk_group_stats") |
|||
), |
|||
) |
|||
op.create_index( |
|||
"ix_group_stats_period_gist", |
|||
"group_stats", |
|||
["period"], |
|||
unique=False, |
|||
postgresql_using="gist", |
|||
) |
|||
|
|||
|
|||
def downgrade(): |
|||
op.drop_index("ix_group_stats_period_gist", table_name="group_stats") |
|||
op.drop_table("group_stats") |
@ -0,0 +1,86 @@ |
|||
# Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
# SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
"""Script for generating group statistics for yesterday (UTC). |
|||
|
|||
This script is not very flexible - no matter what time it is run, it will always |
|||
generate stats for the previous UTC day for all groups and store them in the group_stats |
|||
table. |
|||
""" |
|||
|
|||
from datetime import datetime, timedelta |
|||
|
|||
from sqlalchemy.exc import IntegrityError |
|||
from sqlalchemy.orm import Session |
|||
|
|||
from tildes.enums import GroupStatType |
|||
from tildes.lib.database import get_session_from_config |
|||
from tildes.lib.datetime import utc_now |
|||
from tildes.models.comment import Comment |
|||
from tildes.models.group import Group, GroupStat |
|||
from tildes.models.topic import Topic |
|||
|
|||
|
|||
def generate_stats(config_path: str) -> None: |
|||
"""Generate all stats for all groups for yesterday (UTC).""" |
|||
db_session = get_session_from_config(config_path) |
|||
|
|||
# the end time is the start of the current day, start time 1 day before that |
|||
end_time = utc_now().replace(hour=0, minute=0, second=0, microsecond=0) |
|||
start_time = end_time - timedelta(days=1) |
|||
|
|||
groups = db_session.query(Group).all() |
|||
|
|||
for group in groups: |
|||
with db_session.no_autoflush: |
|||
db_session.add(topics_posted(db_session, group, start_time, end_time)) |
|||
db_session.add(comments_posted(db_session, group, start_time, end_time)) |
|||
|
|||
try: |
|||
db_session.commit() |
|||
except IntegrityError: |
|||
# stats have already run for this group/period combination, just skip |
|||
continue |
|||
|
|||
|
|||
def topics_posted( |
|||
db_session: Session, group: Group, start_time: datetime, end_time: datetime |
|||
) -> GroupStat: |
|||
"""Generate a GroupStat for topics posted in the group between start/end times.""" |
|||
num_topics = ( |
|||
db_session.query(Topic) |
|||
.filter( |
|||
Topic.group == group, |
|||
Topic.created_time >= start_time, |
|||
Topic.created_time < end_time, |
|||
Topic.is_deleted == False, # noqa |
|||
Topic.is_removed == False, # noqa |
|||
) |
|||
.count() |
|||
) |
|||
|
|||
return GroupStat( |
|||
group, GroupStatType.TOPICS_POSTED, start_time, end_time, num_topics |
|||
) |
|||
|
|||
|
|||
def comments_posted( |
|||
db_session: Session, group: Group, start_time: datetime, end_time: datetime |
|||
) -> GroupStat: |
|||
"""Generate a GroupStat for comments posted in the group between start/end times.""" |
|||
num_comments = ( |
|||
db_session.query(Comment) |
|||
.join(Topic) |
|||
.filter( |
|||
Topic.group == group, |
|||
Comment.created_time >= start_time, |
|||
Comment.created_time < end_time, |
|||
Comment.is_deleted == False, # noqa |
|||
Comment.is_removed == False, # noqa |
|||
) |
|||
.count() |
|||
) |
|||
|
|||
return GroupStat( |
|||
group, GroupStatType.COMMENTS_POSTED, start_time, end_time, num_comments |
|||
) |
@ -0,0 +1,54 @@ |
|||
# Copyright (c) 2020 Tildes contributors <code@tildes.net> |
|||
# SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
"""Contains the GroupStat class.""" |
|||
|
|||
from datetime import datetime |
|||
from typing import Union |
|||
|
|||
from psycopg2.extras import DateTimeTZRange |
|||
from sqlalchemy import Column, Float, ForeignKey, Index, Integer |
|||
from sqlalchemy.dialects.postgresql import ENUM, TSTZRANGE |
|||
from sqlalchemy.orm import relationship |
|||
|
|||
from tildes.enums import GroupStatType |
|||
from tildes.models import DatabaseModel |
|||
|
|||
from .group import Group |
|||
|
|||
|
|||
class GroupStat(DatabaseModel): |
|||
"""Model for a statistic of a group inside a certain time period.""" |
|||
|
|||
__tablename__ = "group_stats" |
|||
|
|||
group_id: int = Column( |
|||
Integer, ForeignKey("groups.group_id"), nullable=False, primary_key=True, |
|||
) |
|||
stat: GroupStatType = Column(ENUM(GroupStatType), nullable=False, primary_key=True) |
|||
period: DateTimeTZRange = Column(TSTZRANGE, nullable=False, primary_key=True) |
|||
value: float = Column(Float, nullable=False) |
|||
|
|||
group: Group = relationship("Group", innerjoin=True, lazy=False) |
|||
|
|||
# Add a GiST index on the period column for range operators |
|||
__table_args__ = ( |
|||
Index("ix_group_stats_period_gist", period, postgresql_using="gist"), |
|||
) |
|||
|
|||
def __init__( |
|||
self, |
|||
group: Group, |
|||
stat: GroupStatType, |
|||
start_time: datetime, |
|||
end_time: datetime, |
|||
value: Union[int, float], |
|||
): |
|||
"""Create a new statistic for the group and time period. |
|||
|
|||
The time period will be inclusive of start_time but exclusive of end_time. |
|||
""" |
|||
self.group = group |
|||
self.stat = stat |
|||
self.period = DateTimeTZRange(start_time, end_time, bounds="[)") |
|||
self.value = float(value) |
Write
Preview
Loading…
Cancel
Save
Reference in new issue