mirror of https://gitlab.com/tildes/tildes.git
Browse Source
Add group_stats table, track daily topics/comments
Add group_stats table, track daily topics/comments
This adds a group_stats table and cronjob that will insert the previous day's stats into it each day just after 00:00 UTC.merge-requests/102/head
Deimos
5 years ago
7 changed files with 206 additions and 1 deletions
-
7salt/salt/cronjobs.sls
-
50tildes/alembic/versions/9148909b78e9_add_group_stats_table.py
-
86tildes/scripts/generate_group_stats_for_yesterday.py
-
2tildes/tildes/database_models.py
-
7tildes/tildes/enums.py
-
1tildes/tildes/models/group/__init__.py
-
54tildes/tildes/models/group/group_stat.py
@ -0,0 +1,50 @@ |
|||||
|
"""Add group_stats table |
||||
|
|
||||
|
Revision ID: 9148909b78e9 |
||||
|
Revises: fe91222503ef |
||||
|
Create Date: 2020-03-06 02:27:31.720325 |
||||
|
|
||||
|
""" |
||||
|
from alembic import op |
||||
|
import sqlalchemy as sa |
||||
|
from sqlalchemy.dialects import postgresql |
||||
|
|
||||
|
# revision identifiers, used by Alembic. |
||||
|
revision = "9148909b78e9" |
||||
|
down_revision = "fe91222503ef" |
||||
|
branch_labels = None |
||||
|
depends_on = None |
||||
|
|
||||
|
|
||||
|
def upgrade(): |
||||
|
op.create_table( |
||||
|
"group_stats", |
||||
|
sa.Column("group_id", sa.Integer(), nullable=False), |
||||
|
sa.Column( |
||||
|
"stat", |
||||
|
postgresql.ENUM("TOPICS_POSTED", "COMMENTS_POSTED", name="groupstattype"), |
||||
|
nullable=False, |
||||
|
), |
||||
|
sa.Column("period", postgresql.TSTZRANGE(), nullable=False), |
||||
|
sa.Column("value", sa.Float(), nullable=False), |
||||
|
sa.ForeignKeyConstraint( |
||||
|
["group_id"], |
||||
|
["groups.group_id"], |
||||
|
name=op.f("fk_group_stats_group_id_groups"), |
||||
|
), |
||||
|
sa.PrimaryKeyConstraint( |
||||
|
"group_id", "stat", "period", name=op.f("pk_group_stats") |
||||
|
), |
||||
|
) |
||||
|
op.create_index( |
||||
|
"ix_group_stats_period_gist", |
||||
|
"group_stats", |
||||
|
["period"], |
||||
|
unique=False, |
||||
|
postgresql_using="gist", |
||||
|
) |
||||
|
|
||||
|
|
||||
|
def downgrade(): |
||||
|
op.drop_index("ix_group_stats_period_gist", table_name="group_stats") |
||||
|
op.drop_table("group_stats") |
@ -0,0 +1,86 @@ |
|||||
|
# Copyright (c) 2020 Tildes contributors <code@tildes.net> |
||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later |
||||
|
|
||||
|
"""Script for generating group statistics for yesterday (UTC). |
||||
|
|
||||
|
This script is not very flexible - no matter what time it is run, it will always |
||||
|
generate stats for the previous UTC day for all groups and store them in the group_stats |
||||
|
table. |
||||
|
""" |
||||
|
|
||||
|
from datetime import datetime, timedelta |
||||
|
|
||||
|
from sqlalchemy.exc import IntegrityError |
||||
|
from sqlalchemy.orm import Session |
||||
|
|
||||
|
from tildes.enums import GroupStatType |
||||
|
from tildes.lib.database import get_session_from_config |
||||
|
from tildes.lib.datetime import utc_now |
||||
|
from tildes.models.comment import Comment |
||||
|
from tildes.models.group import Group, GroupStat |
||||
|
from tildes.models.topic import Topic |
||||
|
|
||||
|
|
||||
|
def generate_stats(config_path: str) -> None: |
||||
|
"""Generate all stats for all groups for yesterday (UTC).""" |
||||
|
db_session = get_session_from_config(config_path) |
||||
|
|
||||
|
# the end time is the start of the current day, start time 1 day before that |
||||
|
end_time = utc_now().replace(hour=0, minute=0, second=0, microsecond=0) |
||||
|
start_time = end_time - timedelta(days=1) |
||||
|
|
||||
|
groups = db_session.query(Group).all() |
||||
|
|
||||
|
for group in groups: |
||||
|
with db_session.no_autoflush: |
||||
|
db_session.add(topics_posted(db_session, group, start_time, end_time)) |
||||
|
db_session.add(comments_posted(db_session, group, start_time, end_time)) |
||||
|
|
||||
|
try: |
||||
|
db_session.commit() |
||||
|
except IntegrityError: |
||||
|
# stats have already run for this group/period combination, just skip |
||||
|
continue |
||||
|
|
||||
|
|
||||
|
def topics_posted( |
||||
|
db_session: Session, group: Group, start_time: datetime, end_time: datetime |
||||
|
) -> GroupStat: |
||||
|
"""Generate a GroupStat for topics posted in the group between start/end times.""" |
||||
|
num_topics = ( |
||||
|
db_session.query(Topic) |
||||
|
.filter( |
||||
|
Topic.group == group, |
||||
|
Topic.created_time >= start_time, |
||||
|
Topic.created_time < end_time, |
||||
|
Topic.is_deleted == False, # noqa |
||||
|
Topic.is_removed == False, # noqa |
||||
|
) |
||||
|
.count() |
||||
|
) |
||||
|
|
||||
|
return GroupStat( |
||||
|
group, GroupStatType.TOPICS_POSTED, start_time, end_time, num_topics |
||||
|
) |
||||
|
|
||||
|
|
||||
|
def comments_posted( |
||||
|
db_session: Session, group: Group, start_time: datetime, end_time: datetime |
||||
|
) -> GroupStat: |
||||
|
"""Generate a GroupStat for comments posted in the group between start/end times.""" |
||||
|
num_comments = ( |
||||
|
db_session.query(Comment) |
||||
|
.join(Topic) |
||||
|
.filter( |
||||
|
Topic.group == group, |
||||
|
Comment.created_time >= start_time, |
||||
|
Comment.created_time < end_time, |
||||
|
Comment.is_deleted == False, # noqa |
||||
|
Comment.is_removed == False, # noqa |
||||
|
) |
||||
|
.count() |
||||
|
) |
||||
|
|
||||
|
return GroupStat( |
||||
|
group, GroupStatType.COMMENTS_POSTED, start_time, end_time, num_comments |
||||
|
) |
@ -0,0 +1,54 @@ |
|||||
|
# Copyright (c) 2020 Tildes contributors <code@tildes.net> |
||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later |
||||
|
|
||||
|
"""Contains the GroupStat class.""" |
||||
|
|
||||
|
from datetime import datetime |
||||
|
from typing import Union |
||||
|
|
||||
|
from psycopg2.extras import DateTimeTZRange |
||||
|
from sqlalchemy import Column, Float, ForeignKey, Index, Integer |
||||
|
from sqlalchemy.dialects.postgresql import ENUM, TSTZRANGE |
||||
|
from sqlalchemy.orm import relationship |
||||
|
|
||||
|
from tildes.enums import GroupStatType |
||||
|
from tildes.models import DatabaseModel |
||||
|
|
||||
|
from .group import Group |
||||
|
|
||||
|
|
||||
|
class GroupStat(DatabaseModel): |
||||
|
"""Model for a statistic of a group inside a certain time period.""" |
||||
|
|
||||
|
__tablename__ = "group_stats" |
||||
|
|
||||
|
group_id: int = Column( |
||||
|
Integer, ForeignKey("groups.group_id"), nullable=False, primary_key=True, |
||||
|
) |
||||
|
stat: GroupStatType = Column(ENUM(GroupStatType), nullable=False, primary_key=True) |
||||
|
period: DateTimeTZRange = Column(TSTZRANGE, nullable=False, primary_key=True) |
||||
|
value: float = Column(Float, nullable=False) |
||||
|
|
||||
|
group: Group = relationship("Group", innerjoin=True, lazy=False) |
||||
|
|
||||
|
# Add a GiST index on the period column for range operators |
||||
|
__table_args__ = ( |
||||
|
Index("ix_group_stats_period_gist", period, postgresql_using="gist"), |
||||
|
) |
||||
|
|
||||
|
def __init__( |
||||
|
self, |
||||
|
group: Group, |
||||
|
stat: GroupStatType, |
||||
|
start_time: datetime, |
||||
|
end_time: datetime, |
||||
|
value: Union[int, float], |
||||
|
): |
||||
|
"""Create a new statistic for the group and time period. |
||||
|
|
||||
|
The time period will be inclusive of start_time but exclusive of end_time. |
||||
|
""" |
||||
|
self.group = group |
||||
|
self.stat = stat |
||||
|
self.period = DateTimeTZRange(start_time, end_time, bounds="[)") |
||||
|
self.value = float(value) |
Write
Preview
Loading…
Cancel
Save
Reference in new issue