Browse Source

Add group_stats table, track daily topics/comments

This adds a group_stats table and cronjob that will insert the previous
day's stats into it each day just after 00:00 UTC.
merge-requests/110/head
Deimos 5 years ago
parent
commit
7d1c3297fb
  1. 7
      salt/salt/cronjobs.sls
  2. 50
      tildes/alembic/versions/9148909b78e9_add_group_stats_table.py
  3. 86
      tildes/scripts/generate_group_stats_for_yesterday.py
  4. 2
      tildes/tildes/database_models.py
  5. 7
      tildes/tildes/enums.py
  6. 1
      tildes/tildes/models/group/__init__.py
  7. 54
      tildes/tildes/models/group/group_stat.py

7
salt/salt/cronjobs.sls

@ -13,6 +13,13 @@ data-cleanup-cronjob:
- hour: 4
- minute: 10
generate-group-stats-for-yesterday-cronjob:
cron.present:
- name: {{ bin_dir }}/python -c "from scripts.generate_group_stats_for_yesterday import generate_stats; generate_stats('{{ app_dir }}/{{ pillar['ini_file'] }}')"
- user: {{ app_username }}
- hour: 0
- minute: 10
generate-site-icons-css-cronjob:
cron.present:
- name: {{ bin_dir }}/python -c "from scripts.generate_site_icons_css import generate_css; generate_css()"

50
tildes/alembic/versions/9148909b78e9_add_group_stats_table.py

@ -0,0 +1,50 @@
"""Add group_stats table
Revision ID: 9148909b78e9
Revises: fe91222503ef
Create Date: 2020-03-06 02:27:31.720325
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "9148909b78e9"
down_revision = "fe91222503ef"
branch_labels = None
depends_on = None
def upgrade():
op.create_table(
"group_stats",
sa.Column("group_id", sa.Integer(), nullable=False),
sa.Column(
"stat",
postgresql.ENUM("TOPICS_POSTED", "COMMENTS_POSTED", name="groupstattype"),
nullable=False,
),
sa.Column("period", postgresql.TSTZRANGE(), nullable=False),
sa.Column("value", sa.Float(), nullable=False),
sa.ForeignKeyConstraint(
["group_id"],
["groups.group_id"],
name=op.f("fk_group_stats_group_id_groups"),
),
sa.PrimaryKeyConstraint(
"group_id", "stat", "period", name=op.f("pk_group_stats")
),
)
op.create_index(
"ix_group_stats_period_gist",
"group_stats",
["period"],
unique=False,
postgresql_using="gist",
)
def downgrade():
op.drop_index("ix_group_stats_period_gist", table_name="group_stats")
op.drop_table("group_stats")

86
tildes/scripts/generate_group_stats_for_yesterday.py

@ -0,0 +1,86 @@
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Script for generating group statistics for yesterday (UTC).
This script is not very flexible - no matter what time it is run, it will always
generate stats for the previous UTC day for all groups and store them in the group_stats
table.
"""
from datetime import datetime, timedelta
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from tildes.enums import GroupStatType
from tildes.lib.database import get_session_from_config
from tildes.lib.datetime import utc_now
from tildes.models.comment import Comment
from tildes.models.group import Group, GroupStat
from tildes.models.topic import Topic
def generate_stats(config_path: str) -> None:
"""Generate all stats for all groups for yesterday (UTC)."""
db_session = get_session_from_config(config_path)
# the end time is the start of the current day, start time 1 day before that
end_time = utc_now().replace(hour=0, minute=0, second=0, microsecond=0)
start_time = end_time - timedelta(days=1)
groups = db_session.query(Group).all()
for group in groups:
with db_session.no_autoflush:
db_session.add(topics_posted(db_session, group, start_time, end_time))
db_session.add(comments_posted(db_session, group, start_time, end_time))
try:
db_session.commit()
except IntegrityError:
# stats have already run for this group/period combination, just skip
continue
def topics_posted(
db_session: Session, group: Group, start_time: datetime, end_time: datetime
) -> GroupStat:
"""Generate a GroupStat for topics posted in the group between start/end times."""
num_topics = (
db_session.query(Topic)
.filter(
Topic.group == group,
Topic.created_time >= start_time,
Topic.created_time < end_time,
Topic.is_deleted == False, # noqa
Topic.is_removed == False, # noqa
)
.count()
)
return GroupStat(
group, GroupStatType.TOPICS_POSTED, start_time, end_time, num_topics
)
def comments_posted(
db_session: Session, group: Group, start_time: datetime, end_time: datetime
) -> GroupStat:
"""Generate a GroupStat for comments posted in the group between start/end times."""
num_comments = (
db_session.query(Comment)
.join(Topic)
.filter(
Topic.group == group,
Comment.created_time >= start_time,
Comment.created_time < end_time,
Comment.is_deleted == False, # noqa
Comment.is_removed == False, # noqa
)
.count()
)
return GroupStat(
group, GroupStatType.COMMENTS_POSTED, start_time, end_time, num_comments
)

2
tildes/tildes/database_models.py

@ -13,7 +13,7 @@ from tildes.models.comment import (
CommentVote,
)
from tildes.models.financials import Financials
from tildes.models.group import Group, GroupSubscription
from tildes.models.group import Group, GroupStat, GroupSubscription
from tildes.models.log import Log
from tildes.models.message import MessageConversation, MessageReply
from tildes.models.scraper import ScraperResult

7
tildes/tildes/enums.py

@ -165,6 +165,13 @@ class FinancialEntryType(enum.Enum):
INCOME = enum.auto()
class GroupStatType(enum.Enum):
"""Enum for types of group statistics."""
TOPICS_POSTED = enum.auto()
COMMENTS_POSTED = enum.auto()
class LogEventType(enum.Enum):
"""Enum for the types of events stored in logs."""

1
tildes/tildes/models/group/__init__.py

@ -2,5 +2,6 @@
from .group import Group
from .group_query import GroupQuery
from .group_stat import GroupStat
from .group_subscription import GroupSubscription
from .group_wiki_page import GroupWikiPage

54
tildes/tildes/models/group/group_stat.py

@ -0,0 +1,54 @@
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Contains the GroupStat class."""
from datetime import datetime
from typing import Union
from psycopg2.extras import DateTimeTZRange
from sqlalchemy import Column, Float, ForeignKey, Index, Integer
from sqlalchemy.dialects.postgresql import ENUM, TSTZRANGE
from sqlalchemy.orm import relationship
from tildes.enums import GroupStatType
from tildes.models import DatabaseModel
from .group import Group
class GroupStat(DatabaseModel):
"""Model for a statistic of a group inside a certain time period."""
__tablename__ = "group_stats"
group_id: int = Column(
Integer, ForeignKey("groups.group_id"), nullable=False, primary_key=True,
)
stat: GroupStatType = Column(ENUM(GroupStatType), nullable=False, primary_key=True)
period: DateTimeTZRange = Column(TSTZRANGE, nullable=False, primary_key=True)
value: float = Column(Float, nullable=False)
group: Group = relationship("Group", innerjoin=True, lazy=False)
# Add a GiST index on the period column for range operators
__table_args__ = (
Index("ix_group_stats_period_gist", period, postgresql_using="gist"),
)
def __init__(
self,
group: Group,
stat: GroupStatType,
start_time: datetime,
end_time: datetime,
value: Union[int, float],
):
"""Create a new statistic for the group and time period.
The time period will be inclusive of start_time but exclusive of end_time.
"""
self.group = group
self.stat = stat
self.period = DateTimeTZRange(start_time, end_time, bounds="[)")
self.value = float(value)
Loading…
Cancel
Save