diff --git a/salt/salt/cronjobs.sls b/salt/salt/cronjobs.sls index 05e4300..8b075a9 100644 --- a/salt/salt/cronjobs.sls +++ b/salt/salt/cronjobs.sls @@ -11,3 +11,10 @@ generate-site-icons-css-cronjob: - name: {{ bin_dir }}/python -c "from scripts.generate_site_icons_css import generate_css; generate_css()" - user: {{ app_username }} - minute: '*/5' + +update-common-topic-tags-cronjob: + cron.present: + - name: {{ bin_dir }}/python -c "from scripts.update_groups_common_topic_tags import update_common_topic_tags; update_common_topic_tags('{{ app_dir }}/{{ pillar['ini_file'] }}')" + - user: {{ app_username }} + - hour: '*' + - minute: 0 diff --git a/tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py b/tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py new file mode 100644 index 0000000..89b60e3 --- /dev/null +++ b/tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py @@ -0,0 +1,31 @@ +"""Group: add common_topic_tags + +Revision ID: 53f81a72f076 +Revises: fef2c9c9a186 +Create Date: 2019-04-24 17:50:24.360780 + +""" +from alembic import op +import sqlalchemy as sa + +from tildes.lib.database import ArrayOfLtree + + +# revision identifiers, used by Alembic. +revision = "53f81a72f076" +down_revision = "fef2c9c9a186" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "groups", + sa.Column( + "common_topic_tags", ArrayOfLtree(), server_default="{}", nullable=False + ), + ) + + +def downgrade(): + op.drop_column("groups", "common_topic_tags") diff --git a/tildes/scripts/update_groups_common_topic_tags.py b/tildes/scripts/update_groups_common_topic_tags.py new file mode 100644 index 0000000..1b15d80 --- /dev/null +++ b/tildes/scripts/update_groups_common_topic_tags.py @@ -0,0 +1,47 @@ +# Copyright (c) 2019 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Script for updating the list of common topic tags for all groups.""" + +from sqlalchemy import desc, func +from sqlalchemy_utils import Ltree + +from tildes.lib.database import get_session_from_config +from tildes.models.group import Group +from tildes.models.topic import Topic + + +# the maximum number of common tags to store for a particular group +MAX_NUM_COMMON_TAGS = 100 + + +def update_common_topic_tags(config_path: str) -> None: + """Update the list of common topic tags for all groups.""" + db_session = get_session_from_config(config_path) + + all_groups = db_session.query(Group).all() + + for group in all_groups: + # create a subquery for all tags from topics in that group - UNNEST() converts + # the arrays of tags into rows so that we can easily group and count + group_tags = ( + db_session.query(func.unnest(Topic._tags).label("tag")) # noqa + .filter(Topic.group == group) + .subquery() + ) + + # get the list of the most common tags, based on frequency + common_tags = ( + db_session.query(group_tags.columns["tag"], func.count().label("frequency")) + .group_by("tag") + .order_by(desc("frequency")) + .limit(MAX_NUM_COMMON_TAGS) + .all() + ) + + group._common_topic_tags = [ # noqa + Ltree(common_tag[0]) for common_tag in common_tags + ] + + db_session.add(group) + db_session.commit() diff --git a/tildes/tildes/models/group/group.py b/tildes/tildes/models/group/group.py index a592b7e..1190050 100644 --- a/tildes/tildes/models/group/group.py +++ b/tildes/tildes/models/group/group.py @@ -4,13 +4,15 @@ """Contains the Group class.""" from datetime import datetime -from typing import Any, Optional, Sequence, Tuple +from typing import Any, List, Optional, Sequence, Tuple from pyramid.security import Allow, Authenticated, Deny, DENY_ALL, Everyone from sqlalchemy import Boolean, CheckConstraint, Column, Index, Integer, Text, TIMESTAMP +from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.sql.expression import text from sqlalchemy_utils import Ltree, LtreeType +from tildes.lib.database import ArrayOfLtree from tildes.models import DatabaseModel from tildes.schemas.group import GroupSchema, SHORT_DESCRIPTION_MAX_LENGTH @@ -50,12 +52,24 @@ class Group(DatabaseModel): is_user_treated_as_topic_source: bool = Column( Boolean, nullable=False, server_default="false" ) + _common_topic_tags: List[Ltree] = Column( + "common_topic_tags", ArrayOfLtree, nullable=False, server_default="{}" + ) # Create a GiST index on path as well as the btree one that will be created by the # index=True/unique=True keyword args to Column above. The GiST index supports # additional operators for ltree queries: @>, <@, @, ~, ? __table_args__ = (Index("ix_groups_path_gist", path, postgresql_using="gist"),) + @hybrid_property + def common_topic_tags(self) -> List[str]: + """Return the group's list of common topic tags.""" + return [str(tag).replace("_", " ") for tag in self._common_topic_tags] + + @common_topic_tags.setter # type: ignore + def common_topic_tags(self, new_tags: List[str]) -> None: + self._common_topic_tags = new_tags + def __repr__(self) -> str: """Display the group's path and ID as its repr format.""" return f""