Browse Source

Add cronjob to maintain per-group common tag lists

This sets up a cronjob that will run every hour to select the most
common tags used in a group (up to 100), and store them in a new column
in the groups table. This will be used to populate the list of tags to
use for autocompletion.
merge-requests/68/head
Deimos 5 years ago
parent
commit
53f543ab4d
  1. 7
      salt/salt/cronjobs.sls
  2. 31
      tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py
  3. 47
      tildes/scripts/update_groups_common_topic_tags.py
  4. 16
      tildes/tildes/models/group/group.py

7
salt/salt/cronjobs.sls

@ -11,3 +11,10 @@ generate-site-icons-css-cronjob:
- name: {{ bin_dir }}/python -c "from scripts.generate_site_icons_css import generate_css; generate_css()" - name: {{ bin_dir }}/python -c "from scripts.generate_site_icons_css import generate_css; generate_css()"
- user: {{ app_username }} - user: {{ app_username }}
- minute: '*/5' - minute: '*/5'
update-common-topic-tags-cronjob:
cron.present:
- name: {{ bin_dir }}/python -c "from scripts.update_groups_common_topic_tags import update_common_topic_tags; update_common_topic_tags('{{ app_dir }}/{{ pillar['ini_file'] }}')"
- user: {{ app_username }}
- hour: '*'
- minute: 0

31
tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py

@ -0,0 +1,31 @@
"""Group: add common_topic_tags
Revision ID: 53f81a72f076
Revises: fef2c9c9a186
Create Date: 2019-04-24 17:50:24.360780
"""
from alembic import op
import sqlalchemy as sa
from tildes.lib.database import ArrayOfLtree
# revision identifiers, used by Alembic.
revision = "53f81a72f076"
down_revision = "fef2c9c9a186"
branch_labels = None
depends_on = None
def upgrade():
op.add_column(
"groups",
sa.Column(
"common_topic_tags", ArrayOfLtree(), server_default="{}", nullable=False
),
)
def downgrade():
op.drop_column("groups", "common_topic_tags")

47
tildes/scripts/update_groups_common_topic_tags.py

@ -0,0 +1,47 @@
# Copyright (c) 2019 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Script for updating the list of common topic tags for all groups."""
from sqlalchemy import desc, func
from sqlalchemy_utils import Ltree
from tildes.lib.database import get_session_from_config
from tildes.models.group import Group
from tildes.models.topic import Topic
# the maximum number of common tags to store for a particular group
MAX_NUM_COMMON_TAGS = 100
def update_common_topic_tags(config_path: str) -> None:
"""Update the list of common topic tags for all groups."""
db_session = get_session_from_config(config_path)
all_groups = db_session.query(Group).all()
for group in all_groups:
# create a subquery for all tags from topics in that group - UNNEST() converts
# the arrays of tags into rows so that we can easily group and count
group_tags = (
db_session.query(func.unnest(Topic._tags).label("tag")) # noqa
.filter(Topic.group == group)
.subquery()
)
# get the list of the most common tags, based on frequency
common_tags = (
db_session.query(group_tags.columns["tag"], func.count().label("frequency"))
.group_by("tag")
.order_by(desc("frequency"))
.limit(MAX_NUM_COMMON_TAGS)
.all()
)
group._common_topic_tags = [ # noqa
Ltree(common_tag[0]) for common_tag in common_tags
]
db_session.add(group)
db_session.commit()

16
tildes/tildes/models/group/group.py

@ -4,13 +4,15 @@
"""Contains the Group class.""" """Contains the Group class."""
from datetime import datetime from datetime import datetime
from typing import Any, Optional, Sequence, Tuple
from typing import Any, List, Optional, Sequence, Tuple
from pyramid.security import Allow, Authenticated, Deny, DENY_ALL, Everyone from pyramid.security import Allow, Authenticated, Deny, DENY_ALL, Everyone
from sqlalchemy import Boolean, CheckConstraint, Column, Index, Integer, Text, TIMESTAMP from sqlalchemy import Boolean, CheckConstraint, Column, Index, Integer, Text, TIMESTAMP
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.sql.expression import text from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree, LtreeType from sqlalchemy_utils import Ltree, LtreeType
from tildes.lib.database import ArrayOfLtree
from tildes.models import DatabaseModel from tildes.models import DatabaseModel
from tildes.schemas.group import GroupSchema, SHORT_DESCRIPTION_MAX_LENGTH from tildes.schemas.group import GroupSchema, SHORT_DESCRIPTION_MAX_LENGTH
@ -50,12 +52,24 @@ class Group(DatabaseModel):
is_user_treated_as_topic_source: bool = Column( is_user_treated_as_topic_source: bool = Column(
Boolean, nullable=False, server_default="false" Boolean, nullable=False, server_default="false"
) )
_common_topic_tags: List[Ltree] = Column(
"common_topic_tags", ArrayOfLtree, nullable=False, server_default="{}"
)
# Create a GiST index on path as well as the btree one that will be created by the # Create a GiST index on path as well as the btree one that will be created by the
# index=True/unique=True keyword args to Column above. The GiST index supports # index=True/unique=True keyword args to Column above. The GiST index supports
# additional operators for ltree queries: @>, <@, @, ~, ? # additional operators for ltree queries: @>, <@, @, ~, ?
__table_args__ = (Index("ix_groups_path_gist", path, postgresql_using="gist"),) __table_args__ = (Index("ix_groups_path_gist", path, postgresql_using="gist"),)
@hybrid_property
def common_topic_tags(self) -> List[str]:
"""Return the group's list of common topic tags."""
return [str(tag).replace("_", " ") for tag in self._common_topic_tags]
@common_topic_tags.setter # type: ignore
def common_topic_tags(self, new_tags: List[str]) -> None:
self._common_topic_tags = new_tags
def __repr__(self) -> str: def __repr__(self) -> str:
"""Display the group's path and ID as its repr format.""" """Display the group's path and ID as its repr format."""
return f"<Group {self.path} ({self.group_id})>" return f"<Group {self.path} ({self.group_id})>"

Loading…
Cancel
Save