Browse Source

Add cronjob to maintain per-group common tag lists

This sets up a cronjob that will run every hour to select the most
common tags used in a group (up to 100), and store them in a new column
in the groups table. This will be used to populate the list of tags to
use for autocompletion.
merge-requests/68/head
Deimos 5 years ago
parent
commit
53f543ab4d
  1. 7
      salt/salt/cronjobs.sls
  2. 31
      tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py
  3. 47
      tildes/scripts/update_groups_common_topic_tags.py
  4. 16
      tildes/tildes/models/group/group.py

7
salt/salt/cronjobs.sls

@ -11,3 +11,10 @@ generate-site-icons-css-cronjob:
- name: {{ bin_dir }}/python -c "from scripts.generate_site_icons_css import generate_css; generate_css()"
- user: {{ app_username }}
- minute: '*/5'
update-common-topic-tags-cronjob:
cron.present:
- name: {{ bin_dir }}/python -c "from scripts.update_groups_common_topic_tags import update_common_topic_tags; update_common_topic_tags('{{ app_dir }}/{{ pillar['ini_file'] }}')"
- user: {{ app_username }}
- hour: '*'
- minute: 0

31
tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py

@ -0,0 +1,31 @@
"""Group: add common_topic_tags
Revision ID: 53f81a72f076
Revises: fef2c9c9a186
Create Date: 2019-04-24 17:50:24.360780
"""
from alembic import op
import sqlalchemy as sa
from tildes.lib.database import ArrayOfLtree
# revision identifiers, used by Alembic.
revision = "53f81a72f076"
down_revision = "fef2c9c9a186"
branch_labels = None
depends_on = None
def upgrade():
op.add_column(
"groups",
sa.Column(
"common_topic_tags", ArrayOfLtree(), server_default="{}", nullable=False
),
)
def downgrade():
op.drop_column("groups", "common_topic_tags")

47
tildes/scripts/update_groups_common_topic_tags.py

@ -0,0 +1,47 @@
# Copyright (c) 2019 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Script for updating the list of common topic tags for all groups."""
from sqlalchemy import desc, func
from sqlalchemy_utils import Ltree
from tildes.lib.database import get_session_from_config
from tildes.models.group import Group
from tildes.models.topic import Topic
# the maximum number of common tags to store for a particular group
MAX_NUM_COMMON_TAGS = 100
def update_common_topic_tags(config_path: str) -> None:
"""Update the list of common topic tags for all groups."""
db_session = get_session_from_config(config_path)
all_groups = db_session.query(Group).all()
for group in all_groups:
# create a subquery for all tags from topics in that group - UNNEST() converts
# the arrays of tags into rows so that we can easily group and count
group_tags = (
db_session.query(func.unnest(Topic._tags).label("tag")) # noqa
.filter(Topic.group == group)
.subquery()
)
# get the list of the most common tags, based on frequency
common_tags = (
db_session.query(group_tags.columns["tag"], func.count().label("frequency"))
.group_by("tag")
.order_by(desc("frequency"))
.limit(MAX_NUM_COMMON_TAGS)
.all()
)
group._common_topic_tags = [ # noqa
Ltree(common_tag[0]) for common_tag in common_tags
]
db_session.add(group)
db_session.commit()

16
tildes/tildes/models/group/group.py

@ -4,13 +4,15 @@
"""Contains the Group class."""
from datetime import datetime
from typing import Any, Optional, Sequence, Tuple
from typing import Any, List, Optional, Sequence, Tuple
from pyramid.security import Allow, Authenticated, Deny, DENY_ALL, Everyone
from sqlalchemy import Boolean, CheckConstraint, Column, Index, Integer, Text, TIMESTAMP
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree, LtreeType
from tildes.lib.database import ArrayOfLtree
from tildes.models import DatabaseModel
from tildes.schemas.group import GroupSchema, SHORT_DESCRIPTION_MAX_LENGTH
@ -50,12 +52,24 @@ class Group(DatabaseModel):
is_user_treated_as_topic_source: bool = Column(
Boolean, nullable=False, server_default="false"
)
_common_topic_tags: List[Ltree] = Column(
"common_topic_tags", ArrayOfLtree, nullable=False, server_default="{}"
)
# Create a GiST index on path as well as the btree one that will be created by the
# index=True/unique=True keyword args to Column above. The GiST index supports
# additional operators for ltree queries: @>, <@, @, ~, ?
__table_args__ = (Index("ix_groups_path_gist", path, postgresql_using="gist"),)
@hybrid_property
def common_topic_tags(self) -> List[str]:
"""Return the group's list of common topic tags."""
return [str(tag).replace("_", " ") for tag in self._common_topic_tags]
@common_topic_tags.setter # type: ignore
def common_topic_tags(self, new_tags: List[str]) -> None:
self._common_topic_tags = new_tags
def __repr__(self) -> str:
"""Display the group's path and ID as its repr format."""
return f"<Group {self.path} ({self.group_id})>"

Loading…
Cancel
Save