mirror of https://gitlab.com/tildes/tildes.git
Browse Source
Add cronjob to maintain per-group common tag lists
Add cronjob to maintain per-group common tag lists
This sets up a cronjob that will run every hour to select the most common tags used in a group (up to 100), and store them in a new column in the groups table. This will be used to populate the list of tags to use for autocompletion.merge-requests/68/head
4 changed files with 100 additions and 1 deletions
-
7salt/salt/cronjobs.sls
-
31tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py
-
47tildes/scripts/update_groups_common_topic_tags.py
-
16tildes/tildes/models/group/group.py
@ -0,0 +1,31 @@ |
|||||
|
"""Group: add common_topic_tags |
||||
|
|
||||
|
Revision ID: 53f81a72f076 |
||||
|
Revises: fef2c9c9a186 |
||||
|
Create Date: 2019-04-24 17:50:24.360780 |
||||
|
|
||||
|
""" |
||||
|
from alembic import op |
||||
|
import sqlalchemy as sa |
||||
|
|
||||
|
from tildes.lib.database import ArrayOfLtree |
||||
|
|
||||
|
|
||||
|
# revision identifiers, used by Alembic. |
||||
|
revision = "53f81a72f076" |
||||
|
down_revision = "fef2c9c9a186" |
||||
|
branch_labels = None |
||||
|
depends_on = None |
||||
|
|
||||
|
|
||||
|
def upgrade(): |
||||
|
op.add_column( |
||||
|
"groups", |
||||
|
sa.Column( |
||||
|
"common_topic_tags", ArrayOfLtree(), server_default="{}", nullable=False |
||||
|
), |
||||
|
) |
||||
|
|
||||
|
|
||||
|
def downgrade(): |
||||
|
op.drop_column("groups", "common_topic_tags") |
||||
@ -0,0 +1,47 @@ |
|||||
|
# Copyright (c) 2019 Tildes contributors <code@tildes.net> |
||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later |
||||
|
|
||||
|
"""Script for updating the list of common topic tags for all groups.""" |
||||
|
|
||||
|
from sqlalchemy import desc, func |
||||
|
from sqlalchemy_utils import Ltree |
||||
|
|
||||
|
from tildes.lib.database import get_session_from_config |
||||
|
from tildes.models.group import Group |
||||
|
from tildes.models.topic import Topic |
||||
|
|
||||
|
|
||||
|
# the maximum number of common tags to store for a particular group |
||||
|
MAX_NUM_COMMON_TAGS = 100 |
||||
|
|
||||
|
|
||||
|
def update_common_topic_tags(config_path: str) -> None: |
||||
|
"""Update the list of common topic tags for all groups.""" |
||||
|
db_session = get_session_from_config(config_path) |
||||
|
|
||||
|
all_groups = db_session.query(Group).all() |
||||
|
|
||||
|
for group in all_groups: |
||||
|
# create a subquery for all tags from topics in that group - UNNEST() converts |
||||
|
# the arrays of tags into rows so that we can easily group and count |
||||
|
group_tags = ( |
||||
|
db_session.query(func.unnest(Topic._tags).label("tag")) # noqa |
||||
|
.filter(Topic.group == group) |
||||
|
.subquery() |
||||
|
) |
||||
|
|
||||
|
# get the list of the most common tags, based on frequency |
||||
|
common_tags = ( |
||||
|
db_session.query(group_tags.columns["tag"], func.count().label("frequency")) |
||||
|
.group_by("tag") |
||||
|
.order_by(desc("frequency")) |
||||
|
.limit(MAX_NUM_COMMON_TAGS) |
||||
|
.all() |
||||
|
) |
||||
|
|
||||
|
group._common_topic_tags = [ # noqa |
||||
|
Ltree(common_tag[0]) for common_tag in common_tags |
||||
|
] |
||||
|
|
||||
|
db_session.add(group) |
||||
|
db_session.commit() |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue