mirror of https://gitlab.com/tildes/tildes.git
Browse Source
Add cronjob to maintain per-group common tag lists
Add cronjob to maintain per-group common tag lists
This sets up a cronjob that will run every hour to select the most common tags used in a group (up to 100), and store them in a new column in the groups table. This will be used to populate the list of tags to use for autocompletion.merge-requests/68/head
Deimos
6 years ago
4 changed files with 100 additions and 1 deletions
-
7salt/salt/cronjobs.sls
-
31tildes/alembic/versions/53f81a72f076_group_add_common_topic_tags.py
-
47tildes/scripts/update_groups_common_topic_tags.py
-
16tildes/tildes/models/group/group.py
@ -0,0 +1,31 @@ |
|||
"""Group: add common_topic_tags |
|||
|
|||
Revision ID: 53f81a72f076 |
|||
Revises: fef2c9c9a186 |
|||
Create Date: 2019-04-24 17:50:24.360780 |
|||
|
|||
""" |
|||
from alembic import op |
|||
import sqlalchemy as sa |
|||
|
|||
from tildes.lib.database import ArrayOfLtree |
|||
|
|||
|
|||
# revision identifiers, used by Alembic. |
|||
revision = "53f81a72f076" |
|||
down_revision = "fef2c9c9a186" |
|||
branch_labels = None |
|||
depends_on = None |
|||
|
|||
|
|||
def upgrade(): |
|||
op.add_column( |
|||
"groups", |
|||
sa.Column( |
|||
"common_topic_tags", ArrayOfLtree(), server_default="{}", nullable=False |
|||
), |
|||
) |
|||
|
|||
|
|||
def downgrade(): |
|||
op.drop_column("groups", "common_topic_tags") |
@ -0,0 +1,47 @@ |
|||
# Copyright (c) 2019 Tildes contributors <code@tildes.net> |
|||
# SPDX-License-Identifier: AGPL-3.0-or-later |
|||
|
|||
"""Script for updating the list of common topic tags for all groups.""" |
|||
|
|||
from sqlalchemy import desc, func |
|||
from sqlalchemy_utils import Ltree |
|||
|
|||
from tildes.lib.database import get_session_from_config |
|||
from tildes.models.group import Group |
|||
from tildes.models.topic import Topic |
|||
|
|||
|
|||
# the maximum number of common tags to store for a particular group |
|||
MAX_NUM_COMMON_TAGS = 100 |
|||
|
|||
|
|||
def update_common_topic_tags(config_path: str) -> None: |
|||
"""Update the list of common topic tags for all groups.""" |
|||
db_session = get_session_from_config(config_path) |
|||
|
|||
all_groups = db_session.query(Group).all() |
|||
|
|||
for group in all_groups: |
|||
# create a subquery for all tags from topics in that group - UNNEST() converts |
|||
# the arrays of tags into rows so that we can easily group and count |
|||
group_tags = ( |
|||
db_session.query(func.unnest(Topic._tags).label("tag")) # noqa |
|||
.filter(Topic.group == group) |
|||
.subquery() |
|||
) |
|||
|
|||
# get the list of the most common tags, based on frequency |
|||
common_tags = ( |
|||
db_session.query(group_tags.columns["tag"], func.count().label("frequency")) |
|||
.group_by("tag") |
|||
.order_by(desc("frequency")) |
|||
.limit(MAX_NUM_COMMON_TAGS) |
|||
.all() |
|||
) |
|||
|
|||
group._common_topic_tags = [ # noqa |
|||
Ltree(common_tag[0]) for common_tag in common_tags |
|||
] |
|||
|
|||
db_session.add(group) |
|||
db_session.commit() |
Write
Preview
Loading…
Cancel
Save
Reference in new issue