From 9792214da8638b93aea086614b773477f7302296 Mon Sep 17 00:00:00 2001 From: Deimos Date: Sat, 27 Apr 2019 16:10:29 -0600 Subject: [PATCH] Common topic tags: break ties using most recent At the "bottom" of the common topic tags list, there can be a large number of tags that all have the same usage count, with the cutoff being somewhere in the middle of the list. For example, the list for a group might be cutting off in the middle of "tags that have been used 5 times". Previously, it was more or less random which tags in that set would be excluded or excluded - this changes it so that the ones used most recently will be given preference. --- .../scripts/update_groups_common_topic_tags.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tildes/scripts/update_groups_common_topic_tags.py b/tildes/scripts/update_groups_common_topic_tags.py index 1b15d80..115a275 100644 --- a/tildes/scripts/update_groups_common_topic_tags.py +++ b/tildes/scripts/update_groups_common_topic_tags.py @@ -23,18 +23,26 @@ def update_common_topic_tags(config_path: str) -> None: for group in all_groups: # create a subquery for all tags from topics in that group - UNNEST() converts - # the arrays of tags into rows so that we can easily group and count + # the arrays of tags into rows so that we can easily group and count, and + # created_time will be used to determine when a particular tag was last used group_tags = ( - db_session.query(func.unnest(Topic._tags).label("tag")) # noqa + db_session.query( + func.unnest(Topic._tags).label("tag"), Topic.created_time # noqa + ) .filter(Topic.group == group) .subquery() ) - # get the list of the most common tags, based on frequency + # get the list of the most common tags, based on frequency and breaking ties + # with which was used most recently common_tags = ( - db_session.query(group_tags.columns["tag"], func.count().label("frequency")) + db_session.query( + group_tags.columns["tag"], + func.count().label("frequency"), + func.max(group_tags.columns["created_time"]).label("last_used"), + ) .group_by("tag") - .order_by(desc("frequency")) + .order_by(desc("frequency"), desc("last_used")) .limit(MAX_NUM_COMMON_TAGS) .all() )