From 43be910d5cbf46cd9de73d6e0b48682a97d7e71f Mon Sep 17 00:00:00 2001 From: Deimos Date: Tue, 8 Oct 2019 18:16:24 -0600 Subject: [PATCH] Refactor tag list columns to use TypeDecorator I've always been pretty unhappy with the ugly way tags were implemented, using @hybrid_property and needing to do strange things all over the place to deal with converting underscores to spaces and vice versa, as well as other idiosyncracies. There are still a few oddities here and there, but overall this is much better. --- tildes/scripts/clean_private_data.py | 4 +- .../update_groups_common_topic_tags.py | 9 +---- tildes/tests/test_topic_tags.py | 6 --- tildes/tildes/lib/database.py | 18 ++++++++- tildes/tildes/models/group/group.py | 15 +------- tildes/tildes/models/topic/topic.py | 38 +++++++------------ tildes/tildes/models/topic/topic_query.py | 5 +-- tildes/tildes/models/topic/topic_schedule.py | 9 ++--- tildes/tildes/models/user/user.py | 16 ++------ tildes/tildes/schemas/topic.py | 21 +++++----- .../templates/includes/topic_tags.jinja2 | 2 +- tildes/tildes/views/topic.py | 11 +++--- 12 files changed, 62 insertions(+), 92 deletions(-) diff --git a/tildes/scripts/clean_private_data.py b/tildes/scripts/clean_private_data.py index a84d734..dac5114 100644 --- a/tildes/scripts/clean_private_data.py +++ b/tildes/scripts/clean_private_data.py @@ -146,7 +146,7 @@ class DataCleaner: "rendered_html": DEFAULT, "link": DEFAULT, "content_metadata": DEFAULT, - "_tags": DEFAULT, + "tags": DEFAULT, }, synchronize_session=False, ) @@ -183,7 +183,7 @@ class DataCleaner: "permissions": DEFAULT, "home_default_order": DEFAULT, "home_default_period": DEFAULT, - "_filtered_topic_tags": DEFAULT, + "filtered_topic_tags": DEFAULT, "comment_label_weight": DEFAULT, "last_exemplary_label_time": DEFAULT, "_bio_markdown": DEFAULT, diff --git a/tildes/scripts/update_groups_common_topic_tags.py b/tildes/scripts/update_groups_common_topic_tags.py index 115a275..4c3b585 100644 --- a/tildes/scripts/update_groups_common_topic_tags.py +++ b/tildes/scripts/update_groups_common_topic_tags.py @@ -4,7 +4,6 @@ """Script for updating the list of common topic tags for all groups.""" from sqlalchemy import desc, func -from sqlalchemy_utils import Ltree from tildes.lib.database import get_session_from_config from tildes.models.group import Group @@ -26,9 +25,7 @@ def update_common_topic_tags(config_path: str) -> None: # the arrays of tags into rows so that we can easily group and count, and # created_time will be used to determine when a particular tag was last used group_tags = ( - db_session.query( - func.unnest(Topic._tags).label("tag"), Topic.created_time # noqa - ) + db_session.query(func.unnest(Topic.tags).label("tag"), Topic.created_time) .filter(Topic.group == group) .subquery() ) @@ -47,9 +44,7 @@ def update_common_topic_tags(config_path: str) -> None: .all() ) - group._common_topic_tags = [ # noqa - Ltree(common_tag[0]) for common_tag in common_tags - ] + group.common_topic_tags = [common_tag[0] for common_tag in common_tags] db_session.add(group) db_session.commit() diff --git a/tildes/tests/test_topic_tags.py b/tildes/tests/test_topic_tags.py index a988be4..906eb2d 100644 --- a/tildes/tests/test_topic_tags.py +++ b/tildes/tests/test_topic_tags.py @@ -8,12 +8,6 @@ def test_tags_whitespace_stripped(text_topic): assert text_topic.tags == ["one", "two", "three"] -def test_tag_space_replacement(text_topic): - """Ensure spaces in tags are converted to underscores internally.""" - text_topic.tags = ["one two", "three four five"] - assert text_topic._tags == ["one_two", "three_four_five"] - - def test_tag_consecutive_spaces(text_topic): """Ensure consecutive spaces/underscores in tags are removed.""" text_topic.tags = ["one two", "three four", "five __ six"] diff --git a/tildes/tildes/lib/database.py b/tildes/tildes/lib/database.py index 92dbad3..ca32573 100644 --- a/tildes/tildes/lib/database.py +++ b/tildes/tildes/lib/database.py @@ -13,7 +13,7 @@ from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.engine.interfaces import Dialect from sqlalchemy.orm.session import Session from sqlalchemy.types import Text, TypeDecorator, UserDefinedType -from sqlalchemy_utils import LtreeType +from sqlalchemy_utils import Ltree, LtreeType from sqlalchemy_utils.types.ltree import LQUERY from tildes.lib.datetime import rrule_to_str @@ -170,3 +170,19 @@ class RecurrenceRule(TypeDecorator): return value return rrulestr(value) + + +class TagList(TypeDecorator): + """Stores a list of tags in the database as an array of ltree.""" + + # pylint: disable=abstract-method + + impl = ArrayOfLtree + + def process_bind_param(self, value: str, dialect: Dialect) -> List[Ltree]: + """Convert the value to ltree[] for storing.""" + return [Ltree(tag.replace(" ", "_")) for tag in value] + + def process_result_value(self, value: List[Ltree], dialect: Dialect) -> List[str]: + """Convert the stored value to a list of strings.""" + return [str(tag).replace("_", " ") for tag in value] diff --git a/tildes/tildes/models/group/group.py b/tildes/tildes/models/group/group.py index ea60100..a27b72f 100644 --- a/tildes/tildes/models/group/group.py +++ b/tildes/tildes/models/group/group.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import deferred from sqlalchemy.sql.expression import text from sqlalchemy_utils import Ltree, LtreeType -from tildes.lib.database import ArrayOfLtree +from tildes.lib.database import TagList from tildes.lib.markdown import convert_markdown_to_safe_html from tildes.models import DatabaseModel from tildes.schemas.group import GroupSchema, SHORT_DESCRIPTION_MAX_LENGTH @@ -56,24 +56,13 @@ class Group(DatabaseModel): is_user_treated_as_topic_source: bool = Column( Boolean, nullable=False, server_default="false" ) - _common_topic_tags: List[Ltree] = Column( - "common_topic_tags", ArrayOfLtree, nullable=False, server_default="{}" - ) + common_topic_tags: List[str] = Column(TagList, nullable=False, server_default="{}") # Create a GiST index on path as well as the btree one that will be created by the # index=True/unique=True keyword args to Column above. The GiST index supports # additional operators for ltree queries: @>, <@, @, ~, ? __table_args__ = (Index("ix_groups_path_gist", path, postgresql_using="gist"),) - @hybrid_property - def common_topic_tags(self) -> List[str]: - """Return the group's list of common topic tags.""" - return [str(tag).replace("_", " ") for tag in self._common_topic_tags] - - @common_topic_tags.setter # type: ignore - def common_topic_tags(self, new_tags: List[str]) -> None: - self._common_topic_tags = new_tags - @hybrid_property def sidebar_markdown(self) -> str: """Return the sidebar's markdown.""" diff --git a/tildes/tildes/models/topic/topic.py b/tildes/tildes/models/topic/topic.py index 0931093..afd3b70 100644 --- a/tildes/tildes/models/topic/topic.py +++ b/tildes/tildes/models/topic/topic.py @@ -4,7 +4,8 @@ """Contains the Topic class.""" from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING +from itertools import chain +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TYPE_CHECKING from pyramid.security import Allow, Authenticated, Deny, DENY_ALL, Everyone from sqlalchemy import ( @@ -21,11 +22,10 @@ from sqlalchemy.dialects.postgresql import ENUM, JSONB, TSVECTOR from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import deferred, relationship from sqlalchemy.sql.expression import text -from sqlalchemy_utils import Ltree from titlecase import titlecase from tildes.enums import TopicType -from tildes.lib.database import ArrayOfLtree +from tildes.lib.database import TagList from tildes.lib.datetime import utc_from_timestamp, utc_now from tildes.lib.id import id_to_id36 from tildes.lib.markdown import convert_markdown_to_safe_html @@ -121,9 +121,7 @@ class Topic(DatabaseModel): ) num_comments: int = Column(Integer, nullable=False, server_default="0", index=True) num_votes: int = Column(Integer, nullable=False, server_default="0", index=True) - _tags: List[Ltree] = Column( - "tags", ArrayOfLtree, nullable=False, server_default="{}" - ) + tags: List[str] = Column(TagList, nullable=False, server_default="{}") is_official: bool = Column(Boolean, nullable=False, server_default="false") is_locked: bool = Column(Boolean, nullable=False, server_default="false") search_tsv: Any = deferred(Column(TSVECTOR)) @@ -133,7 +131,7 @@ class Topic(DatabaseModel): # Create specialized indexes __table_args__ = ( - Index("ix_topics_tags_gist", _tags, postgresql_using="gist"), + Index("ix_topics_tags_gist", tags, postgresql_using="gist"), Index("ix_topics_search_tsv_gin", "search_tsv", postgresql_using="gin"), ) @@ -160,23 +158,6 @@ class Topic(DatabaseModel): if self.age > EDIT_GRACE_PERIOD: self.last_edited_time = utc_now() - @hybrid_property - def tags(self) -> List[str]: - """Return the topic's tags.""" - sorted_tags = [str(tag).replace("_", " ") for tag in self._tags] - - # move special tags in front - # reverse so that tags at the start of the list appear first - for tag in reversed(SPECIAL_TAGS): - if tag in sorted_tags: - sorted_tags.insert(0, sorted_tags.pop(sorted_tags.index(tag))) - - return sorted_tags - - @tags.setter - def tags(self, new_tags: List[str]) -> None: - self._tags = new_tags - @property def important_tags(self) -> List[str]: """Return only the topic's "important" tags.""" @@ -188,6 +169,15 @@ class Topic(DatabaseModel): important_tags = set(self.important_tags) return [tag for tag in self.tags if tag not in important_tags] + @property + def tags_ordered(self) -> Iterable[str]: + """Return an iterator over the topic's tags, in a suitable order for display. + + Currently, this puts the "important" tags first, but they're otherwise + ordered arbitrarily (whatever order they were entered). + """ + return chain(self.important_tags, self.unimportant_tags) + def __repr__(self) -> str: """Display the topic's title and ID as its repr format.""" return f'' diff --git a/tildes/tildes/models/topic/topic_query.py b/tildes/tildes/models/topic/topic_query.py index b3dc293..0609170 100644 --- a/tildes/tildes/models/topic/topic_query.py +++ b/tildes/tildes/models/topic/topic_query.py @@ -8,7 +8,6 @@ from typing import Any, Sequence from pyramid.request import Request from sqlalchemy import func from sqlalchemy.sql.expression import and_, null -from sqlalchemy_utils import Ltree from tildes.enums import TopicSortOption from tildes.lib.datetime import SimpleHoursPeriod, utc_now @@ -163,7 +162,7 @@ class TopicQuery(PaginatedQuery): return self.filter(Topic.created_time > start_time) - def has_tag(self, tag: Ltree) -> "TopicQuery": + def has_tag(self, tag: str) -> "TopicQuery": """Restrict the topics to ones with a specific tag (generative). Note that this method searches for topics that have any tag that either starts @@ -172,7 +171,7 @@ class TopicQuery(PaginatedQuery): queries = [f"{tag}.*", f"*.{tag}"] # pylint: disable=protected-access - return self.filter(Topic._tags.lquery(queries)) # type: ignore + return self.filter(Topic.tags.lquery(queries)) # type: ignore def search(self, query: str) -> "TopicQuery": """Restrict the topics to ones that match a search query (generative).""" diff --git a/tildes/tildes/models/topic/topic_schedule.py b/tildes/tildes/models/topic/topic_schedule.py index 474d1b9..979f3b1 100644 --- a/tildes/tildes/models/topic/topic_schedule.py +++ b/tildes/tildes/models/topic/topic_schedule.py @@ -11,9 +11,8 @@ from sqlalchemy import CheckConstraint, Column, ForeignKey, Integer, Text, TIMES from sqlalchemy.orm import relationship from sqlalchemy.orm.session import Session from sqlalchemy.sql.expression import text -from sqlalchemy_utils import Ltree -from tildes.lib.database import ArrayOfLtree, RecurrenceRule +from tildes.lib.database import RecurrenceRule, TagList from tildes.models import DatabaseModel from tildes.models.group import Group from tildes.models.topic import Topic @@ -40,7 +39,7 @@ class TopicSchedule(DatabaseModel): nullable=False, ) markdown: str = Column(Text, nullable=False) - tags: List[Ltree] = Column(ArrayOfLtree, nullable=False, server_default="{}") + tags: List[str] = Column(TagList, nullable=False, server_default="{}") next_post_time: Optional[datetime] = Column( TIMESTAMP(timezone=True), nullable=True, index=True ) @@ -63,7 +62,7 @@ class TopicSchedule(DatabaseModel): self.group = group self.title = title self.markdown = markdown - self.tags = [Ltree(tag) for tag in tags] + self.tags = tags self.next_post_time = next_post_time self.recurrence_rule = recurrence_rule self.user = user @@ -82,7 +81,7 @@ class TopicSchedule(DatabaseModel): ) topic = Topic.create_text_topic(self.group, user, self.title, self.markdown) - topic.tags = [str(tag) for tag in self.tags] + topic.tags = self.tags return topic diff --git a/tildes/tildes/models/user/user.py b/tildes/tildes/models/user/user.py index a49a965..4b64f0e 100644 --- a/tildes/tildes/models/user/user.py +++ b/tildes/tildes/models/user/user.py @@ -30,10 +30,9 @@ from sqlalchemy.dialects.postgresql import ARRAY, ENUM, JSONB from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm import deferred from sqlalchemy.sql.expression import text -from sqlalchemy_utils import Ltree from tildes.enums import CommentLabelOption, HTMLSanitizationContext, TopicSortOption -from tildes.lib.database import ArrayOfLtree, CIText +from tildes.lib.database import CIText, TagList from tildes.lib.datetime import utc_now from tildes.lib.hash import hash_string, is_match_for_hash from tildes.lib.markdown import convert_markdown_to_safe_html @@ -122,8 +121,8 @@ class User(DatabaseModel): permissions: Any = Column(JSONB(none_as_null=True)) home_default_order: Optional[TopicSortOption] = Column(ENUM(TopicSortOption)) home_default_period: Optional[str] = Column(Text) - _filtered_topic_tags: List[Ltree] = Column( - "filtered_topic_tags", ArrayOfLtree, nullable=False, server_default="{}" + filtered_topic_tags: List[str] = Column( + TagList, nullable=False, server_default="{}" ) comment_label_weight: Optional[float] = Column(REAL) last_exemplary_label_time: Optional[datetime] = Column(TIMESTAMP(timezone=True)) @@ -138,15 +137,6 @@ class User(DatabaseModel): ) bio_rendered_html: str = deferred(Column(Text)) - @hybrid_property - def filtered_topic_tags(self) -> List[str]: - """Return the user's list of filtered topic tags.""" - return [str(tag).replace("_", " ") for tag in self._filtered_topic_tags] - - @filtered_topic_tags.setter # type: ignore - def filtered_topic_tags(self, new_tags: List[str]) -> None: - self._filtered_topic_tags = new_tags - @hybrid_property def bio_markdown(self) -> str: """Return the user bio's markdown.""" diff --git a/tildes/tildes/schemas/topic.py b/tildes/tildes/schemas/topic.py index 1282cfb..f287562 100644 --- a/tildes/tildes/schemas/topic.py +++ b/tildes/tildes/schemas/topic.py @@ -7,12 +7,11 @@ import re import typing from urllib.parse import urlparse -import sqlalchemy_utils from marshmallow import pre_load, Schema, validates, validates_schema, ValidationError from marshmallow.fields import DateTime, List, Nested, String, URL from tildes.lib.url_transform import apply_url_transformations -from tildes.schemas.fields import Enum, ID36, Ltree, Markdown, SimpleString +from tildes.schemas.fields import Enum, ID36, Markdown, SimpleString from tildes.schemas.group import GroupSchema from tildes.schemas.user import UserSchema @@ -31,7 +30,7 @@ class TopicSchema(Schema): rendered_html = String(dump_only=True) link = URL(schemes={"http", "https"}, allow_none=True) created_time = DateTime(dump_only=True) - tags = List(Ltree()) + tags = List(String()) user = Nested(UserSchema, dump_only=True) group = Nested(GroupSchema, dump_only=True) @@ -47,14 +46,14 @@ class TopicSchema(Schema): for tag in data["tags"]: tag = tag.lower() - # replace spaces with underscores - tag = tag.replace(" ", "_") + # replace underscores with spaces + tag = tag.replace("_", " ") - # remove any consecutive underscores - tag = re.sub("_{2,}", "_", tag) + # remove any consecutive spaces + tag = re.sub(" {2,}", " ", tag) - # remove any leading/trailing underscores - tag = tag.strip("_") + # remove any leading/trailing spaces + tag = tag.strip(" ") # drop any empty tags if not tag or tag.isspace(): @@ -76,7 +75,7 @@ class TopicSchema(Schema): return data @validates("tags") - def validate_tags(self, value: typing.List[sqlalchemy_utils.Ltree]) -> None: + def validate_tags(self, value: typing.List[str]) -> None: """Validate the tags field, raising an error if an issue exists. Note that tags are validated by ensuring that each tag would be a valid group @@ -88,7 +87,7 @@ class TopicSchema(Schema): group_schema = GroupSchema(partial=True) for tag in value: try: - group_schema.validate({"path": str(tag)}) + group_schema.validate({"path": tag}) except ValidationError: raise ValidationError("Tag %s is invalid" % tag) diff --git a/tildes/tildes/templates/includes/topic_tags.jinja2 b/tildes/tildes/templates/includes/topic_tags.jinja2 index acd3d3a..5ecff90 100644 --- a/tildes/tildes/templates/includes/topic_tags.jinja2 +++ b/tildes/tildes/templates/includes/topic_tags.jinja2 @@ -2,7 +2,7 @@ {# SPDX-License-Identifier: AGPL-3.0-or-later #}
{% if topic.tags %}Tags:{% endif %} -{% for tag in topic.tags %} +{% for tag in topic.tags_ordered %} {{ tag }} {%- if not loop.last %},{% endif %} {% endfor %} diff --git a/tildes/tildes/views/topic.py b/tildes/tildes/views/topic.py index c21b021..9140344 100644 --- a/tildes/tildes/views/topic.py +++ b/tildes/tildes/views/topic.py @@ -27,7 +27,7 @@ from tildes.enums import ( LogEventType, TopicSortOption, ) -from tildes.lib.database import ArrayOfLtree +from tildes.lib.database import TagList from tildes.lib.datetime import SimpleHoursPeriod from tildes.models.comment import Comment, CommentNotification, CommentTree from tildes.models.group import Group, GroupWikiPage @@ -196,7 +196,7 @@ def get_group_topics( # restrict to a specific tag, if we're viewing a single one if tag: - query = query.has_tag(tag) + query = query.has_tag(str(tag)) # apply before/after pagination restrictions if relevant if before: @@ -206,11 +206,10 @@ def get_group_topics( query = query.after_id36(after) # apply topic tag filters unless they're disabled or viewing a single tag - if request.user and not (tag or unfiltered): - # pylint: disable=protected-access + if request.user and request.user.filtered_topic_tags and not (tag or unfiltered): query = query.filter( - ~Topic._tags.descendant_of( # type: ignore - any_(cast(request.user._filtered_topic_tags, ArrayOfLtree)) + ~Topic.tags.descendant_of( # type: ignore + any_(cast(request.user.filtered_topic_tags, TagList)) ) )