From 43be910d5cbf46cd9de73d6e0b48682a97d7e71f Mon Sep 17 00:00:00 2001
From: Deimos <deimos@tildes.net>
Date: Tue, 8 Oct 2019 18:16:24 -0600
Subject: [PATCH] Refactor tag list columns to use TypeDecorator

I've always been pretty unhappy with the ugly way tags were implemented,
using @hybrid_property and needing to do strange things all over the
place to deal with converting underscores to spaces and vice versa, as
well as other idiosyncracies.

There are still a few oddities here and there, but overall this is much
better.
---
 tildes/scripts/clean_private_data.py          |  4 +-
 .../update_groups_common_topic_tags.py        |  9 +----
 tildes/tests/test_topic_tags.py               |  6 ---
 tildes/tildes/lib/database.py                 | 18 ++++++++-
 tildes/tildes/models/group/group.py           | 15 +-------
 tildes/tildes/models/topic/topic.py           | 38 +++++++------------
 tildes/tildes/models/topic/topic_query.py     |  5 +--
 tildes/tildes/models/topic/topic_schedule.py  |  9 ++---
 tildes/tildes/models/user/user.py             | 16 ++------
 tildes/tildes/schemas/topic.py                | 21 +++++-----
 .../templates/includes/topic_tags.jinja2      |  2 +-
 tildes/tildes/views/topic.py                  | 11 +++---
 12 files changed, 62 insertions(+), 92 deletions(-)

diff --git a/tildes/scripts/clean_private_data.py b/tildes/scripts/clean_private_data.py
index a84d734..dac5114 100644
--- a/tildes/scripts/clean_private_data.py
+++ b/tildes/scripts/clean_private_data.py
@@ -146,7 +146,7 @@ class DataCleaner:
                     "rendered_html": DEFAULT,
                     "link": DEFAULT,
                     "content_metadata": DEFAULT,
-                    "_tags": DEFAULT,
+                    "tags": DEFAULT,
                 },
                 synchronize_session=False,
             )
@@ -183,7 +183,7 @@ class DataCleaner:
                     "permissions": DEFAULT,
                     "home_default_order": DEFAULT,
                     "home_default_period": DEFAULT,
-                    "_filtered_topic_tags": DEFAULT,
+                    "filtered_topic_tags": DEFAULT,
                     "comment_label_weight": DEFAULT,
                     "last_exemplary_label_time": DEFAULT,
                     "_bio_markdown": DEFAULT,
diff --git a/tildes/scripts/update_groups_common_topic_tags.py b/tildes/scripts/update_groups_common_topic_tags.py
index 115a275..4c3b585 100644
--- a/tildes/scripts/update_groups_common_topic_tags.py
+++ b/tildes/scripts/update_groups_common_topic_tags.py
@@ -4,7 +4,6 @@
 """Script for updating the list of common topic tags for all groups."""
 
 from sqlalchemy import desc, func
-from sqlalchemy_utils import Ltree
 
 from tildes.lib.database import get_session_from_config
 from tildes.models.group import Group
@@ -26,9 +25,7 @@ def update_common_topic_tags(config_path: str) -> None:
         # the arrays of tags into rows so that we can easily group and count, and
         # created_time will be used to determine when a particular tag was last used
         group_tags = (
-            db_session.query(
-                func.unnest(Topic._tags).label("tag"), Topic.created_time  # noqa
-            )
+            db_session.query(func.unnest(Topic.tags).label("tag"), Topic.created_time)
             .filter(Topic.group == group)
             .subquery()
         )
@@ -47,9 +44,7 @@ def update_common_topic_tags(config_path: str) -> None:
             .all()
         )
 
-        group._common_topic_tags = [  # noqa
-            Ltree(common_tag[0]) for common_tag in common_tags
-        ]
+        group.common_topic_tags = [common_tag[0] for common_tag in common_tags]
 
         db_session.add(group)
         db_session.commit()
diff --git a/tildes/tests/test_topic_tags.py b/tildes/tests/test_topic_tags.py
index a988be4..906eb2d 100644
--- a/tildes/tests/test_topic_tags.py
+++ b/tildes/tests/test_topic_tags.py
@@ -8,12 +8,6 @@ def test_tags_whitespace_stripped(text_topic):
     assert text_topic.tags == ["one", "two", "three"]
 
 
-def test_tag_space_replacement(text_topic):
-    """Ensure spaces in tags are converted to underscores internally."""
-    text_topic.tags = ["one two", "three four five"]
-    assert text_topic._tags == ["one_two", "three_four_five"]
-
-
 def test_tag_consecutive_spaces(text_topic):
     """Ensure consecutive spaces/underscores in tags are removed."""
     text_topic.tags = ["one  two", "three   four", "five __ six"]
diff --git a/tildes/tildes/lib/database.py b/tildes/tildes/lib/database.py
index 92dbad3..ca32573 100644
--- a/tildes/tildes/lib/database.py
+++ b/tildes/tildes/lib/database.py
@@ -13,7 +13,7 @@ from sqlalchemy.dialects.postgresql import ARRAY
 from sqlalchemy.engine.interfaces import Dialect
 from sqlalchemy.orm.session import Session
 from sqlalchemy.types import Text, TypeDecorator, UserDefinedType
-from sqlalchemy_utils import LtreeType
+from sqlalchemy_utils import Ltree, LtreeType
 from sqlalchemy_utils.types.ltree import LQUERY
 
 from tildes.lib.datetime import rrule_to_str
@@ -170,3 +170,19 @@ class RecurrenceRule(TypeDecorator):
             return value
 
         return rrulestr(value)
+
+
+class TagList(TypeDecorator):
+    """Stores a list of tags in the database as an array of ltree."""
+
+    # pylint: disable=abstract-method
+
+    impl = ArrayOfLtree
+
+    def process_bind_param(self, value: str, dialect: Dialect) -> List[Ltree]:
+        """Convert the value to ltree[] for storing."""
+        return [Ltree(tag.replace(" ", "_")) for tag in value]
+
+    def process_result_value(self, value: List[Ltree], dialect: Dialect) -> List[str]:
+        """Convert the stored value to a list of strings."""
+        return [str(tag).replace("_", " ") for tag in value]
diff --git a/tildes/tildes/models/group/group.py b/tildes/tildes/models/group/group.py
index ea60100..a27b72f 100644
--- a/tildes/tildes/models/group/group.py
+++ b/tildes/tildes/models/group/group.py
@@ -13,7 +13,7 @@ from sqlalchemy.orm import deferred
 from sqlalchemy.sql.expression import text
 from sqlalchemy_utils import Ltree, LtreeType
 
-from tildes.lib.database import ArrayOfLtree
+from tildes.lib.database import TagList
 from tildes.lib.markdown import convert_markdown_to_safe_html
 from tildes.models import DatabaseModel
 from tildes.schemas.group import GroupSchema, SHORT_DESCRIPTION_MAX_LENGTH
@@ -56,24 +56,13 @@ class Group(DatabaseModel):
     is_user_treated_as_topic_source: bool = Column(
         Boolean, nullable=False, server_default="false"
     )
-    _common_topic_tags: List[Ltree] = Column(
-        "common_topic_tags", ArrayOfLtree, nullable=False, server_default="{}"
-    )
+    common_topic_tags: List[str] = Column(TagList, nullable=False, server_default="{}")
 
     # Create a GiST index on path as well as the btree one that will be created by the
     # index=True/unique=True keyword args to Column above. The GiST index supports
     # additional operators for ltree queries: @>, <@, @, ~, ?
     __table_args__ = (Index("ix_groups_path_gist", path, postgresql_using="gist"),)
 
-    @hybrid_property
-    def common_topic_tags(self) -> List[str]:
-        """Return the group's list of common topic tags."""
-        return [str(tag).replace("_", " ") for tag in self._common_topic_tags]
-
-    @common_topic_tags.setter  # type: ignore
-    def common_topic_tags(self, new_tags: List[str]) -> None:
-        self._common_topic_tags = new_tags
-
     @hybrid_property
     def sidebar_markdown(self) -> str:
         """Return the sidebar's markdown."""
diff --git a/tildes/tildes/models/topic/topic.py b/tildes/tildes/models/topic/topic.py
index 0931093..afd3b70 100644
--- a/tildes/tildes/models/topic/topic.py
+++ b/tildes/tildes/models/topic/topic.py
@@ -4,7 +4,8 @@
 """Contains the Topic class."""
 
 from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING
+from itertools import chain
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TYPE_CHECKING
 
 from pyramid.security import Allow, Authenticated, Deny, DENY_ALL, Everyone
 from sqlalchemy import (
@@ -21,11 +22,10 @@ from sqlalchemy.dialects.postgresql import ENUM, JSONB, TSVECTOR
 from sqlalchemy.ext.mutable import MutableDict
 from sqlalchemy.orm import deferred, relationship
 from sqlalchemy.sql.expression import text
-from sqlalchemy_utils import Ltree
 from titlecase import titlecase
 
 from tildes.enums import TopicType
-from tildes.lib.database import ArrayOfLtree
+from tildes.lib.database import TagList
 from tildes.lib.datetime import utc_from_timestamp, utc_now
 from tildes.lib.id import id_to_id36
 from tildes.lib.markdown import convert_markdown_to_safe_html
@@ -121,9 +121,7 @@ class Topic(DatabaseModel):
     )
     num_comments: int = Column(Integer, nullable=False, server_default="0", index=True)
     num_votes: int = Column(Integer, nullable=False, server_default="0", index=True)
-    _tags: List[Ltree] = Column(
-        "tags", ArrayOfLtree, nullable=False, server_default="{}"
-    )
+    tags: List[str] = Column(TagList, nullable=False, server_default="{}")
     is_official: bool = Column(Boolean, nullable=False, server_default="false")
     is_locked: bool = Column(Boolean, nullable=False, server_default="false")
     search_tsv: Any = deferred(Column(TSVECTOR))
@@ -133,7 +131,7 @@ class Topic(DatabaseModel):
 
     # Create specialized indexes
     __table_args__ = (
-        Index("ix_topics_tags_gist", _tags, postgresql_using="gist"),
+        Index("ix_topics_tags_gist", tags, postgresql_using="gist"),
         Index("ix_topics_search_tsv_gin", "search_tsv", postgresql_using="gin"),
     )
 
@@ -160,23 +158,6 @@ class Topic(DatabaseModel):
         if self.age > EDIT_GRACE_PERIOD:
             self.last_edited_time = utc_now()
 
-    @hybrid_property
-    def tags(self) -> List[str]:
-        """Return the topic's tags."""
-        sorted_tags = [str(tag).replace("_", " ") for tag in self._tags]
-
-        # move special tags in front
-        # reverse so that tags at the start of the list appear first
-        for tag in reversed(SPECIAL_TAGS):
-            if tag in sorted_tags:
-                sorted_tags.insert(0, sorted_tags.pop(sorted_tags.index(tag)))
-
-        return sorted_tags
-
-    @tags.setter
-    def tags(self, new_tags: List[str]) -> None:
-        self._tags = new_tags
-
     @property
     def important_tags(self) -> List[str]:
         """Return only the topic's "important" tags."""
@@ -188,6 +169,15 @@ class Topic(DatabaseModel):
         important_tags = set(self.important_tags)
         return [tag for tag in self.tags if tag not in important_tags]
 
+    @property
+    def tags_ordered(self) -> Iterable[str]:
+        """Return an iterator over the topic's tags, in a suitable order for display.
+
+        Currently, this puts the "important" tags first, but they're otherwise
+        ordered arbitrarily (whatever order they were entered).
+        """
+        return chain(self.important_tags, self.unimportant_tags)
+
     def __repr__(self) -> str:
         """Display the topic's title and ID as its repr format."""
         return f'<Topic "{self.title}" ({self.topic_id})>'
diff --git a/tildes/tildes/models/topic/topic_query.py b/tildes/tildes/models/topic/topic_query.py
index b3dc293..0609170 100644
--- a/tildes/tildes/models/topic/topic_query.py
+++ b/tildes/tildes/models/topic/topic_query.py
@@ -8,7 +8,6 @@ from typing import Any, Sequence
 from pyramid.request import Request
 from sqlalchemy import func
 from sqlalchemy.sql.expression import and_, null
-from sqlalchemy_utils import Ltree
 
 from tildes.enums import TopicSortOption
 from tildes.lib.datetime import SimpleHoursPeriod, utc_now
@@ -163,7 +162,7 @@ class TopicQuery(PaginatedQuery):
 
         return self.filter(Topic.created_time > start_time)
 
-    def has_tag(self, tag: Ltree) -> "TopicQuery":
+    def has_tag(self, tag: str) -> "TopicQuery":
         """Restrict the topics to ones with a specific tag (generative).
 
         Note that this method searches for topics that have any tag that either starts
@@ -172,7 +171,7 @@ class TopicQuery(PaginatedQuery):
         queries = [f"{tag}.*", f"*.{tag}"]
 
         # pylint: disable=protected-access
-        return self.filter(Topic._tags.lquery(queries))  # type: ignore
+        return self.filter(Topic.tags.lquery(queries))  # type: ignore
 
     def search(self, query: str) -> "TopicQuery":
         """Restrict the topics to ones that match a search query (generative)."""
diff --git a/tildes/tildes/models/topic/topic_schedule.py b/tildes/tildes/models/topic/topic_schedule.py
index 474d1b9..979f3b1 100644
--- a/tildes/tildes/models/topic/topic_schedule.py
+++ b/tildes/tildes/models/topic/topic_schedule.py
@@ -11,9 +11,8 @@ from sqlalchemy import CheckConstraint, Column, ForeignKey, Integer, Text, TIMES
 from sqlalchemy.orm import relationship
 from sqlalchemy.orm.session import Session
 from sqlalchemy.sql.expression import text
-from sqlalchemy_utils import Ltree
 
-from tildes.lib.database import ArrayOfLtree, RecurrenceRule
+from tildes.lib.database import RecurrenceRule, TagList
 from tildes.models import DatabaseModel
 from tildes.models.group import Group
 from tildes.models.topic import Topic
@@ -40,7 +39,7 @@ class TopicSchedule(DatabaseModel):
         nullable=False,
     )
     markdown: str = Column(Text, nullable=False)
-    tags: List[Ltree] = Column(ArrayOfLtree, nullable=False, server_default="{}")
+    tags: List[str] = Column(TagList, nullable=False, server_default="{}")
     next_post_time: Optional[datetime] = Column(
         TIMESTAMP(timezone=True), nullable=True, index=True
     )
@@ -63,7 +62,7 @@ class TopicSchedule(DatabaseModel):
         self.group = group
         self.title = title
         self.markdown = markdown
-        self.tags = [Ltree(tag) for tag in tags]
+        self.tags = tags
         self.next_post_time = next_post_time
         self.recurrence_rule = recurrence_rule
         self.user = user
@@ -82,7 +81,7 @@ class TopicSchedule(DatabaseModel):
             )
 
         topic = Topic.create_text_topic(self.group, user, self.title, self.markdown)
-        topic.tags = [str(tag) for tag in self.tags]
+        topic.tags = self.tags
 
         return topic
 
diff --git a/tildes/tildes/models/user/user.py b/tildes/tildes/models/user/user.py
index a49a965..4b64f0e 100644
--- a/tildes/tildes/models/user/user.py
+++ b/tildes/tildes/models/user/user.py
@@ -30,10 +30,9 @@ from sqlalchemy.dialects.postgresql import ARRAY, ENUM, JSONB
 from sqlalchemy.ext.hybrid import hybrid_property
 from sqlalchemy.orm import deferred
 from sqlalchemy.sql.expression import text
-from sqlalchemy_utils import Ltree
 
 from tildes.enums import CommentLabelOption, HTMLSanitizationContext, TopicSortOption
-from tildes.lib.database import ArrayOfLtree, CIText
+from tildes.lib.database import CIText, TagList
 from tildes.lib.datetime import utc_now
 from tildes.lib.hash import hash_string, is_match_for_hash
 from tildes.lib.markdown import convert_markdown_to_safe_html
@@ -122,8 +121,8 @@ class User(DatabaseModel):
     permissions: Any = Column(JSONB(none_as_null=True))
     home_default_order: Optional[TopicSortOption] = Column(ENUM(TopicSortOption))
     home_default_period: Optional[str] = Column(Text)
-    _filtered_topic_tags: List[Ltree] = Column(
-        "filtered_topic_tags", ArrayOfLtree, nullable=False, server_default="{}"
+    filtered_topic_tags: List[str] = Column(
+        TagList, nullable=False, server_default="{}"
     )
     comment_label_weight: Optional[float] = Column(REAL)
     last_exemplary_label_time: Optional[datetime] = Column(TIMESTAMP(timezone=True))
@@ -138,15 +137,6 @@ class User(DatabaseModel):
     )
     bio_rendered_html: str = deferred(Column(Text))
 
-    @hybrid_property
-    def filtered_topic_tags(self) -> List[str]:
-        """Return the user's list of filtered topic tags."""
-        return [str(tag).replace("_", " ") for tag in self._filtered_topic_tags]
-
-    @filtered_topic_tags.setter  # type: ignore
-    def filtered_topic_tags(self, new_tags: List[str]) -> None:
-        self._filtered_topic_tags = new_tags
-
     @hybrid_property
     def bio_markdown(self) -> str:
         """Return the user bio's markdown."""
diff --git a/tildes/tildes/schemas/topic.py b/tildes/tildes/schemas/topic.py
index 1282cfb..f287562 100644
--- a/tildes/tildes/schemas/topic.py
+++ b/tildes/tildes/schemas/topic.py
@@ -7,12 +7,11 @@ import re
 import typing
 from urllib.parse import urlparse
 
-import sqlalchemy_utils
 from marshmallow import pre_load, Schema, validates, validates_schema, ValidationError
 from marshmallow.fields import DateTime, List, Nested, String, URL
 
 from tildes.lib.url_transform import apply_url_transformations
-from tildes.schemas.fields import Enum, ID36, Ltree, Markdown, SimpleString
+from tildes.schemas.fields import Enum, ID36, Markdown, SimpleString
 from tildes.schemas.group import GroupSchema
 from tildes.schemas.user import UserSchema
 
@@ -31,7 +30,7 @@ class TopicSchema(Schema):
     rendered_html = String(dump_only=True)
     link = URL(schemes={"http", "https"}, allow_none=True)
     created_time = DateTime(dump_only=True)
-    tags = List(Ltree())
+    tags = List(String())
 
     user = Nested(UserSchema, dump_only=True)
     group = Nested(GroupSchema, dump_only=True)
@@ -47,14 +46,14 @@ class TopicSchema(Schema):
         for tag in data["tags"]:
             tag = tag.lower()
 
-            # replace spaces with underscores
-            tag = tag.replace(" ", "_")
+            # replace underscores with spaces
+            tag = tag.replace("_", " ")
 
-            # remove any consecutive underscores
-            tag = re.sub("_{2,}", "_", tag)
+            # remove any consecutive spaces
+            tag = re.sub(" {2,}", " ", tag)
 
-            # remove any leading/trailing underscores
-            tag = tag.strip("_")
+            # remove any leading/trailing spaces
+            tag = tag.strip(" ")
 
             # drop any empty tags
             if not tag or tag.isspace():
@@ -76,7 +75,7 @@ class TopicSchema(Schema):
         return data
 
     @validates("tags")
-    def validate_tags(self, value: typing.List[sqlalchemy_utils.Ltree]) -> None:
+    def validate_tags(self, value: typing.List[str]) -> None:
         """Validate the tags field, raising an error if an issue exists.
 
         Note that tags are validated by ensuring that each tag would be a valid group
@@ -88,7 +87,7 @@ class TopicSchema(Schema):
         group_schema = GroupSchema(partial=True)
         for tag in value:
             try:
-                group_schema.validate({"path": str(tag)})
+                group_schema.validate({"path": tag})
             except ValidationError:
                 raise ValidationError("Tag %s is invalid" % tag)
 
diff --git a/tildes/tildes/templates/includes/topic_tags.jinja2 b/tildes/tildes/templates/includes/topic_tags.jinja2
index acd3d3a..5ecff90 100644
--- a/tildes/tildes/templates/includes/topic_tags.jinja2
+++ b/tildes/tildes/templates/includes/topic_tags.jinja2
@@ -2,7 +2,7 @@
 {# SPDX-License-Identifier: AGPL-3.0-or-later #}
 
 <div class="topic-full-tags">{% if topic.tags %}Tags:{% endif %}
-{% for tag in topic.tags %}
+{% for tag in topic.tags_ordered %}
   <a href="/~{{ topic.group.path }}?tag={{ tag.replace(" ", "_") }}">{{ tag }}</a>
   {%- if not loop.last %},{% endif %}
 {% endfor %}
diff --git a/tildes/tildes/views/topic.py b/tildes/tildes/views/topic.py
index c21b021..9140344 100644
--- a/tildes/tildes/views/topic.py
+++ b/tildes/tildes/views/topic.py
@@ -27,7 +27,7 @@ from tildes.enums import (
     LogEventType,
     TopicSortOption,
 )
-from tildes.lib.database import ArrayOfLtree
+from tildes.lib.database import TagList
 from tildes.lib.datetime import SimpleHoursPeriod
 from tildes.models.comment import Comment, CommentNotification, CommentTree
 from tildes.models.group import Group, GroupWikiPage
@@ -196,7 +196,7 @@ def get_group_topics(
 
     # restrict to a specific tag, if we're viewing a single one
     if tag:
-        query = query.has_tag(tag)
+        query = query.has_tag(str(tag))
 
     # apply before/after pagination restrictions if relevant
     if before:
@@ -206,11 +206,10 @@ def get_group_topics(
         query = query.after_id36(after)
 
     # apply topic tag filters unless they're disabled or viewing a single tag
-    if request.user and not (tag or unfiltered):
-        # pylint: disable=protected-access
+    if request.user and request.user.filtered_topic_tags and not (tag or unfiltered):
         query = query.filter(
-            ~Topic._tags.descendant_of(  # type: ignore
-                any_(cast(request.user._filtered_topic_tags, ArrayOfLtree))
+            ~Topic.tags.descendant_of(  # type: ignore
+                any_(cast(request.user.filtered_topic_tags, TagList))
             )
         )