Browse Source

Refactor tag list columns to use TypeDecorator

I've always been pretty unhappy with the ugly way tags were implemented,
using @hybrid_property and needing to do strange things all over the
place to deal with converting underscores to spaces and vice versa, as
well as other idiosyncracies.

There are still a few oddities here and there, but overall this is much
better.
merge-requests/85/head
Deimos 5 years ago
parent
commit
43be910d5c
  1. 4
      tildes/scripts/clean_private_data.py
  2. 9
      tildes/scripts/update_groups_common_topic_tags.py
  3. 6
      tildes/tests/test_topic_tags.py
  4. 18
      tildes/tildes/lib/database.py
  5. 15
      tildes/tildes/models/group/group.py
  6. 38
      tildes/tildes/models/topic/topic.py
  7. 5
      tildes/tildes/models/topic/topic_query.py
  8. 9
      tildes/tildes/models/topic/topic_schedule.py
  9. 16
      tildes/tildes/models/user/user.py
  10. 21
      tildes/tildes/schemas/topic.py
  11. 2
      tildes/tildes/templates/includes/topic_tags.jinja2
  12. 11
      tildes/tildes/views/topic.py

4
tildes/scripts/clean_private_data.py

@ -146,7 +146,7 @@ class DataCleaner:
"rendered_html": DEFAULT,
"link": DEFAULT,
"content_metadata": DEFAULT,
"_tags": DEFAULT,
"tags": DEFAULT,
},
synchronize_session=False,
)
@ -183,7 +183,7 @@ class DataCleaner:
"permissions": DEFAULT,
"home_default_order": DEFAULT,
"home_default_period": DEFAULT,
"_filtered_topic_tags": DEFAULT,
"filtered_topic_tags": DEFAULT,
"comment_label_weight": DEFAULT,
"last_exemplary_label_time": DEFAULT,
"_bio_markdown": DEFAULT,

9
tildes/scripts/update_groups_common_topic_tags.py

@ -4,7 +4,6 @@
"""Script for updating the list of common topic tags for all groups."""
from sqlalchemy import desc, func
from sqlalchemy_utils import Ltree
from tildes.lib.database import get_session_from_config
from tildes.models.group import Group
@ -26,9 +25,7 @@ def update_common_topic_tags(config_path: str) -> None:
# the arrays of tags into rows so that we can easily group and count, and
# created_time will be used to determine when a particular tag was last used
group_tags = (
db_session.query(
func.unnest(Topic._tags).label("tag"), Topic.created_time # noqa
)
db_session.query(func.unnest(Topic.tags).label("tag"), Topic.created_time)
.filter(Topic.group == group)
.subquery()
)
@ -47,9 +44,7 @@ def update_common_topic_tags(config_path: str) -> None:
.all()
)
group._common_topic_tags = [ # noqa
Ltree(common_tag[0]) for common_tag in common_tags
]
group.common_topic_tags = [common_tag[0] for common_tag in common_tags]
db_session.add(group)
db_session.commit()

6
tildes/tests/test_topic_tags.py

@ -8,12 +8,6 @@ def test_tags_whitespace_stripped(text_topic):
assert text_topic.tags == ["one", "two", "three"]
def test_tag_space_replacement(text_topic):
"""Ensure spaces in tags are converted to underscores internally."""
text_topic.tags = ["one two", "three four five"]
assert text_topic._tags == ["one_two", "three_four_five"]
def test_tag_consecutive_spaces(text_topic):
"""Ensure consecutive spaces/underscores in tags are removed."""
text_topic.tags = ["one two", "three four", "five __ six"]

18
tildes/tildes/lib/database.py

@ -13,7 +13,7 @@ from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.engine.interfaces import Dialect
from sqlalchemy.orm.session import Session
from sqlalchemy.types import Text, TypeDecorator, UserDefinedType
from sqlalchemy_utils import LtreeType
from sqlalchemy_utils import Ltree, LtreeType
from sqlalchemy_utils.types.ltree import LQUERY
from tildes.lib.datetime import rrule_to_str
@ -170,3 +170,19 @@ class RecurrenceRule(TypeDecorator):
return value
return rrulestr(value)
class TagList(TypeDecorator):
"""Stores a list of tags in the database as an array of ltree."""
# pylint: disable=abstract-method
impl = ArrayOfLtree
def process_bind_param(self, value: str, dialect: Dialect) -> List[Ltree]:
"""Convert the value to ltree[] for storing."""
return [Ltree(tag.replace(" ", "_")) for tag in value]
def process_result_value(self, value: List[Ltree], dialect: Dialect) -> List[str]:
"""Convert the stored value to a list of strings."""
return [str(tag).replace("_", " ") for tag in value]

15
tildes/tildes/models/group/group.py

@ -13,7 +13,7 @@ from sqlalchemy.orm import deferred
from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree, LtreeType
from tildes.lib.database import ArrayOfLtree
from tildes.lib.database import TagList
from tildes.lib.markdown import convert_markdown_to_safe_html
from tildes.models import DatabaseModel
from tildes.schemas.group import GroupSchema, SHORT_DESCRIPTION_MAX_LENGTH
@ -56,24 +56,13 @@ class Group(DatabaseModel):
is_user_treated_as_topic_source: bool = Column(
Boolean, nullable=False, server_default="false"
)
_common_topic_tags: List[Ltree] = Column(
"common_topic_tags", ArrayOfLtree, nullable=False, server_default="{}"
)
common_topic_tags: List[str] = Column(TagList, nullable=False, server_default="{}")
# Create a GiST index on path as well as the btree one that will be created by the
# index=True/unique=True keyword args to Column above. The GiST index supports
# additional operators for ltree queries: @>, <@, @, ~, ?
__table_args__ = (Index("ix_groups_path_gist", path, postgresql_using="gist"),)
@hybrid_property
def common_topic_tags(self) -> List[str]:
"""Return the group's list of common topic tags."""
return [str(tag).replace("_", " ") for tag in self._common_topic_tags]
@common_topic_tags.setter # type: ignore
def common_topic_tags(self, new_tags: List[str]) -> None:
self._common_topic_tags = new_tags
@hybrid_property
def sidebar_markdown(self) -> str:
"""Return the sidebar's markdown."""

38
tildes/tildes/models/topic/topic.py

@ -4,7 +4,8 @@
"""Contains the Topic class."""
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING
from itertools import chain
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TYPE_CHECKING
from pyramid.security import Allow, Authenticated, Deny, DENY_ALL, Everyone
from sqlalchemy import (
@ -21,11 +22,10 @@ from sqlalchemy.dialects.postgresql import ENUM, JSONB, TSVECTOR
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy.orm import deferred, relationship
from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree
from titlecase import titlecase
from tildes.enums import TopicType
from tildes.lib.database import ArrayOfLtree
from tildes.lib.database import TagList
from tildes.lib.datetime import utc_from_timestamp, utc_now
from tildes.lib.id import id_to_id36
from tildes.lib.markdown import convert_markdown_to_safe_html
@ -121,9 +121,7 @@ class Topic(DatabaseModel):
)
num_comments: int = Column(Integer, nullable=False, server_default="0", index=True)
num_votes: int = Column(Integer, nullable=False, server_default="0", index=True)
_tags: List[Ltree] = Column(
"tags", ArrayOfLtree, nullable=False, server_default="{}"
)
tags: List[str] = Column(TagList, nullable=False, server_default="{}")
is_official: bool = Column(Boolean, nullable=False, server_default="false")
is_locked: bool = Column(Boolean, nullable=False, server_default="false")
search_tsv: Any = deferred(Column(TSVECTOR))
@ -133,7 +131,7 @@ class Topic(DatabaseModel):
# Create specialized indexes
__table_args__ = (
Index("ix_topics_tags_gist", _tags, postgresql_using="gist"),
Index("ix_topics_tags_gist", tags, postgresql_using="gist"),
Index("ix_topics_search_tsv_gin", "search_tsv", postgresql_using="gin"),
)
@ -160,23 +158,6 @@ class Topic(DatabaseModel):
if self.age > EDIT_GRACE_PERIOD:
self.last_edited_time = utc_now()
@hybrid_property
def tags(self) -> List[str]:
"""Return the topic's tags."""
sorted_tags = [str(tag).replace("_", " ") for tag in self._tags]
# move special tags in front
# reverse so that tags at the start of the list appear first
for tag in reversed(SPECIAL_TAGS):
if tag in sorted_tags:
sorted_tags.insert(0, sorted_tags.pop(sorted_tags.index(tag)))
return sorted_tags
@tags.setter
def tags(self, new_tags: List[str]) -> None:
self._tags = new_tags
@property
def important_tags(self) -> List[str]:
"""Return only the topic's "important" tags."""
@ -188,6 +169,15 @@ class Topic(DatabaseModel):
important_tags = set(self.important_tags)
return [tag for tag in self.tags if tag not in important_tags]
@property
def tags_ordered(self) -> Iterable[str]:
"""Return an iterator over the topic's tags, in a suitable order for display.
Currently, this puts the "important" tags first, but they're otherwise
ordered arbitrarily (whatever order they were entered).
"""
return chain(self.important_tags, self.unimportant_tags)
def __repr__(self) -> str:
"""Display the topic's title and ID as its repr format."""
return f'<Topic "{self.title}" ({self.topic_id})>'

5
tildes/tildes/models/topic/topic_query.py

@ -8,7 +8,6 @@ from typing import Any, Sequence
from pyramid.request import Request
from sqlalchemy import func
from sqlalchemy.sql.expression import and_, null
from sqlalchemy_utils import Ltree
from tildes.enums import TopicSortOption
from tildes.lib.datetime import SimpleHoursPeriod, utc_now
@ -163,7 +162,7 @@ class TopicQuery(PaginatedQuery):
return self.filter(Topic.created_time > start_time)
def has_tag(self, tag: Ltree) -> "TopicQuery":
def has_tag(self, tag: str) -> "TopicQuery":
"""Restrict the topics to ones with a specific tag (generative).
Note that this method searches for topics that have any tag that either starts
@ -172,7 +171,7 @@ class TopicQuery(PaginatedQuery):
queries = [f"{tag}.*", f"*.{tag}"]
# pylint: disable=protected-access
return self.filter(Topic._tags.lquery(queries)) # type: ignore
return self.filter(Topic.tags.lquery(queries)) # type: ignore
def search(self, query: str) -> "TopicQuery":
"""Restrict the topics to ones that match a search query (generative)."""

9
tildes/tildes/models/topic/topic_schedule.py

@ -11,9 +11,8 @@ from sqlalchemy import CheckConstraint, Column, ForeignKey, Integer, Text, TIMES
from sqlalchemy.orm import relationship
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree
from tildes.lib.database import ArrayOfLtree, RecurrenceRule
from tildes.lib.database import RecurrenceRule, TagList
from tildes.models import DatabaseModel
from tildes.models.group import Group
from tildes.models.topic import Topic
@ -40,7 +39,7 @@ class TopicSchedule(DatabaseModel):
nullable=False,
)
markdown: str = Column(Text, nullable=False)
tags: List[Ltree] = Column(ArrayOfLtree, nullable=False, server_default="{}")
tags: List[str] = Column(TagList, nullable=False, server_default="{}")
next_post_time: Optional[datetime] = Column(
TIMESTAMP(timezone=True), nullable=True, index=True
)
@ -63,7 +62,7 @@ class TopicSchedule(DatabaseModel):
self.group = group
self.title = title
self.markdown = markdown
self.tags = [Ltree(tag) for tag in tags]
self.tags = tags
self.next_post_time = next_post_time
self.recurrence_rule = recurrence_rule
self.user = user
@ -82,7 +81,7 @@ class TopicSchedule(DatabaseModel):
)
topic = Topic.create_text_topic(self.group, user, self.title, self.markdown)
topic.tags = [str(tag) for tag in self.tags]
topic.tags = self.tags
return topic

16
tildes/tildes/models/user/user.py

@ -30,10 +30,9 @@ from sqlalchemy.dialects.postgresql import ARRAY, ENUM, JSONB
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import deferred
from sqlalchemy.sql.expression import text
from sqlalchemy_utils import Ltree
from tildes.enums import CommentLabelOption, HTMLSanitizationContext, TopicSortOption
from tildes.lib.database import ArrayOfLtree, CIText
from tildes.lib.database import CIText, TagList
from tildes.lib.datetime import utc_now
from tildes.lib.hash import hash_string, is_match_for_hash
from tildes.lib.markdown import convert_markdown_to_safe_html
@ -122,8 +121,8 @@ class User(DatabaseModel):
permissions: Any = Column(JSONB(none_as_null=True))
home_default_order: Optional[TopicSortOption] = Column(ENUM(TopicSortOption))
home_default_period: Optional[str] = Column(Text)
_filtered_topic_tags: List[Ltree] = Column(
"filtered_topic_tags", ArrayOfLtree, nullable=False, server_default="{}"
filtered_topic_tags: List[str] = Column(
TagList, nullable=False, server_default="{}"
)
comment_label_weight: Optional[float] = Column(REAL)
last_exemplary_label_time: Optional[datetime] = Column(TIMESTAMP(timezone=True))
@ -138,15 +137,6 @@ class User(DatabaseModel):
)
bio_rendered_html: str = deferred(Column(Text))
@hybrid_property
def filtered_topic_tags(self) -> List[str]:
"""Return the user's list of filtered topic tags."""
return [str(tag).replace("_", " ") for tag in self._filtered_topic_tags]
@filtered_topic_tags.setter # type: ignore
def filtered_topic_tags(self, new_tags: List[str]) -> None:
self._filtered_topic_tags = new_tags
@hybrid_property
def bio_markdown(self) -> str:
"""Return the user bio's markdown."""

21
tildes/tildes/schemas/topic.py

@ -7,12 +7,11 @@ import re
import typing
from urllib.parse import urlparse
import sqlalchemy_utils
from marshmallow import pre_load, Schema, validates, validates_schema, ValidationError
from marshmallow.fields import DateTime, List, Nested, String, URL
from tildes.lib.url_transform import apply_url_transformations
from tildes.schemas.fields import Enum, ID36, Ltree, Markdown, SimpleString
from tildes.schemas.fields import Enum, ID36, Markdown, SimpleString
from tildes.schemas.group import GroupSchema
from tildes.schemas.user import UserSchema
@ -31,7 +30,7 @@ class TopicSchema(Schema):
rendered_html = String(dump_only=True)
link = URL(schemes={"http", "https"}, allow_none=True)
created_time = DateTime(dump_only=True)
tags = List(Ltree())
tags = List(String())
user = Nested(UserSchema, dump_only=True)
group = Nested(GroupSchema, dump_only=True)
@ -47,14 +46,14 @@ class TopicSchema(Schema):
for tag in data["tags"]:
tag = tag.lower()
# replace spaces with underscores
tag = tag.replace(" ", "_")
# replace underscores with spaces
tag = tag.replace("_", " ")
# remove any consecutive underscores
tag = re.sub("_{2,}", "_", tag)
# remove any consecutive spaces
tag = re.sub(" {2,}", " ", tag)
# remove any leading/trailing underscores
tag = tag.strip("_")
# remove any leading/trailing spaces
tag = tag.strip(" ")
# drop any empty tags
if not tag or tag.isspace():
@ -76,7 +75,7 @@ class TopicSchema(Schema):
return data
@validates("tags")
def validate_tags(self, value: typing.List[sqlalchemy_utils.Ltree]) -> None:
def validate_tags(self, value: typing.List[str]) -> None:
"""Validate the tags field, raising an error if an issue exists.
Note that tags are validated by ensuring that each tag would be a valid group
@ -88,7 +87,7 @@ class TopicSchema(Schema):
group_schema = GroupSchema(partial=True)
for tag in value:
try:
group_schema.validate({"path": str(tag)})
group_schema.validate({"path": tag})
except ValidationError:
raise ValidationError("Tag %s is invalid" % tag)

2
tildes/tildes/templates/includes/topic_tags.jinja2

@ -2,7 +2,7 @@
{# SPDX-License-Identifier: AGPL-3.0-or-later #}
<div class="topic-full-tags">{% if topic.tags %}Tags:{% endif %}
{% for tag in topic.tags %}
{% for tag in topic.tags_ordered %}
<a href="/~{{ topic.group.path }}?tag={{ tag.replace(" ", "_") }}">{{ tag }}</a>
{%- if not loop.last %},{% endif %}
{% endfor %}

11
tildes/tildes/views/topic.py

@ -27,7 +27,7 @@ from tildes.enums import (
LogEventType,
TopicSortOption,
)
from tildes.lib.database import ArrayOfLtree
from tildes.lib.database import TagList
from tildes.lib.datetime import SimpleHoursPeriod
from tildes.models.comment import Comment, CommentNotification, CommentTree
from tildes.models.group import Group, GroupWikiPage
@ -196,7 +196,7 @@ def get_group_topics(
# restrict to a specific tag, if we're viewing a single one
if tag:
query = query.has_tag(tag)
query = query.has_tag(str(tag))
# apply before/after pagination restrictions if relevant
if before:
@ -206,11 +206,10 @@ def get_group_topics(
query = query.after_id36(after)
# apply topic tag filters unless they're disabled or viewing a single tag
if request.user and not (tag or unfiltered):
# pylint: disable=protected-access
if request.user and request.user.filtered_topic_tags and not (tag or unfiltered):
query = query.filter(
~Topic._tags.descendant_of( # type: ignore
any_(cast(request.user._filtered_topic_tags, ArrayOfLtree))
~Topic.tags.descendant_of( # type: ignore
any_(cast(request.user.filtered_topic_tags, TagList))
)
)

Loading…
Cancel
Save