Browse Source

Clean extra data associated with deleted users

This updates the clean_private_data script to delete more data
associated with users that deleted their accounts at least 30 days ago,
including all of their votes, subscriptions, bookmarks, and
notifications.
merge-requests/64/head
Deimos 6 years ago
parent
commit
9e276e7310
  1. 49
      tildes/scripts/clean_private_data.py

49
tildes/scripts/clean_private_data.py

@ -16,10 +16,17 @@ from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import text
from tildes.lib.database import get_session_from_config
from tildes.models.comment import Comment
from tildes.models.comment import (
Comment,
CommentBookmark,
CommentLabel,
CommentNotification,
CommentVote,
)
from tildes.models.group import GroupSubscription
from tildes.models.log import Log
from tildes.models.topic import Topic, TopicVisit
from tildes.models.user import User
from tildes.models.topic import Topic, TopicBookmark, TopicVisit, TopicVote
from tildes.models.user import User, UserGroupSettings
# sensitive data older than this should be removed
@ -49,6 +56,9 @@ class DataCleaner:
self.db_session = db_session
self.retention_cutoff = datetime.now() - retention_period
# set high timeout for this script, since cleanup can activate a lot of triggers
self.db_session.execute("SET statement_timeout TO '10min'")
def clean_all(self) -> None:
"""Call all the cleanup functions."""
logging.info(f"Cleaning up all data (retention cutoff {self.retention_cutoff})")
@ -60,6 +70,8 @@ class DataCleaner:
self.clean_old_deleted_topics()
self.clean_old_deleted_users()
self.clean_old_deleted_user_data()
def delete_old_log_entries(self) -> None:
"""Delete all log entries older than the retention cutoff.
@ -183,3 +195,34 @@ class DataCleaner:
)
self.db_session.commit()
logging.info(f"Cleaned {updated} old deleted users.")
def clean_old_deleted_user_data(self) -> None:
"""Clean additional data from deleted users (subscriptions, votes, etc.)."""
models_to_delete_from = [
CommentBookmark,
CommentLabel,
CommentNotification,
CommentVote,
GroupSubscription,
TopicBookmark,
TopicVote,
UserGroupSettings,
]
user_id_subquery = (
self.db_session.query(User.user_id)
.filter(
User.is_deleted == True, # noqa
User.deleted_time <= self.retention_cutoff, # type: ignore
)
.subquery()
)
for model_cls in models_to_delete_from:
deleted = (
self.db_session.query(model_cls)
.filter(model_cls.user_id.in_(user_id_subquery)) # type: ignore
.delete(synchronize_session=False)
)
self.db_session.commit()
logging.info(f"Deleted {deleted} rows from {model_cls.__name__}.")
Loading…
Cancel
Save