You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

128 lines
4.3 KiB

  1. # Copyright (c) 2018 Tildes contributors <code@tildes.net>
  2. # SPDX-License-Identifier: AGPL-3.0-or-later
  3. """Script for cleaning up private/deleted data.
  4. Other things that should probably be added here eventually:
  5. - Delete individual votes on comments/topics after voting has been closed
  6. - Delete which users labeled comments after labeling has been closed
  7. - Delete old used invite codes (30 days after used?)
  8. """
  9. from datetime import datetime, timedelta
  10. import logging
  11. from sqlalchemy.orm.session import Session
  12. from tildes.lib.database import get_session_from_config
  13. from tildes.models.comment import Comment
  14. from tildes.models.log import Log
  15. from tildes.models.topic import Topic, TopicVisit
  16. # sensitive data older than this should be removed
  17. RETENTION_PERIOD = timedelta(days=30)
  18. def clean_all_data(config_path: str) -> None:
  19. """Clean all private/deleted data.
  20. This should generally be the only function called in most cases, and will initiate
  21. the full cleanup process.
  22. """
  23. db_session = get_session_from_config(config_path)
  24. cleaner = DataCleaner(db_session, RETENTION_PERIOD)
  25. cleaner.clean_all()
  26. class DataCleaner:
  27. """Container class for all methods related to cleaning up old data."""
  28. def __init__(self, db_session: Session, retention_period: timedelta) -> None:
  29. """Create a new DataCleaner."""
  30. self.db_session = db_session
  31. self.retention_cutoff = datetime.now() - retention_period
  32. def clean_all(self) -> None:
  33. """Call all the cleanup functions."""
  34. logging.info(f"Cleaning up all data (retention cutoff {self.retention_cutoff})")
  35. self.delete_old_log_entries()
  36. self.delete_old_topic_visits()
  37. self.clean_old_deleted_comments()
  38. self.clean_old_deleted_topics()
  39. def delete_old_log_entries(self) -> None:
  40. """Delete all log entries older than the retention cutoff.
  41. Note that this will also delete all entries from the child tables that inherit
  42. from Log (LogTopics, etc.).
  43. """
  44. deleted = (
  45. self.db_session.query(Log)
  46. .filter(Log.event_time <= self.retention_cutoff)
  47. .delete(synchronize_session=False)
  48. )
  49. self.db_session.commit()
  50. logging.info(f"Deleted {deleted} old log entries.")
  51. def delete_old_topic_visits(self) -> None:
  52. """Delete all topic visits older than the retention cutoff."""
  53. deleted = (
  54. self.db_session.query(TopicVisit)
  55. .filter(TopicVisit.visit_time <= self.retention_cutoff)
  56. .delete(synchronize_session=False)
  57. )
  58. self.db_session.commit()
  59. logging.info(f"Deleted {deleted} old topic visits.")
  60. def clean_old_deleted_comments(self) -> None:
  61. """Clean the data of old deleted comments.
  62. Change the comment's author to the "unknown user" (id 0), and delete its
  63. contents.
  64. """
  65. updated = (
  66. self.db_session.query(Comment)
  67. .filter(
  68. Comment.deleted_time <= self.retention_cutoff, # type: ignore
  69. Comment.user_id != 0,
  70. )
  71. .update(
  72. {"user_id": 0, "markdown": "", "rendered_html": ""},
  73. synchronize_session=False,
  74. )
  75. )
  76. self.db_session.commit()
  77. logging.info(f"Cleaned {updated} old deleted comments.")
  78. def clean_old_deleted_topics(self) -> None:
  79. """Clean the data of old deleted topics.
  80. Change the topic's author to the "unknown user" (id 0), and delete its title,
  81. contents, tags, and metadata.
  82. """
  83. updated = (
  84. self.db_session.query(Topic)
  85. .filter(
  86. Topic.deleted_time <= self.retention_cutoff, # type: ignore
  87. Topic.user_id != 0,
  88. )
  89. .update(
  90. {
  91. "user_id": 0,
  92. "title": "",
  93. "topic_type": "TEXT",
  94. "markdown": None,
  95. "rendered_html": None,
  96. "link": None,
  97. "content_metadata": None,
  98. "_tags": [],
  99. },
  100. synchronize_session=False,
  101. )
  102. )
  103. self.db_session.commit()
  104. logging.info(f"Cleaned {updated} old deleted topics.")