diff --git a/salt/salt/consumers/init.sls b/salt/salt/consumers/init.sls index a05a680..5c4df93 100644 --- a/salt/salt/consumers/init.sls +++ b/salt/salt/consumers/init.sls @@ -22,6 +22,14 @@ - group: root - mode: 644 +/etc/systemd/system/consumer-post_processing_script_runner.service: + file.managed: + - source: salt://consumers/post_processing_script_runner.service.jinja2 + - template: jinja + - user: root + - group: root + - mode: 644 + consumer-topic_interesting_activity_updater.service: service.running: - enable: True @@ -34,6 +42,10 @@ consumer-comment_user_mentions_generator.service: service.running: - enable: True +consumer-post_processing_script_runner.service: + service.running: + - enable: True + {% if grains['id'] == 'prod' %} /etc/systemd/system/consumer-topic_embedly_extractor.service: file.managed: diff --git a/salt/salt/consumers/post_processing_script_runner.service.jinja2 b/salt/salt/consumers/post_processing_script_runner.service.jinja2 new file mode 100644 index 0000000..b7c0f34 --- /dev/null +++ b/salt/salt/consumers/post_processing_script_runner.service.jinja2 @@ -0,0 +1,18 @@ +{% from 'common.jinja2' import app_dir, app_username, bin_dir -%} +[Unit] +Description=Post Processing Script Runner (Queue Consumer) +Requires=redis.service +After=redis.service +PartOf=redis.service + +[Service] +User={{ app_username }} +Group={{ app_username }} +WorkingDirectory={{ app_dir }}/consumers +Environment="INI_FILE={{ app_dir }}/{{ pillar['ini_file'] }}" +ExecStart={{ bin_dir }}/python post_processing_script_runner.py +Restart=always +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/tildes/alembic/versions/55f4c1f951d5_add_group_scripts_table.py b/tildes/alembic/versions/55f4c1f951d5_add_group_scripts_table.py new file mode 100644 index 0000000..b5b287f --- /dev/null +++ b/tildes/alembic/versions/55f4c1f951d5_add_group_scripts_table.py @@ -0,0 +1,35 @@ +"""Add group_scripts table + +Revision ID: 55f4c1f951d5 +Revises: 28d7ce2c4825 +Create Date: 2020-11-30 19:54:30.731335 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "55f4c1f951d5" +down_revision = "28d7ce2c4825" +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "group_scripts", + sa.Column("script_id", sa.Integer(), nullable=False), + sa.Column("group_id", sa.Integer(), nullable=True), + sa.Column("code", sa.Text(), nullable=False), + sa.ForeignKeyConstraint( + ["group_id"], + ["groups.group_id"], + name=op.f("fk_group_scripts_group_id_groups"), + ), + sa.PrimaryKeyConstraint("script_id", name=op.f("pk_group_scripts")), + ) + + +def downgrade(): + op.drop_table("group_scripts") diff --git a/tildes/consumers/post_processing_script_runner.py b/tildes/consumers/post_processing_script_runner.py new file mode 100644 index 0000000..52fd266 --- /dev/null +++ b/tildes/consumers/post_processing_script_runner.py @@ -0,0 +1,73 @@ +# Copyright (c) 2020 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Consumer that runs processing scripts on posts.""" + +from sqlalchemy import desc +from sqlalchemy.sql.expression import or_ + +from tildes.lib.event_stream import EventStreamConsumer, Message +from tildes.lib.lua import SandboxedLua +from tildes.models.comment import Comment +from tildes.models.group import GroupScript +from tildes.models.scripting import CommentScriptingWrapper, TopicScriptingWrapper +from tildes.models.topic import Topic + + +class PostProcessingScriptRunner(EventStreamConsumer): + """Consumer that generates content_metadata for topics.""" + + METRICS_PORT = 25016 + + def process_message(self, message: Message) -> None: + """Process a message from the stream.""" + if "topic_id" in message.fields: + post = ( + self.db_session.query(Topic) + .filter_by(topic_id=message.fields["topic_id"]) + .one() + ) + wrapper_class = TopicScriptingWrapper + group = post.group + elif "comment_id" in message.fields: + post = ( + self.db_session.query(Comment) + .filter_by(comment_id=message.fields["comment_id"]) + .one() + ) + wrapper_class = CommentScriptingWrapper + group = post.topic.group + + if post.is_deleted: + return + + scripts_to_run = ( + self.db_session.query(GroupScript) + .filter(or_(GroupScript.group == None, GroupScript.group == group)) # noqa + .order_by(desc(GroupScript.group_id)) # sort the global script first + .all() + ) + + for script in scripts_to_run: + lua_sandbox = SandboxedLua() + lua_sandbox.run_code(script.code) + + wrapped_post = wrapper_class(post, lua_sandbox) + + try: + if isinstance(post, Topic): + lua_sandbox.run_lua_function("on_topic_post", wrapped_post) + elif isinstance(post, Comment): + lua_sandbox.run_lua_function("on_comment_post", wrapped_post) + except ValueError: + pass + + +if __name__ == "__main__": + PostProcessingScriptRunner( + "post_processing_script_runner", + source_streams=[ + "comments.insert", + "topics.insert", + ], + ).consume_streams() diff --git a/tildes/lua/sandbox.lua b/tildes/lua/sandbox.lua new file mode 100644 index 0000000..dc68fc7 --- /dev/null +++ b/tildes/lua/sandbox.lua @@ -0,0 +1,277 @@ +-- Lua Sandbox +-- From the Splash project: https://github.com/scrapinghub/splash +-- Original version was as of Splash commit 75a5394af310bf07d704c3c05c0e9902d88592f2 +-- +-- Copyright (c) Scrapinghub +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without modification, +-- are permitted provided that the following conditions are met: +-- +-- 1. Redistributions of source code must retain the above copyright notice, +-- this list of conditions and the following disclaimer. +-- +-- 2. Redistributions in binary form must reproduce the above copyright +-- notice, this list of conditions and the following disclaimer in the +-- documentation and/or other materials provided with the distribution. +-- +-- 3. Neither the name of Splash nor the names of its contributors may be used +-- to endorse or promote products derived from this software without +-- specific prior written permission. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +-- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +local sandbox = {} + +sandbox.allowed_require_names = {} + +-- 6.4 String Manipulation +-- http://www.lua.org/manual/5.2/manual.html#6.4 +local _string = { + byte = string.byte, + char = string.char, + find = string.find, + format = string.format, +-- gmatch = string.gmatch, -- can be CPU intensive +-- gsub = string.gsub, -- can be CPU intensive; can result in arbitrary native code execution (in 5.1)? + len = string.len, + lower = string.lower, +-- match = string.match, -- can be CPU intensive +-- rep = string.rep, -- can eat memory + reverse = string.reverse, + sub = string.sub, + upper = string.upper, +} + + +sandbox.env = { + -- + -- 6.1 Basic Functions + -- http://www.lua.org/manual/5.2/manual.html#6.1 + assert = assert, + error = error, + ipairs = ipairs, + next = next, + pairs = pairs, + pcall = pcall, + print = print, -- should we disable it? + select = select, + tonumber = tonumber, + tostring = tostring, -- Mike Pall says it is unsafe; why? See http://lua-users.org/lists/lua-l/2011-02/msg01595.html + type = type, + xpcall = xpcall, + + -- + -- 6.2 Coroutine Manipulation + -- http://www.lua.org/manual/5.2/manual.html#6.2 + -- + -- Disabled because: + -- 1. coroutines are used internally - users shouldn't yield to Splash themselves; + -- 2. debug hooks are per-coroutine in 'standard' Lua (not LuaJIT) - this requires a workaround. + + -- + -- 6.3 Modules + -- http://www.lua.org/manual/5.2/manual.html#6.3 + -- + require = function(name) + if sandbox.allowed_require_names[name] then + local ok, res = pcall(function() return require(name) end) + if ok then + return res + end + end + error("module '" .. name .. "' not found", 2) + end, + + -- + -- 6.4 String Manipulation + -- http://www.lua.org/manual/5.2/manual.html#6.4 + string = _string, + + -- + -- 6.5 Table Manipulation + -- http://www.lua.org/manual/5.2/manual.html#6.5 + table = { + concat = table.concat, + insert = table.insert, + pack = table.pack, + remove = table.remove, +-- sort = table.sort, -- can result in arbitrary native code execution (in 5.1)? + unpack = table.unpack, + }, + + -- + -- 6.6 Mathematical Functions + -- http://www.lua.org/manual/5.2/manual.html#6.6 + math = { + abs = math.abs, + acos = math.acos, + asin = math.asin, + atan = math.atan, + atan2 = math.atan2, + ceil = math.ceil, + cos = math.cos, + cosh = math.cosh, + deg = math.deg, + exp = math.exp, + floor = math.floor, + fmod = math.fmod, + frexp = math.frexp, + huge = math.huge, + ldexp = math.ldexp, + log = math.log, + max = math.max, + min = math.min, + modf = math.modf, + pi = math.pi, + pow = math.pow, + rad = math.rad, + random = math.random, + randomseed = math.randomseed, + sin = math.sin, + sinh = math.sinh, + sqrt = math.sqrt, + tan = math.tan, + tanh = math.tanh, + }, + + -- + -- 6.7 Bitwise Operations + -- http://www.lua.org/manual/5.2/manual.html#6.7 + -- + -- Disabled: if anyone cares we may add them. + + -- + -- 6.8 Input and Output Facilities + -- http://www.lua.org/manual/5.2/manual.html#6.8 + -- + -- Disabled. + + -- + -- 6.9 Operating System Facilities + -- http://www.lua.org/manual/5.2/manual.html#6.9 + os = { + clock = os.clock, +-- date = os.date, -- from wiki: "This can crash on some platforms (undocumented). For example, os.date'%v'. It is reported that this will be fixed in 5.2 or 5.1.3." + difftime = os.difftime, + time = os.time, + }, + + -- + -- 6.10 The Debug Library + -- http://www.lua.org/manual/5.2/manual.html#6.10 + -- + -- Disabled. +} + +------------------------------------------------------------- +-- +-- Fix metatables. Some of the functions are available +-- via metatables of primitive types; disable them all. +-- +sandbox.fix_metatables = function() + -- Fix string metatable: provide common functions + -- from string module. + local mt = {__index={}} + for k, v in pairs(_string) do + mt['__index'][k] = v + end + debug.setmetatable('', mt) + + -- 2. Make sure there are no other metatables: + debug.setmetatable(1, nil) + debug.setmetatable(function() end, nil) + debug.setmetatable(true, nil) +end + + +------------------------------------------------------------- +-- +-- Basic memory and CPU limits. +-- Based on code by Roberto Ierusalimschy. +-- http://lua-users.org/lists/lua-l/2013-12/msg00406.html +-- + +-- maximum memory (in KB) that can be used by Lua script +sandbox.mem_limit = 100000 +sandbox.mem_limit_reached = false + +function sandbox.enable_memory_limit() + if sandbox._memory_tracking_enabled then + return + end + local mt = {__gc = function (u) + if sandbox.mem_limit_reached then + error("script uses too much memory") + end + if collectgarbage("count") > sandbox.mem_limit then + sandbox.mem_limit_reached = true + error("script uses too much memory") + else + -- create a new object for the next GC cycle + setmetatable({}, getmetatable(u)) + end + end } + -- create an empty object which will be collected at next GC cycle + setmetatable({}, mt) + sandbox._memory_tracking_enabled = true +end + + +-- Maximum number of instructions that can be executed. +-- XXX: the slowdown only becomes percievable at ~5m instructions. +sandbox.instruction_limit = 1e7 +sandbox.instruction_count = 0 + +function sandbox.enable_per_instruction_limits() + local function _debug_step(event, line) + sandbox.instruction_count = sandbox.instruction_count + 1 + if sandbox.instruction_count > sandbox.instruction_limit then + error("script uses too much CPU", 2) + end + if sandbox.mem_limit_reached then + error("script uses too much memory") + end + end + debug.sethook(_debug_step, '', 1) +end + + +-- In Lua (but not in LuaJIT) debug hooks are per-coroutine. +-- Use this function as a replacement for `coroutine.create` to ensure +-- instruction limit is enforced in coroutines. +function sandbox.create_coroutine(f, ...) + return coroutine.create(function(...) + sandbox.enable_per_instruction_limits() + return f(...) + end, ...) +end + + +------------------------------------------------------------- +-- +-- Lua 5.2 sandbox. +-- +-- Note that it changes the global state: after the first `sandbox.run` +-- call the runtime becomes restricted in CPU and memory, and +-- "string":methods() like "foo":upper() stop working. +-- +function sandbox.run(untrusted_code) + sandbox.fix_metatables() + sandbox.enable_memory_limit() + sandbox.enable_per_instruction_limits() + local untrusted_function, message = load(untrusted_code, nil, 't', sandbox.env) + if not untrusted_function then return nil, message end + return pcall(untrusted_function) +end + +return sandbox diff --git a/tildes/requirements-dev.txt b/tildes/requirements-dev.txt index e3c201c..38379d2 100644 --- a/tildes/requirements-dev.txt +++ b/tildes/requirements-dev.txt @@ -32,6 +32,7 @@ isort==4.3.21 jedi==0.17.2 jinja2==2.11.2 lazy-object-proxy==1.4.3 +lupa==1.9 mako==1.1.3 markupsafe==1.1.1 marshmallow==3.9.0 diff --git a/tildes/requirements.in b/tildes/requirements.in index 2fe3844..1cc2cee 100644 --- a/tildes/requirements.in +++ b/tildes/requirements.in @@ -9,6 +9,7 @@ gunicorn html5lib invoke ipython +lupa marshmallow Pillow pip-tools diff --git a/tildes/requirements.txt b/tildes/requirements.txt index 02b2669..23a3aeb 100644 --- a/tildes/requirements.txt +++ b/tildes/requirements.txt @@ -20,6 +20,7 @@ ipython-genutils==0.2.0 ipython==7.19.0 jedi==0.17.2 jinja2==2.11.2 +lupa==1.9 mako==1.1.3 markupsafe==1.1.1 marshmallow==3.9.0 diff --git a/tildes/tildes/database_models.py b/tildes/tildes/database_models.py index 6b3e1a3..dd9e1b6 100644 --- a/tildes/tildes/database_models.py +++ b/tildes/tildes/database_models.py @@ -13,7 +13,7 @@ from tildes.models.comment import ( CommentVote, ) from tildes.models.financials import Financials -from tildes.models.group import Group, GroupStat, GroupSubscription +from tildes.models.group import Group, GroupScript, GroupStat, GroupSubscription from tildes.models.log import Log from tildes.models.message import MessageConversation, MessageReply from tildes.models.scraper import ScraperResult diff --git a/tildes/tildes/lib/lua.py b/tildes/tildes/lib/lua.py new file mode 100644 index 0000000..27d420e --- /dev/null +++ b/tildes/tildes/lib/lua.py @@ -0,0 +1,81 @@ +# Copyright (c) 2020 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Functions and classes related to Lua scripting.""" + +from pathlib import Path +from typing import Any, Callable, Optional + +from lupa import LuaError, LuaRuntime + + +LUA_PACKAGES_PATH = Path("/opt/tildes/lua", "?.lua") + + +def getter_handler(obj: Any, attr_name: str) -> Any: + """Return the value of an object's attr, if scripts are allowed access. + + Depends on a "gettable_attrs" attribute on the object, which should be a list of + attr names that scripts are allowed to access. + """ + gettable_attrs = getattr(obj, "gettable_attrs", []) + + if attr_name not in gettable_attrs: + raise AttributeError(f"{attr_name}") + + return getattr(obj, attr_name) + + +def setter_handler(obj: Any, attr_name: str, value: Any) -> None: + """Set an object's attr to a new value, if scripts are allowed to do so. + + Depends on a "settable_attrs" attribute on the object, which should be a list of + attr names that scripts are allowed to overwrite the value of. + """ + settable_attrs = getattr(obj, "settable_attrs", []) + + if attr_name not in settable_attrs: + raise AttributeError + + setattr(obj, attr_name, value) + + +class SandboxedLua: + """A Lua runtime environment that's restricted to a sandbox. + + The sandbox is mostly implemented in Lua itself, and restricts the capabilities + and data that code will be able to use. There are also some attempts to restrict + resource usage, but I don't know how effective it is (and should probably be done + on the OS level as well). + """ + + def __init__(self) -> None: + """Create a Lua runtime and set up the sandbox environment inside it.""" + self.lua = LuaRuntime( + register_eval=False, + register_builtins=False, + unpack_returned_tuples=True, + attribute_handlers=(getter_handler, setter_handler), + ) + + self.lua.execute(f"package.path = '{LUA_PACKAGES_PATH}'") + self.sandbox = self.lua.eval('require("sandbox")') + + def run_code(self, code: str) -> None: + """Run Lua code inside the sandboxed environment.""" + result = self.sandbox.run(code) + + if result is not True: + raise LuaError(result[1]) + + def get_lua_function(self, name: str) -> Optional[Callable]: + """Return the named Lua function so it can be called on Python data.""" + return self.sandbox.env[name] + + def run_lua_function(self, name: str, *args: Any) -> None: + """Run the named Lua function, passing in the remaining args.""" + function = self.get_lua_function(name) + if not function: + raise ValueError(f"No Lua function named {name} exists") + + function(*args) diff --git a/tildes/tildes/models/group/__init__.py b/tildes/tildes/models/group/__init__.py index 1189c8a..1831ce2 100644 --- a/tildes/tildes/models/group/__init__.py +++ b/tildes/tildes/models/group/__init__.py @@ -2,6 +2,7 @@ from .group import Group from .group_query import GroupQuery +from .group_script import GroupScript from .group_stat import GroupStat from .group_subscription import GroupSubscription from .group_wiki_page import GroupWikiPage diff --git a/tildes/tildes/models/group/group_script.py b/tildes/tildes/models/group/group_script.py new file mode 100644 index 0000000..276208b --- /dev/null +++ b/tildes/tildes/models/group/group_script.py @@ -0,0 +1,41 @@ +# Copyright (c) 2020 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Contains the GroupScript class.""" + +from typing import Optional + +from pyramid.security import DENY_ALL +from sqlalchemy import Column, ForeignKey, Integer, Text +from sqlalchemy.orm import relationship + +from tildes.models import DatabaseModel +from tildes.typing import AclType + +from .group import Group + + +class GroupScript(DatabaseModel): + """Model for a script in a group, which can be used to process topics/comments.""" + + __tablename__ = "group_scripts" + + script_id: int = Column(Integer, primary_key=True) + group_id: Optional[int] = Column(Integer, ForeignKey("groups.group_id")) + code: str = Column(Text, nullable=False) + + group: Optional[Group] = relationship("Group") + + def __init__(self, group: Optional[Group], code: str): + """Create a new script for a group.""" + self.group = group + self.code = code + + def __acl__(self) -> AclType: + """Pyramid security ACL.""" + acl = [] + + # for now, deny all permissions through the app + acl.append(DENY_ALL) + + return acl diff --git a/tildes/tildes/models/scripting.py b/tildes/tildes/models/scripting.py new file mode 100644 index 0000000..f38dd57 --- /dev/null +++ b/tildes/tildes/models/scripting.py @@ -0,0 +1,89 @@ +# Copyright (c) 2020 Tildes contributors +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Model wrappers that control which data and methods are accessible for scripting. + +Each wrapper class needs to have "gettable_attrs" and/or "settable_attrs" properties +that define which attributes (including methods) are accessible from inside scripts. +""" + +from wrapt import ObjectProxy + +from tildes.lib.lua import SandboxedLua + +from .comment import Comment +from .topic import Topic +from .user import User + + +class UserScriptingWrapper(ObjectProxy): + # pylint: disable=abstract-method + """Wrapper for the User model.""" + + gettable_attrs = {"username"} + + def __init__(self, user: User, lua_sandbox: SandboxedLua): + """Wrap a User.""" + super().__init__(user) + + self._lua = lua_sandbox.lua + + +class TopicScriptingWrapper(ObjectProxy): + # pylint: disable=abstract-method + """Wrapper for the Topic model.""" + + gettable_attrs = { + "is_link_type", + "is_text_type", + "link", + "link_domain", + "markdown", + "remove", + "tags", + "title", + "user", + } + settable_attrs = {"link", "tags", "title"} + + def __init__(self, topic: Topic, lua_sandbox: SandboxedLua): + """Wrap a Topic.""" + super().__init__(topic) + + self._lua = lua_sandbox.lua + + self.user = UserScriptingWrapper(topic.user, lua_sandbox) + + @property + def tags(self): # type: ignore + """Return the topic's tags as a Lua table.""" + return self._lua.table_from(self.__wrapped__.tags) + + @tags.setter + def tags(self, new_tags): # type: ignore + """Set the topic's tags, the new value should be a Lua table.""" + self.__wrapped__.tags = new_tags.values() + + def remove(self) -> None: + """Remove the topic.""" + self.__wrapped__.is_removed = True + + +class CommentScriptingWrapper(ObjectProxy): + # pylint: disable=abstract-method + """Wrapper for the Comment model.""" + + gettable_attrs = {"markdown", "remove", "topic", "user"} + + def __init__(self, comment: Comment, lua_sandbox: SandboxedLua): + """Wrap a Comment.""" + super().__init__(comment) + + self._lua = lua_sandbox.lua + + self.topic = TopicScriptingWrapper(comment.topic, lua_sandbox) + self.user = UserScriptingWrapper(comment.user, lua_sandbox) + + def remove(self) -> None: + """Remove the comment.""" + self.__wrapped__.is_removed = True