Browse Source

Add ability to process posts with Lua scripts

This adds the backend pieces (no interface yet) to configure Lua scripts
that will be applied to topics and comments due to different events.
Initially, it only supports running a script when a new topic or comment
is posted. For example, here is a Lua script that would prepend a new
topic's title with "[Text] " or "[Link] " depending on its type, as well
as replace its tags with either "text" or "link":

function on_topic_post (topic)
    if (topic.is_text_type) then
        topic.title = "[Text] " .. topic.title
        topic.tags = {"text"}
    elseif (topic.is_link_type) then
        topic.title = "[Link] " .. topic.title
        topic.tags = {"link"}
    end
end

There can be a global script as well as group-specific scripts, and the
scripts are sandboxed, with limited access to data as well as being
restricted to a subset of Lua's built-in functions. The Lua sandboxing
code comes from Splash (https://github.com/scrapinghub/splash). It will
need to be modified, but this commit keeps it unmodified so that future
changes can be more easily tracked by comparing to the original state of
the file.

The sandboxing also includes some restrictions on number of instructions
and memory usage, but this might be more effectively managed on the OS
level. More research will still need to be done on security and resource
restrictions before this feature can be safely opened to users.
merge-requests/126/merge
Deimos 4 years ago
parent
commit
5fbc72c44c
  1. 12
      salt/salt/consumers/init.sls
  2. 18
      salt/salt/consumers/post_processing_script_runner.service.jinja2
  3. 35
      tildes/alembic/versions/55f4c1f951d5_add_group_scripts_table.py
  4. 73
      tildes/consumers/post_processing_script_runner.py
  5. 277
      tildes/lua/sandbox.lua
  6. 1
      tildes/requirements-dev.txt
  7. 1
      tildes/requirements.in
  8. 1
      tildes/requirements.txt
  9. 2
      tildes/tildes/database_models.py
  10. 81
      tildes/tildes/lib/lua.py
  11. 1
      tildes/tildes/models/group/__init__.py
  12. 41
      tildes/tildes/models/group/group_script.py
  13. 89
      tildes/tildes/models/scripting.py

12
salt/salt/consumers/init.sls

@ -22,6 +22,14 @@
- group: root
- mode: 644
/etc/systemd/system/consumer-post_processing_script_runner.service:
file.managed:
- source: salt://consumers/post_processing_script_runner.service.jinja2
- template: jinja
- user: root
- group: root
- mode: 644
consumer-topic_interesting_activity_updater.service:
service.running:
- enable: True
@ -34,6 +42,10 @@ consumer-comment_user_mentions_generator.service:
service.running:
- enable: True
consumer-post_processing_script_runner.service:
service.running:
- enable: True
{% if grains['id'] == 'prod' %}
/etc/systemd/system/consumer-topic_embedly_extractor.service:
file.managed:

18
salt/salt/consumers/post_processing_script_runner.service.jinja2

@ -0,0 +1,18 @@
{% from 'common.jinja2' import app_dir, app_username, bin_dir -%}
[Unit]
Description=Post Processing Script Runner (Queue Consumer)
Requires=redis.service
After=redis.service
PartOf=redis.service
[Service]
User={{ app_username }}
Group={{ app_username }}
WorkingDirectory={{ app_dir }}/consumers
Environment="INI_FILE={{ app_dir }}/{{ pillar['ini_file'] }}"
ExecStart={{ bin_dir }}/python post_processing_script_runner.py
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target

35
tildes/alembic/versions/55f4c1f951d5_add_group_scripts_table.py

@ -0,0 +1,35 @@
"""Add group_scripts table
Revision ID: 55f4c1f951d5
Revises: 28d7ce2c4825
Create Date: 2020-11-30 19:54:30.731335
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "55f4c1f951d5"
down_revision = "28d7ce2c4825"
branch_labels = None
depends_on = None
def upgrade():
op.create_table(
"group_scripts",
sa.Column("script_id", sa.Integer(), nullable=False),
sa.Column("group_id", sa.Integer(), nullable=True),
sa.Column("code", sa.Text(), nullable=False),
sa.ForeignKeyConstraint(
["group_id"],
["groups.group_id"],
name=op.f("fk_group_scripts_group_id_groups"),
),
sa.PrimaryKeyConstraint("script_id", name=op.f("pk_group_scripts")),
)
def downgrade():
op.drop_table("group_scripts")

73
tildes/consumers/post_processing_script_runner.py

@ -0,0 +1,73 @@
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Consumer that runs processing scripts on posts."""
from sqlalchemy import desc
from sqlalchemy.sql.expression import or_
from tildes.lib.event_stream import EventStreamConsumer, Message
from tildes.lib.lua import SandboxedLua
from tildes.models.comment import Comment
from tildes.models.group import GroupScript
from tildes.models.scripting import CommentScriptingWrapper, TopicScriptingWrapper
from tildes.models.topic import Topic
class PostProcessingScriptRunner(EventStreamConsumer):
"""Consumer that generates content_metadata for topics."""
METRICS_PORT = 25016
def process_message(self, message: Message) -> None:
"""Process a message from the stream."""
if "topic_id" in message.fields:
post = (
self.db_session.query(Topic)
.filter_by(topic_id=message.fields["topic_id"])
.one()
)
wrapper_class = TopicScriptingWrapper
group = post.group
elif "comment_id" in message.fields:
post = (
self.db_session.query(Comment)
.filter_by(comment_id=message.fields["comment_id"])
.one()
)
wrapper_class = CommentScriptingWrapper
group = post.topic.group
if post.is_deleted:
return
scripts_to_run = (
self.db_session.query(GroupScript)
.filter(or_(GroupScript.group == None, GroupScript.group == group)) # noqa
.order_by(desc(GroupScript.group_id)) # sort the global script first
.all()
)
for script in scripts_to_run:
lua_sandbox = SandboxedLua()
lua_sandbox.run_code(script.code)
wrapped_post = wrapper_class(post, lua_sandbox)
try:
if isinstance(post, Topic):
lua_sandbox.run_lua_function("on_topic_post", wrapped_post)
elif isinstance(post, Comment):
lua_sandbox.run_lua_function("on_comment_post", wrapped_post)
except ValueError:
pass
if __name__ == "__main__":
PostProcessingScriptRunner(
"post_processing_script_runner",
source_streams=[
"comments.insert",
"topics.insert",
],
).consume_streams()

277
tildes/lua/sandbox.lua

@ -0,0 +1,277 @@
-- Lua Sandbox
-- From the Splash project: https://github.com/scrapinghub/splash
-- Original version was as of Splash commit 75a5394af310bf07d704c3c05c0e9902d88592f2
--
-- Copyright (c) Scrapinghub
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without modification,
-- are permitted provided that the following conditions are met:
--
-- 1. Redistributions of source code must retain the above copyright notice,
-- this list of conditions and the following disclaimer.
--
-- 2. Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
--
-- 3. Neither the name of Splash nor the names of its contributors may be used
-- to endorse or promote products derived from this software without
-- specific prior written permission.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
local sandbox = {}
sandbox.allowed_require_names = {}
-- 6.4 String Manipulation
-- http://www.lua.org/manual/5.2/manual.html#6.4
local _string = {
byte = string.byte,
char = string.char,
find = string.find,
format = string.format,
-- gmatch = string.gmatch, -- can be CPU intensive
-- gsub = string.gsub, -- can be CPU intensive; can result in arbitrary native code execution (in 5.1)?
len = string.len,
lower = string.lower,
-- match = string.match, -- can be CPU intensive
-- rep = string.rep, -- can eat memory
reverse = string.reverse,
sub = string.sub,
upper = string.upper,
}
sandbox.env = {
--
-- 6.1 Basic Functions
-- http://www.lua.org/manual/5.2/manual.html#6.1
assert = assert,
error = error,
ipairs = ipairs,
next = next,
pairs = pairs,
pcall = pcall,
print = print, -- should we disable it?
select = select,
tonumber = tonumber,
tostring = tostring, -- Mike Pall says it is unsafe; why? See http://lua-users.org/lists/lua-l/2011-02/msg01595.html
type = type,
xpcall = xpcall,
--
-- 6.2 Coroutine Manipulation
-- http://www.lua.org/manual/5.2/manual.html#6.2
--
-- Disabled because:
-- 1. coroutines are used internally - users shouldn't yield to Splash themselves;
-- 2. debug hooks are per-coroutine in 'standard' Lua (not LuaJIT) - this requires a workaround.
--
-- 6.3 Modules
-- http://www.lua.org/manual/5.2/manual.html#6.3
--
require = function(name)
if sandbox.allowed_require_names[name] then
local ok, res = pcall(function() return require(name) end)
if ok then
return res
end
end
error("module '" .. name .. "' not found", 2)
end,
--
-- 6.4 String Manipulation
-- http://www.lua.org/manual/5.2/manual.html#6.4
string = _string,
--
-- 6.5 Table Manipulation
-- http://www.lua.org/manual/5.2/manual.html#6.5
table = {
concat = table.concat,
insert = table.insert,
pack = table.pack,
remove = table.remove,
-- sort = table.sort, -- can result in arbitrary native code execution (in 5.1)?
unpack = table.unpack,
},
--
-- 6.6 Mathematical Functions
-- http://www.lua.org/manual/5.2/manual.html#6.6
math = {
abs = math.abs,
acos = math.acos,
asin = math.asin,
atan = math.atan,
atan2 = math.atan2,
ceil = math.ceil,
cos = math.cos,
cosh = math.cosh,
deg = math.deg,
exp = math.exp,
floor = math.floor,
fmod = math.fmod,
frexp = math.frexp,
huge = math.huge,
ldexp = math.ldexp,
log = math.log,
max = math.max,
min = math.min,
modf = math.modf,
pi = math.pi,
pow = math.pow,
rad = math.rad,
random = math.random,
randomseed = math.randomseed,
sin = math.sin,
sinh = math.sinh,
sqrt = math.sqrt,
tan = math.tan,
tanh = math.tanh,
},
--
-- 6.7 Bitwise Operations
-- http://www.lua.org/manual/5.2/manual.html#6.7
--
-- Disabled: if anyone cares we may add them.
--
-- 6.8 Input and Output Facilities
-- http://www.lua.org/manual/5.2/manual.html#6.8
--
-- Disabled.
--
-- 6.9 Operating System Facilities
-- http://www.lua.org/manual/5.2/manual.html#6.9
os = {
clock = os.clock,
-- date = os.date, -- from wiki: "This can crash on some platforms (undocumented). For example, os.date'%v'. It is reported that this will be fixed in 5.2 or 5.1.3."
difftime = os.difftime,
time = os.time,
},
--
-- 6.10 The Debug Library
-- http://www.lua.org/manual/5.2/manual.html#6.10
--
-- Disabled.
}
-------------------------------------------------------------
--
-- Fix metatables. Some of the functions are available
-- via metatables of primitive types; disable them all.
--
sandbox.fix_metatables = function()
-- Fix string metatable: provide common functions
-- from string module.
local mt = {__index={}}
for k, v in pairs(_string) do
mt['__index'][k] = v
end
debug.setmetatable('', mt)
-- 2. Make sure there are no other metatables:
debug.setmetatable(1, nil)
debug.setmetatable(function() end, nil)
debug.setmetatable(true, nil)
end
-------------------------------------------------------------
--
-- Basic memory and CPU limits.
-- Based on code by Roberto Ierusalimschy.
-- http://lua-users.org/lists/lua-l/2013-12/msg00406.html
--
-- maximum memory (in KB) that can be used by Lua script
sandbox.mem_limit = 100000
sandbox.mem_limit_reached = false
function sandbox.enable_memory_limit()
if sandbox._memory_tracking_enabled then
return
end
local mt = {__gc = function (u)
if sandbox.mem_limit_reached then
error("script uses too much memory")
end
if collectgarbage("count") > sandbox.mem_limit then
sandbox.mem_limit_reached = true
error("script uses too much memory")
else
-- create a new object for the next GC cycle
setmetatable({}, getmetatable(u))
end
end }
-- create an empty object which will be collected at next GC cycle
setmetatable({}, mt)
sandbox._memory_tracking_enabled = true
end
-- Maximum number of instructions that can be executed.
-- XXX: the slowdown only becomes percievable at ~5m instructions.
sandbox.instruction_limit = 1e7
sandbox.instruction_count = 0
function sandbox.enable_per_instruction_limits()
local function _debug_step(event, line)
sandbox.instruction_count = sandbox.instruction_count + 1
if sandbox.instruction_count > sandbox.instruction_limit then
error("script uses too much CPU", 2)
end
if sandbox.mem_limit_reached then
error("script uses too much memory")
end
end
debug.sethook(_debug_step, '', 1)
end
-- In Lua (but not in LuaJIT) debug hooks are per-coroutine.
-- Use this function as a replacement for `coroutine.create` to ensure
-- instruction limit is enforced in coroutines.
function sandbox.create_coroutine(f, ...)
return coroutine.create(function(...)
sandbox.enable_per_instruction_limits()
return f(...)
end, ...)
end
-------------------------------------------------------------
--
-- Lua 5.2 sandbox.
--
-- Note that it changes the global state: after the first `sandbox.run`
-- call the runtime becomes restricted in CPU and memory, and
-- "string":methods() like "foo":upper() stop working.
--
function sandbox.run(untrusted_code)
sandbox.fix_metatables()
sandbox.enable_memory_limit()
sandbox.enable_per_instruction_limits()
local untrusted_function, message = load(untrusted_code, nil, 't', sandbox.env)
if not untrusted_function then return nil, message end
return pcall(untrusted_function)
end
return sandbox

1
tildes/requirements-dev.txt

@ -32,6 +32,7 @@ isort==4.3.21
jedi==0.17.2
jinja2==2.11.2
lazy-object-proxy==1.4.3
lupa==1.9
mako==1.1.3
markupsafe==1.1.1
marshmallow==3.9.0

1
tildes/requirements.in

@ -9,6 +9,7 @@ gunicorn
html5lib
invoke
ipython
lupa
marshmallow
Pillow
pip-tools

1
tildes/requirements.txt

@ -20,6 +20,7 @@ ipython-genutils==0.2.0
ipython==7.19.0
jedi==0.17.2
jinja2==2.11.2
lupa==1.9
mako==1.1.3
markupsafe==1.1.1
marshmallow==3.9.0

2
tildes/tildes/database_models.py

@ -13,7 +13,7 @@ from tildes.models.comment import (
CommentVote,
)
from tildes.models.financials import Financials
from tildes.models.group import Group, GroupStat, GroupSubscription
from tildes.models.group import Group, GroupScript, GroupStat, GroupSubscription
from tildes.models.log import Log
from tildes.models.message import MessageConversation, MessageReply
from tildes.models.scraper import ScraperResult

81
tildes/tildes/lib/lua.py

@ -0,0 +1,81 @@
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Functions and classes related to Lua scripting."""
from pathlib import Path
from typing import Any, Callable, Optional
from lupa import LuaError, LuaRuntime
LUA_PACKAGES_PATH = Path("/opt/tildes/lua", "?.lua")
def getter_handler(obj: Any, attr_name: str) -> Any:
"""Return the value of an object's attr, if scripts are allowed access.
Depends on a "gettable_attrs" attribute on the object, which should be a list of
attr names that scripts are allowed to access.
"""
gettable_attrs = getattr(obj, "gettable_attrs", [])
if attr_name not in gettable_attrs:
raise AttributeError(f"{attr_name}")
return getattr(obj, attr_name)
def setter_handler(obj: Any, attr_name: str, value: Any) -> None:
"""Set an object's attr to a new value, if scripts are allowed to do so.
Depends on a "settable_attrs" attribute on the object, which should be a list of
attr names that scripts are allowed to overwrite the value of.
"""
settable_attrs = getattr(obj, "settable_attrs", [])
if attr_name not in settable_attrs:
raise AttributeError
setattr(obj, attr_name, value)
class SandboxedLua:
"""A Lua runtime environment that's restricted to a sandbox.
The sandbox is mostly implemented in Lua itself, and restricts the capabilities
and data that code will be able to use. There are also some attempts to restrict
resource usage, but I don't know how effective it is (and should probably be done
on the OS level as well).
"""
def __init__(self) -> None:
"""Create a Lua runtime and set up the sandbox environment inside it."""
self.lua = LuaRuntime(
register_eval=False,
register_builtins=False,
unpack_returned_tuples=True,
attribute_handlers=(getter_handler, setter_handler),
)
self.lua.execute(f"package.path = '{LUA_PACKAGES_PATH}'")
self.sandbox = self.lua.eval('require("sandbox")')
def run_code(self, code: str) -> None:
"""Run Lua code inside the sandboxed environment."""
result = self.sandbox.run(code)
if result is not True:
raise LuaError(result[1])
def get_lua_function(self, name: str) -> Optional[Callable]:
"""Return the named Lua function so it can be called on Python data."""
return self.sandbox.env[name]
def run_lua_function(self, name: str, *args: Any) -> None:
"""Run the named Lua function, passing in the remaining args."""
function = self.get_lua_function(name)
if not function:
raise ValueError(f"No Lua function named {name} exists")
function(*args)

1
tildes/tildes/models/group/__init__.py

@ -2,6 +2,7 @@
from .group import Group
from .group_query import GroupQuery
from .group_script import GroupScript
from .group_stat import GroupStat
from .group_subscription import GroupSubscription
from .group_wiki_page import GroupWikiPage

41
tildes/tildes/models/group/group_script.py

@ -0,0 +1,41 @@
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Contains the GroupScript class."""
from typing import Optional
from pyramid.security import DENY_ALL
from sqlalchemy import Column, ForeignKey, Integer, Text
from sqlalchemy.orm import relationship
from tildes.models import DatabaseModel
from tildes.typing import AclType
from .group import Group
class GroupScript(DatabaseModel):
"""Model for a script in a group, which can be used to process topics/comments."""
__tablename__ = "group_scripts"
script_id: int = Column(Integer, primary_key=True)
group_id: Optional[int] = Column(Integer, ForeignKey("groups.group_id"))
code: str = Column(Text, nullable=False)
group: Optional[Group] = relationship("Group")
def __init__(self, group: Optional[Group], code: str):
"""Create a new script for a group."""
self.group = group
self.code = code
def __acl__(self) -> AclType:
"""Pyramid security ACL."""
acl = []
# for now, deny all permissions through the app
acl.append(DENY_ALL)
return acl

89
tildes/tildes/models/scripting.py

@ -0,0 +1,89 @@
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Model wrappers that control which data and methods are accessible for scripting.
Each wrapper class needs to have "gettable_attrs" and/or "settable_attrs" properties
that define which attributes (including methods) are accessible from inside scripts.
"""
from wrapt import ObjectProxy
from tildes.lib.lua import SandboxedLua
from .comment import Comment
from .topic import Topic
from .user import User
class UserScriptingWrapper(ObjectProxy):
# pylint: disable=abstract-method
"""Wrapper for the User model."""
gettable_attrs = {"username"}
def __init__(self, user: User, lua_sandbox: SandboxedLua):
"""Wrap a User."""
super().__init__(user)
self._lua = lua_sandbox.lua
class TopicScriptingWrapper(ObjectProxy):
# pylint: disable=abstract-method
"""Wrapper for the Topic model."""
gettable_attrs = {
"is_link_type",
"is_text_type",
"link",
"link_domain",
"markdown",
"remove",
"tags",
"title",
"user",
}
settable_attrs = {"link", "tags", "title"}
def __init__(self, topic: Topic, lua_sandbox: SandboxedLua):
"""Wrap a Topic."""
super().__init__(topic)
self._lua = lua_sandbox.lua
self.user = UserScriptingWrapper(topic.user, lua_sandbox)
@property
def tags(self): # type: ignore
"""Return the topic's tags as a Lua table."""
return self._lua.table_from(self.__wrapped__.tags)
@tags.setter
def tags(self, new_tags): # type: ignore
"""Set the topic's tags, the new value should be a Lua table."""
self.__wrapped__.tags = new_tags.values()
def remove(self) -> None:
"""Remove the topic."""
self.__wrapped__.is_removed = True
class CommentScriptingWrapper(ObjectProxy):
# pylint: disable=abstract-method
"""Wrapper for the Comment model."""
gettable_attrs = {"markdown", "remove", "topic", "user"}
def __init__(self, comment: Comment, lua_sandbox: SandboxedLua):
"""Wrap a Comment."""
super().__init__(comment)
self._lua = lua_sandbox.lua
self.topic = TopicScriptingWrapper(comment.topic, lua_sandbox)
self.user = UserScriptingWrapper(comment.user, lua_sandbox)
def remove(self) -> None:
"""Remove the comment."""
self.__wrapped__.is_removed = True
Loading…
Cancel
Save