From cc0ab174b33e068b987fff5c7ed24520dfe302c2 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Fri, 25 Oct 2019 13:32:05 -0600 Subject: [PATCH 1/7] Base structure for an antispam module --- README.md | 37 ++++++++++++++++++++++++++++ synapse_antispam/mjolnir/__init__.py | 1 + synapse_antispam/mjolnir/antispam.py | 24 ++++++++++++++++++ synapse_antispam/setup.py | 11 +++++++++ 4 files changed, 73 insertions(+) create mode 100644 synapse_antispam/mjolnir/__init__.py create mode 100644 synapse_antispam/mjolnir/antispam.py create mode 100644 synapse_antispam/setup.py diff --git a/README.md b/README.md index 4e16b665..5e0335b0 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,43 @@ nano config/development.yaml node lib/index.js ``` +## Synapse Antispam Module + +First, install the module to your Synapse python environment: +``` +pip install -e git+https://github.com/matrix-org/mjolnir.git#egg=mjolnir&subdirectory=synapse_antispam +``` + +*Note*: Where your python environment is depends on your installation method. Visit +[#synapse:matrix.org](https://matrix.to/#/#synapse:matrix.org) if you're not sure. + +Then add the following to your `homeserver.yaml`: +```yaml +spam_checker: + module: mjolnir.AntiSpam + config: + # Prevent servers/users in the ban lists from inviting users on this + # server to rooms. Default true. + block_invites: true + # Flag messages sent by servers/users in the ban lists as spam. Currently + # this means that spammy messages will appear as empty to users. Default + # false. + block_messages: false + # The room IDs of the ban lists to honour. Unlike other parts of Mjolnir, + # this list cannot be room aliases or permalinks. This server is expected + # to already be joined to the room - Mjolnir will not automatically join + # these rooms. + ban_lists: + - "!roomid:example.org" +``` + +Be sure to change the configuration to match your setup. Your server is expected to +already be participating in the ban lists - if it is not, you will need to have a user +on your homeserver join. The antispam module will not join the rooms for you. + +If you change the configuration, you will need to restart Synapse. You'll also need +to restart Synapse to install the plugin. + ## Development TODO. It's a TypeScript project with a linter. diff --git a/synapse_antispam/mjolnir/__init__.py b/synapse_antispam/mjolnir/__init__.py new file mode 100644 index 00000000..76c48524 --- /dev/null +++ b/synapse_antispam/mjolnir/__init__.py @@ -0,0 +1 @@ +from .antispam import AntiSpam diff --git a/synapse_antispam/mjolnir/antispam.py b/synapse_antispam/mjolnir/antispam.py new file mode 100644 index 00000000..16921023 --- /dev/null +++ b/synapse_antispam/mjolnir/antispam.py @@ -0,0 +1,24 @@ +class AntiSpam(object): + def __init__(self, config): + self._block_invites = config.get("block_invites", True) + self._block_messages = config.get("block_messages", False) + self._list_room_ids = config.get("ban_lists", []) + + def check_event_for_spam(self, event): + return False # not spam + + def user_may_invite(self, inviter_user_id, invitee_user_id, room_id): + return True # allowed + + def user_may_create_room(self, user_id): + return True # allowed + + def user_may_create_room_alias(self, user_id, room_alias): + return True # allowed + + def user_may_publish_room(self, user_id, room_id): + return True # allowed + + @staticmethod + def parse_config(config): + return config # no parsing needed diff --git a/synapse_antispam/setup.py b/synapse_antispam/setup.py new file mode 100644 index 00000000..53bcf4d3 --- /dev/null +++ b/synapse_antispam/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup, find_packages + +setup( + name="mjolnir", + version="0.0.1", + packages=find_packages(), + description="Mjolnir Antispam", + include_package_data=True, + zip_safe=True, + install_requires=[], +) From 187a76a3e899a45c5e6fd176783958059d39ce54 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Fri, 25 Oct 2019 20:59:19 -0600 Subject: [PATCH 2/7] Add logic for Mjolnir antispam module --- synapse_antispam/mjolnir/antispam.py | 106 +++++++++++++++++++++++--- synapse_antispam/mjolnir/ban_list.py | 80 +++++++++++++++++++ synapse_antispam/mjolnir/list_rule.py | 52 +++++++++++++ 3 files changed, 228 insertions(+), 10 deletions(-) create mode 100644 synapse_antispam/mjolnir/ban_list.py create mode 100644 synapse_antispam/mjolnir/list_rule.py diff --git a/synapse_antispam/mjolnir/antispam.py b/synapse_antispam/mjolnir/antispam.py index 16921023..9fc46c4e 100644 --- a/synapse_antispam/mjolnir/antispam.py +++ b/synapse_antispam/mjolnir/antispam.py @@ -1,24 +1,110 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from .list_rule import ALL_RULE_TYPES, RECOMMENDATION_BAN +from .ban_list import BanList +from synapse.types import UserID + +logger = logging.getLogger("synapse.contrib." + __name__) + class AntiSpam(object): - def __init__(self, config): - self._block_invites = config.get("block_invites", True) - self._block_messages = config.get("block_messages", False) - self._list_room_ids = config.get("ban_lists", []) + def __init__(self, config, hs): + self.block_invites = config.get("block_invites", True) + self.block_messages = config.get("block_messages", False) + self.list_room_ids = config.get("ban_lists", []) + self.rooms_to_lists = {} # type: Dict[str, BanList] + self.hs = hs + + # Now we build the ban lists so we can match them + self.build_lists() + + def build_lists(self): + for room_id in self.list_room_ids: + self.build_list(room_id) + + def build_list(self, room_id): + logger.info("Rebuilding ban list for %s" % (room_id)) + self.get_list_for_room(room_id).build() + + def get_list_for_room(self, room_id): + if room_id not in self.rooms_to_lists: + self.rooms_to_lists[room_id] = BanList(hs=self.hs, room_id=room_id) + return self.rooms_to_lists[room_id] + + def is_user_banned(self, user_id): + for room_id in self.rooms_to_lists: + ban_list = self.rooms_to_lists[room_id] + for rule in ban_list.user_rules: + if rule.matches(user_id): + return rule.action == RECOMMENDATION_BAN + return False + + def is_server_banned(self, server_name): + for room_id in self.rooms_to_lists: + ban_list = self.rooms_to_lists[room_id] + for rule in ban_list.server_rules: + if rule.matches(server_name): + return rule.action == RECOMMENDATION_BAN + return False + + # --- spam checker interface below here --- def check_event_for_spam(self, event): - return False # not spam + room_id = event.get("room_id", "") + event_type = event.get("type", "") + state_key = event.get("state_key", None) + + # Rebuild the rules if there's an event for our ban lists + if state_key is not None and event_type in ALL_RULE_TYPES and room_id in self.list_room_ids: + logger.info("Received ban list event - updating list") + self.get_list_for_room(room_id).build(with_event=event) + return False # Ban list updates aren't spam + + if not self.block_messages: + return False # not spam (we aren't blocking messages) + + sender = UserID.from_string(event.get("sender", "")) + if self.is_user_banned(sender.to_string()): + return True + if self.is_server_banned(sender.domain): + return True + + return False # not spam (as far as we're concerned) def user_may_invite(self, inviter_user_id, invitee_user_id, room_id): - return True # allowed + if not self.block_invites: + return True # allowed (we aren't blocking invites) + + sender = UserID.from_string(inviter_user_id) + if self.is_user_banned(sender.to_string()): + return False + if self.is_server_banned(sender.domain): + return False + + return True # allowed (as far as we're concerned) def user_may_create_room(self, user_id): - return True # allowed + return True # allowed def user_may_create_room_alias(self, user_id, room_alias): - return True # allowed + return True # allowed def user_may_publish_room(self, user_id, room_id): - return True # allowed + return True # allowed @staticmethod def parse_config(config): - return config # no parsing needed + return config # no parsing needed diff --git a/synapse_antispam/mjolnir/ban_list.py b/synapse_antispam/mjolnir/ban_list.py new file mode 100644 index 00000000..540676a3 --- /dev/null +++ b/synapse_antispam/mjolnir/ban_list.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from .list_rule import ListRule, ALL_RULE_TYPES, USER_RULE_TYPES, SERVER_RULE_TYPES, ROOM_RULE_TYPES +from twisted.internet import defer +from synapse.storage.state import StateFilter + +logger = logging.getLogger("synapse.contrib." + __name__) + +class BanList(object): + def __init__(self, hs, room_id): + self.hs = hs + self.room_id = room_id + self.server_rules = [] + self.user_rules = [] + self.room_rules = [] + self.build() + + @defer.inlineCallbacks + def build(self, with_event=None): + events = yield self.get_relevant_state_events() + if with_event is not None: + events = [*events, with_event] + self.server_rules = [] + self.user_rules = [] + self.room_rules = [] + for event in events: + event_type = event.get("type", "") + state_key = event.get("state_key", "") + content = event.get("content", {}) + if state_key is None: + continue # Some message event got in here? + + # Skip over events which are replaced by with_event. We do this + # to ensure that when we rebuild the list we're using updated rules. + if with_event is not None: + w_event_type = with_event.get("type", "") + w_state_key = with_event.get("state_key", "") + w_event_id = with_event.event_id + event_id = event.event_id + if w_event_type == event_type and w_state_key == state_key and w_event_id != event_id: + continue + + entity = content.get("entity", None) + recommendation = content.get("recommendation", None) + reason = content.get("reason", None) + if entity is None or recommendation is None or reason is None: + continue # invalid event + + logger.info("Adding rule %s/%s with action %s" % (event_type, entity, recommendation)) + rule = ListRule(entity=entity, action=recommendation, reason=reason, kind=event_type) + if event_type in USER_RULE_TYPES: + self.user_rules.append(rule) + elif event_type in ROOM_RULE_TYPES: + self.room_rules.append(rule) + elif event_type in SERVER_RULE_TYPES: + self.server_rules.append(rule) + + @defer.inlineCallbacks + def get_relevant_state_events(self): + store = self.hs.get_datastore() + ev_filter = StateFilter.from_types([(t, None) for t in ALL_RULE_TYPES]) + state_ids = yield store.get_filtered_current_state_ids( + room_id=self.room_id, state_filter=ev_filter + ) + state = yield store.get_events(state_ids.values()) + return state.values() diff --git a/synapse_antispam/mjolnir/list_rule.py b/synapse_antispam/mjolnir/list_rule.py new file mode 100644 index 00000000..763e59c1 --- /dev/null +++ b/synapse_antispam/mjolnir/list_rule.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.util import glob_to_regex + +RECOMMENDATION_BAN = "m.ban" +RECOMMENDATION_BAN_TYPES = [RECOMMENDATION_BAN, "org.matrix.mjolnir.ban"] + +RULE_USER = "m.room.rule.user" +RULE_ROOM = "m.room.rule.room" +RULE_SERVER = "m.room.rule.server" +USER_RULE_TYPES = [RULE_USER, "org.matrix.mjolnir.rule.user"] +ROOM_RULE_TYPES = [RULE_ROOM, "org.matrix.mjolnir.rule.room"] +SERVER_RULE_TYPES = [RULE_SERVER, "org.matrix.mjolnir.rule.server"] +ALL_RULE_TYPES = [*USER_RULE_TYPES, *ROOM_RULE_TYPES, *SERVER_RULE_TYPES] + +def recommendation_to_stable(recommendation): + if recommendation in RECOMMENDATION_BAN_TYPES: + return RECOMMENDATION_BAN + return None + +def rule_type_to_stable(rule): + if rule in USER_RULE_TYPES: + return RULE_USER + if rule in ROOM_RULE_TYPES: + return RULE_ROOM + if rule in SERVER_RULE_TYPES: + return RULE_SERVER + return None + +class ListRule(object): + def __init__(self, entity, action, reason, kind): + self.entity = entity + self.regex = glob_to_regex(entity) + self.action = recommendation_to_stable(action) + self.reason = reason + self.kind = rule_type_to_stable(kind) + + def matches(self, victim): + return self.regex.match(victim) From d313fad561c7d5d80a84d94474d8e3d7a4abce55 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Fri, 25 Oct 2019 21:06:31 -0600 Subject: [PATCH 3/7] Update feature state --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e0335b0..ff3517b2 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Phase 2: * [x] Support multiple lists Phase 3: -* [ ] Synapse antispam module +* [x] Synapse antispam module * [ ] Room upgrade handling (both protected+list rooms) * [ ] Support community-defined scopes? (ie: no hardcoded config) * [ ] Riot hooks (independent of mjolnir?) From 40ad7f0df362aeeed067083a98a35830bc17b6cd Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 28 Oct 2019 14:55:32 -0600 Subject: [PATCH 4/7] Update for new Spam Checker API --- synapse_antispam/mjolnir/antispam.py | 6 +++--- synapse_antispam/mjolnir/ban_list.py | 14 +++----------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/synapse_antispam/mjolnir/antispam.py b/synapse_antispam/mjolnir/antispam.py index 9fc46c4e..ec0234cc 100644 --- a/synapse_antispam/mjolnir/antispam.py +++ b/synapse_antispam/mjolnir/antispam.py @@ -21,12 +21,12 @@ from synapse.types import UserID logger = logging.getLogger("synapse.contrib." + __name__) class AntiSpam(object): - def __init__(self, config, hs): + def __init__(self, config, api): self.block_invites = config.get("block_invites", True) self.block_messages = config.get("block_messages", False) self.list_room_ids = config.get("ban_lists", []) self.rooms_to_lists = {} # type: Dict[str, BanList] - self.hs = hs + self.api = api # Now we build the ban lists so we can match them self.build_lists() @@ -41,7 +41,7 @@ class AntiSpam(object): def get_list_for_room(self, room_id): if room_id not in self.rooms_to_lists: - self.rooms_to_lists[room_id] = BanList(hs=self.hs, room_id=room_id) + self.rooms_to_lists[room_id] = BanList(api=self.api, room_id=room_id) return self.rooms_to_lists[room_id] def is_user_banned(self, user_id): diff --git a/synapse_antispam/mjolnir/ban_list.py b/synapse_antispam/mjolnir/ban_list.py index 540676a3..58d317e2 100644 --- a/synapse_antispam/mjolnir/ban_list.py +++ b/synapse_antispam/mjolnir/ban_list.py @@ -16,13 +16,12 @@ import logging from .list_rule import ListRule, ALL_RULE_TYPES, USER_RULE_TYPES, SERVER_RULE_TYPES, ROOM_RULE_TYPES from twisted.internet import defer -from synapse.storage.state import StateFilter logger = logging.getLogger("synapse.contrib." + __name__) class BanList(object): - def __init__(self, hs, room_id): - self.hs = hs + def __init__(self, api, room_id): + self.api = api self.room_id = room_id self.server_rules = [] self.user_rules = [] @@ -69,12 +68,5 @@ class BanList(object): elif event_type in SERVER_RULE_TYPES: self.server_rules.append(rule) - @defer.inlineCallbacks def get_relevant_state_events(self): - store = self.hs.get_datastore() - ev_filter = StateFilter.from_types([(t, None) for t in ALL_RULE_TYPES]) - state_ids = yield store.get_filtered_current_state_ids( - room_id=self.room_id, state_filter=ev_filter - ) - state = yield store.get_events(state_ids.values()) - return state.values() + return self.api.get_state_events_in_room(self.room_id, [(t, None) for t in ALL_RULE_TYPES]) From f72b68dc7cd45a5495857110233fec5c67e9eab9 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 31 Oct 2019 08:34:58 -0600 Subject: [PATCH 5/7] Add a description for what Mjolnir's antispam module does --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index ff3517b2..a3b7eb8a 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,11 @@ node lib/index.js ## Synapse Antispam Module +Using the bot to manage your rooms is great, however if you want to use your ban lists +(or someone else's) on your server to affect all of your users then an antispam module +is needed. Primarily meant to block invites from undesired homeservers/users, Mjolnir's +antispam module is a way to interpret ban lists and apply them to your entire homeserver. + First, install the module to your Synapse python environment: ``` pip install -e git+https://github.com/matrix-org/mjolnir.git#egg=mjolnir&subdirectory=synapse_antispam From 83fffd322e04ebad142ba5c31d89ff46507b53f7 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 13 Nov 2019 14:31:31 -0700 Subject: [PATCH 6/7] Wrap ban list build in a logging context --- synapse_antispam/mjolnir/ban_list.py | 77 +++++++++++++++------------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/synapse_antispam/mjolnir/ban_list.py b/synapse_antispam/mjolnir/ban_list.py index 58d317e2..8b7e8976 100644 --- a/synapse_antispam/mjolnir/ban_list.py +++ b/synapse_antispam/mjolnir/ban_list.py @@ -16,6 +16,7 @@ import logging from .list_rule import ListRule, ALL_RULE_TYPES, USER_RULE_TYPES, SERVER_RULE_TYPES, ROOM_RULE_TYPES from twisted.internet import defer +from synapse.logging.context import LoggingContext logger = logging.getLogger("synapse.contrib." + __name__) @@ -28,45 +29,49 @@ class BanList(object): self.room_rules = [] self.build() - @defer.inlineCallbacks - def build(self, with_event=None): - events = yield self.get_relevant_state_events() - if with_event is not None: - events = [*events, with_event] - self.server_rules = [] - self.user_rules = [] - self.room_rules = [] - for event in events: - event_type = event.get("type", "") - state_key = event.get("state_key", "") - content = event.get("content", {}) - if state_key is None: - continue # Some message event got in here? - - # Skip over events which are replaced by with_event. We do this - # to ensure that when we rebuild the list we're using updated rules. + def build(with_event=None): + @defer.inlineCallbacks + def run(self, with_event=None): + events = yield self.get_relevant_state_events() if with_event is not None: - w_event_type = with_event.get("type", "") - w_state_key = with_event.get("state_key", "") - w_event_id = with_event.event_id - event_id = event.event_id - if w_event_type == event_type and w_state_key == state_key and w_event_id != event_id: - continue + events = [*events, with_event] + self.server_rules = [] + self.user_rules = [] + self.room_rules = [] + for event in events: + event_type = event.get("type", "") + state_key = event.get("state_key", "") + content = event.get("content", {}) + if state_key is None: + continue # Some message event got in here? - entity = content.get("entity", None) - recommendation = content.get("recommendation", None) - reason = content.get("reason", None) - if entity is None or recommendation is None or reason is None: - continue # invalid event + # Skip over events which are replaced by with_event. We do this + # to ensure that when we rebuild the list we're using updated rules. + if with_event is not None: + w_event_type = with_event.get("type", "") + w_state_key = with_event.get("state_key", "") + w_event_id = with_event.event_id + event_id = event.event_id + if w_event_type == event_type and w_state_key == state_key and w_event_id != event_id: + continue - logger.info("Adding rule %s/%s with action %s" % (event_type, entity, recommendation)) - rule = ListRule(entity=entity, action=recommendation, reason=reason, kind=event_type) - if event_type in USER_RULE_TYPES: - self.user_rules.append(rule) - elif event_type in ROOM_RULE_TYPES: - self.room_rules.append(rule) - elif event_type in SERVER_RULE_TYPES: - self.server_rules.append(rule) + entity = content.get("entity", None) + recommendation = content.get("recommendation", None) + reason = content.get("reason", None) + if entity is None or recommendation is None or reason is None: + continue # invalid event + + logger.info("Adding rule %s/%s with action %s" % (event_type, entity, recommendation)) + rule = ListRule(entity=entity, action=recommendation, reason=reason, kind=event_type) + if event_type in USER_RULE_TYPES: + self.user_rules.append(rule) + elif event_type in ROOM_RULE_TYPES: + self.room_rules.append(rule) + elif event_type in SERVER_RULE_TYPES: + self.server_rules.append(rule) + + with LoggingContext("mjolnir_ban_list_build"): + run(with_event=with_event) def get_relevant_state_events(self): return self.api.get_state_events_in_room(self.room_id, [(t, None) for t in ALL_RULE_TYPES]) From 9462c25bbf1673388f7ea8b4a736d85584a6a640 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 13 Nov 2019 15:05:16 -0700 Subject: [PATCH 7/7] Use background processes, not log contexts --- synapse_antispam/mjolnir/ban_list.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/synapse_antispam/mjolnir/ban_list.py b/synapse_antispam/mjolnir/ban_list.py index 8b7e8976..e85b391a 100644 --- a/synapse_antispam/mjolnir/ban_list.py +++ b/synapse_antispam/mjolnir/ban_list.py @@ -16,7 +16,7 @@ import logging from .list_rule import ListRule, ALL_RULE_TYPES, USER_RULE_TYPES, SERVER_RULE_TYPES, ROOM_RULE_TYPES from twisted.internet import defer -from synapse.logging.context import LoggingContext +from synapse.metrics.background_process_metrics import run_as_background_process logger = logging.getLogger("synapse.contrib." + __name__) @@ -31,7 +31,7 @@ class BanList(object): def build(with_event=None): @defer.inlineCallbacks - def run(self, with_event=None): + def run(with_event): events = yield self.get_relevant_state_events() if with_event is not None: events = [*events, with_event] @@ -70,8 +70,7 @@ class BanList(object): elif event_type in SERVER_RULE_TYPES: self.server_rules.append(rule) - with LoggingContext("mjolnir_ban_list_build"): - run(with_event=with_event) + run_as_background_process("mjolnir_build_ban_list", run, with_event) def get_relevant_state_events(self): return self.api.get_state_events_in_room(self.room_id, [(t, None) for t in ALL_RULE_TYPES])