From 93a1185ba43619483ea14149d49f31ed49abb0dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 May 2026 10:59:31 +0100 Subject: [PATCH] Remove Python redaction in favour of Rust Rather than keeping two implementations about --- rust/src/events/mod.rs | 41 +++++- synapse/crypto/event_signing.py | 5 +- synapse/crypto/keyring.py | 4 +- synapse/events/utils.py | 126 +----------------- .../storage/databases/main/censor_events.py | 10 +- .../databases/main/events_bg_updates.py | 4 +- synapse/synapse_rust/events.pyi | 16 +++ 7 files changed, 68 insertions(+), 138 deletions(-) diff --git a/rust/src/events/mod.rs b/rust/src/events/mod.rs index 3703e3d9f2..47d40116a3 100644 --- a/rust/src/events/mod.rs +++ b/rust/src/events/mod.rs @@ -52,7 +52,7 @@ use std::{borrow::Cow, sync::Arc}; use pyo3::{ exceptions::{PyAttributeError, PyKeyError, PyValueError}, - pyclass, pymethods, + pyclass, pyfunction, pymethods, types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyMapping, PyModule, PyModuleMethods}, wrap_pyfunction, Bound, IntoPyObject, PyAny, PyResult, Python, }; @@ -65,6 +65,7 @@ use crate::events::{ }, signatures::Signatures, unsigned::Unsigned, + utils::redact, }; use crate::{ duration::SynapseDuration, @@ -105,6 +106,8 @@ pub fn register_module(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> child_module.add_class::()?; child_module.add_class::()?; child_module.add_function(wrap_pyfunction!(filter::event_visible_to_server_py, m)?)?; + child_module.add_function(wrap_pyfunction!(redact_event_to_dict_py, m)?)?; + child_module.add_function(wrap_pyfunction!(redact_event_dict_to_dict_py, m)?)?; m.add_submodule(&child_module)?; @@ -566,6 +569,42 @@ fn depythonize_event_dict( Ok(formatted_event) } +/// Returns a pruned version of the given event, which removes all keys we don't +/// know about or think could potentially be dodgy. +/// +/// Returns the redacted event as a dict. +#[pyfunction(name = "redact_event_to_dict")] +fn redact_event_to_dict_py<'py>(py: Python<'py>, event: &'py Event) -> PyResult> { + let event_value = serde_json::to_value(&event.parsed_event).map_err(|err| { + PyValueError::new_err(format!("Failed to serialize event for redaction: {}", err)) + })?; + + let redacted = redact(&event_value, event.room_version)?; + + let redacted_py = pythonize(py, &redacted)?; + + Ok(redacted_py) +} + +/// Returns a pruned version of the given event dict, which removes all keys we +/// don't know about or think could potentially be dodgy. +/// +/// Returns the redacted event as a dict. +#[pyfunction(name = "redact_event_dict_to_dict")] +fn redact_event_dict_to_dict_py<'py>( + py: Python<'py>, + room_version: &RoomVersion, + event_dict: &'py Bound<'py, PyAny>, +) -> PyResult> { + let event_value = depythonize(event_dict)?; + + let redacted = redact(&event_value, room_version)?; + + let redacted_py = pythonize(py, &redacted)?; + + Ok(redacted_py) +} + #[cfg(test)] mod tests { diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 823b6288e8..8eef57bdc9 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -33,8 +33,9 @@ from unpaddedbase64 import decode_base64, encode_base64 from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersion from synapse.events import EventBase -from synapse.events.utils import prune_event, prune_event_dict +from synapse.events.utils import prune_event from synapse.logging.opentracing import trace +from synapse.synapse_rust.events import redact_event_dict_to_dict from synapse.types import JsonDict, UserID logger = logging.getLogger(__name__) @@ -157,7 +158,7 @@ def compute_event_signature( Returns: a dictionary in the same format of an event's signatures field. """ - redact_json = prune_event_dict(room_version, event_dict) + redact_json = redact_event_dict_to_dict(room_version, event_dict) redact_json.pop("age_ts", None) redact_json.pop("unsigned", None) if logger.isEnabledFor(logging.DEBUG): diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 36736b4559..01c929cfbc 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -46,9 +46,9 @@ from synapse.api.errors import ( ) from synapse.config.key import TrustedKeyServer from synapse.events import EventBase -from synapse.events.utils import prune_event_dict from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.storage.keys import FetchKeyResult +from synapse.synapse_rust.events import redact_event_to_dict from synapse.types import JsonDict from synapse.util import unwrapFirstError from synapse.util.async_helpers import yieldable_gather_results @@ -136,7 +136,7 @@ class VerifyJsonRequest: server_name, # We defer creating the redacted json object, as it uses a lot more # memory than the Event object itself. - lambda: prune_event_dict(event.room_version, event.get_pdu_json()), + lambda: redact_event_to_dict(event), minimum_valid_until_ms, key_ids=key_ids, ) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 1ae19e5e23..e7e3b505c1 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -39,15 +39,13 @@ from synapse.api.constants import ( CANONICALJSON_MAX_INT, CANONICALJSON_MIN_INT, MAX_PDU_SIZE, - EventContentFields, EventTypes, EventUnsignedContentFields, RelationTypes, ) from synapse.api.errors import Codes, SynapseError -from synapse.api.room_versions import RoomVersion from synapse.logging.opentracing import SynapseTags, set_tag, trace -from synapse.synapse_rust.events import Unsigned +from synapse.synapse_rust.events import Unsigned, redact_event_to_dict from synapse.types import JsonDict, Requester from . import EventBase, StrippedStateEvent, make_event_from_dict @@ -78,7 +76,7 @@ def prune_event(event: EventBase) -> EventBase: the user has specified, but we do want to keep necessary information like type, state_key etc. """ - pruned_event_dict = prune_event_dict(event.room_version, event.get_dict()) + pruned_event_dict = redact_event_to_dict(event) pruned_event = make_event_from_dict( pruned_event_dict, event.room_version, event.internal_metadata.get_dict() @@ -109,126 +107,6 @@ def clone_event(event: EventBase) -> EventBase: return event.deep_copy() -def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDict: - """Redacts the event_dict in the same way as `prune_event`, except it - operates on dicts rather than event objects - - Returns: - A copy of the pruned event dict - """ - - allowed_keys = [ - "event_id", - "sender", - "room_id", - "hashes", - "signatures", - "content", - "type", - "state_key", - "depth", - "prev_events", - "auth_events", - "origin_server_ts", - ] - - # Earlier room versions from had additional allowed keys. - if not room_version.updated_redaction_rules: - allowed_keys.extend(["prev_state", "membership", "origin"]) - # Custom room versions add new allowed keys and remove others - if room_version.msc4242_state_dags: - allowed_keys.extend(["prev_state_events"]) - allowed_keys.remove("auth_events") - - event_type = event_dict["type"] - - new_content = {} - - def add_fields(*fields: str) -> None: - for field in fields: - if field in event_dict["content"]: - new_content[field] = event_dict["content"][field] - - if event_type == EventTypes.Member: - add_fields("membership") - if room_version.restricted_join_rule_fix: - add_fields(EventContentFields.AUTHORISING_USER) - if room_version.updated_redaction_rules: - # Preserve the signed field under third_party_invite. - third_party_invite = event_dict["content"].get("third_party_invite") - if isinstance(third_party_invite, collections.abc.Mapping): - new_content["third_party_invite"] = {} - if "signed" in third_party_invite: - new_content["third_party_invite"]["signed"] = third_party_invite[ - "signed" - ] - - elif event_type == EventTypes.Create: - if room_version.updated_redaction_rules: - # MSC2176 rules state that create events cannot have their `content` redacted. - new_content = event_dict["content"] - if not room_version.implicit_room_creator: - # Some room versions give meaning to `creator` - add_fields("creator") - if room_version.msc4291_room_ids_as_hashes: - # room_id is not allowed on the create event as it's derived from the event ID - allowed_keys.remove("room_id") - - elif event_type == EventTypes.JoinRules: - add_fields("join_rule") - if room_version.restricted_join_rule: - add_fields("allow") - elif event_type == EventTypes.PowerLevels: - add_fields( - "users", - "users_default", - "events", - "events_default", - "state_default", - "ban", - "kick", - "redact", - ) - - if room_version.updated_redaction_rules: - add_fields("invite") - - elif event_type == EventTypes.Aliases and room_version.special_case_aliases_auth: - add_fields("aliases") - elif event_type == EventTypes.RoomHistoryVisibility: - add_fields("history_visibility") - elif event_type == EventTypes.Redaction and room_version.updated_redaction_rules: - add_fields("redacts") - - # Protect the rel_type and event_id fields under the m.relates_to field. - if room_version.msc3389_relation_redactions: - relates_to = event_dict["content"].get("m.relates_to") - if isinstance(relates_to, collections.abc.Mapping): - new_relates_to = {} - for field in ("rel_type", "event_id"): - if field in relates_to: - new_relates_to[field] = relates_to[field] - # Only include a non-empty relates_to field. - if new_relates_to: - new_content["m.relates_to"] = new_relates_to - - allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys} - - allowed_fields["content"] = new_content - - unsigned: JsonDict = {} - allowed_fields["unsigned"] = unsigned - - event_unsigned = event_dict.get("unsigned", {}) - - if "age_ts" in event_unsigned: - unsigned["age_ts"] = event_unsigned["age_ts"] - if "replaces_state" in event_unsigned: - unsigned["replaces_state"] = event_unsigned["replaces_state"] - - return allowed_fields - - def _copy_field(src: JsonDict, dst: JsonDict, field: list[str]) -> None: """Copy the field in 'src' to 'dst'. diff --git a/synapse/storage/databases/main/censor_events.py b/synapse/storage/databases/main/censor_events.py index a5ae4bf506..c1ad0c42d5 100644 --- a/synapse/storage/databases/main/censor_events.py +++ b/synapse/storage/databases/main/censor_events.py @@ -22,7 +22,6 @@ import logging from typing import TYPE_CHECKING -from synapse.events.utils import prune_event_dict from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( @@ -32,6 +31,7 @@ from synapse.storage.database import ( ) from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore +from synapse.synapse_rust.events import redact_event_to_dict from synapse.util.duration import Duration from synapse.util.json import json_encoder @@ -123,9 +123,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase ): # Redaction was allowed pruned_json: str | None = json_encoder.encode( - prune_event_dict( - original_event.room_version, original_event.get_dict() - ) + redact_event_to_dict(original_event) ) else: # Redaction wasn't allowed @@ -190,9 +188,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase return # Prune the event's dict then convert it to JSON. - pruned_json = json_encoder.encode( - prune_event_dict(event.room_version, event.get_dict()) - ) + pruned_json = json_encoder.encode(redact_event_to_dict(event)) # Update the event_json table to replace the event's JSON with the pruned # JSON. diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index c0d218398d..024bc0a490 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -38,7 +38,6 @@ from synapse.crypto.event_signing import ( resign_event, ) from synapse.events import EventBase, make_event_from_dict -from synapse.events.utils import prune_event_dict from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -63,6 +62,7 @@ from synapse.storage.databases.main.state_deltas import StateDeltasStore from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.engines import PostgresEngine from synapse.storage.types import Cursor +from synapse.synapse_rust.events import redact_event_to_dict from synapse.types import JsonDict, RoomStreamToken, StateMap, StrCollection from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES from synapse.types.state import StateFilter @@ -2831,7 +2831,7 @@ class EventsBackgroundUpdatesStore( # Verify the signature is genuinely from this key. We prune # first since signatures are computed over the redacted form. - pruned = prune_event_dict(event.room_version, event.get_pdu_json()) + pruned = redact_event_to_dict(event) try: verify_signed_json(pruned, self.hs.hostname, old_verify_key) except SignatureVerifyException: diff --git a/synapse/synapse_rust/events.pyi b/synapse/synapse_rust/events.pyi index 28404314ca..fb6d1113ef 100644 --- a/synapse/synapse_rust/events.pyi +++ b/synapse/synapse_rust/events.pyi @@ -308,3 +308,19 @@ class Event: """If this event has the ``msc4354_sticky`` top-level field, returns a ``SynapseDuration`` representing the sticky duration. Otherwise returns ``None``.""" + +def redact_event_to_dict(event: Event) -> JsonDict: + """Returns a pruned version of the given event, which removes all keys we + don't know about or think could potentially be dodgy. + + Returns the redacted event as a dict. + """ + +def redact_event_dict_to_dict( + room_version: RoomVersion, event_dict: JsonMapping +) -> JsonDict: + """Returns a pruned version of the given event dict, which removes all keys + we don't know about or think could potentially be dodgy. + + Returns the redacted event as a dict. + """