Remove Python redaction in favour of Rust

Rather than keeping two implementations about
This commit is contained in:
Erik Johnston
2026-05-28 10:59:31 +01:00
parent cb6906e3a5
commit 93a1185ba4
7 changed files with 68 additions and 138 deletions
+40 -1
View File
@@ -52,7 +52,7 @@ use std::{borrow::Cow, sync::Arc};
use pyo3::{
exceptions::{PyAttributeError, PyKeyError, PyValueError},
pyclass, pymethods,
pyclass, pyfunction, pymethods,
types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyMapping, PyModule, PyModuleMethods},
wrap_pyfunction, Bound, IntoPyObject, PyAny, PyResult, Python,
};
@@ -65,6 +65,7 @@ use crate::events::{
},
signatures::Signatures,
unsigned::Unsigned,
utils::redact,
};
use crate::{
duration::SynapseDuration,
@@ -105,6 +106,8 @@ pub fn register_module(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()>
child_module.add_class::<json_object::JsonObjectItemsView>()?;
child_module.add_class::<Event>()?;
child_module.add_function(wrap_pyfunction!(filter::event_visible_to_server_py, m)?)?;
child_module.add_function(wrap_pyfunction!(redact_event_to_dict_py, m)?)?;
child_module.add_function(wrap_pyfunction!(redact_event_dict_to_dict_py, m)?)?;
m.add_submodule(&child_module)?;
@@ -566,6 +569,42 @@ fn depythonize_event_dict(
Ok(formatted_event)
}
/// Returns a pruned version of the given event, which removes all keys we don't
/// know about or think could potentially be dodgy.
///
/// Returns the redacted event as a dict.
#[pyfunction(name = "redact_event_to_dict")]
fn redact_event_to_dict_py<'py>(py: Python<'py>, event: &'py Event) -> PyResult<Bound<'py, PyAny>> {
let event_value = serde_json::to_value(&event.parsed_event).map_err(|err| {
PyValueError::new_err(format!("Failed to serialize event for redaction: {}", err))
})?;
let redacted = redact(&event_value, event.room_version)?;
let redacted_py = pythonize(py, &redacted)?;
Ok(redacted_py)
}
/// Returns a pruned version of the given event dict, which removes all keys we
/// don't know about or think could potentially be dodgy.
///
/// Returns the redacted event as a dict.
#[pyfunction(name = "redact_event_dict_to_dict")]
fn redact_event_dict_to_dict_py<'py>(
py: Python<'py>,
room_version: &RoomVersion,
event_dict: &'py Bound<'py, PyAny>,
) -> PyResult<Bound<'py, PyAny>> {
let event_value = depythonize(event_dict)?;
let redacted = redact(&event_value, room_version)?;
let redacted_py = pythonize(py, &redacted)?;
Ok(redacted_py)
}
#[cfg(test)]
mod tests {
+3 -2
View File
@@ -33,8 +33,9 @@ from unpaddedbase64 import decode_base64, encode_base64
from synapse.api.errors import Codes, SynapseError
from synapse.api.room_versions import RoomVersion
from synapse.events import EventBase
from synapse.events.utils import prune_event, prune_event_dict
from synapse.events.utils import prune_event
from synapse.logging.opentracing import trace
from synapse.synapse_rust.events import redact_event_dict_to_dict
from synapse.types import JsonDict, UserID
logger = logging.getLogger(__name__)
@@ -157,7 +158,7 @@ def compute_event_signature(
Returns:
a dictionary in the same format of an event's signatures field.
"""
redact_json = prune_event_dict(room_version, event_dict)
redact_json = redact_event_dict_to_dict(room_version, event_dict)
redact_json.pop("age_ts", None)
redact_json.pop("unsigned", None)
if logger.isEnabledFor(logging.DEBUG):
+2 -2
View File
@@ -46,9 +46,9 @@ from synapse.api.errors import (
)
from synapse.config.key import TrustedKeyServer
from synapse.events import EventBase
from synapse.events.utils import prune_event_dict
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.storage.keys import FetchKeyResult
from synapse.synapse_rust.events import redact_event_to_dict
from synapse.types import JsonDict
from synapse.util import unwrapFirstError
from synapse.util.async_helpers import yieldable_gather_results
@@ -136,7 +136,7 @@ class VerifyJsonRequest:
server_name,
# We defer creating the redacted json object, as it uses a lot more
# memory than the Event object itself.
lambda: prune_event_dict(event.room_version, event.get_pdu_json()),
lambda: redact_event_to_dict(event),
minimum_valid_until_ms,
key_ids=key_ids,
)
+2 -124
View File
@@ -39,15 +39,13 @@ from synapse.api.constants import (
CANONICALJSON_MAX_INT,
CANONICALJSON_MIN_INT,
MAX_PDU_SIZE,
EventContentFields,
EventTypes,
EventUnsignedContentFields,
RelationTypes,
)
from synapse.api.errors import Codes, SynapseError
from synapse.api.room_versions import RoomVersion
from synapse.logging.opentracing import SynapseTags, set_tag, trace
from synapse.synapse_rust.events import Unsigned
from synapse.synapse_rust.events import Unsigned, redact_event_to_dict
from synapse.types import JsonDict, Requester
from . import EventBase, StrippedStateEvent, make_event_from_dict
@@ -78,7 +76,7 @@ def prune_event(event: EventBase) -> EventBase:
the user has specified, but we do want to keep necessary information like
type, state_key etc.
"""
pruned_event_dict = prune_event_dict(event.room_version, event.get_dict())
pruned_event_dict = redact_event_to_dict(event)
pruned_event = make_event_from_dict(
pruned_event_dict, event.room_version, event.internal_metadata.get_dict()
@@ -109,126 +107,6 @@ def clone_event(event: EventBase) -> EventBase:
return event.deep_copy()
def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDict:
"""Redacts the event_dict in the same way as `prune_event`, except it
operates on dicts rather than event objects
Returns:
A copy of the pruned event dict
"""
allowed_keys = [
"event_id",
"sender",
"room_id",
"hashes",
"signatures",
"content",
"type",
"state_key",
"depth",
"prev_events",
"auth_events",
"origin_server_ts",
]
# Earlier room versions from had additional allowed keys.
if not room_version.updated_redaction_rules:
allowed_keys.extend(["prev_state", "membership", "origin"])
# Custom room versions add new allowed keys and remove others
if room_version.msc4242_state_dags:
allowed_keys.extend(["prev_state_events"])
allowed_keys.remove("auth_events")
event_type = event_dict["type"]
new_content = {}
def add_fields(*fields: str) -> None:
for field in fields:
if field in event_dict["content"]:
new_content[field] = event_dict["content"][field]
if event_type == EventTypes.Member:
add_fields("membership")
if room_version.restricted_join_rule_fix:
add_fields(EventContentFields.AUTHORISING_USER)
if room_version.updated_redaction_rules:
# Preserve the signed field under third_party_invite.
third_party_invite = event_dict["content"].get("third_party_invite")
if isinstance(third_party_invite, collections.abc.Mapping):
new_content["third_party_invite"] = {}
if "signed" in third_party_invite:
new_content["third_party_invite"]["signed"] = third_party_invite[
"signed"
]
elif event_type == EventTypes.Create:
if room_version.updated_redaction_rules:
# MSC2176 rules state that create events cannot have their `content` redacted.
new_content = event_dict["content"]
if not room_version.implicit_room_creator:
# Some room versions give meaning to `creator`
add_fields("creator")
if room_version.msc4291_room_ids_as_hashes:
# room_id is not allowed on the create event as it's derived from the event ID
allowed_keys.remove("room_id")
elif event_type == EventTypes.JoinRules:
add_fields("join_rule")
if room_version.restricted_join_rule:
add_fields("allow")
elif event_type == EventTypes.PowerLevels:
add_fields(
"users",
"users_default",
"events",
"events_default",
"state_default",
"ban",
"kick",
"redact",
)
if room_version.updated_redaction_rules:
add_fields("invite")
elif event_type == EventTypes.Aliases and room_version.special_case_aliases_auth:
add_fields("aliases")
elif event_type == EventTypes.RoomHistoryVisibility:
add_fields("history_visibility")
elif event_type == EventTypes.Redaction and room_version.updated_redaction_rules:
add_fields("redacts")
# Protect the rel_type and event_id fields under the m.relates_to field.
if room_version.msc3389_relation_redactions:
relates_to = event_dict["content"].get("m.relates_to")
if isinstance(relates_to, collections.abc.Mapping):
new_relates_to = {}
for field in ("rel_type", "event_id"):
if field in relates_to:
new_relates_to[field] = relates_to[field]
# Only include a non-empty relates_to field.
if new_relates_to:
new_content["m.relates_to"] = new_relates_to
allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys}
allowed_fields["content"] = new_content
unsigned: JsonDict = {}
allowed_fields["unsigned"] = unsigned
event_unsigned = event_dict.get("unsigned", {})
if "age_ts" in event_unsigned:
unsigned["age_ts"] = event_unsigned["age_ts"]
if "replaces_state" in event_unsigned:
unsigned["replaces_state"] = event_unsigned["replaces_state"]
return allowed_fields
def _copy_field(src: JsonDict, dst: JsonDict, field: list[str]) -> None:
"""Copy the field in 'src' to 'dst'.
@@ -22,7 +22,6 @@
import logging
from typing import TYPE_CHECKING
from synapse.events.utils import prune_event_dict
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import (
@@ -32,6 +31,7 @@ from synapse.storage.database import (
)
from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
from synapse.storage.databases.main.events_worker import EventsWorkerStore
from synapse.synapse_rust.events import redact_event_to_dict
from synapse.util.duration import Duration
from synapse.util.json import json_encoder
@@ -123,9 +123,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase
):
# Redaction was allowed
pruned_json: str | None = json_encoder.encode(
prune_event_dict(
original_event.room_version, original_event.get_dict()
)
redact_event_to_dict(original_event)
)
else:
# Redaction wasn't allowed
@@ -190,9 +188,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase
return
# Prune the event's dict then convert it to JSON.
pruned_json = json_encoder.encode(
prune_event_dict(event.room_version, event.get_dict())
)
pruned_json = json_encoder.encode(redact_event_to_dict(event))
# Update the event_json table to replace the event's JSON with the pruned
# JSON.
@@ -38,7 +38,6 @@ from synapse.crypto.event_signing import (
resign_event,
)
from synapse.events import EventBase, make_event_from_dict
from synapse.events.utils import prune_event_dict
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import (
DatabasePool,
@@ -63,6 +62,7 @@ from synapse.storage.databases.main.state_deltas import StateDeltasStore
from synapse.storage.databases.main.stream import StreamWorkerStore
from synapse.storage.engines import PostgresEngine
from synapse.storage.types import Cursor
from synapse.synapse_rust.events import redact_event_to_dict
from synapse.types import JsonDict, RoomStreamToken, StateMap, StrCollection
from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES
from synapse.types.state import StateFilter
@@ -2831,7 +2831,7 @@ class EventsBackgroundUpdatesStore(
# Verify the signature is genuinely from this key. We prune
# first since signatures are computed over the redacted form.
pruned = prune_event_dict(event.room_version, event.get_pdu_json())
pruned = redact_event_to_dict(event)
try:
verify_signed_json(pruned, self.hs.hostname, old_verify_key)
except SignatureVerifyException:
+16
View File
@@ -308,3 +308,19 @@ class Event:
"""If this event has the ``msc4354_sticky`` top-level field, returns a
``SynapseDuration`` representing the sticky duration. Otherwise returns
``None``."""
def redact_event_to_dict(event: Event) -> JsonDict:
"""Returns a pruned version of the given event, which removes all keys we
don't know about or think could potentially be dodgy.
Returns the redacted event as a dict.
"""
def redact_event_dict_to_dict(
room_version: RoomVersion, event_dict: JsonMapping
) -> JsonDict:
"""Returns a pruned version of the given event dict, which removes all keys
we don't know about or think could potentially be dodgy.
Returns the redacted event as a dict.
"""