From e8e5a421803c6ffb14ad01df95121cf0c2da1648 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Jun 2026 14:52:05 +0100 Subject: [PATCH] Fix parsing events that have large integers (#19819) Follow on from https://github.com/element-hq/synapse/pull/19701. Unfortunately serde has a bug when using `#[serde(flatten)]` with `arbitrary-precision` feature when handling integers that fit in a i128 when doing `serde_json::from_value`. See https://github.com/serde-rs/serde/issues/2230. The `depythonize` hits the same issue. To fix this we make it so we only parse events from strings and not values. --- changelog.d/19819.misc | 1 + rust/src/events/formats/mod.rs | 7 ++ rust/src/events/mod.rs | 80 ++++++++-------------- rust/src/events/unsigned.rs | 13 ++-- synapse/events/__init__.py | 25 ++++++- synapse/synapse_rust/events.pyi | 4 +- tests/events/test_event_parsing.py | 102 ++++++++++++++++++++++++++++ tests/handlers/test_room_member.py | 6 +- tests/storage/test_stream.py | 8 +-- tests/synapse_rust/test_unsigned.py | 13 +++- 10 files changed, 189 insertions(+), 70 deletions(-) create mode 100644 changelog.d/19819.misc create mode 100644 tests/events/test_event_parsing.py diff --git a/changelog.d/19819.misc b/changelog.d/19819.misc new file mode 100644 index 0000000000..4663e8b961 --- /dev/null +++ b/changelog.d/19819.misc @@ -0,0 +1 @@ +Port the python Event classes to Rust. diff --git a/rust/src/events/formats/mod.rs b/rust/src/events/formats/mod.rs index 10b16a5436..86023add17 100644 --- a/rust/src/events/formats/mod.rs +++ b/rust/src/events/formats/mod.rs @@ -98,6 +98,13 @@ pub use vmsc4242::EventFormatVMSC4242; /// fields as they are mutable (and must be deep-copied if the event is cloned). /// `common_fields` and `specific_fields` are both `#[serde(flatten)]`ed so that /// the serialised JSON is a single flat object matching the Matrix spec. +/// +/// Note, deserialization of this struct must not be done from +/// [`serde_json::Value`] nor [`pythonize::depythonize`], due to a bug with +/// `#[serde(flatten)]` combined with the `arbitrary_precision` feature. +/// Instead, deserialize directly from a JSON string with +/// `serde_json::from_str`. See https://github.com/serde-rs/serde/issues/2230 +/// for details. #[derive(Serialize, Deserialize)] pub struct FormattedEvent> { /// The event's signatures. diff --git a/rust/src/events/mod.rs b/rust/src/events/mod.rs index ad3f61e2fd..a4fe15c522 100644 --- a/rust/src/events/mod.rs +++ b/rust/src/events/mod.rs @@ -160,10 +160,15 @@ pub struct Event { #[pymethods] impl Event { + /// Construct an Event from a JSON string, room version, and internal + /// metadata dict. + /// + /// We do no accept a Python dict directly because of the issues with + /// depythonize and large integers (see [`FormattedEvent`] for details). #[new] fn new_from_py<'a, 'py>( py: Python<'py>, - event_dict: &'a Bound<'py, PyAny>, + event_json: &str, room_version: &'a Bound<'py, PyAny>, internal_metadata_dict: &'a Bound<'py, PyDict>, rejected_reason: Option, @@ -178,14 +183,14 @@ impl Event { let rejected_reason = rejected_reason.map(String::into_boxed_str); - // Parse the event dict into a FormattedEvent, converting any failures to + // Parse the event JSON into a FormattedEvent, converting any failures to // a `ValueError`. - let parsed_event = depythonize_event_dict(room_version, event_dict).map_err(|err| { + let parsed_event = event_dict_from_json_str(room_version, event_json).map_err(|err| { let new_err = PyValueError::new_err(format!( - "Failed to parse event for room version {}", - room_version + "Failed to parse event for room version {}: {}", + room_version, err )); - new_err.set_cause(py, Some(err)); + new_err.set_cause(py, Some(PyValueError::new_err(err.to_string()))); new_err })?; @@ -555,63 +560,27 @@ impl Event { } } -fn depythonize_event_dict( +/// Parses a JSON string into a [`FormattedEvent`] for the given room version. +fn event_dict_from_json_str( room_version: &RoomVersion, - event_dict: &Bound<'_, PyAny>, -) -> PyResult { - let formatted_event: FormattedEvent = match room_version.event_format { - EventFormatVersions::ROOM_V1_V2 => { - let event_format: FormattedEvent = depythonize(event_dict)?; - - event_format.into() - } - EventFormatVersions::ROOM_V3 | EventFormatVersions::ROOM_V4_PLUS => { - let event_format: FormattedEvent = depythonize(event_dict)?; - event_format.into() - } - EventFormatVersions::ROOM_V11_HYDRA_PLUS => { - let event_format: FormattedEvent = depythonize(event_dict)?; - event_format.into() - } - EventFormatVersions::ROOM_VMSC4242 => { - let event_format: FormattedEvent = depythonize(event_dict)?; - event_format.into() - } - _ => { - return Err(PyValueError::new_err(format!( - "Unsupported room version: {}", - room_version - ))) - } - }; - - formatted_event.validate()?; - - Ok(formatted_event) -} - -/// Converts an event dict as [`serde_json::Value`] into a [`FormattedEvent`]. -fn event_dict_from_json_value( - room_version: &RoomVersion, - event_dict: serde_json::Value, + event_json: &str, ) -> Result { let formatted_event: FormattedEvent = match room_version.event_format { EventFormatVersions::ROOM_V1_V2 => { - let event_format: FormattedEvent = serde_json::from_value(event_dict)?; - + let event_format: FormattedEvent = serde_json::from_str(event_json)?; event_format.into() } EventFormatVersions::ROOM_V3 | EventFormatVersions::ROOM_V4_PLUS => { - let event_format: FormattedEvent = serde_json::from_value(event_dict)?; + let event_format: FormattedEvent = serde_json::from_str(event_json)?; event_format.into() } EventFormatVersions::ROOM_V11_HYDRA_PLUS => { - let event_format: FormattedEvent = serde_json::from_value(event_dict)?; + let event_format: FormattedEvent = serde_json::from_str(event_json)?; event_format.into() } EventFormatVersions::ROOM_VMSC4242 => { let event_format: FormattedEvent = - serde_json::from_value(event_dict)?; + serde_json::from_str(event_json)?; event_format.into() } _ => { @@ -638,10 +607,17 @@ fn redact_event_py(event: &Event) -> PyResult { })?; let redacted_value = redact(&event_value, event.room_version)?; - let redacted_formatted_event = event_dict_from_json_value(event.room_version, redacted_value) - .map_err(|err| { - PyValueError::new_err(format!("Failed to deserialize redacted event: {}", err)) + + // We can't convert from a value into [`Event`] directly, so we round-trip + // through JSON. See [`FormattedEvent`] for details on why we can't go + // directly through Python dicts. + let redacted_event_json = serde_json::to_string(&redacted_value).map_err(|err| { + PyValueError::new_err(format!("Failed to serialize redacted event: {}", err)) })?; + let redacted_formatted_event = + event_dict_from_json_str(event.room_version, &redacted_event_json).map_err(|err| { + PyValueError::new_err(format!("Failed to deserialize redacted event: {}", err)) + })?; let redacted_event = Event { parsed_event: redacted_formatted_event, diff --git a/rust/src/events/unsigned.rs b/rust/src/events/unsigned.rs index 0bb644ee90..931c412325 100644 --- a/rust/src/events/unsigned.rs +++ b/rust/src/events/unsigned.rs @@ -16,9 +16,9 @@ use std::sync::{Arc, RwLock, RwLockReadGuard}; use pyo3::{ - exceptions::{PyKeyError, PyRuntimeError, PyTypeError}, + exceptions::{PyKeyError, PyRuntimeError, PyTypeError, PyValueError}, pyclass, pymethods, - types::{PyAnyMethods, PyList, PyListMethods, PyMapping}, + types::{PyAnyMethods, PyList, PyListMethods}, Bound, IntoPyObjectExt, PyAny, PyResult, Python, }; use pythonize::{depythonize, pythonize}; @@ -114,9 +114,14 @@ impl Unsigned { #[pymethods] impl Unsigned { + /// Create a new `Unsigned` from a JSON string. + /// + /// We do no accept a Python dict directly because of the issues with + /// depythonize and large integers (see [`FormattedEvent`] for details). #[new] - fn py_new(unsigned: Bound<'_, PyMapping>) -> PyResult { - let inner = depythonize(&unsigned)?; + fn py_new(unsigned_json: &str) -> PyResult { + let inner = serde_json::from_str(unsigned_json) + .map_err(|err| PyValueError::new_err(format!("Failed to parse unsigned: {}", err)))?; Ok(Self { inner: Arc::new(RwLock::new(inner)), diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 7abd91f072..b01e786550 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -29,6 +29,7 @@ from typing import ( ) import attr +from canonicaljson import encode_canonical_json from synapse.api.constants import ( EventContentFields, @@ -72,15 +73,35 @@ def make_event_from_dict( ) -> Event: """Construct an EventBase from the given event dict""" + # Event constructor only takes JSON string, see Event constructor for + # details. + event_json = encode_canonical_json(event_dict).decode("utf-8") + + return make_event_from_json( + event_json=event_json, + room_version=room_version, + internal_metadata_dict=internal_metadata_dict, + rejected_reason=rejected_reason, + ) + + +def make_event_from_json( + event_json: str, + room_version: RoomVersion = RoomVersions.V1, + internal_metadata_dict: JsonDict | None = None, + rejected_reason: str | None = None, +) -> Event: + """Construct an EventBase from the given event JSON string""" + try: return Event( - event_dict=event_dict, + event_json=event_json, room_version=room_version, internal_metadata_dict=internal_metadata_dict or {}, rejected_reason=rejected_reason, ) except ValueError: - raise SynapseError(400, "Invalid event dict", Codes.BAD_JSON) + raise SynapseError(400, "Invalid event JSON", Codes.BAD_JSON) @attr.s(slots=True, frozen=True, auto_attribs=True) diff --git a/synapse/synapse_rust/events.pyi b/synapse/synapse_rust/events.pyi index 9a69438c1d..f84eeb55d6 100644 --- a/synapse/synapse_rust/events.pyi +++ b/synapse/synapse_rust/events.pyi @@ -189,7 +189,7 @@ class Signatures: class Unsigned: """A class representing the unsigned data of an event.""" - def __init__(self, unsigned_dict: JsonMapping): ... + def __init__(self, unsigned_json: str): ... def __getitem__(self, key: str) -> Any: """Get the value for the given key. @@ -230,7 +230,7 @@ class Event: def __init__( self, - event_dict: JsonDict, + event_json: str, room_version: RoomVersion, internal_metadata_dict: JsonDict, rejected_reason: str | None, diff --git a/tests/events/test_event_parsing.py b/tests/events/test_event_parsing.py new file mode 100644 index 0000000000..9d68cfabd6 --- /dev/null +++ b/tests/events/test_event_parsing.py @@ -0,0 +1,102 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2026 Element Creations Ltd. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . + + +from typing import Any + +from parameterized import parameterized_class + +from synapse.api.room_versions import RoomVersions +from synapse.events import make_event_from_dict +from synapse.synapse_rust.events import redact_event +from synapse.types import JsonDict + +from tests.test_utils.event_injection import EventTypes +from tests.unittest import TestCase + + +def create_minimal_event_dict(**fields: Any) -> JsonDict: + """Create a minimal event dict that will parse correctly.""" + return { + "type": EventTypes.Message, + "content": {}, + "room_id": "!room:id", + "sender": "@user:id", + "event_id": "$event:id", + "origin_server_ts": 0, + "auth_events": [], + "prev_events": [], + "hashes": {}, + "signatures": {}, + "depth": 0, + **fields, + } + + +@parameterized_class( + [ + {"test_int": 2**7 - 1}, + {"test_int": 2**15 - 1}, + {"test_int": 2**31 - 1}, + {"test_int": 2**63 - 1}, + {"test_int": 2**127 - 1}, + {"test_int": 2**200}, + ] +) +class LargeIntTestCase(TestCase): + """Test that we can handle various sized integers in events. + + This is a regression test where we had issues handling integers that fit in + a Rust `i128`. + """ + + test_int: int + """The integer to test with. This will be set by the parameterized_class decorator.""" + + def test_very_large_int_in_event_content(self) -> None: + """Test that we can handle integers in the event content, which is a + JsonObject and thus can contain arbitrary JSON.""" + + event_dict = create_minimal_event_dict(content={"some_field": self.test_int}) + + event = make_event_from_dict(event_dict, RoomVersions.V1) + + self.assertEqual(event.content["some_field"], self.test_int) + + def test_large_int_in_unsigned(self) -> None: + """Test that we can handle integers in the unsigned data, which is an + Unsigned and thus can contain arbitrary JSON.""" + + event_dict = create_minimal_event_dict( + unsigned={"prev_content": {"some_field": self.test_int}} + ) + + event = make_event_from_dict(event_dict, RoomVersions.V1) + + self.assertEqual(event.unsigned["prev_content"]["some_field"], self.test_int) + + def test_large_int_redacted(self) -> None: + """Test that redact events that have an unsigned field with a large + integer in a protected field""" + + event_dict = create_minimal_event_dict( + type=EventTypes.PowerLevels, + state_key="", + content={"users": {"@user:id": self.test_int}}, + ) + + event = make_event_from_dict(event_dict, RoomVersions.V1) + + redacted_event = redact_event(event) + + self.assertEqual(redacted_event.content["users"]["@user:id"], self.test_int) diff --git a/tests/handlers/test_room_member.py b/tests/handlers/test_room_member.py index 9cc7ebfa80..d5b95e4ef6 100644 --- a/tests/handlers/test_room_member.py +++ b/tests/handlers/test_room_member.py @@ -8,7 +8,7 @@ import synapse.rest.client.room from synapse.api.constants import AccountDataTypes, EventTypes, Membership from synapse.api.errors import Codes, LimitExceededError, SynapseError from synapse.crypto.event_signing import add_hashes_and_signatures -from synapse.events import Event +from synapse.events import make_event_from_dict from synapse.federation.federation_client import SendJoinResult from synapse.server import HomeServer from synapse.types import UserID, create_requester @@ -124,7 +124,7 @@ class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase): create_event_source, self.hs.config.server.default_room_version, ) - create_event = Event( + create_event = make_event_from_dict( create_event_source, self.hs.config.server.default_room_version, {}, @@ -148,7 +148,7 @@ class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase): self.hs.hostname, self.hs.signing_key, ) - join_event = Event( + join_event = make_event_from_dict( join_event_source, self.hs.config.server.default_room_version, {}, diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index ba3b3802cf..de127e3971 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -35,7 +35,7 @@ from synapse.api.constants import ( ) from synapse.api.filtering import Filter from synapse.crypto.event_signing import add_hashes_and_signatures -from synapse.events import Event +from synapse.events import make_event_from_dict from synapse.federation.federation_client import SendJoinResult from synapse.rest import admin from synapse.rest.client import login, room @@ -1385,7 +1385,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( create_event_source, self.hs.config.server.default_room_version, ) - create_event = Event( + create_event = make_event_from_dict( create_event_source, self.hs.config.server.default_room_version, {}, @@ -1408,7 +1408,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( creator_join_event_source, self.hs.config.server.default_room_version, ) - creator_join_event = Event( + creator_join_event = make_event_from_dict( creator_join_event_source, self.hs.config.server.default_room_version, {}, @@ -1433,7 +1433,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( self.hs.hostname, self.hs.signing_key, ) - join_event = Event( + join_event = make_event_from_dict( join_event_source, self.hs.config.server.default_room_version, {}, diff --git a/tests/synapse_rust/test_unsigned.py b/tests/synapse_rust/test_unsigned.py index 5193188f38..6c1bf2238b 100644 --- a/tests/synapse_rust/test_unsigned.py +++ b/tests/synapse_rust/test_unsigned.py @@ -11,15 +11,22 @@ # See the GNU Affero General Public License for more details: # . +from canonicaljson import encode_canonical_json + from synapse.synapse_rust.events import Unsigned +from synapse.types import JsonDict from tests import unittest +def _make_unsigned(d: JsonDict) -> Unsigned: + return Unsigned(encode_canonical_json(d).decode("utf-8")) + + class UnsignedTestCase(unittest.TestCase): def test_prev_content(self) -> None: """Test that the prev_content field is correctly exposed as a JsonObject.""" - unsigned = Unsigned({"prev_content": {"key1": "value1", "key2": 42}}) + unsigned = _make_unsigned({"prev_content": {"key1": "value1", "key2": 42}}) self.assert_dict(unsigned["prev_content"], {"key1": "value1", "key2": 42}) @@ -32,7 +39,7 @@ class UnsignedTestCase(unittest.TestCase): than the maximum rust native integer size.""" large_int = 2**200 - unsigned = Unsigned({"age_ts": large_int}) + unsigned = _make_unsigned({"age_ts": large_int}) self.assertEqual(unsigned["age_ts"], large_int) @@ -44,7 +51,7 @@ class UnsignedTestCase(unittest.TestCase): JSON.""" large_int = 2**200 - unsigned = Unsigned({"prev_content": {"some_field": large_int}}) + unsigned = _make_unsigned({"prev_content": {"some_field": large_int}}) self.assertEqual(unsigned["prev_content"]["some_field"], large_int) self.assert_dict(