Fix parsing events that have large integers (#19819)

Follow on from https://github.com/element-hq/synapse/pull/19701.

Unfortunately serde has a bug when using `#[serde(flatten)]` with
`arbitrary-precision` feature when handling integers that fit in a i128
when doing `serde_json::from_value`. See
https://github.com/serde-rs/serde/issues/2230.

The `depythonize` hits the same issue. To fix this we make it so we only
parse events from strings and not values.
This commit is contained in:
Erik Johnston
2026-06-03 14:52:05 +01:00
committed by GitHub
parent 5bf825c016
commit e8e5a42180
10 changed files with 189 additions and 70 deletions
+1
View File
@@ -0,0 +1 @@
Port the python Event classes to Rust.
+7
View File
@@ -98,6 +98,13 @@ pub use vmsc4242::EventFormatVMSC4242;
/// fields as they are mutable (and must be deep-copied if the event is cloned).
/// `common_fields` and `specific_fields` are both `#[serde(flatten)]`ed so that
/// the serialised JSON is a single flat object matching the Matrix spec.
///
/// Note, deserialization of this struct must not be done from
/// [`serde_json::Value`] nor [`pythonize::depythonize`], due to a bug with
/// `#[serde(flatten)]` combined with the `arbitrary_precision` feature.
/// Instead, deserialize directly from a JSON string with
/// `serde_json::from_str`. See https://github.com/serde-rs/serde/issues/2230
/// for details.
#[derive(Serialize, Deserialize)]
pub struct FormattedEvent<E = Arc<EventFormatEnum>> {
/// The event's signatures.
+28 -52
View File
@@ -160,10 +160,15 @@ pub struct Event {
#[pymethods]
impl Event {
/// Construct an Event from a JSON string, room version, and internal
/// metadata dict.
///
/// We do no accept a Python dict directly because of the issues with
/// depythonize and large integers (see [`FormattedEvent`] for details).
#[new]
fn new_from_py<'a, 'py>(
py: Python<'py>,
event_dict: &'a Bound<'py, PyAny>,
event_json: &str,
room_version: &'a Bound<'py, PyAny>,
internal_metadata_dict: &'a Bound<'py, PyDict>,
rejected_reason: Option<String>,
@@ -178,14 +183,14 @@ impl Event {
let rejected_reason = rejected_reason.map(String::into_boxed_str);
// Parse the event dict into a FormattedEvent, converting any failures to
// Parse the event JSON into a FormattedEvent, converting any failures to
// a `ValueError`.
let parsed_event = depythonize_event_dict(room_version, event_dict).map_err(|err| {
let parsed_event = event_dict_from_json_str(room_version, event_json).map_err(|err| {
let new_err = PyValueError::new_err(format!(
"Failed to parse event for room version {}",
room_version
"Failed to parse event for room version {}: {}",
room_version, err
));
new_err.set_cause(py, Some(err));
new_err.set_cause(py, Some(PyValueError::new_err(err.to_string())));
new_err
})?;
@@ -555,63 +560,27 @@ impl Event {
}
}
fn depythonize_event_dict(
/// Parses a JSON string into a [`FormattedEvent`] for the given room version.
fn event_dict_from_json_str(
room_version: &RoomVersion,
event_dict: &Bound<'_, PyAny>,
) -> PyResult<FormattedEvent> {
let formatted_event: FormattedEvent = match room_version.event_format {
EventFormatVersions::ROOM_V1_V2 => {
let event_format: FormattedEvent<EventFormatV1> = depythonize(event_dict)?;
event_format.into()
}
EventFormatVersions::ROOM_V3 | EventFormatVersions::ROOM_V4_PLUS => {
let event_format: FormattedEvent<EventFormatV2V3> = depythonize(event_dict)?;
event_format.into()
}
EventFormatVersions::ROOM_V11_HYDRA_PLUS => {
let event_format: FormattedEvent<EventFormatV4> = depythonize(event_dict)?;
event_format.into()
}
EventFormatVersions::ROOM_VMSC4242 => {
let event_format: FormattedEvent<EventFormatVMSC4242> = depythonize(event_dict)?;
event_format.into()
}
_ => {
return Err(PyValueError::new_err(format!(
"Unsupported room version: {}",
room_version
)))
}
};
formatted_event.validate()?;
Ok(formatted_event)
}
/// Converts an event dict as [`serde_json::Value`] into a [`FormattedEvent`].
fn event_dict_from_json_value(
room_version: &RoomVersion,
event_dict: serde_json::Value,
event_json: &str,
) -> Result<FormattedEvent, Error> {
let formatted_event: FormattedEvent = match room_version.event_format {
EventFormatVersions::ROOM_V1_V2 => {
let event_format: FormattedEvent<EventFormatV1> = serde_json::from_value(event_dict)?;
let event_format: FormattedEvent<EventFormatV1> = serde_json::from_str(event_json)?;
event_format.into()
}
EventFormatVersions::ROOM_V3 | EventFormatVersions::ROOM_V4_PLUS => {
let event_format: FormattedEvent<EventFormatV2V3> = serde_json::from_value(event_dict)?;
let event_format: FormattedEvent<EventFormatV2V3> = serde_json::from_str(event_json)?;
event_format.into()
}
EventFormatVersions::ROOM_V11_HYDRA_PLUS => {
let event_format: FormattedEvent<EventFormatV4> = serde_json::from_value(event_dict)?;
let event_format: FormattedEvent<EventFormatV4> = serde_json::from_str(event_json)?;
event_format.into()
}
EventFormatVersions::ROOM_VMSC4242 => {
let event_format: FormattedEvent<EventFormatVMSC4242> =
serde_json::from_value(event_dict)?;
serde_json::from_str(event_json)?;
event_format.into()
}
_ => {
@@ -638,10 +607,17 @@ fn redact_event_py(event: &Event) -> PyResult<Event> {
})?;
let redacted_value = redact(&event_value, event.room_version)?;
let redacted_formatted_event = event_dict_from_json_value(event.room_version, redacted_value)
.map_err(|err| {
PyValueError::new_err(format!("Failed to deserialize redacted event: {}", err))
// We can't convert from a value into [`Event`] directly, so we round-trip
// through JSON. See [`FormattedEvent`] for details on why we can't go
// directly through Python dicts.
let redacted_event_json = serde_json::to_string(&redacted_value).map_err(|err| {
PyValueError::new_err(format!("Failed to serialize redacted event: {}", err))
})?;
let redacted_formatted_event =
event_dict_from_json_str(event.room_version, &redacted_event_json).map_err(|err| {
PyValueError::new_err(format!("Failed to deserialize redacted event: {}", err))
})?;
let redacted_event = Event {
parsed_event: redacted_formatted_event,
+9 -4
View File
@@ -16,9 +16,9 @@
use std::sync::{Arc, RwLock, RwLockReadGuard};
use pyo3::{
exceptions::{PyKeyError, PyRuntimeError, PyTypeError},
exceptions::{PyKeyError, PyRuntimeError, PyTypeError, PyValueError},
pyclass, pymethods,
types::{PyAnyMethods, PyList, PyListMethods, PyMapping},
types::{PyAnyMethods, PyList, PyListMethods},
Bound, IntoPyObjectExt, PyAny, PyResult, Python,
};
use pythonize::{depythonize, pythonize};
@@ -114,9 +114,14 @@ impl Unsigned {
#[pymethods]
impl Unsigned {
/// Create a new `Unsigned` from a JSON string.
///
/// We do no accept a Python dict directly because of the issues with
/// depythonize and large integers (see [`FormattedEvent`] for details).
#[new]
fn py_new(unsigned: Bound<'_, PyMapping>) -> PyResult<Self> {
let inner = depythonize(&unsigned)?;
fn py_new(unsigned_json: &str) -> PyResult<Self> {
let inner = serde_json::from_str(unsigned_json)
.map_err(|err| PyValueError::new_err(format!("Failed to parse unsigned: {}", err)))?;
Ok(Self {
inner: Arc::new(RwLock::new(inner)),
+23 -2
View File
@@ -29,6 +29,7 @@ from typing import (
)
import attr
from canonicaljson import encode_canonical_json
from synapse.api.constants import (
EventContentFields,
@@ -72,15 +73,35 @@ def make_event_from_dict(
) -> Event:
"""Construct an EventBase from the given event dict"""
# Event constructor only takes JSON string, see Event constructor for
# details.
event_json = encode_canonical_json(event_dict).decode("utf-8")
return make_event_from_json(
event_json=event_json,
room_version=room_version,
internal_metadata_dict=internal_metadata_dict,
rejected_reason=rejected_reason,
)
def make_event_from_json(
event_json: str,
room_version: RoomVersion = RoomVersions.V1,
internal_metadata_dict: JsonDict | None = None,
rejected_reason: str | None = None,
) -> Event:
"""Construct an EventBase from the given event JSON string"""
try:
return Event(
event_dict=event_dict,
event_json=event_json,
room_version=room_version,
internal_metadata_dict=internal_metadata_dict or {},
rejected_reason=rejected_reason,
)
except ValueError:
raise SynapseError(400, "Invalid event dict", Codes.BAD_JSON)
raise SynapseError(400, "Invalid event JSON", Codes.BAD_JSON)
@attr.s(slots=True, frozen=True, auto_attribs=True)
+2 -2
View File
@@ -189,7 +189,7 @@ class Signatures:
class Unsigned:
"""A class representing the unsigned data of an event."""
def __init__(self, unsigned_dict: JsonMapping): ...
def __init__(self, unsigned_json: str): ...
def __getitem__(self, key: str) -> Any:
"""Get the value for the given key.
@@ -230,7 +230,7 @@ class Event:
def __init__(
self,
event_dict: JsonDict,
event_json: str,
room_version: RoomVersion,
internal_metadata_dict: JsonDict,
rejected_reason: str | None,
+102
View File
@@ -0,0 +1,102 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2026 Element Creations Ltd.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
from typing import Any
from parameterized import parameterized_class
from synapse.api.room_versions import RoomVersions
from synapse.events import make_event_from_dict
from synapse.synapse_rust.events import redact_event
from synapse.types import JsonDict
from tests.test_utils.event_injection import EventTypes
from tests.unittest import TestCase
def create_minimal_event_dict(**fields: Any) -> JsonDict:
"""Create a minimal event dict that will parse correctly."""
return {
"type": EventTypes.Message,
"content": {},
"room_id": "!room:id",
"sender": "@user:id",
"event_id": "$event:id",
"origin_server_ts": 0,
"auth_events": [],
"prev_events": [],
"hashes": {},
"signatures": {},
"depth": 0,
**fields,
}
@parameterized_class(
[
{"test_int": 2**7 - 1},
{"test_int": 2**15 - 1},
{"test_int": 2**31 - 1},
{"test_int": 2**63 - 1},
{"test_int": 2**127 - 1},
{"test_int": 2**200},
]
)
class LargeIntTestCase(TestCase):
"""Test that we can handle various sized integers in events.
This is a regression test where we had issues handling integers that fit in
a Rust `i128`.
"""
test_int: int
"""The integer to test with. This will be set by the parameterized_class decorator."""
def test_very_large_int_in_event_content(self) -> None:
"""Test that we can handle integers in the event content, which is a
JsonObject and thus can contain arbitrary JSON."""
event_dict = create_minimal_event_dict(content={"some_field": self.test_int})
event = make_event_from_dict(event_dict, RoomVersions.V1)
self.assertEqual(event.content["some_field"], self.test_int)
def test_large_int_in_unsigned(self) -> None:
"""Test that we can handle integers in the unsigned data, which is an
Unsigned and thus can contain arbitrary JSON."""
event_dict = create_minimal_event_dict(
unsigned={"prev_content": {"some_field": self.test_int}}
)
event = make_event_from_dict(event_dict, RoomVersions.V1)
self.assertEqual(event.unsigned["prev_content"]["some_field"], self.test_int)
def test_large_int_redacted(self) -> None:
"""Test that redact events that have an unsigned field with a large
integer in a protected field"""
event_dict = create_minimal_event_dict(
type=EventTypes.PowerLevels,
state_key="",
content={"users": {"@user:id": self.test_int}},
)
event = make_event_from_dict(event_dict, RoomVersions.V1)
redacted_event = redact_event(event)
self.assertEqual(redacted_event.content["users"]["@user:id"], self.test_int)
+3 -3
View File
@@ -8,7 +8,7 @@ import synapse.rest.client.room
from synapse.api.constants import AccountDataTypes, EventTypes, Membership
from synapse.api.errors import Codes, LimitExceededError, SynapseError
from synapse.crypto.event_signing import add_hashes_and_signatures
from synapse.events import Event
from synapse.events import make_event_from_dict
from synapse.federation.federation_client import SendJoinResult
from synapse.server import HomeServer
from synapse.types import UserID, create_requester
@@ -124,7 +124,7 @@ class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase):
create_event_source,
self.hs.config.server.default_room_version,
)
create_event = Event(
create_event = make_event_from_dict(
create_event_source,
self.hs.config.server.default_room_version,
{},
@@ -148,7 +148,7 @@ class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase):
self.hs.hostname,
self.hs.signing_key,
)
join_event = Event(
join_event = make_event_from_dict(
join_event_source,
self.hs.config.server.default_room_version,
{},
+4 -4
View File
@@ -35,7 +35,7 @@ from synapse.api.constants import (
)
from synapse.api.filtering import Filter
from synapse.crypto.event_signing import add_hashes_and_signatures
from synapse.events import Event
from synapse.events import make_event_from_dict
from synapse.federation.federation_client import SendJoinResult
from synapse.rest import admin
from synapse.rest.client import login, room
@@ -1385,7 +1385,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(
create_event_source,
self.hs.config.server.default_room_version,
)
create_event = Event(
create_event = make_event_from_dict(
create_event_source,
self.hs.config.server.default_room_version,
{},
@@ -1408,7 +1408,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(
creator_join_event_source,
self.hs.config.server.default_room_version,
)
creator_join_event = Event(
creator_join_event = make_event_from_dict(
creator_join_event_source,
self.hs.config.server.default_room_version,
{},
@@ -1433,7 +1433,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(
self.hs.hostname,
self.hs.signing_key,
)
join_event = Event(
join_event = make_event_from_dict(
join_event_source,
self.hs.config.server.default_room_version,
{},
+10 -3
View File
@@ -11,15 +11,22 @@
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
from canonicaljson import encode_canonical_json
from synapse.synapse_rust.events import Unsigned
from synapse.types import JsonDict
from tests import unittest
def _make_unsigned(d: JsonDict) -> Unsigned:
return Unsigned(encode_canonical_json(d).decode("utf-8"))
class UnsignedTestCase(unittest.TestCase):
def test_prev_content(self) -> None:
"""Test that the prev_content field is correctly exposed as a JsonObject."""
unsigned = Unsigned({"prev_content": {"key1": "value1", "key2": 42}})
unsigned = _make_unsigned({"prev_content": {"key1": "value1", "key2": 42}})
self.assert_dict(unsigned["prev_content"], {"key1": "value1", "key2": 42})
@@ -32,7 +39,7 @@ class UnsignedTestCase(unittest.TestCase):
than the maximum rust native integer size."""
large_int = 2**200
unsigned = Unsigned({"age_ts": large_int})
unsigned = _make_unsigned({"age_ts": large_int})
self.assertEqual(unsigned["age_ts"], large_int)
@@ -44,7 +51,7 @@ class UnsignedTestCase(unittest.TestCase):
JSON."""
large_int = 2**200
unsigned = Unsigned({"prev_content": {"some_field": large_int}})
unsigned = _make_unsigned({"prev_content": {"some_field": large_int}})
self.assertEqual(unsigned["prev_content"]["some_field"], large_int)
self.assert_dict(