mirror of
https://github.com/element-hq/synapse.git
synced 2026-06-07 04:31:53 +00:00
Fix parsing events that have large integers (#19819)
Follow on from https://github.com/element-hq/synapse/pull/19701. Unfortunately serde has a bug when using `#[serde(flatten)]` with `arbitrary-precision` feature when handling integers that fit in a i128 when doing `serde_json::from_value`. See https://github.com/serde-rs/serde/issues/2230. The `depythonize` hits the same issue. To fix this we make it so we only parse events from strings and not values.
This commit is contained in:
@@ -0,0 +1 @@
|
||||
Port the python Event classes to Rust.
|
||||
@@ -98,6 +98,13 @@ pub use vmsc4242::EventFormatVMSC4242;
|
||||
/// fields as they are mutable (and must be deep-copied if the event is cloned).
|
||||
/// `common_fields` and `specific_fields` are both `#[serde(flatten)]`ed so that
|
||||
/// the serialised JSON is a single flat object matching the Matrix spec.
|
||||
///
|
||||
/// Note, deserialization of this struct must not be done from
|
||||
/// [`serde_json::Value`] nor [`pythonize::depythonize`], due to a bug with
|
||||
/// `#[serde(flatten)]` combined with the `arbitrary_precision` feature.
|
||||
/// Instead, deserialize directly from a JSON string with
|
||||
/// `serde_json::from_str`. See https://github.com/serde-rs/serde/issues/2230
|
||||
/// for details.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FormattedEvent<E = Arc<EventFormatEnum>> {
|
||||
/// The event's signatures.
|
||||
|
||||
+28
-52
@@ -160,10 +160,15 @@ pub struct Event {
|
||||
|
||||
#[pymethods]
|
||||
impl Event {
|
||||
/// Construct an Event from a JSON string, room version, and internal
|
||||
/// metadata dict.
|
||||
///
|
||||
/// We do no accept a Python dict directly because of the issues with
|
||||
/// depythonize and large integers (see [`FormattedEvent`] for details).
|
||||
#[new]
|
||||
fn new_from_py<'a, 'py>(
|
||||
py: Python<'py>,
|
||||
event_dict: &'a Bound<'py, PyAny>,
|
||||
event_json: &str,
|
||||
room_version: &'a Bound<'py, PyAny>,
|
||||
internal_metadata_dict: &'a Bound<'py, PyDict>,
|
||||
rejected_reason: Option<String>,
|
||||
@@ -178,14 +183,14 @@ impl Event {
|
||||
|
||||
let rejected_reason = rejected_reason.map(String::into_boxed_str);
|
||||
|
||||
// Parse the event dict into a FormattedEvent, converting any failures to
|
||||
// Parse the event JSON into a FormattedEvent, converting any failures to
|
||||
// a `ValueError`.
|
||||
let parsed_event = depythonize_event_dict(room_version, event_dict).map_err(|err| {
|
||||
let parsed_event = event_dict_from_json_str(room_version, event_json).map_err(|err| {
|
||||
let new_err = PyValueError::new_err(format!(
|
||||
"Failed to parse event for room version {}",
|
||||
room_version
|
||||
"Failed to parse event for room version {}: {}",
|
||||
room_version, err
|
||||
));
|
||||
new_err.set_cause(py, Some(err));
|
||||
new_err.set_cause(py, Some(PyValueError::new_err(err.to_string())));
|
||||
new_err
|
||||
})?;
|
||||
|
||||
@@ -555,63 +560,27 @@ impl Event {
|
||||
}
|
||||
}
|
||||
|
||||
fn depythonize_event_dict(
|
||||
/// Parses a JSON string into a [`FormattedEvent`] for the given room version.
|
||||
fn event_dict_from_json_str(
|
||||
room_version: &RoomVersion,
|
||||
event_dict: &Bound<'_, PyAny>,
|
||||
) -> PyResult<FormattedEvent> {
|
||||
let formatted_event: FormattedEvent = match room_version.event_format {
|
||||
EventFormatVersions::ROOM_V1_V2 => {
|
||||
let event_format: FormattedEvent<EventFormatV1> = depythonize(event_dict)?;
|
||||
|
||||
event_format.into()
|
||||
}
|
||||
EventFormatVersions::ROOM_V3 | EventFormatVersions::ROOM_V4_PLUS => {
|
||||
let event_format: FormattedEvent<EventFormatV2V3> = depythonize(event_dict)?;
|
||||
event_format.into()
|
||||
}
|
||||
EventFormatVersions::ROOM_V11_HYDRA_PLUS => {
|
||||
let event_format: FormattedEvent<EventFormatV4> = depythonize(event_dict)?;
|
||||
event_format.into()
|
||||
}
|
||||
EventFormatVersions::ROOM_VMSC4242 => {
|
||||
let event_format: FormattedEvent<EventFormatVMSC4242> = depythonize(event_dict)?;
|
||||
event_format.into()
|
||||
}
|
||||
_ => {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Unsupported room version: {}",
|
||||
room_version
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
formatted_event.validate()?;
|
||||
|
||||
Ok(formatted_event)
|
||||
}
|
||||
|
||||
/// Converts an event dict as [`serde_json::Value`] into a [`FormattedEvent`].
|
||||
fn event_dict_from_json_value(
|
||||
room_version: &RoomVersion,
|
||||
event_dict: serde_json::Value,
|
||||
event_json: &str,
|
||||
) -> Result<FormattedEvent, Error> {
|
||||
let formatted_event: FormattedEvent = match room_version.event_format {
|
||||
EventFormatVersions::ROOM_V1_V2 => {
|
||||
let event_format: FormattedEvent<EventFormatV1> = serde_json::from_value(event_dict)?;
|
||||
|
||||
let event_format: FormattedEvent<EventFormatV1> = serde_json::from_str(event_json)?;
|
||||
event_format.into()
|
||||
}
|
||||
EventFormatVersions::ROOM_V3 | EventFormatVersions::ROOM_V4_PLUS => {
|
||||
let event_format: FormattedEvent<EventFormatV2V3> = serde_json::from_value(event_dict)?;
|
||||
let event_format: FormattedEvent<EventFormatV2V3> = serde_json::from_str(event_json)?;
|
||||
event_format.into()
|
||||
}
|
||||
EventFormatVersions::ROOM_V11_HYDRA_PLUS => {
|
||||
let event_format: FormattedEvent<EventFormatV4> = serde_json::from_value(event_dict)?;
|
||||
let event_format: FormattedEvent<EventFormatV4> = serde_json::from_str(event_json)?;
|
||||
event_format.into()
|
||||
}
|
||||
EventFormatVersions::ROOM_VMSC4242 => {
|
||||
let event_format: FormattedEvent<EventFormatVMSC4242> =
|
||||
serde_json::from_value(event_dict)?;
|
||||
serde_json::from_str(event_json)?;
|
||||
event_format.into()
|
||||
}
|
||||
_ => {
|
||||
@@ -638,10 +607,17 @@ fn redact_event_py(event: &Event) -> PyResult<Event> {
|
||||
})?;
|
||||
|
||||
let redacted_value = redact(&event_value, event.room_version)?;
|
||||
let redacted_formatted_event = event_dict_from_json_value(event.room_version, redacted_value)
|
||||
.map_err(|err| {
|
||||
PyValueError::new_err(format!("Failed to deserialize redacted event: {}", err))
|
||||
|
||||
// We can't convert from a value into [`Event`] directly, so we round-trip
|
||||
// through JSON. See [`FormattedEvent`] for details on why we can't go
|
||||
// directly through Python dicts.
|
||||
let redacted_event_json = serde_json::to_string(&redacted_value).map_err(|err| {
|
||||
PyValueError::new_err(format!("Failed to serialize redacted event: {}", err))
|
||||
})?;
|
||||
let redacted_formatted_event =
|
||||
event_dict_from_json_str(event.room_version, &redacted_event_json).map_err(|err| {
|
||||
PyValueError::new_err(format!("Failed to deserialize redacted event: {}", err))
|
||||
})?;
|
||||
|
||||
let redacted_event = Event {
|
||||
parsed_event: redacted_formatted_event,
|
||||
|
||||
@@ -16,9 +16,9 @@
|
||||
use std::sync::{Arc, RwLock, RwLockReadGuard};
|
||||
|
||||
use pyo3::{
|
||||
exceptions::{PyKeyError, PyRuntimeError, PyTypeError},
|
||||
exceptions::{PyKeyError, PyRuntimeError, PyTypeError, PyValueError},
|
||||
pyclass, pymethods,
|
||||
types::{PyAnyMethods, PyList, PyListMethods, PyMapping},
|
||||
types::{PyAnyMethods, PyList, PyListMethods},
|
||||
Bound, IntoPyObjectExt, PyAny, PyResult, Python,
|
||||
};
|
||||
use pythonize::{depythonize, pythonize};
|
||||
@@ -114,9 +114,14 @@ impl Unsigned {
|
||||
|
||||
#[pymethods]
|
||||
impl Unsigned {
|
||||
/// Create a new `Unsigned` from a JSON string.
|
||||
///
|
||||
/// We do no accept a Python dict directly because of the issues with
|
||||
/// depythonize and large integers (see [`FormattedEvent`] for details).
|
||||
#[new]
|
||||
fn py_new(unsigned: Bound<'_, PyMapping>) -> PyResult<Self> {
|
||||
let inner = depythonize(&unsigned)?;
|
||||
fn py_new(unsigned_json: &str) -> PyResult<Self> {
|
||||
let inner = serde_json::from_str(unsigned_json)
|
||||
.map_err(|err| PyValueError::new_err(format!("Failed to parse unsigned: {}", err)))?;
|
||||
|
||||
Ok(Self {
|
||||
inner: Arc::new(RwLock::new(inner)),
|
||||
|
||||
@@ -29,6 +29,7 @@ from typing import (
|
||||
)
|
||||
|
||||
import attr
|
||||
from canonicaljson import encode_canonical_json
|
||||
|
||||
from synapse.api.constants import (
|
||||
EventContentFields,
|
||||
@@ -72,15 +73,35 @@ def make_event_from_dict(
|
||||
) -> Event:
|
||||
"""Construct an EventBase from the given event dict"""
|
||||
|
||||
# Event constructor only takes JSON string, see Event constructor for
|
||||
# details.
|
||||
event_json = encode_canonical_json(event_dict).decode("utf-8")
|
||||
|
||||
return make_event_from_json(
|
||||
event_json=event_json,
|
||||
room_version=room_version,
|
||||
internal_metadata_dict=internal_metadata_dict,
|
||||
rejected_reason=rejected_reason,
|
||||
)
|
||||
|
||||
|
||||
def make_event_from_json(
|
||||
event_json: str,
|
||||
room_version: RoomVersion = RoomVersions.V1,
|
||||
internal_metadata_dict: JsonDict | None = None,
|
||||
rejected_reason: str | None = None,
|
||||
) -> Event:
|
||||
"""Construct an EventBase from the given event JSON string"""
|
||||
|
||||
try:
|
||||
return Event(
|
||||
event_dict=event_dict,
|
||||
event_json=event_json,
|
||||
room_version=room_version,
|
||||
internal_metadata_dict=internal_metadata_dict or {},
|
||||
rejected_reason=rejected_reason,
|
||||
)
|
||||
except ValueError:
|
||||
raise SynapseError(400, "Invalid event dict", Codes.BAD_JSON)
|
||||
raise SynapseError(400, "Invalid event JSON", Codes.BAD_JSON)
|
||||
|
||||
|
||||
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
||||
|
||||
@@ -189,7 +189,7 @@ class Signatures:
|
||||
class Unsigned:
|
||||
"""A class representing the unsigned data of an event."""
|
||||
|
||||
def __init__(self, unsigned_dict: JsonMapping): ...
|
||||
def __init__(self, unsigned_json: str): ...
|
||||
def __getitem__(self, key: str) -> Any:
|
||||
"""Get the value for the given key.
|
||||
|
||||
@@ -230,7 +230,7 @@ class Event:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
event_dict: JsonDict,
|
||||
event_json: str,
|
||||
room_version: RoomVersion,
|
||||
internal_metadata_dict: JsonDict,
|
||||
rejected_reason: str | None,
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
#
|
||||
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
||||
#
|
||||
# Copyright (C) 2026 Element Creations Ltd.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# See the GNU Affero General Public License for more details:
|
||||
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
||||
|
||||
|
||||
from typing import Any
|
||||
|
||||
from parameterized import parameterized_class
|
||||
|
||||
from synapse.api.room_versions import RoomVersions
|
||||
from synapse.events import make_event_from_dict
|
||||
from synapse.synapse_rust.events import redact_event
|
||||
from synapse.types import JsonDict
|
||||
|
||||
from tests.test_utils.event_injection import EventTypes
|
||||
from tests.unittest import TestCase
|
||||
|
||||
|
||||
def create_minimal_event_dict(**fields: Any) -> JsonDict:
|
||||
"""Create a minimal event dict that will parse correctly."""
|
||||
return {
|
||||
"type": EventTypes.Message,
|
||||
"content": {},
|
||||
"room_id": "!room:id",
|
||||
"sender": "@user:id",
|
||||
"event_id": "$event:id",
|
||||
"origin_server_ts": 0,
|
||||
"auth_events": [],
|
||||
"prev_events": [],
|
||||
"hashes": {},
|
||||
"signatures": {},
|
||||
"depth": 0,
|
||||
**fields,
|
||||
}
|
||||
|
||||
|
||||
@parameterized_class(
|
||||
[
|
||||
{"test_int": 2**7 - 1},
|
||||
{"test_int": 2**15 - 1},
|
||||
{"test_int": 2**31 - 1},
|
||||
{"test_int": 2**63 - 1},
|
||||
{"test_int": 2**127 - 1},
|
||||
{"test_int": 2**200},
|
||||
]
|
||||
)
|
||||
class LargeIntTestCase(TestCase):
|
||||
"""Test that we can handle various sized integers in events.
|
||||
|
||||
This is a regression test where we had issues handling integers that fit in
|
||||
a Rust `i128`.
|
||||
"""
|
||||
|
||||
test_int: int
|
||||
"""The integer to test with. This will be set by the parameterized_class decorator."""
|
||||
|
||||
def test_very_large_int_in_event_content(self) -> None:
|
||||
"""Test that we can handle integers in the event content, which is a
|
||||
JsonObject and thus can contain arbitrary JSON."""
|
||||
|
||||
event_dict = create_minimal_event_dict(content={"some_field": self.test_int})
|
||||
|
||||
event = make_event_from_dict(event_dict, RoomVersions.V1)
|
||||
|
||||
self.assertEqual(event.content["some_field"], self.test_int)
|
||||
|
||||
def test_large_int_in_unsigned(self) -> None:
|
||||
"""Test that we can handle integers in the unsigned data, which is an
|
||||
Unsigned and thus can contain arbitrary JSON."""
|
||||
|
||||
event_dict = create_minimal_event_dict(
|
||||
unsigned={"prev_content": {"some_field": self.test_int}}
|
||||
)
|
||||
|
||||
event = make_event_from_dict(event_dict, RoomVersions.V1)
|
||||
|
||||
self.assertEqual(event.unsigned["prev_content"]["some_field"], self.test_int)
|
||||
|
||||
def test_large_int_redacted(self) -> None:
|
||||
"""Test that redact events that have an unsigned field with a large
|
||||
integer in a protected field"""
|
||||
|
||||
event_dict = create_minimal_event_dict(
|
||||
type=EventTypes.PowerLevels,
|
||||
state_key="",
|
||||
content={"users": {"@user:id": self.test_int}},
|
||||
)
|
||||
|
||||
event = make_event_from_dict(event_dict, RoomVersions.V1)
|
||||
|
||||
redacted_event = redact_event(event)
|
||||
|
||||
self.assertEqual(redacted_event.content["users"]["@user:id"], self.test_int)
|
||||
@@ -8,7 +8,7 @@ import synapse.rest.client.room
|
||||
from synapse.api.constants import AccountDataTypes, EventTypes, Membership
|
||||
from synapse.api.errors import Codes, LimitExceededError, SynapseError
|
||||
from synapse.crypto.event_signing import add_hashes_and_signatures
|
||||
from synapse.events import Event
|
||||
from synapse.events import make_event_from_dict
|
||||
from synapse.federation.federation_client import SendJoinResult
|
||||
from synapse.server import HomeServer
|
||||
from synapse.types import UserID, create_requester
|
||||
@@ -124,7 +124,7 @@ class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase):
|
||||
create_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
)
|
||||
create_event = Event(
|
||||
create_event = make_event_from_dict(
|
||||
create_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
{},
|
||||
@@ -148,7 +148,7 @@ class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase):
|
||||
self.hs.hostname,
|
||||
self.hs.signing_key,
|
||||
)
|
||||
join_event = Event(
|
||||
join_event = make_event_from_dict(
|
||||
join_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
{},
|
||||
|
||||
@@ -35,7 +35,7 @@ from synapse.api.constants import (
|
||||
)
|
||||
from synapse.api.filtering import Filter
|
||||
from synapse.crypto.event_signing import add_hashes_and_signatures
|
||||
from synapse.events import Event
|
||||
from synapse.events import make_event_from_dict
|
||||
from synapse.federation.federation_client import SendJoinResult
|
||||
from synapse.rest import admin
|
||||
from synapse.rest.client import login, room
|
||||
@@ -1385,7 +1385,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(
|
||||
create_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
)
|
||||
create_event = Event(
|
||||
create_event = make_event_from_dict(
|
||||
create_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
{},
|
||||
@@ -1408,7 +1408,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(
|
||||
creator_join_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
)
|
||||
creator_join_event = Event(
|
||||
creator_join_event = make_event_from_dict(
|
||||
creator_join_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
{},
|
||||
@@ -1433,7 +1433,7 @@ class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(
|
||||
self.hs.hostname,
|
||||
self.hs.signing_key,
|
||||
)
|
||||
join_event = Event(
|
||||
join_event = make_event_from_dict(
|
||||
join_event_source,
|
||||
self.hs.config.server.default_room_version,
|
||||
{},
|
||||
|
||||
@@ -11,15 +11,22 @@
|
||||
# See the GNU Affero General Public License for more details:
|
||||
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
||||
|
||||
from canonicaljson import encode_canonical_json
|
||||
|
||||
from synapse.synapse_rust.events import Unsigned
|
||||
from synapse.types import JsonDict
|
||||
|
||||
from tests import unittest
|
||||
|
||||
|
||||
def _make_unsigned(d: JsonDict) -> Unsigned:
|
||||
return Unsigned(encode_canonical_json(d).decode("utf-8"))
|
||||
|
||||
|
||||
class UnsignedTestCase(unittest.TestCase):
|
||||
def test_prev_content(self) -> None:
|
||||
"""Test that the prev_content field is correctly exposed as a JsonObject."""
|
||||
unsigned = Unsigned({"prev_content": {"key1": "value1", "key2": 42}})
|
||||
unsigned = _make_unsigned({"prev_content": {"key1": "value1", "key2": 42}})
|
||||
|
||||
self.assert_dict(unsigned["prev_content"], {"key1": "value1", "key2": 42})
|
||||
|
||||
@@ -32,7 +39,7 @@ class UnsignedTestCase(unittest.TestCase):
|
||||
than the maximum rust native integer size."""
|
||||
|
||||
large_int = 2**200
|
||||
unsigned = Unsigned({"age_ts": large_int})
|
||||
unsigned = _make_unsigned({"age_ts": large_int})
|
||||
|
||||
self.assertEqual(unsigned["age_ts"], large_int)
|
||||
|
||||
@@ -44,7 +51,7 @@ class UnsignedTestCase(unittest.TestCase):
|
||||
JSON."""
|
||||
|
||||
large_int = 2**200
|
||||
unsigned = Unsigned({"prev_content": {"some_field": large_int}})
|
||||
unsigned = _make_unsigned({"prev_content": {"some_field": large_int}})
|
||||
|
||||
self.assertEqual(unsigned["prev_content"]["some_field"], large_int)
|
||||
self.assert_dict(
|
||||
|
||||
Reference in New Issue
Block a user