Handle arbitrary sized integers in unsigned. (#19769)

Handle arbitrary sized integers in `unsigned` (and other Rust objects
that use `serde_json::Value`)
This commit is contained in:
Erik Johnston
2026-05-13 11:28:06 +01:00
committed by GitHub
parent eb2ae9d3da
commit 5efeac44b2
10 changed files with 225 additions and 84 deletions
Generated
+15 -44
View File
@@ -29,12 +29,6 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "base64"
version = "0.22.1"
@@ -646,12 +640,6 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "indoc"
version = "2.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
[[package]]
name = "ipnet"
version = "2.11.0"
@@ -735,15 +723,6 @@ version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "memoffset"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [
"autocfg",
]
[[package]]
name = "mime"
version = "0.3.17"
@@ -821,37 +800,34 @@ dependencies = [
[[package]]
name = "pyo3"
version = "0.27.2"
version = "0.28.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d"
checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12"
dependencies = [
"anyhow",
"bytes",
"indoc",
"libc",
"memoffset",
"once_cell",
"portable-atomic",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
"unindent",
]
[[package]]
name = "pyo3-build-config"
version = "0.27.2"
version = "0.28.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6"
checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e"
dependencies = [
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.27.2"
version = "0.28.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089"
checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e"
dependencies = [
"libc",
"pyo3-build-config",
@@ -870,9 +846,9 @@ dependencies = [
[[package]]
name = "pyo3-macros"
version = "0.27.2"
version = "0.28.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02"
checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
@@ -882,9 +858,9 @@ dependencies = [
[[package]]
name = "pyo3-macros-backend"
version = "0.27.2"
version = "0.28.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9"
checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb"
dependencies = [
"heck",
"proc-macro2",
@@ -895,12 +871,13 @@ dependencies = [
[[package]]
name = "pythonize"
version = "0.27.0"
version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3a8f29db331e28c332c63496cfcbb822aca3d7320bc08b655d7fd0c29c50ede"
checksum = "0b79f670c9626c8b651c0581011b57b6ba6970bb69faf01a7c4c0cfc81c43f95"
dependencies = [
"pyo3",
"serde",
"serde_json",
]
[[package]]
@@ -1391,9 +1368,9 @@ dependencies = [
[[package]]
name = "target-lexicon"
version = "0.13.2"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
[[package]]
name = "thiserror"
@@ -1569,12 +1546,6 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unindent"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
[[package]]
name = "untrusted"
version = "0.9.0"
+1
View File
@@ -0,0 +1 @@
Correctly handle arbitrary precision integers in `unsigned` field of events.
+10 -4
View File
@@ -30,7 +30,7 @@ http = "1.1.0"
lazy_static = "1.4.0"
log = "0.4.17"
mime = "0.3.17"
pyo3 = { version = "0.27.2", features = [
pyo3 = { version = "0.28.3", features = [
"macros",
"anyhow",
"abi3",
@@ -39,12 +39,18 @@ pyo3 = { version = "0.27.2", features = [
# https://docs.rs/pyo3/latest/pyo3/bytes/index.html
"bytes",
] }
pyo3-log = "0.13.1"
pythonize = "0.27.0"
pyo3-log = "0.13.3"
pythonize = { version = "0.28.0", features = ["arbitrary_precision"] }
regex = "1.6.0"
sha2 = "0.10.8"
serde = { version = "1.0.144", features = ["derive", "rc"] }
serde_json = { version = "1.0.85", features = ["raw_value"] }
serde_json = { version = "1.0.85", features = [
"raw_value",
# We need to be able to parse arbitrary precision numbers, as some numbers
# in the database may be out of range of i64 (as Python uses arbitrary
# precision integers).
"arbitrary_precision",
] }
ulid = "1.1.2"
icu_segmenter = "2.0.0"
reqwest = { version = "0.12.15", default-features = false, features = [
+1 -1
View File
@@ -47,7 +47,7 @@ pub fn register_module(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()>
}
#[derive(Debug, Clone)]
#[pyclass(frozen)]
#[pyclass(frozen, skip_from_py_object)]
pub struct ServerAclEvaluator {
allow_ip_literals: bool,
allow: Vec<Regex>,
+118 -14
View File
@@ -29,7 +29,7 @@ use std::{
io::{self, Write},
};
use serde::ser::SerializeMap;
use serde::{ser::SerializeMap, Serializer as _};
use serde::{
ser::{Error as _, SerializeStruct},
Serialize,
@@ -37,7 +37,7 @@ use serde::{
use serde_json::{
ser::{Formatter, Serializer},
value::RawValue,
Value,
Number, Value,
};
/// The minimum integer that can be used in canonical JSON.
@@ -46,6 +46,12 @@ pub const MIN_VALID_INTEGER: i64 = -(2i64.pow(53)) + 1;
/// The maximum integer that can be used in canonical JSON.
pub const MAX_VALID_INTEGER: i64 = (2i64.pow(53)) - 1;
/// A token used by `serde_json` to identify its internal `Number` type when the
/// `arbitrary_precision` feature is enabled. This is a copy from serde_json's
/// internal `TOKEN` for `Number`, which unfortunately isn't exported by the
/// crate.
const SERDE_JSON_NUMBER_TOKEN: &str = "$serde_json::private::Number";
/// Options to control how strict JSON canonicalization is.
#[derive(Clone, Debug)]
pub struct CanonicalizationOptions {
@@ -236,7 +242,7 @@ where
type SerializeMap = CanonicalSerializeMap<'a, W>;
type SerializeStruct = CanonicalSerializeMap<'a, W>;
type SerializeStruct = CanonicalSerializeStruct<'a, W>;
type SerializeStructVariant =
<&'a mut Serializer<W, CanonicalFormatter> as serde::Serializer>::SerializeStructVariant;
@@ -426,7 +432,7 @@ where
fn serialize_struct(
self,
name: &'static str,
_len: usize,
len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
// We want to disallow `RawValue` as we don't know if its contents is
// canonical JSON.
@@ -436,10 +442,7 @@ where
if name == "$serde_json::private::RawValue" {
return Err(Self::Error::custom("`RawValue` is not supported"));
}
Ok(CanonicalSerializeMap::new(
&mut self.inner,
self.options.clone(),
))
CanonicalSerializeStruct::new(name, len, &mut self.inner, self.options.clone())
}
fn serialize_struct_variant(
@@ -554,7 +557,42 @@ where
}
}
impl<'a, W> SerializeStruct for CanonicalSerializeMap<'a, W>
/// A helper type for [`CanonicalSerializer`] that serializes structs in
/// lexicographic order.
#[doc(hidden)]
pub struct CanonicalSerializeStruct<'a, W: Write> {
name: &'static str,
// We buffer up the key and serialized value for each field we see.
// The BTreeMap will then serialize in lexicographic order.
map: BTreeMap<&'static str, Box<RawValue>>,
options: CanonicalizationOptions,
// The serializer to use to write the sorted map too.
struct_serializer:
<&'a mut Serializer<W, CanonicalFormatter> as serde::Serializer>::SerializeStruct,
}
impl<'a, W> CanonicalSerializeStruct<'a, W>
where
W: Write,
{
fn new(
name: &'static str,
len: usize,
ser: &'a mut Serializer<W, CanonicalFormatter>,
options: CanonicalizationOptions,
) -> Result<Self, serde_json::Error> {
let struct_serializer = ser.serialize_struct(name, len)?;
Ok(Self {
name,
map: BTreeMap::new(),
options,
struct_serializer,
})
}
}
impl<'a, W> SerializeStruct for CanonicalSerializeStruct<'a, W>
where
W: Write,
{
@@ -566,20 +604,69 @@ where
where
T: Serialize + ?Sized,
{
let key_string = key.to_string();
// Check if this is the special case of `SERDE_JSON_NUMBER_TOKEN`,
// which is used when serializing numbers with the `arbitrary_precision`
// feature. If so, we can just serialize it directly without
// canonicalizing it first, as `serde_json` will have already serialized
// it in a canonical way.
if key == SERDE_JSON_NUMBER_TOKEN && self.name == SERDE_JSON_NUMBER_TOKEN {
if self.options.enforce_int_range {
// We need to check that the number is in the valid range, as
// `serde_json` won't have done this for us as we're using the
// `arbitrary_precision` feature.
// The value here will be something that serializes to a JSON
// string containing the number, so we first serialize it to a
// Value and pull the string out, then parse it as a `Number`.
let serde_val = serde_json::to_value(value)?;
let serde_json::Value::String(number_str) = serde_val else {
return Err(serde_json::Error::custom("invalid number"));
};
let number: Number = number_str
.parse()
.map_err(|_| serde_json::Error::custom("invalid number"))?;
// Now check that the number is an integer in the valid range.
if let Some(int) = number.as_i64() {
assert_integer_in_range(int)?;
} else {
// Can't be cast to an i64, so it must be out of range.
return Err(serde_json::Error::custom("integer out of range"));
}
}
self.struct_serializer.serialize_field(key, value)?;
return Ok(());
}
// We serialize the value canonically, then store it as a `RawValue` in
// the buffer map.
let value_string = to_string_canonical(value, self.options.clone())?;
self.map
.insert(key_string, RawValue::from_string(value_string)?);
self.map.insert(key, RawValue::from_string(value_string)?);
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
self.map.serialize(self.ser)?;
fn end(mut self) -> Result<Self::Ok, Self::Error> {
if self.name == SERDE_JSON_NUMBER_TOKEN {
// Map must be empty in this case, as `SERDE_JSON_NUMBER_TOKEN`
// only has one field and we've handled it in `serialize_field`.
if !self.map.is_empty() {
return Err(Self::Error::custom(format!(
"unexpected fields in `{}`",
SERDE_JSON_NUMBER_TOKEN
)));
}
}
for (key, value) in self.map {
self.struct_serializer.serialize_field(key, &value)?;
}
SerializeStruct::end(self.struct_serializer)?;
Ok(())
}
@@ -737,6 +824,23 @@ mod tests {
assert!(to_string_canonical(&-(2i128.pow(60)), CanonicalizationOptions::strict()).is_err());
}
#[test]
fn bigints() {
// Create a `serde_json::Number` that is too big to be represented as an
// i64, but can be represented as a string.
let bigint_string = "10000000000000000000000000000000000000";
let value: serde_json::Number = bigint_string.parse().unwrap();
// This should work with relaxed option.
assert_eq!(
to_string_canonical(&value, CanonicalizationOptions::relaxed()).unwrap(),
bigint_string
);
// But should fail with strict option, as it's out of range.
assert!(to_string_canonical(&value, CanonicalizationOptions::strict()).is_err());
}
#[test]
fn backwards_compatibility() {
assert_eq!(
+1 -1
View File
@@ -476,7 +476,7 @@ impl EventInternalMetadataInner {
}
}
#[pyclass(frozen)]
#[pyclass(frozen, skip_from_py_object)]
#[derive(Clone)]
pub struct EventInternalMetadata {
inner: Arc<RwLock<EventInternalMetadataInner>>,
+16 -12
View File
@@ -23,6 +23,7 @@ use pyo3::{
};
use pythonize::{depythonize, pythonize};
use serde::{Deserialize, Serialize};
use serde_json::Number;
#[pyclass(frozen, skip_from_py_object)]
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@@ -36,7 +37,7 @@ pub struct Unsigned {
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
struct PersistedUnsignedFields {
#[serde(skip_serializing_if = "Option::is_none")]
age_ts: Option<i64>,
age_ts: Option<Number>,
#[serde(skip_serializing_if = "Option::is_none")]
replaces_state: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
@@ -129,11 +130,14 @@ impl Unsigned {
let unsigned = self.py_read()?;
match field {
UnsignedField::AgeTs => Ok(unsigned
.persisted_fields
.age_ts
.ok_or_else(|| PyKeyError::new_err("age_ts"))?
.into_bound_py_any(py)?),
UnsignedField::AgeTs => {
let age_ts = &unsigned
.persisted_fields
.age_ts
.as_ref()
.ok_or_else(|| PyKeyError::new_err("age_ts"))?;
Ok(pythonize(py, age_ts)?)
}
UnsignedField::ReplacesState => Ok((unsigned.persisted_fields.replaces_state)
.as_ref()
.ok_or_else(|| PyKeyError::new_err("replaces_state"))?
@@ -203,7 +207,7 @@ impl Unsigned {
let mut unsigned = self.py_write()?;
match field {
UnsignedField::AgeTs => unsigned.persisted_fields.age_ts = Some(value.extract()?),
UnsignedField::AgeTs => unsigned.persisted_fields.age_ts = Some(depythonize(&value)?),
UnsignedField::ReplacesState => {
unsigned.persisted_fields.replaces_state = Some(value.extract()?)
}
@@ -339,7 +343,7 @@ mod tests {
#[test]
fn test_persisted_fields_serialize_populated() {
let fields = PersistedUnsignedFields {
age_ts: Some(1234),
age_ts: Some(1234.into()),
replaces_state: Some("$prev:example.com".to_string()),
invite_room_state: Some(vec![json!({"type": "m.room.name"})]),
knock_room_state: Some(vec![json!({"type": "m.room.topic"})]),
@@ -360,7 +364,7 @@ mod tests {
fn test_unsigned_inner_flattens_persisted_fields() {
let inner = UnsignedInner {
persisted_fields: PersistedUnsignedFields {
age_ts: Some(99),
age_ts: Some(99.into()),
..Default::default()
},
prev_content: Some(Box::new(json!({"body": "hi"}))),
@@ -382,7 +386,7 @@ mod tests {
fn test_unsigned_inner_roundtrip() {
let original = UnsignedInner {
persisted_fields: PersistedUnsignedFields {
age_ts: Some(10),
age_ts: Some(10.into()),
replaces_state: Some("$state:example.com".to_string()),
invite_room_state: None,
knock_room_state: None,
@@ -394,7 +398,7 @@ mod tests {
let json = serde_json::to_string(&original).unwrap();
let roundtripped: UnsignedInner = serde_json::from_str(&json).unwrap();
assert_eq!(roundtripped.persisted_fields.age_ts, Some(10));
assert_eq!(roundtripped.persisted_fields.age_ts, Some(10.into()));
assert_eq!(
roundtripped.persisted_fields.replaces_state.as_deref(),
Some("$state:example.com")
@@ -423,7 +427,7 @@ mod tests {
});
let unsigned: Unsigned = serde_json::from_value(json).unwrap();
let inner = unsigned.inner.read().unwrap();
assert_eq!(inner.persisted_fields.age_ts, Some(5));
assert_eq!(inner.persisted_fields.age_ts, Some(5.into()));
assert_eq!(inner.prev_sender.as_deref(), Some("@bob:example.com"));
}
}
+3 -3
View File
@@ -104,7 +104,7 @@ fn get_base_rule_ids() -> HashSet<&'static str> {
/// A single push rule for a user.
#[derive(Debug, Clone)]
#[pyclass(frozen)]
#[pyclass(frozen, from_py_object)]
pub struct PushRule {
/// A unique ID for this rule
pub rule_id: Cow<'static, str>,
@@ -462,7 +462,7 @@ pub struct RelatedEventMatchTypeCondition {
/// The collection of push rules for a user.
#[derive(Debug, Clone, Default)]
#[pyclass(frozen)]
#[pyclass(frozen, from_py_object)]
pub struct PushRules {
/// Custom push rules that override a base rule.
overridden_base_rules: HashMap<Cow<'static, str>, PushRule>,
@@ -549,7 +549,7 @@ impl PushRules {
/// A wrapper around `PushRules` that checks the enabled state of rules and
/// filters out disabled experimental rules.
#[derive(Debug, Clone, Default)]
#[pyclass(frozen)]
#[pyclass(frozen, skip_from_py_object)]
pub struct FilteredPushRules {
push_rules: PushRules,
enabled_map: BTreeMap<String, bool>,
+8 -5
View File
@@ -93,7 +93,7 @@ impl PushRuleRoomFlag {
}
/// An object which describes the unique attributes of a room version.
#[pyclass(frozen, eq, hash, get_all)]
#[pyclass(frozen, eq, hash, get_all, skip_from_py_object)]
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct RoomVersion {
/// The identifier for this version.
@@ -622,7 +622,7 @@ const ROOM_VERSION_MSC4242V12: RoomVersion = RoomVersion {
/// Note: room versions can be added to this mapping at startup (allowing
/// support for experimental room versions to be behind experimental feature
/// flags).
#[pyclass(frozen, mapping)]
#[pyclass(frozen, mapping, skip_from_py_object)]
#[derive(Clone)]
pub struct KnownRoomVersionsMapping {
// Note we use a Vec here to ensure that the order of keys is
@@ -637,19 +637,22 @@ pub struct KnownRoomVersionsMapping {
impl KnownRoomVersionsMapping {
/// Add a new room version to the mapping, indicating that this instance
/// supports it.
fn add_room_version(&self, version: RoomVersion) -> PyResult<()> {
fn add_room_version(&self, version: Bound<'_, RoomVersion>) -> PyResult<()> {
let mut versions = self
.versions
.write()
.map_err(|_| PyRuntimeError::new_err("KnownRoomVersionsMapping lock poisoned"))?;
if versions.iter().any(|v| v.identifier == version.identifier) {
if versions
.iter()
.any(|v| v.identifier == version.get().identifier)
{
// We already have this room version, so we don't add it again (as
// otherwise we'd end up with duplicates).
return Ok(());
}
versions.push(version);
versions.push(*version.get());
Ok(())
}
+52
View File
@@ -0,0 +1,52 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2026 Element Creations Ltd.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
from synapse.synapse_rust.events import Unsigned
from tests import unittest
class UnsignedTestCase(unittest.TestCase):
def test_prev_content(self) -> None:
"""Test that the prev_content field is correctly exposed as a JsonObject."""
unsigned = Unsigned({"prev_content": {"key1": "value1", "key2": 42}})
self.assert_dict(unsigned["prev_content"], {"key1": "value1", "key2": 42})
self.assert_dict(
unsigned.for_event(), {"prev_content": {"key1": "value1", "key2": 42}}
)
def test_large_age_ts(self) -> None:
"""Test that we can handle integers larger than 2^128, which is larger
than the maximum rust native integer size."""
large_int = 2**200
unsigned = Unsigned({"age_ts": large_int})
self.assertEqual(unsigned["age_ts"], large_int)
self.assert_dict(unsigned.for_event(), {"age_ts": large_int})
def test_large_integer_in_prev_content(self) -> None:
"""Test that we can handle integers larger than 2^128 in the
prev_content field, which is a JsonObject and thus can contain arbitrary
JSON."""
large_int = 2**200
unsigned = Unsigned({"prev_content": {"some_field": large_int}})
self.assertEqual(unsigned["prev_content"]["some_field"], large_int)
self.assert_dict(
unsigned.for_event(), {"prev_content": {"some_field": large_int}}
)