Files
synapse/rust/src/events/json_object.rs
T
Erik Johnston 9e2a076144 Port Event class to Rust (#19701)
Ports the event class to Rust.

The main difference here are:
1. There is now a single event class
2. We now validate a lot more at event construction time than we
previously did (we basically checked nothing before). This required some
changes to the tests, including
https://github.com/matrix-org/sytest/pull/1423

Reviewable commit-by-commit.

### Overview of Event Rust structure

The format of the event struct in Rust is quite different than that in
Python.

The top-level looks like:

```rust
pub struct Event {
    /// The parsed event JSON.
    fields: FormattedEvent,

    /// The event ID. For format v1 this is read directly from the JSON;
    /// for v2+ it is computed from the canonical-JSON hash at
    /// construction time and cached here.
    event_id: Arc<str>,

    /// Synapse-internal per-event state that lives outside the federated
    /// JSON (e.g. outlier flag, soft-failure, stream positions).
    #[pyo3(get)]
    internal_metadata: EventInternalMetadata,

    /// The room version this event was parsed for.
    #[pyo3(get)]
    room_version: &'static RoomVersion,

    /// `None` for accepted events; otherwise a short reason set by auth
    /// when the event was rejected.
    rejected_reason: Option<Box<str>>,
}
```

which includes the actual parsed event in `FormattedEvent`, plus the
rest of the event metadata.

```rust
pub struct FormattedEvent<E = Arc<EventFormatEnum>> {
    #[serde(default)]
    pub signatures: Signatures,

    #[serde(default)]
    pub unsigned: Unsigned,

    #[serde(flatten)]
    pub specific_fields: E,

    #[serde(flatten)]
    pub common_fields: Arc<EventCommonFields>,
}
```

The struct is further split into the common fields, format specific
fields, plus the signatures and unsigned. We split out the signature and
unsigned fields as they are mutable, so when we clone the event we can
still share the common and specific fields and only copy signature and
unsigned.

The `specific_fields` are the fields that depend on the format version.
They can either be a specific format (e.g. `E = EventFormatV1`) or a
type-erased enum `EventFormatEnum` that is across all room versions:

```rust
pub enum EventFormatEnum {
    V1(EventFormatV1),
    V2V3(EventFormatV2V3),
    V4(EventFormatV4),
    VMSC4242(EventFormatVMSC4242),
}
```

For example:

```rust
/// Shared flat-list encoding of `auth_events` and `prev_events`, reused
/// by every format from v2/v3 onwards.
#[derive(Serialize, Deserialize)]
pub struct SimpleAuthPrevEvents {
    pub auth_events: Vec<String>,
    pub prev_events: Vec<String>,
}

/// Version-specific fields for room versions 3-10.
#[derive(Serialize, Deserialize)]
pub struct EventFormatV2V3 {
    pub room_id: Box<str>,
    #[serde(flatten)]
    pub auth_prev_events: SimpleAuthPrevEvents,
}
```


### Dev notes

As discussed in
[`#element-backend-internal:matrix.org`](https://matrix.to/#/!SGNQGPGUwtcPBUotTL:matrix.org/$3gTjDO440GbAz57cXcCawwiyFLiD0crrarvS1uhzKOY?via=jki.re&via=element.io&via=matrix.org)

---------

Co-authored-by: Eric Eastwood <erice@element.io>
2026-06-02 11:05:38 +01:00

495 lines
16 KiB
Rust

/*
* This file is licensed under the Affero General Public License (AGPL) version 3.
*
* Copyright (C) 2026 Element Creations Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* See the GNU Affero General Public License for more details:
* <https://www.gnu.org/licenses/agpl-3.0.html>.
*
*/
use std::{collections::BTreeMap, sync::Arc};
use pyo3::{
exceptions::{PyKeyError, PyTypeError},
pyclass, pymethods,
types::{
PyAnyMethods, PyIterator, PyList, PyListMethods, PyMapping, PySet, PySetMethods, PyTuple,
},
Bound, IntoPyObject, IntoPyObjectExt, Py, PyAny, PyResult, Python,
};
use pythonize::{depythonize, pythonize};
use serde::{Deserialize, Serialize};
/// A generic class for representing immutable JSON objects.
///
/// This is used for representing the `content` field of an event.
///
/// The basic architecture here is to optimize for two things:
/// 1. Fast access of top-level keys (e.g. `event.content["key"]`)
/// 2. Pure Rust implementation.
#[derive(Serialize, Deserialize, Clone, Default)]
#[pyclass(mapping, frozen, skip_from_py_object)]
#[serde(transparent)]
pub struct JsonObject {
object: Arc<BTreeMap<Box<str>, serde_json::Value>>,
}
#[pymethods]
impl JsonObject {
#[new]
#[pyo3(signature = (content = None))]
fn new<'a, 'py>(content: Option<&'a Bound<'py, PyAny>>) -> PyResult<Self> {
let Some(content) = content else {
// If no content is provided, default to an empty object.
return Ok(Self::default());
};
if let Ok(content) = content.cast::<JsonObject>() {
// If the content is already a JsonObject, we can just clone the
// underlying map (this is safe as the object is immutable).
return Ok(JsonObject {
object: content.get().object.clone(),
});
}
let Ok(content) = content.cast::<PyMapping>() else {
return Err(PyTypeError::new_err("'content' must be a mapping"));
};
// Use pythonize to try and convert from a mapping.
let content = depythonize(content)?;
Ok(Self {
object: Arc::new(content),
})
}
fn __len__(&self) -> usize {
self.object.len()
}
fn __contains__(&self, key: &Bound<'_, PyAny>) -> bool {
// Match dict semantics: a non-string key is simply "not in" the
// mapping, rather than raising TypeError.
let Ok(key_str) = key.extract::<&str>() else {
return false;
};
self.object.contains_key(key_str)
}
fn __getitem__<'py>(
&self,
py: Python<'py>,
key: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PyAny>> {
// We only ever store string keys, so any non-string lookup is a miss.
// Raise KeyError (not TypeError) to match dict's behaviour.
let Ok(key_str) = key.extract::<&str>() else {
return Err(PyKeyError::new_err(key.unbind()));
};
let Some(value) = self.object.get(key_str) else {
return Err(PyKeyError::new_err(key.unbind()));
};
Ok(pythonize(py, value)?)
}
fn __iter__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyIterator>> {
// The easiest way to get an iterator over the keys is to create a
// temporary list and call `iter()` on it. This is not the most
// efficient approach, but is much less boilerplate than implementing a
// custom iterator type. Since the keys are typically small in number
// this should be fine in practice.
let list = PyList::new(py, self.object.keys().map(Box::as_ref))?;
PyIterator::from_object(&list)
}
// The view classes below each hold a `JsonObject` clone. This is cheap
// because the underlying map is behind an `Arc`, and lets the view outlive
// the originating object (matching dict_keys/values/items semantics in
// Python, which also keep the dict alive).
fn keys(&self) -> JsonObjectKeysView {
JsonObjectKeysView {
object: self.clone(),
}
}
fn values(&self) -> JsonObjectValuesView {
JsonObjectValuesView {
object: self.clone(),
}
}
fn items(&self) -> JsonObjectItemsView {
JsonObjectItemsView {
object: self.clone(),
}
}
#[pyo3(signature = (key, default=None))]
fn get<'py>(
&self,
py: Python<'py>,
key: Bound<'_, PyAny>,
default: Option<Bound<'py, PyAny>>,
) -> PyResult<Bound<'py, PyAny>> {
// Non-string keys can never match, so treat them as a miss and return
// the caller-supplied default rather than raising.
let Ok(key_str) = key.extract::<&str>() else {
return Ok(default.into_pyobject(py)?);
};
match self.object.get(key_str) {
Some(value) => Ok(pythonize(py, value)?),
None => Ok(default.into_pyobject(py)?),
}
}
fn __eq__(&self, other: Bound<'_, PyAny>) -> bool {
// We support equality against any Python mapping (e.g. plain dicts),
// so callers can swap a JsonObject in without rewriting comparisons.
let Ok(mapping) = other.cast::<PyMapping>() else {
return false;
};
let Ok(other_len) = mapping.len() else {
return false;
};
if other_len != self.object.len() {
return false;
}
// We know the "other" is a mapping with the same number of fields as
// us. So we can convert it into a JsonObject and compare the underlying
// maps.
let Ok(other_dict) = depythonize(&other) else {
return false;
};
*self.object == other_dict
}
// Since we implement comparisons with other types, we need to disable
// hashing to avoid violating the invariant that equal objects must have the
// same hash.
//
// Alternatively, we could only allow comparisons with other JsonObjects and
// allow hashing, but a) its nicer to be able to compare with arbitrary
// mappings and b) we don't really need hashing for these objects.
#[classattr]
const __hash__: Option<Py<PyAny>> = None;
fn __str__(&self) -> String {
serde_json::to_string(&self.object).expect("Value should be serializable")
}
fn __repr__(&self) -> String {
format!("JsonObject({})", self.__str__())
}
}
impl JsonObject {
pub fn get_field(&self, key: &str) -> Option<&serde_json::Value> {
self.object.get(key)
}
}
/// Helper class returned by `JsonObject.keys()` to act as a view into the keys
/// of the object.
///
/// This needs to both be iterable *and* operate like a set.
#[pyclass(frozen, skip_from_py_object)]
#[derive(Clone)]
pub struct JsonObjectKeysView {
object: JsonObject,
}
#[pymethods]
impl JsonObjectKeysView {
fn __iter__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyIterator>> {
// Create the iterator by making a temporary python list of the keys and
// calling `iter()` on it.
let list = PyList::new(py, self.object.object.keys().map(Box::as_ref))?;
PyIterator::from_object(&list)
}
fn __len__(&self) -> usize {
self.object.__len__()
}
fn __contains__(&self, key: &Bound<'_, PyAny>) -> bool {
self.object.__contains__(key)
}
fn __eq__(&self, other: Bound<'_, PyAny>) -> bool {
let other_len = match other.len() {
Ok(len) => len,
Err(_) => return false,
};
if self.object.__len__() != other_len {
return false;
}
for key in self.object.object.keys() {
if !matches!(other.contains(key.as_ref()), Ok(true)) {
return false;
}
}
true
}
// The set operators below match the behaviour of `dict.keys()` in Python:
// they accept any object that supports `__contains__` (for `&`) or is
// iterable (for `|`, `-`, `^`), not just sets. Each returns a fresh
// `PySet` so the caller gets a normal mutable Python set back.
//
// The `__r*__` variants are reflected operators, called by Python when
// the left-hand operand doesn't know how to combine with us. Since these
// operations are commutative for sets (or symmetric in the case of `^`),
// they just delegate. The asymmetric ops (`-`) need a separate impl.
fn __and__<'py>(
&self,
py: Python<'py>,
other: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PySet>> {
// Iterate our (typically small) key set and probe `other`, which may
// be any container supporting `__contains__`.
let mut result = Vec::new();
for key in self.object.object.keys() {
if matches!(other.contains(key.as_ref()), Ok(true)) {
result.push(key.as_ref());
}
}
PySet::new(py, &result)
}
fn __rand__<'py>(
&self,
py: Python<'py>,
other: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PySet>> {
self.__and__(py, other)
}
fn __or__<'py>(&self, py: Python<'py>, other: Bound<'_, PyAny>) -> PyResult<Bound<'py, PySet>> {
// Union needs to enumerate both sides, so the right operand must be
// iterable (a bare `__contains__` is not enough).
let Ok(other_iter) = other.try_iter() else {
return Err(PyTypeError::new_err("Right operand must be iterable"));
};
let result = PySet::new(py, self.object.object.keys().map(Box::as_ref))?;
// PySet handles dedup, so we can blindly add every element from the
// other iterable.
for item in other_iter {
let item = item?;
result.add(item)?;
}
Ok(result)
}
fn __ror__<'py>(
&self,
py: Python<'py>,
other: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PySet>> {
self.__or__(py, other)
}
fn __sub__<'py>(
&self,
py: Python<'py>,
other: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PySet>> {
// `self - other`: keep our keys that are not in `other`. Only `other`
// needs to support `__contains__` here.
let mut result = Vec::new();
for key in self.object.object.keys() {
if matches!(other.contains(key.as_ref()), Ok(true)) {
continue;
}
result.push(key.as_ref());
}
PySet::new(py, &result)
}
fn __rsub__<'py>(
&self,
py: Python<'py>,
other: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PySet>> {
// `other - self`: we need to enumerate `other`, so it must be
// iterable. Not symmetric with `__sub__`, hence a separate impl.
let Ok(other_iter) = other.try_iter() else {
return Err(PyTypeError::new_err("Left operand must be iterable"));
};
let result = PySet::empty(py)?;
for item in other_iter {
let item = item?;
if self.object.__contains__(&item) {
continue;
}
result.add(item)?;
}
Ok(result)
}
fn __xor__<'py>(
&self,
py: Python<'py>,
other: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PySet>> {
// Symmetric difference: elements in exactly one side. Implemented as
// two filtered passes — one over our keys, one over `other`.
let Ok(other_iter) = other.try_iter() else {
return Err(PyTypeError::new_err("Right operand must be iterable"));
};
let result = PySet::empty(py)?;
for key in self.object.object.keys() {
if matches!(other.contains(key.as_ref()), Ok(true)) {
continue;
}
result.add(key.as_ref())?;
}
for item in other_iter {
let item = item?;
if self.object.__contains__(&item) {
continue;
}
result.add(item)?;
}
Ok(result)
}
fn __rxor__<'py>(
&self,
py: Python<'py>,
other: Bound<'_, PyAny>,
) -> PyResult<Bound<'py, PySet>> {
self.__xor__(py, other)
}
fn isdisjoint(&self, other: Bound<'_, PyAny>) -> bool {
for key in self.object.object.keys() {
if matches!(other.contains(key.as_ref()), Ok(true)) {
return false;
}
}
true
}
}
/// Helper class returned by `JsonObject.values()` to act as a view into the
/// values of the object.
#[pyclass(frozen, skip_from_py_object)]
#[derive(Clone)]
pub struct JsonObjectValuesView {
object: JsonObject,
}
#[pymethods]
impl JsonObjectValuesView {
fn __iter__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyIterator>> {
// Create the iterator by making a temporary python list of the keys and
// calling `iter()` on it.
let list = PyList::empty(py);
for v in self.object.object.values() {
let py_value = pythonize(py, v)?.into_bound_py_any(py)?;
list.append(py_value)?;
}
PyIterator::from_object(&list)
}
fn __len__(&self) -> usize {
self.object.__len__()
}
fn __contains__(&self, other: Bound<'_, PyAny>) -> bool {
// We compare by JSON equality rather than Python identity: convert
// the candidate into a `serde_json::Value` once and scan our values.
// Anything that fails to depythonize cannot match by definition.
let other_value: serde_json::Value = match depythonize(&other) {
Ok(v) => v,
Err(_) => return false,
};
self.object.object.values().any(|v| *v == other_value)
}
}
/// Helper class returned by `JsonObject.items()` to act as a view into the
/// items of the object.
///
/// Technically this should be a set-like view according to Python semantics,
/// unless the values are unhashable. Since the values are immutable we could
/// support it, but it's more work and nobody seems to actually use the set
/// operations on `dict_items` in practice.
#[pyclass(frozen, skip_from_py_object)]
#[derive(Clone)]
pub struct JsonObjectItemsView {
object: JsonObject,
}
#[pymethods]
impl JsonObjectItemsView {
fn __iter__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyIterator>> {
// Create the iterator by making a temporary python list of the keys and
// calling `iter()` on it.
let list = PyList::empty(py);
for (k, v) in self.object.object.iter() {
let py_key = k.as_ref().into_bound_py_any(py)?;
let py_value = pythonize(py, v)?.into_bound_py_any(py)?;
let item = PyTuple::new(py, [py_key, py_value])?;
list.append(item)?;
}
PyIterator::from_object(&list)
}
fn __len__(&self) -> usize {
self.object.__len__()
}
fn __contains__(&self, other: Bound<'_, PyAny>) -> bool {
// `(key, value) in items` — only a 2-tuple can possibly match. We
// look the key up directly (avoiding a full scan) and then compare
// the stored value against `value` using JSON equality.
let Ok((key, value)) = other.extract::<(Bound<'_, PyAny>, Bound<'_, PyAny>)>() else {
return false;
};
let Ok(key_str) = key.extract::<&str>() else {
return false;
};
let Some(stored) = self.object.object.get(key_str) else {
return false;
};
let other_value: serde_json::Value = match depythonize(&value) {
Ok(v) => v,
Err(_) => return false,
};
*stored == other_value
}
}