Files
synapse/tests/util/test_split_dict.py
T
Erik Johnston 5bf825c016 Limit to-device EDU sizes (#19617)
This is based on https://github.com/element-hq/synapse/pull/18416, which
got reverted (#19614) due to it incorrectly rejecting to-device messages
to users with many devices (and thus breaking message sending).

Fix https://github.com/element-hq/synapse/issues/17035

A to-device message content looks like:

```jsonc
{
  "@user:domain": {"device1": {...}, "device2": {...}},
  ...
}
```

The previous PR would split up into multiple EDUs, each with a subset of
the users. However, if one user's entry was too large it would not
further split it up and then error out.

The main change in this PR is to allow splitting up a single user into
multiple EDUs.

Other changes:
1. Rename to `SOFT_MAX_EDU_SIZE` to indicate that we sometimes send EDUs
with larger size than that, and its more a target than a hard limit.
2. Check early if any to-device message (to a specific device) is too
large to send, even if we're not going to send it over federation. This
ensures that we catch issues where clients try to send too large
to-device.

This still means that if a client send a large individual to-device
message it will fail, but I don't believe we ever send such large
to-device messages (normally they're in the range of a few KB).

---

I ended up changing the implementation a bunch to make it easy to reuse
the code to split up dictionaries. Instead of repeatedly splitting up
the EDU until each bit fits into the size, we instead record the size of
each entry in the dict and instead split up based on cumulative size.
This means we call `encode_canonical_json` on each entry rather than
once on the entire struct, but its not significantly slower to do so.

--

cc @MatMaul @MadLittleMods

---------

Co-authored-by: Mathieu Velten <matmaul@gmail.com>
Co-authored-by: mcalinghee <mcalinghee.dev@gmail.com>
Co-authored-by: Eric Eastwood <madlittlemods@gmail.com>
2026-06-02 15:57:55 +00:00

202 lines
6.4 KiB
Python

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2026 Element Creations Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
from canonicaljson import encode_canonical_json
from synapse.util import split_dict_to_fit_to_size
from tests.unittest import TestCase
class SplitDictTestCase(TestCase):
def test_empty(self) -> None:
"Test that an empty dict yields no payloads"
self.assertEqual(
list(
split_dict_to_fit_to_size({}, soft_max_size=10, wrapping_object_size=0)
),
[],
)
def test_no_splitting(self) -> None:
"Test that a dict that fits within the size limit is yielded as a single payload"
original_dict = {"a": {"key": "value"}, "b": {"key": "value"}}
# Set the soft max size to be the size of the original dict, so it
# should fit
soft_max_size = len(encode_canonical_json(original_dict))
self.assertEqual(
list(
split_dict_to_fit_to_size(
original_dict,
soft_max_size=soft_max_size,
)
),
[(original_dict, soft_max_size)],
)
def test_no_splitting_with_wrapping_size(self) -> None:
"Test that the wrapping size is taken into account when deciding whether to split"
wrapping = {"key": "value", "payload": {}}
original_dict = {"a": {"key": "value"}, "b": {"key": "value"}}
wrapping_object_size = len(encode_canonical_json(wrapping))
# Set the soft max size to the size of the expected final output.
soft_max_size = len(
encode_canonical_json({"key": "value", "payload": original_dict})
)
self.assertEqual(
list(
split_dict_to_fit_to_size(
original_dict,
soft_max_size=soft_max_size,
wrapping_object_size=wrapping_object_size,
)
),
[(original_dict, soft_max_size)],
)
def test_splitting(self) -> None:
"Test that a dict that exceeds the size limit is split into multiple payloads"
original_dict = {
"a": {"key": "value"},
"b": {"key": "value"},
"c": {"key": "value"},
}
# Set the soft max size to be the size of a single key-value pair, so
# it should split into three payloads.
soft_max_size = len(encode_canonical_json({"a": {"key": "value"}}))
self.assertEqual(
list(
split_dict_to_fit_to_size(
original_dict,
soft_max_size=soft_max_size,
)
),
[
({"a": {"key": "value"}}, soft_max_size),
({"b": {"key": "value"}}, soft_max_size),
({"c": {"key": "value"}}, soft_max_size),
],
)
def test_splitting_with_wrapping_size(self) -> None:
"Test that the wrapping size is taken into account when splitting"
wrapping = {"key": "value", "payload": {}}
original_dict = {
"a": {"key": "value"},
"b": {"key": "value"},
"c": {"key": "value"},
}
wrapping_object_size = len(encode_canonical_json(wrapping))
# Set the soft max size to be the size of a single key-value pair plus
# the wrapping size, so it should split into three payloads.
soft_max_size = (
len(encode_canonical_json({"a": {"key": "value"}}))
+ wrapping_object_size
- 2
)
self.assertEqual(
list(
split_dict_to_fit_to_size(
original_dict,
soft_max_size=soft_max_size,
wrapping_object_size=wrapping_object_size,
)
),
[
({"a": {"key": "value"}}, soft_max_size),
({"b": {"key": "value"}}, soft_max_size),
({"c": {"key": "value"}}, soft_max_size),
],
)
def test_oversized_entry(self) -> None:
"""Test that if a single entry exceeds the size limit, it is still
yielded as a single payload"""
original_dict = {
"a": {"key": "value"},
"b": {"key": "value"},
"c": {"key": "value"},
}
# Set the soft max size to be smaller than the size of a single
# key-value pair, so each entry exceeds the limit.
soft_max_size = len(encode_canonical_json({"a": {"key": "value"}})) - 1
self.assertEqual(
list(
split_dict_to_fit_to_size(
original_dict,
soft_max_size=soft_max_size,
)
),
[
(
{"a": {"key": "value"}},
len(encode_canonical_json({"a": {"key": "value"}})),
),
(
{"b": {"key": "value"}},
len(encode_canonical_json({"b": {"key": "value"}})),
),
(
{"c": {"key": "value"}},
len(encode_canonical_json({"c": {"key": "value"}})),
),
],
)
def test_different_sized_entries(self) -> None:
"""Test that entries of different sizes are split correctly"""
original_dict = {
"a": "X" * 5, # size 13
"b": "X" * 10, # size 18
"c": "X" * 5, # size 13
}
soft_max_size = 30
self.assertEqual(
list(
split_dict_to_fit_to_size(
original_dict,
soft_max_size=soft_max_size,
)
),
[
(
{"a": "X" * 5, "b": "X" * 10},
len(encode_canonical_json({"a": "X" * 5, "b": "X" * 10})),
),
(
{"c": "X" * 5},
len(encode_canonical_json({"c": "X" * 5})),
),
],
)