mirror of
https://github.com/element-hq/synapse.git
synced 2026-06-07 17:32:45 +00:00
5bf825c016
This is based on https://github.com/element-hq/synapse/pull/18416, which got reverted (#19614) due to it incorrectly rejecting to-device messages to users with many devices (and thus breaking message sending). Fix https://github.com/element-hq/synapse/issues/17035 A to-device message content looks like: ```jsonc { "@user:domain": {"device1": {...}, "device2": {...}}, ... } ``` The previous PR would split up into multiple EDUs, each with a subset of the users. However, if one user's entry was too large it would not further split it up and then error out. The main change in this PR is to allow splitting up a single user into multiple EDUs. Other changes: 1. Rename to `SOFT_MAX_EDU_SIZE` to indicate that we sometimes send EDUs with larger size than that, and its more a target than a hard limit. 2. Check early if any to-device message (to a specific device) is too large to send, even if we're not going to send it over federation. This ensures that we catch issues where clients try to send too large to-device. This still means that if a client send a large individual to-device message it will fail, but I don't believe we ever send such large to-device messages (normally they're in the range of a few KB). --- I ended up changing the implementation a bunch to make it easy to reuse the code to split up dictionaries. Instead of repeatedly splitting up the EDU until each bit fits into the size, we instead record the size of each entry in the dict and instead split up based on cumulative size. This means we call `encode_canonical_json` on each entry rather than once on the entire struct, but its not significantly slower to do so. -- cc @MatMaul @MadLittleMods --------- Co-authored-by: Mathieu Velten <matmaul@gmail.com> Co-authored-by: mcalinghee <mcalinghee.dev@gmail.com> Co-authored-by: Eric Eastwood <madlittlemods@gmail.com>
202 lines
6.4 KiB
Python
202 lines
6.4 KiB
Python
#
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
#
|
|
# Copyright (C) 2026 Element Creations Ltd
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
# License, or (at your option) any later version.
|
|
#
|
|
# See the GNU Affero General Public License for more details:
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
#
|
|
|
|
|
|
from canonicaljson import encode_canonical_json
|
|
|
|
from synapse.util import split_dict_to_fit_to_size
|
|
|
|
from tests.unittest import TestCase
|
|
|
|
|
|
class SplitDictTestCase(TestCase):
|
|
def test_empty(self) -> None:
|
|
"Test that an empty dict yields no payloads"
|
|
|
|
self.assertEqual(
|
|
list(
|
|
split_dict_to_fit_to_size({}, soft_max_size=10, wrapping_object_size=0)
|
|
),
|
|
[],
|
|
)
|
|
|
|
def test_no_splitting(self) -> None:
|
|
"Test that a dict that fits within the size limit is yielded as a single payload"
|
|
|
|
original_dict = {"a": {"key": "value"}, "b": {"key": "value"}}
|
|
|
|
# Set the soft max size to be the size of the original dict, so it
|
|
# should fit
|
|
soft_max_size = len(encode_canonical_json(original_dict))
|
|
|
|
self.assertEqual(
|
|
list(
|
|
split_dict_to_fit_to_size(
|
|
original_dict,
|
|
soft_max_size=soft_max_size,
|
|
)
|
|
),
|
|
[(original_dict, soft_max_size)],
|
|
)
|
|
|
|
def test_no_splitting_with_wrapping_size(self) -> None:
|
|
"Test that the wrapping size is taken into account when deciding whether to split"
|
|
|
|
wrapping = {"key": "value", "payload": {}}
|
|
original_dict = {"a": {"key": "value"}, "b": {"key": "value"}}
|
|
wrapping_object_size = len(encode_canonical_json(wrapping))
|
|
|
|
# Set the soft max size to the size of the expected final output.
|
|
soft_max_size = len(
|
|
encode_canonical_json({"key": "value", "payload": original_dict})
|
|
)
|
|
|
|
self.assertEqual(
|
|
list(
|
|
split_dict_to_fit_to_size(
|
|
original_dict,
|
|
soft_max_size=soft_max_size,
|
|
wrapping_object_size=wrapping_object_size,
|
|
)
|
|
),
|
|
[(original_dict, soft_max_size)],
|
|
)
|
|
|
|
def test_splitting(self) -> None:
|
|
"Test that a dict that exceeds the size limit is split into multiple payloads"
|
|
|
|
original_dict = {
|
|
"a": {"key": "value"},
|
|
"b": {"key": "value"},
|
|
"c": {"key": "value"},
|
|
}
|
|
|
|
# Set the soft max size to be the size of a single key-value pair, so
|
|
# it should split into three payloads.
|
|
soft_max_size = len(encode_canonical_json({"a": {"key": "value"}}))
|
|
|
|
self.assertEqual(
|
|
list(
|
|
split_dict_to_fit_to_size(
|
|
original_dict,
|
|
soft_max_size=soft_max_size,
|
|
)
|
|
),
|
|
[
|
|
({"a": {"key": "value"}}, soft_max_size),
|
|
({"b": {"key": "value"}}, soft_max_size),
|
|
({"c": {"key": "value"}}, soft_max_size),
|
|
],
|
|
)
|
|
|
|
def test_splitting_with_wrapping_size(self) -> None:
|
|
"Test that the wrapping size is taken into account when splitting"
|
|
|
|
wrapping = {"key": "value", "payload": {}}
|
|
original_dict = {
|
|
"a": {"key": "value"},
|
|
"b": {"key": "value"},
|
|
"c": {"key": "value"},
|
|
}
|
|
wrapping_object_size = len(encode_canonical_json(wrapping))
|
|
# Set the soft max size to be the size of a single key-value pair plus
|
|
# the wrapping size, so it should split into three payloads.
|
|
soft_max_size = (
|
|
len(encode_canonical_json({"a": {"key": "value"}}))
|
|
+ wrapping_object_size
|
|
- 2
|
|
)
|
|
|
|
self.assertEqual(
|
|
list(
|
|
split_dict_to_fit_to_size(
|
|
original_dict,
|
|
soft_max_size=soft_max_size,
|
|
wrapping_object_size=wrapping_object_size,
|
|
)
|
|
),
|
|
[
|
|
({"a": {"key": "value"}}, soft_max_size),
|
|
({"b": {"key": "value"}}, soft_max_size),
|
|
({"c": {"key": "value"}}, soft_max_size),
|
|
],
|
|
)
|
|
|
|
def test_oversized_entry(self) -> None:
|
|
"""Test that if a single entry exceeds the size limit, it is still
|
|
yielded as a single payload"""
|
|
|
|
original_dict = {
|
|
"a": {"key": "value"},
|
|
"b": {"key": "value"},
|
|
"c": {"key": "value"},
|
|
}
|
|
|
|
# Set the soft max size to be smaller than the size of a single
|
|
# key-value pair, so each entry exceeds the limit.
|
|
soft_max_size = len(encode_canonical_json({"a": {"key": "value"}})) - 1
|
|
|
|
self.assertEqual(
|
|
list(
|
|
split_dict_to_fit_to_size(
|
|
original_dict,
|
|
soft_max_size=soft_max_size,
|
|
)
|
|
),
|
|
[
|
|
(
|
|
{"a": {"key": "value"}},
|
|
len(encode_canonical_json({"a": {"key": "value"}})),
|
|
),
|
|
(
|
|
{"b": {"key": "value"}},
|
|
len(encode_canonical_json({"b": {"key": "value"}})),
|
|
),
|
|
(
|
|
{"c": {"key": "value"}},
|
|
len(encode_canonical_json({"c": {"key": "value"}})),
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_different_sized_entries(self) -> None:
|
|
"""Test that entries of different sizes are split correctly"""
|
|
|
|
original_dict = {
|
|
"a": "X" * 5, # size 13
|
|
"b": "X" * 10, # size 18
|
|
"c": "X" * 5, # size 13
|
|
}
|
|
|
|
soft_max_size = 30
|
|
|
|
self.assertEqual(
|
|
list(
|
|
split_dict_to_fit_to_size(
|
|
original_dict,
|
|
soft_max_size=soft_max_size,
|
|
)
|
|
),
|
|
[
|
|
(
|
|
{"a": "X" * 5, "b": "X" * 10},
|
|
len(encode_canonical_json({"a": "X" * 5, "b": "X" * 10})),
|
|
),
|
|
(
|
|
{"c": "X" * 5},
|
|
len(encode_canonical_json({"c": "X" * 5})),
|
|
),
|
|
],
|
|
)
|