Remove redacted_because from internal unsigned. (#19581)

This is a simplification so that `unsigned` only includes "simple"
values, to make it easier to port to Rust.

Reviewable commit-by-commit

Summary:

1. **Add `recheck` column to `redactions` table**
   
A new boolean `recheck` column (default true) is added to the
`redactions` table. This captures whether a redaction needs its sender
domain checked at read time — required for room v3+ where redactions are
accepted speculatively and later validated. When persisting a new
redaction, `recheck` is set directly from
`event.internal_metadata.need_to_check_redaction()`.
     
It's fine if initially we recheck all redactions, as it only results in
a little more CPU overhead (as we always pull out the redaction event
regardless).
                                                      
2. **Backfill `recheck` via background update**
   
A background update (`redactions_recheck`) backfills the new column for
existing rows by reading `recheck_redaction` from each event's
`internal_metadata` JSON. This avoids loading full event objects by
reading `event_json` directly via a SQL JOIN.
                              
3. **Don't fetch confirmed redaction events from the DB**
                              
Previously, when loading events, Synapse recursively fetched all
redaction events regardless of whether they needed domain rechecking.
Now `_fetch_event_rows` reads the `recheck` column and splits redactions
into two lists:
        - `unconfirmed_redactions` — need fetching and domain validation
- `confirmed_redactions` — already validated, applied directly without
fetching the event
      
This avoids unnecessary DB reads for the common case of
already-confirmed redactions.
4. **Move `redacted_because` population to `EventClientSerializer`**
Previously, `redacted_because` (the full redaction event object) was
stored in `event.unsigned` at DB fetch time, coupling storage-layer code
to client serialization concerns. This is removed from
`_maybe_redact_event_row` and moved into
`EventClientSerializer.serialize_event`, which fetches the redaction
event on demand. The storage layer now only sets
`unsigned["redacted_by"]` (the redaction event ID).
5. **Always use `EventClientSerializer`**
   
The standalone `serialize_event` function was made private
(`_serialize_event`). All external callers — `rest/client/room.py`,
`rest/admin/events.py, appservice/api.py`, and `tests` — were updated to
use `EventClientSerializer.serialize_event` / `serialize_events`,
ensuring
  `redacted_because` is always populated correctly via the serializer.
6. **Batch-fetch redaction events in `serialize_events`**
   
`serialize_events` now collects all `redacted_by` IDs from the event
batch upfront and fetches them in a single `get_events` call, passing
the result as a `redaction_map` to each `serialize_event` call. This
reduces N individual DB round-trips to one when serializing a batch of
events that includes redacted events.

---------

Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Erik Johnston
2026-03-26 09:18:08 +00:00
committed by GitHub
parent f545aa4f33
commit 539f708f32
20 changed files with 466 additions and 121 deletions
+1
View File
@@ -0,0 +1 @@
Remove `redacted_because` from internal unsigned.
+6
View File
@@ -261,6 +261,11 @@ pub struct EventInternalMetadata {
#[pyo3(get, set)]
instance_name: Option<String>,
/// The event ID of the redaction event, if this event has been redacted.
/// This is set dynamically at load time and is not persisted to the database.
#[pyo3(get, set)]
redacted_by: Option<String>,
/// whether this event is an outlier (ie, whether we have the state at that
/// point in the DAG)
#[pyo3(get, set)]
@@ -289,6 +294,7 @@ impl EventInternalMetadata {
data,
stream_ordering: None,
instance_name: None,
redacted_by: None,
outlier: false,
})
}
+1 -1
View File
@@ -122,7 +122,7 @@ BOOLEAN_COLUMNS = {
"presence_stream": ["currently_active"],
"public_room_list_stream": ["visibility"],
"pushers": ["enabled"],
"redactions": ["have_censored"],
"redactions": ["have_censored", "recheck"],
"remote_media_cache": ["authenticated"],
"room_memberships": ["participant"],
"room_stats_state": ["is_federatable"],
+21 -27
View File
@@ -32,7 +32,7 @@ from typing import (
from prometheus_client import Counter
from typing_extensions import ParamSpec, TypeGuard
from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind
from synapse.api.constants import ThirdPartyEntityKind
from synapse.api.errors import CodeMessageException, HttpResponseException
from synapse.appservice import (
ApplicationService,
@@ -40,7 +40,7 @@ from synapse.appservice import (
TransactionUnusedFallbackKeys,
)
from synapse.events import EventBase
from synapse.events.utils import SerializeEventConfig, serialize_event
from synapse.events.utils import SerializeEventConfig
from synapse.http.client import SimpleHttpClient, is_unknown_endpoint
from synapse.logging import opentracing
from synapse.metrics import SERVER_NAME_LABEL
@@ -128,6 +128,7 @@ class ApplicationServiceApi(SimpleHttpClient):
self.server_name = hs.hostname
self.clock = hs.get_clock()
self.config = hs.config.appservice
self._event_serializer = hs.get_event_client_serializer()
self.protocol_meta_cache: ResponseCache[tuple[str, str]] = ResponseCache(
clock=hs.get_clock(),
@@ -343,7 +344,7 @@ class ApplicationServiceApi(SimpleHttpClient):
# This is required by the configuration.
assert service.hs_token is not None
serialized_events = self._serialize(service, events)
serialized_events = await self._serialize(service, events)
if txn_id is None:
logger.warning(
@@ -539,30 +540,23 @@ class ApplicationServiceApi(SimpleHttpClient):
return response
def _serialize(
async def _serialize(
self, service: "ApplicationService", events: Iterable[EventBase]
) -> list[JsonDict]:
time_now = self.clock.time_msec()
return [
serialize_event(
e,
time_now,
config=SerializeEventConfig(
as_client_event=True,
# If this is an invite or a knock membership event, and we're interested
# in this user, then include any stripped state alongside the event.
include_stripped_room_state=(
e.type == EventTypes.Member
and (
e.membership == Membership.INVITE
or e.membership == Membership.KNOCK
)
and service.is_interested_in_user(e.state_key)
),
# Appservices are considered 'trusted' by the admin and should have
# applicable metadata on their events.
include_admin_metadata=True,
),
)
for e in events
]
return await self._event_serializer.serialize_events(
list(events),
time_now,
config=SerializeEventConfig(
as_client_event=True,
# If this is an invite or a knock membership event, then include
# any stripped state alongside the event. We could narrow this
# down to only users the appservice is "interested in", however
# it's not worth the complexity of doing so, and it's simpler to
# just include it for all users.
include_stripped_room_state=True,
# Appservices are considered 'trusted' by the admin and should have
# applicable metadata on their events.
include_admin_metadata=True,
),
)
+63 -18
View File
@@ -88,6 +88,7 @@ def prune_event(event: EventBase) -> EventBase:
)
pruned_event.internal_metadata.instance_name = event.internal_metadata.instance_name
pruned_event.internal_metadata.outlier = event.internal_metadata.outlier
pruned_event.internal_metadata.redacted_by = event.internal_metadata.redacted_by
# Mark the event as redacted
pruned_event.internal_metadata.redacted = True
@@ -123,6 +124,7 @@ def clone_event(event: EventBase) -> EventBase:
)
new_event.internal_metadata.instance_name = event.internal_metadata.instance_name
new_event.internal_metadata.outlier = event.internal_metadata.outlier
new_event.internal_metadata.redacted_by = event.internal_metadata.redacted_by
return new_event
@@ -423,7 +425,7 @@ class SerializeEventConfig:
# the transaction_id and delay_id in the unsigned section of the event.
requester: Requester | None = None
# List of event fields to include. If empty, all fields will be returned.
only_event_fields: list[str] | None = None
only_event_fields: list[str] | None = attr.ib(default=None)
# Some events can have stripped room state stored in the `unsigned` field.
# This is required for invite and knock functionality. If this option is
# False, that state will be removed from the event before it is returned.
@@ -434,6 +436,16 @@ class SerializeEventConfig:
# whether an event was soft failed by the server.
include_admin_metadata: bool = False
@only_event_fields.validator
def _validate_only_event_fields(
self, attribute: attr.Attribute, value: Any
) -> None:
if value is None:
return
if not isinstance(value, list) or not all(isinstance(f, str) for f in value):
raise TypeError("only_event_fields must be a list of strings")
_DEFAULT_SERIALIZE_EVENT_CONFIG = SerializeEventConfig()
@@ -444,7 +456,7 @@ def make_config_for_admin(existing: SerializeEventConfig) -> SerializeEventConfi
return attr.evolve(existing, include_admin_metadata=True)
def serialize_event(
def _serialize_event(
e: JsonDict | EventBase,
time_now_ms: int,
*,
@@ -476,13 +488,6 @@ def serialize_event(
d["unsigned"]["age"] = time_now_ms - d["unsigned"]["age_ts"]
del d["unsigned"]["age_ts"]
if "redacted_because" in e.unsigned:
d["unsigned"]["redacted_because"] = serialize_event(
e.unsigned["redacted_because"],
time_now_ms,
config=config,
)
# If we have applicable fields saved in the internal_metadata, include them in the
# unsigned section of the event if the event was sent by the same session (or when
# appropriate, just the same sender) as the one requesting the event.
@@ -559,14 +564,6 @@ def serialize_event(
if e.internal_metadata.policy_server_spammy:
d["unsigned"]["io.element.synapse.policy_server_spammy"] = True
only_event_fields = config.only_event_fields
if only_event_fields:
if not isinstance(only_event_fields, list) or not all(
isinstance(f, str) for f in only_event_fields
):
raise TypeError("only_event_fields must be a list of strings")
d = only_fields(d, only_event_fields)
return d
@@ -591,6 +588,7 @@ class EventClientSerializer:
*,
config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG,
bundle_aggregations: dict[str, "BundledAggregations"] | None = None,
redaction_map: Mapping[str, "EventBase"] | None = None,
) -> JsonDict:
"""Serializes a single event.
@@ -600,6 +598,8 @@ class EventClientSerializer:
config: Event serialization config
bundle_aggregations: A map from event_id to the aggregations to be bundled
into the event.
redaction_map: Optional pre-fetched map from redaction event_id to event,
used to avoid per-event DB lookups when serializing many events.
Returns:
The serialized event
@@ -617,7 +617,34 @@ class EventClientSerializer:
):
config = make_config_for_admin(config)
serialized_event = serialize_event(event, time_now, config=config)
serialized_event = _serialize_event(event, time_now, config=config)
# If the event was redacted, fetch the redaction event from the database
# and include it in the serialized event's unsigned section.
redacted_by: str | None = event.internal_metadata.redacted_by
if redacted_by is not None:
serialized_event.setdefault("unsigned", {})["redacted_by"] = redacted_by
if redaction_map is not None:
redaction_event: EventBase | None = redaction_map.get(redacted_by)
else:
redaction_event = await self._store.get_event(
redacted_by,
allow_none=True,
)
if redaction_event is not None:
serialized_redaction = _serialize_event(
redaction_event, time_now, config=config
)
serialized_event.setdefault("unsigned", {})["redacted_because"] = (
serialized_redaction
)
# format_event_for_client_v1 copies redacted_because to the
# top level, but since we add it after that runs, do it here.
if (
config.as_client_event
and config.event_format is format_event_for_client_v1
):
serialized_event["redacted_because"] = serialized_redaction
new_unsigned = {}
for callback in self._add_extra_fields_to_unsigned_client_event_callbacks:
@@ -630,6 +657,13 @@ class EventClientSerializer:
new_unsigned.update(serialized_event["unsigned"])
serialized_event["unsigned"] = new_unsigned
# Only include fields that the client has requested.
#
# Note: we always return bundled aggregations, though it is unclear why.
only_event_fields = config.only_event_fields
if only_event_fields:
serialized_event = only_fields(serialized_event, only_event_fields)
# Check if there are any bundled aggregations to include with the event.
if bundle_aggregations:
if event.event_id in bundle_aggregations:
@@ -745,12 +779,23 @@ class EventClientSerializer:
str(len(events)),
)
# Batch-fetch all redaction events in one go rather than one per event.
redaction_ids = {
e.internal_metadata.redacted_by
for e in events
if isinstance(e, EventBase) and e.internal_metadata.redacted_by is not None
}
redaction_map = (
await self._store.get_events(redaction_ids) if redaction_ids else {}
)
return [
await self.serialize_event(
event,
time_now,
config=config,
bundle_aggregations=bundle_aggregations,
redaction_map=redaction_map,
)
for event in events
]
+6 -2
View File
@@ -5,7 +5,6 @@ from synapse.api.errors import NotFoundError
from synapse.events.utils import (
SerializeEventConfig,
format_event_raw,
serialize_event,
)
from synapse.http.servlet import RestServlet
from synapse.http.site import SynapseRequest
@@ -40,6 +39,7 @@ class EventRestServlet(RestServlet):
self._auth = hs.get_auth()
self._store = hs.get_datastores().main
self._clock = hs.get_clock()
self._event_serializer = hs.get_event_client_serializer()
async def on_GET(
self, request: SynapseRequest, event_id: str
@@ -64,6 +64,10 @@ class EventRestServlet(RestServlet):
include_stripped_room_state=True,
include_admin_metadata=True,
)
res = {"event": serialize_event(event, self._clock.time_msec(), config=config)}
res = {
"event": await self._event_serializer.serialize_event(
event, self._clock.time_msec(), config=config
)
}
return HTTPStatus.OK, res
+2 -2
View File
@@ -55,7 +55,6 @@ from synapse.events.utils import (
EventClientSerializer,
SerializeEventConfig,
format_event_for_client_v2,
serialize_event,
)
from synapse.handlers.pagination import GetMessagesResult
from synapse.http.server import HttpServer
@@ -214,6 +213,7 @@ class RoomStateEventRestServlet(RestServlet):
self.delayed_events_handler = hs.get_delayed_events_handler()
self.auth = hs.get_auth()
self.clock = hs.get_clock()
self._event_serializer = hs.get_event_client_serializer()
self._max_event_delay_ms = hs.config.server.max_event_delay_ms
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
self._msc4354_enabled = hs.config.experimental.msc4354_enabled
@@ -285,7 +285,7 @@ class RoomStateEventRestServlet(RestServlet):
raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND)
if format == "event":
event = serialize_event(
event = await self._event_serializer.serialize_event(
data,
self.clock.time_msec(),
config=SerializeEventConfig(
+1
View File
@@ -2944,6 +2944,7 @@ class PersistEventsStore:
values={
"redacts": event.redacts,
"received_ts": self._clock.time_msec(),
"recheck": event.internal_metadata.need_to_check_redaction(),
},
)
@@ -159,6 +159,11 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
"redactions_received_ts", self._redactions_received_ts
)
self.db_pool.updates.register_background_update_handler(
_BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE,
self._redactions_recheck_bg_update,
)
# This index gets deleted in `event_fix_redactions_bytes` update
self.db_pool.updates.register_background_index_update(
"event_fix_redactions_bytes_create_index",
@@ -747,6 +752,66 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
return count
async def _redactions_recheck_bg_update(
self, progress: JsonDict, batch_size: int
) -> int:
"""Fills in the `recheck` column of the `redactions` table based on
the `recheck_redaction` field in each event's internal metadata."""
last_event_id = progress.get("last_event_id", "")
def _txn(txn: LoggingTransaction) -> int:
sql = """
SELECT r.event_id, ej.internal_metadata
FROM redactions AS r
LEFT JOIN event_json AS ej USING (event_id)
WHERE r.event_id > ?
ORDER BY r.event_id ASC
LIMIT ?
"""
txn.execute(sql, (last_event_id, batch_size))
rows = txn.fetchall()
if not rows:
return 0
updates = []
for event_id, internal_metadata_json in rows:
if internal_metadata_json is not None:
internal_metadata = db_to_json(internal_metadata_json)
recheck = bool(internal_metadata.get("recheck_redaction", False))
else:
recheck = False
if not recheck:
# Column defaults to true, so we only need to update rows
# where recheck should be false.
updates.append((event_id, recheck))
self.db_pool.simple_update_many_txn(
txn,
table="redactions",
key_names=("event_id",),
key_values=[(event_id,) for event_id, _ in updates],
value_names=("recheck",),
value_values=[(recheck,) for _, recheck in updates],
)
upper_event_id = rows[-1][0]
self.db_pool.updates._background_update_progress_txn(
txn,
_BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE,
{"last_event_id": upper_event_id},
)
return len(rows)
count = await self.db_pool.runInteraction("_redactions_recheck_bg_update", _txn)
if not count:
await self.db_pool.updates._end_background_update(
_BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE
)
return count
async def _event_fix_redactions_bytes(
self, progress: JsonDict, batch_size: int
) -> int:
+46 -21
View File
@@ -179,7 +179,11 @@ class _EventRow:
rejected_reason: if the event was rejected, the reason why.
redactions: a list of event-ids which (claim to) redact this event.
unconfirmed_redactions: a list of event-ids which (claim to) redact this event
and need to be rechecked.
confirmed_redactions: a list of event-ids which redact this event and have been
confirmed as valid redactions.
outlier: True if this event is an outlier.
"""
@@ -192,7 +196,8 @@ class _EventRow:
format_version: int | None
room_version_id: str | None
rejected_reason: str | None
redactions: list[str]
unconfirmed_redactions: list[str]
confirmed_redactions: list[str]
outlier: bool
@@ -1359,14 +1364,20 @@ class EventsWorkerStore(SQLBaseStore):
)
row_map = await self._enqueue_events(event_ids_to_fetch)
# we need to recursively fetch any redactions of those events
# we need to recursively fetch redaction events that require
# rechecking, so we can validate them
redaction_ids: set[str] = set()
for event_id in event_ids_to_fetch:
row = row_map.get(event_id)
fetched_event_ids.add(event_id)
if row:
fetched_events[event_id] = row
redaction_ids.update(row.redactions)
# If this event only has unconfirmed redactions we fetch
# them from the DB so that we check them to see if any are
# valid.
if not row.confirmed_redactions:
redaction_ids.update(row.unconfirmed_redactions)
event_ids_to_fetch = redaction_ids.difference(fetched_event_ids)
return event_ids_to_fetch
@@ -1510,9 +1521,12 @@ class EventsWorkerStore(SQLBaseStore):
# the cache entries.
result_map: dict[str, EventCacheEntry] = {}
for event_id, original_ev in event_map.items():
redactions = fetched_events[event_id].redactions
row = fetched_events[event_id]
redacted_event = self._maybe_redact_event_row(
original_ev, redactions, event_map
original_ev,
row.unconfirmed_redactions,
row.confirmed_redactions,
event_map,
)
cache_entry = EventCacheEntry(
@@ -1606,21 +1620,25 @@ class EventsWorkerStore(SQLBaseStore):
format_version=row[5],
room_version_id=row[6],
rejected_reason=row[7],
redactions=[],
unconfirmed_redactions=[],
confirmed_redactions=[],
outlier=bool(row[8]), # This is an int in SQLite3
)
# check for redactions
redactions_sql = "SELECT event_id, redacts FROM redactions WHERE "
redactions_sql = "SELECT event_id, redacts, recheck FROM redactions WHERE "
clause, args = make_in_list_sql_clause(txn.database_engine, "redacts", evs)
txn.execute(redactions_sql + clause, args)
for redacter, redacted in txn:
for redacter, redacted, recheck in txn:
d = event_dict.get(redacted)
if d:
d.redactions.append(redacter)
if recheck:
d.unconfirmed_redactions.append(redacter)
else:
d.confirmed_redactions.append(redacter)
# check for MSC4293 redactions
to_check = []
@@ -1669,24 +1687,28 @@ class EventsWorkerStore(SQLBaseStore):
# backfilled events, as they have a negative stream ordering
if e_row.stream_ordering >= redact_end_ordering:
continue
e_row.redactions.append(redacting_event_id)
e_row.unconfirmed_redactions.append(redacting_event_id)
return event_dict
def _maybe_redact_event_row(
self,
original_ev: EventBase,
redactions: Iterable[str],
unconfirmed_redactions: Iterable[str],
confirmed_redactions: Iterable[str],
event_map: dict[str, EventBase],
) -> EventBase | None:
"""Given an event object and a list of possible redacting event ids,
"""Given an event object and lists of possible redacting event ids,
determine whether to honour any of those redactions and if so return a redacted
event.
Args:
original_ev: The original event.
redactions: list of event ids of potential redaction events
unconfirmed_redactions: list of event ids of redaction events that need
domain rechecking (room v3+).
confirmed_redactions: list of event ids of redaction events that have
already been validated and do not need rechecking.
event_map: other events which have been fetched, in which we can
look up the redaaction events. Map from event id to event.
look up the redaction events. Map from event id to event.
Returns:
If the event should be redacted, a pruned event object. Otherwise, None.
@@ -1695,7 +1717,12 @@ class EventsWorkerStore(SQLBaseStore):
# we choose to ignore redactions of m.room.create events.
return None
for redaction_id in redactions:
for redaction_id in confirmed_redactions:
redacted_event = prune_event(original_ev)
redacted_event.internal_metadata.redacted_by = redaction_id
return redacted_event
for redaction_id in unconfirmed_redactions:
redaction_event = event_map.get(redaction_id)
if not redaction_event or redaction_event.rejected_reason:
# we don't have the redaction event, or the redaction event was not
@@ -1736,12 +1763,10 @@ class EventsWorkerStore(SQLBaseStore):
# we found a good redaction event. Redact!
redacted_event = prune_event(original_ev)
redacted_event.unsigned["redacted_by"] = redaction_id
# It's fine to add the event directly, since get_pdu_json
# will serialise this field correctly
redacted_event.unsigned["redacted_because"] = redaction_event
redacted_event.internal_metadata.redacted_by = redaction_id
# Note: The `redacted_because` field will later be populated by
# `EventClientSerializer.serialize_event`.
return redacted_event
# no valid redaction found for this event
+4 -1
View File
@@ -19,7 +19,7 @@
#
#
SCHEMA_VERSION = 93 # remember to update the list below when updating
SCHEMA_VERSION = 94 # remember to update the list below when updating
"""Represents the expectations made by the codebase about the database schema
This should be incremented whenever the codebase changes its requirements on the
@@ -171,6 +171,9 @@ Changes in SCHEMA_VERSION = 92
Changes in SCHEMA_VERSION = 93
- MSC4140: Set delayed events to be uniquely identifiable by their delay ID.
Changes in SCHEMA_VERSION = 94
- Add `recheck` column (boolean, default true) to the `redactions` table.
"""
@@ -0,0 +1,15 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2026 Element Creations, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
ALTER TABLE redactions ADD COLUMN recheck boolean NOT NULL DEFAULT true;
@@ -0,0 +1,15 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2026 Element Creations, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(9402, 'redactions_recheck', '{}');
+2
View File
@@ -21,6 +21,8 @@ class EventInternalMetadata:
"""the stream ordering of this event. None, until it has been persisted."""
instance_name: str | None
"""the instance name of the server that persisted this event. None, until it has been persisted."""
redacted_by: str | None
"""the event ID of the redaction event, if this event has been redacted. Set dynamically at load time, not persisted."""
outlier: bool
"""whether this event is an outlier (ie, whether we have the state at that
+2
View File
@@ -64,3 +64,5 @@ class _BackgroundUpdates:
)
FIXUP_MAX_DEPTH_CAP = "fixup_max_depth_cap"
REDACTIONS_RECHECK_BG_UPDATE = "redactions_recheck"
+69 -12
View File
@@ -20,7 +20,7 @@
#
import unittest as stdlib_unittest
from typing import Any, Mapping
from typing import TYPE_CHECKING, Any, Mapping
from parameterized import parameterized
@@ -37,11 +37,15 @@ from synapse.events.utils import (
make_config_for_admin,
maybe_upsert_event_field,
prune_event,
serialize_event,
)
from synapse.types import JsonDict, create_requester
from synapse.util.frozenutils import freeze
from tests.unittest import HomeserverTestCase
if TYPE_CHECKING:
from synapse.server import HomeServer
def MockEvent(**kwargs: Any) -> EventBase:
if "event_id" not in kwargs:
@@ -638,19 +642,27 @@ class CloneEventTestCase(stdlib_unittest.TestCase):
self.assertEqual(cloned.internal_metadata.txn_id, "txn")
class SerializeEventTestCase(stdlib_unittest.TestCase):
class SerializeEventTestCase(HomeserverTestCase):
def prepare(self, reactor: Any, clock: Any, hs: "HomeServer") -> None:
self._event_serializer = hs.get_event_client_serializer()
def serialize(
self,
ev: EventBase,
fields: list[str] | None,
include_admin_metadata: bool = False,
redaction_map: Mapping[str, EventBase] | None = None,
) -> JsonDict:
return serialize_event(
ev,
1479807801915,
config=SerializeEventConfig(
only_event_fields=fields, include_admin_metadata=include_admin_metadata
),
return self.get_success(
self._event_serializer.serialize_event(
ev,
1479807801915,
config=SerializeEventConfig(
only_event_fields=fields,
include_admin_metadata=include_admin_metadata,
),
redaction_map=redaction_map,
)
)
def test_event_fields_works_with_keys(self) -> None:
@@ -764,9 +776,8 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
def test_event_fields_fail_if_fields_not_str(self) -> None:
with self.assertRaises(TypeError):
self.serialize(
MockEvent(room_id="!foo:bar", content={"foo": "bar"}),
["room_id", 4], # type: ignore[list-item]
SerializeEventConfig(
only_event_fields=["room_id", 4], # type: ignore[list-item]
)
def test_default_serialize_config_excludes_admin_metadata(self) -> None:
@@ -867,6 +878,52 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
)
self.assertTrue(admin_config.include_admin_metadata)
def test_redacted_because_is_filtered_out(self) -> None:
"""If an event's unsigned dict has a `redacted_by` field, then the
`redacted_because` should be filtered out if not specified in
`only_event_fields`."""
redaction_id = "$redaction_event_id"
event = MockEvent(
type="foo",
event_id="test",
room_id="!foo:bar",
content={"foo": "bar"},
)
event.internal_metadata.redacted_by = redaction_id
redaction_event = MockEvent(
type="m.room.redaction",
event_id=redaction_id,
content={"redacts": "test"},
)
self.assertEqual(
self.serialize(
event,
["content.foo"],
redaction_map={redaction_id: redaction_event},
),
{
"content": {"foo": "bar"},
},
)
self.assertEqual(
self.serialize(
event,
["content.foo", "unsigned.redacted_because"],
redaction_map={redaction_id: redaction_event},
),
{
"content": {"foo": "bar"},
"unsigned": {
"redacted_because": self.serialize(redaction_event, fields=None),
},
},
)
class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
def setUp(self) -> None:
+2 -4
View File
@@ -101,11 +101,10 @@ class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase):
msg_dict = msg.get_dict()
msg_dict["content"] = {}
msg_dict["unsigned"]["redacted_by"] = redaction.event_id
msg_dict["unsigned"]["redacted_because"] = redaction
redacted = make_event_from_dict(
msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict()
)
redacted.internal_metadata.redacted_by = redaction.event_id
self.check(
"get_event", [msg.event_id], redacted, asserter=self.assertEventsEqual
)
@@ -125,11 +124,10 @@ class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase):
msg_dict = msg.get_dict()
msg_dict["content"] = {}
msg_dict["unsigned"]["redacted_by"] = redaction.event_id
msg_dict["unsigned"]["redacted_because"] = redaction
redacted = make_event_from_dict(
msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict()
)
redacted.internal_metadata.redacted_by = redaction.event_id
self.check(
"get_event", [msg.event_id], redacted, asserter=self.assertEventsEqual
)
+5 -5
View File
@@ -4746,10 +4746,10 @@ class MSC4293RedactOnBanKickTestCase(unittest.FederatingHomeserverTestCase):
original = self.get_success(self.store.get_event(message.event_id))
if not original:
self.fail("Expected to find remote message in DB")
redacted_because = original.unsigned.get("redacted_because")
if not redacted_because:
self.fail("Did not find redacted_because field")
self.assertEqual(redacted_because.event_id, ban_event_id)
redacted_by = original.internal_metadata.redacted_by
if not redacted_by:
self.fail("Did not find redacted_by field")
self.assertEqual(redacted_by, ban_event_id)
def test_unbanning_remote_user_stops_redaction_action(self) -> None:
bad_user = "@remote_bad_user:" + self.OTHER_SERVER_NAME
@@ -5111,7 +5111,7 @@ class MSC4293RedactOnBanKickTestCase(unittest.FederatingHomeserverTestCase):
original = self.get_success(self.store.get_event(message.event_id))
if not original:
self.fail("Expected to find remote message in DB")
self.assertEqual(original.unsigned["redacted_by"], ban_event_id)
self.assertEqual(original.internal_metadata.redacted_by, ban_event_id)
def test_rejoining_kicked_remote_user_stops_redaction_action(self) -> None:
bad_user = "@remote_bad_user:" + self.OTHER_SERVER_NAME
+133
View File
@@ -14,11 +14,14 @@
#
from canonicaljson import encode_canonical_json
from twisted.internet.testing import MemoryReactor
from synapse.api.constants import MAX_DEPTH
from synapse.api.room_versions import RoomVersion, RoomVersions
from synapse.server import HomeServer
from synapse.types.storage import _BackgroundUpdates
from synapse.util.clock import Clock
from tests.unittest import HomeserverTestCase
@@ -154,3 +157,133 @@ class TestFixupMaxDepthCapBgUpdate(HomeserverTestCase):
# Assert that the topological_ordering of events has not been changed
# from their depth.
self.assertDictEqual(event_id_to_depth, dict(rows))
class TestRedactionsRecheckBgUpdate(HomeserverTestCase):
"""Test the background update that backfills the `recheck` column in redactions."""
def prepare(
self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
) -> None:
self.store = self.hs.get_datastores().main
self.db_pool = self.store.db_pool
# Re-insert the background update, since it already ran during setup.
self.get_success(
self.db_pool.simple_insert(
table="background_updates",
values={
"update_name": _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE,
"progress_json": "{}",
},
)
)
self.db_pool.updates._all_done = False
def _insert_redaction(
self,
event_id: str,
redacts: str,
recheck_redaction: bool | None = None,
insert_event_json: bool = True,
) -> None:
"""Insert a row into `redactions` and optionally a matching `event_json` row.
Args:
event_id: The event ID of the redaction event.
redacts: The event ID being redacted.
recheck_redaction: The value of `recheck_redaction` in internal metadata.
If None, the key is omitted from internal metadata.
insert_event_json: Whether to insert a corresponding row in `event_json`.
"""
self.get_success(
self.db_pool.simple_insert(
table="redactions",
values={
"event_id": event_id,
"redacts": redacts,
"have_censored": False,
"received_ts": 0,
},
)
)
if insert_event_json:
internal_metadata: dict = {}
if recheck_redaction is not None:
internal_metadata["recheck_redaction"] = recheck_redaction
self.get_success(
self.db_pool.simple_insert(
table="event_json",
values={
"event_id": event_id,
"room_id": "!room:test",
"internal_metadata": encode_canonical_json(
internal_metadata
).decode("utf-8"),
"json": "{}",
"format_version": 3,
},
)
)
def _get_recheck(self, event_id: str) -> bool:
row = self.get_success(
self.db_pool.simple_select_one(
table="redactions",
keyvalues={"event_id": event_id},
retcols=["recheck"],
)
)
return bool(row[0])
def test_recheck_true(self) -> None:
"""A redaction with recheck_redaction=True in internal metadata gets recheck=True."""
self._insert_redaction("$redact1:test", "$target1:test", recheck_redaction=True)
self.wait_for_background_updates()
self.assertTrue(self._get_recheck("$redact1:test"))
def test_recheck_false(self) -> None:
"""A redaction with recheck_redaction=False in internal metadata gets recheck=False."""
self._insert_redaction(
"$redact2:test", "$target2:test", recheck_redaction=False
)
self.wait_for_background_updates()
self.assertFalse(self._get_recheck("$redact2:test"))
def test_recheck_absent_from_metadata(self) -> None:
"""A redaction with no recheck_redaction key in internal metadata gets recheck=False."""
self._insert_redaction("$redact3:test", "$target3:test", recheck_redaction=None)
self.wait_for_background_updates()
self.assertFalse(self._get_recheck("$redact3:test"))
def test_recheck_no_event_json(self) -> None:
"""A redaction with no event_json row gets recheck=False."""
self._insert_redaction(
"$redact4:test", "$target4:test", insert_event_json=False
)
self.wait_for_background_updates()
self.assertFalse(self._get_recheck("$redact4:test"))
def test_batching(self) -> None:
"""The update processes rows in batches, completing when all are done."""
self._insert_redaction("$redact5:test", "$target5:test", recheck_redaction=True)
self._insert_redaction(
"$redact6:test", "$target6:test", recheck_redaction=False
)
self._insert_redaction("$redact7:test", "$target7:test", recheck_redaction=True)
self.wait_for_background_updates()
self.assertTrue(self._get_recheck("$redact5:test"))
self.assertFalse(self._get_recheck("$redact6:test"))
self.assertTrue(self._get_recheck("$redact7:test"))
+7 -28
View File
@@ -158,7 +158,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
event,
)
self.assertFalse("redacted_because" in event.unsigned)
self.assertIsNone(event.internal_metadata.redacted_by)
# Redact event
reason = "Because I said so"
@@ -168,7 +168,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
self.assertEqual(msg_event.event_id, event.event_id)
self.assertTrue("redacted_because" in event.unsigned)
self.assertIsNotNone(event.internal_metadata.redacted_by)
self.assertObjectHasAttributes(
{
@@ -179,15 +179,6 @@ class RedactionTestCase(unittest.HomeserverTestCase):
event,
)
self.assertObjectHasAttributes(
{
"type": EventTypes.Redaction,
"user_id": self.u_alice.to_string(),
"content": {"reason": reason},
},
event.unsigned["redacted_because"],
)
def test_redact_join(self) -> None:
self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
@@ -206,7 +197,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
event,
)
self.assertFalse(hasattr(event, "redacted_because"))
self.assertIsNone(event.internal_metadata.redacted_by)
# Redact event
reason = "Because I said so"
@@ -216,7 +207,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
event = self.get_success(self.store.get_event(msg_event.event_id))
self.assertTrue("redacted_because" in event.unsigned)
self.assertIsNotNone(event.internal_metadata.redacted_by)
self.assertObjectHasAttributes(
{
@@ -227,15 +218,6 @@ class RedactionTestCase(unittest.HomeserverTestCase):
event,
)
self.assertObjectHasAttributes(
{
"type": EventTypes.Redaction,
"user_id": self.u_alice.to_string(),
"content": {"reason": reason},
},
event.unsigned["redacted_because"],
)
def test_circular_redaction(self) -> None:
redaction_event_id1 = "$redaction1_id:test"
redaction_event_id2 = "$redaction2_id:test"
@@ -331,10 +313,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
fetched = self.get_success(self.store.get_event(redaction_event_id1))
# it should have been redacted
self.assertEqual(fetched.unsigned["redacted_by"], redaction_event_id2)
self.assertEqual(
fetched.unsigned["redacted_because"].event_id, redaction_event_id2
)
self.assertEqual(fetched.internal_metadata.redacted_by, redaction_event_id2)
def test_redact_censor(self) -> None:
"""Test that a redacted event gets censored in the DB after a month"""
@@ -355,7 +334,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
event,
)
self.assertFalse("redacted_because" in event.unsigned)
self.assertIsNone(event.internal_metadata.redacted_by)
# Redact event
reason = "Because I said so"
@@ -363,7 +342,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
event = self.get_success(self.store.get_event(msg_event.event_id))
self.assertTrue("redacted_because" in event.unsigned)
self.assertIsNotNone(event.internal_metadata.redacted_by)
self.assertObjectHasAttributes(
{