diff --git a/changelog.d/19581.misc b/changelog.d/19581.misc new file mode 100644 index 0000000000..02856f3497 --- /dev/null +++ b/changelog.d/19581.misc @@ -0,0 +1 @@ +Remove `redacted_because` from internal unsigned. diff --git a/rust/src/events/internal_metadata.rs b/rust/src/events/internal_metadata.rs index 595f9cf7eb..a53c1e771b 100644 --- a/rust/src/events/internal_metadata.rs +++ b/rust/src/events/internal_metadata.rs @@ -261,6 +261,11 @@ pub struct EventInternalMetadata { #[pyo3(get, set)] instance_name: Option, + /// The event ID of the redaction event, if this event has been redacted. + /// This is set dynamically at load time and is not persisted to the database. + #[pyo3(get, set)] + redacted_by: Option, + /// whether this event is an outlier (ie, whether we have the state at that /// point in the DAG) #[pyo3(get, set)] @@ -289,6 +294,7 @@ impl EventInternalMetadata { data, stream_ordering: None, instance_name: None, + redacted_by: None, outlier: false, }) } diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 79b2a0c528..eedceb170e 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -122,7 +122,7 @@ BOOLEAN_COLUMNS = { "presence_stream": ["currently_active"], "public_room_list_stream": ["visibility"], "pushers": ["enabled"], - "redactions": ["have_censored"], + "redactions": ["have_censored", "recheck"], "remote_media_cache": ["authenticated"], "room_memberships": ["participant"], "room_stats_state": ["is_federatable"], diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 2bbf77a352..d4e9d50b96 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -32,7 +32,7 @@ from typing import ( from prometheus_client import Counter from typing_extensions import ParamSpec, TypeGuard -from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind +from synapse.api.constants import ThirdPartyEntityKind from synapse.api.errors import CodeMessageException, HttpResponseException from synapse.appservice import ( ApplicationService, @@ -40,7 +40,7 @@ from synapse.appservice import ( TransactionUnusedFallbackKeys, ) from synapse.events import EventBase -from synapse.events.utils import SerializeEventConfig, serialize_event +from synapse.events.utils import SerializeEventConfig from synapse.http.client import SimpleHttpClient, is_unknown_endpoint from synapse.logging import opentracing from synapse.metrics import SERVER_NAME_LABEL @@ -128,6 +128,7 @@ class ApplicationServiceApi(SimpleHttpClient): self.server_name = hs.hostname self.clock = hs.get_clock() self.config = hs.config.appservice + self._event_serializer = hs.get_event_client_serializer() self.protocol_meta_cache: ResponseCache[tuple[str, str]] = ResponseCache( clock=hs.get_clock(), @@ -343,7 +344,7 @@ class ApplicationServiceApi(SimpleHttpClient): # This is required by the configuration. assert service.hs_token is not None - serialized_events = self._serialize(service, events) + serialized_events = await self._serialize(service, events) if txn_id is None: logger.warning( @@ -539,30 +540,23 @@ class ApplicationServiceApi(SimpleHttpClient): return response - def _serialize( + async def _serialize( self, service: "ApplicationService", events: Iterable[EventBase] ) -> list[JsonDict]: time_now = self.clock.time_msec() - return [ - serialize_event( - e, - time_now, - config=SerializeEventConfig( - as_client_event=True, - # If this is an invite or a knock membership event, and we're interested - # in this user, then include any stripped state alongside the event. - include_stripped_room_state=( - e.type == EventTypes.Member - and ( - e.membership == Membership.INVITE - or e.membership == Membership.KNOCK - ) - and service.is_interested_in_user(e.state_key) - ), - # Appservices are considered 'trusted' by the admin and should have - # applicable metadata on their events. - include_admin_metadata=True, - ), - ) - for e in events - ] + return await self._event_serializer.serialize_events( + list(events), + time_now, + config=SerializeEventConfig( + as_client_event=True, + # If this is an invite or a knock membership event, then include + # any stripped state alongside the event. We could narrow this + # down to only users the appservice is "interested in", however + # it's not worth the complexity of doing so, and it's simpler to + # just include it for all users. + include_stripped_room_state=True, + # Appservices are considered 'trusted' by the admin and should have + # applicable metadata on their events. + include_admin_metadata=True, + ), + ) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 89eb2182af..76ebac8b17 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -88,6 +88,7 @@ def prune_event(event: EventBase) -> EventBase: ) pruned_event.internal_metadata.instance_name = event.internal_metadata.instance_name pruned_event.internal_metadata.outlier = event.internal_metadata.outlier + pruned_event.internal_metadata.redacted_by = event.internal_metadata.redacted_by # Mark the event as redacted pruned_event.internal_metadata.redacted = True @@ -123,6 +124,7 @@ def clone_event(event: EventBase) -> EventBase: ) new_event.internal_metadata.instance_name = event.internal_metadata.instance_name new_event.internal_metadata.outlier = event.internal_metadata.outlier + new_event.internal_metadata.redacted_by = event.internal_metadata.redacted_by return new_event @@ -423,7 +425,7 @@ class SerializeEventConfig: # the transaction_id and delay_id in the unsigned section of the event. requester: Requester | None = None # List of event fields to include. If empty, all fields will be returned. - only_event_fields: list[str] | None = None + only_event_fields: list[str] | None = attr.ib(default=None) # Some events can have stripped room state stored in the `unsigned` field. # This is required for invite and knock functionality. If this option is # False, that state will be removed from the event before it is returned. @@ -434,6 +436,16 @@ class SerializeEventConfig: # whether an event was soft failed by the server. include_admin_metadata: bool = False + @only_event_fields.validator + def _validate_only_event_fields( + self, attribute: attr.Attribute, value: Any + ) -> None: + if value is None: + return + + if not isinstance(value, list) or not all(isinstance(f, str) for f in value): + raise TypeError("only_event_fields must be a list of strings") + _DEFAULT_SERIALIZE_EVENT_CONFIG = SerializeEventConfig() @@ -444,7 +456,7 @@ def make_config_for_admin(existing: SerializeEventConfig) -> SerializeEventConfi return attr.evolve(existing, include_admin_metadata=True) -def serialize_event( +def _serialize_event( e: JsonDict | EventBase, time_now_ms: int, *, @@ -476,13 +488,6 @@ def serialize_event( d["unsigned"]["age"] = time_now_ms - d["unsigned"]["age_ts"] del d["unsigned"]["age_ts"] - if "redacted_because" in e.unsigned: - d["unsigned"]["redacted_because"] = serialize_event( - e.unsigned["redacted_because"], - time_now_ms, - config=config, - ) - # If we have applicable fields saved in the internal_metadata, include them in the # unsigned section of the event if the event was sent by the same session (or when # appropriate, just the same sender) as the one requesting the event. @@ -559,14 +564,6 @@ def serialize_event( if e.internal_metadata.policy_server_spammy: d["unsigned"]["io.element.synapse.policy_server_spammy"] = True - only_event_fields = config.only_event_fields - if only_event_fields: - if not isinstance(only_event_fields, list) or not all( - isinstance(f, str) for f in only_event_fields - ): - raise TypeError("only_event_fields must be a list of strings") - d = only_fields(d, only_event_fields) - return d @@ -591,6 +588,7 @@ class EventClientSerializer: *, config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG, bundle_aggregations: dict[str, "BundledAggregations"] | None = None, + redaction_map: Mapping[str, "EventBase"] | None = None, ) -> JsonDict: """Serializes a single event. @@ -600,6 +598,8 @@ class EventClientSerializer: config: Event serialization config bundle_aggregations: A map from event_id to the aggregations to be bundled into the event. + redaction_map: Optional pre-fetched map from redaction event_id to event, + used to avoid per-event DB lookups when serializing many events. Returns: The serialized event @@ -617,7 +617,34 @@ class EventClientSerializer: ): config = make_config_for_admin(config) - serialized_event = serialize_event(event, time_now, config=config) + serialized_event = _serialize_event(event, time_now, config=config) + + # If the event was redacted, fetch the redaction event from the database + # and include it in the serialized event's unsigned section. + redacted_by: str | None = event.internal_metadata.redacted_by + if redacted_by is not None: + serialized_event.setdefault("unsigned", {})["redacted_by"] = redacted_by + if redaction_map is not None: + redaction_event: EventBase | None = redaction_map.get(redacted_by) + else: + redaction_event = await self._store.get_event( + redacted_by, + allow_none=True, + ) + if redaction_event is not None: + serialized_redaction = _serialize_event( + redaction_event, time_now, config=config + ) + serialized_event.setdefault("unsigned", {})["redacted_because"] = ( + serialized_redaction + ) + # format_event_for_client_v1 copies redacted_because to the + # top level, but since we add it after that runs, do it here. + if ( + config.as_client_event + and config.event_format is format_event_for_client_v1 + ): + serialized_event["redacted_because"] = serialized_redaction new_unsigned = {} for callback in self._add_extra_fields_to_unsigned_client_event_callbacks: @@ -630,6 +657,13 @@ class EventClientSerializer: new_unsigned.update(serialized_event["unsigned"]) serialized_event["unsigned"] = new_unsigned + # Only include fields that the client has requested. + # + # Note: we always return bundled aggregations, though it is unclear why. + only_event_fields = config.only_event_fields + if only_event_fields: + serialized_event = only_fields(serialized_event, only_event_fields) + # Check if there are any bundled aggregations to include with the event. if bundle_aggregations: if event.event_id in bundle_aggregations: @@ -745,12 +779,23 @@ class EventClientSerializer: str(len(events)), ) + # Batch-fetch all redaction events in one go rather than one per event. + redaction_ids = { + e.internal_metadata.redacted_by + for e in events + if isinstance(e, EventBase) and e.internal_metadata.redacted_by is not None + } + redaction_map = ( + await self._store.get_events(redaction_ids) if redaction_ids else {} + ) + return [ await self.serialize_event( event, time_now, config=config, bundle_aggregations=bundle_aggregations, + redaction_map=redaction_map, ) for event in events ] diff --git a/synapse/rest/admin/events.py b/synapse/rest/admin/events.py index 1c39d5caf3..8da7a67820 100644 --- a/synapse/rest/admin/events.py +++ b/synapse/rest/admin/events.py @@ -5,7 +5,6 @@ from synapse.api.errors import NotFoundError from synapse.events.utils import ( SerializeEventConfig, format_event_raw, - serialize_event, ) from synapse.http.servlet import RestServlet from synapse.http.site import SynapseRequest @@ -40,6 +39,7 @@ class EventRestServlet(RestServlet): self._auth = hs.get_auth() self._store = hs.get_datastores().main self._clock = hs.get_clock() + self._event_serializer = hs.get_event_client_serializer() async def on_GET( self, request: SynapseRequest, event_id: str @@ -64,6 +64,10 @@ class EventRestServlet(RestServlet): include_stripped_room_state=True, include_admin_metadata=True, ) - res = {"event": serialize_event(event, self._clock.time_msec(), config=config)} + res = { + "event": await self._event_serializer.serialize_event( + event, self._clock.time_msec(), config=config + ) + } return HTTPStatus.OK, res diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 9172bfcb4e..65d9c130ef 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -55,7 +55,6 @@ from synapse.events.utils import ( EventClientSerializer, SerializeEventConfig, format_event_for_client_v2, - serialize_event, ) from synapse.handlers.pagination import GetMessagesResult from synapse.http.server import HttpServer @@ -214,6 +213,7 @@ class RoomStateEventRestServlet(RestServlet): self.delayed_events_handler = hs.get_delayed_events_handler() self.auth = hs.get_auth() self.clock = hs.get_clock() + self._event_serializer = hs.get_event_client_serializer() self._max_event_delay_ms = hs.config.server.max_event_delay_ms self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker self._msc4354_enabled = hs.config.experimental.msc4354_enabled @@ -285,7 +285,7 @@ class RoomStateEventRestServlet(RestServlet): raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) if format == "event": - event = serialize_event( + event = await self._event_serializer.serialize_event( data, self.clock.time_msec(), config=SerializeEventConfig( diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cb452dbc9b..941a5f9f3a 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2944,6 +2944,7 @@ class PersistEventsStore: values={ "redacts": event.redacts, "received_ts": self._clock.time_msec(), + "recheck": event.internal_metadata.need_to_check_redaction(), }, ) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index f8300e016b..934cd157ca 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -159,6 +159,11 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS "redactions_received_ts", self._redactions_received_ts ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE, + self._redactions_recheck_bg_update, + ) + # This index gets deleted in `event_fix_redactions_bytes` update self.db_pool.updates.register_background_index_update( "event_fix_redactions_bytes_create_index", @@ -747,6 +752,66 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS return count + async def _redactions_recheck_bg_update( + self, progress: JsonDict, batch_size: int + ) -> int: + """Fills in the `recheck` column of the `redactions` table based on + the `recheck_redaction` field in each event's internal metadata.""" + last_event_id = progress.get("last_event_id", "") + + def _txn(txn: LoggingTransaction) -> int: + sql = """ + SELECT r.event_id, ej.internal_metadata + FROM redactions AS r + LEFT JOIN event_json AS ej USING (event_id) + WHERE r.event_id > ? + ORDER BY r.event_id ASC + LIMIT ? + """ + txn.execute(sql, (last_event_id, batch_size)) + rows = txn.fetchall() + if not rows: + return 0 + + updates = [] + for event_id, internal_metadata_json in rows: + if internal_metadata_json is not None: + internal_metadata = db_to_json(internal_metadata_json) + recheck = bool(internal_metadata.get("recheck_redaction", False)) + else: + recheck = False + if not recheck: + # Column defaults to true, so we only need to update rows + # where recheck should be false. + updates.append((event_id, recheck)) + + self.db_pool.simple_update_many_txn( + txn, + table="redactions", + key_names=("event_id",), + key_values=[(event_id,) for event_id, _ in updates], + value_names=("recheck",), + value_values=[(recheck,) for _, recheck in updates], + ) + + upper_event_id = rows[-1][0] + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE, + {"last_event_id": upper_event_id}, + ) + + return len(rows) + + count = await self.db_pool.runInteraction("_redactions_recheck_bg_update", _txn) + + if not count: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE + ) + + return count + async def _event_fix_redactions_bytes( self, progress: JsonDict, batch_size: int ) -> int: diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index cb0764feb8..cc79b8042b 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -179,7 +179,11 @@ class _EventRow: rejected_reason: if the event was rejected, the reason why. - redactions: a list of event-ids which (claim to) redact this event. + unconfirmed_redactions: a list of event-ids which (claim to) redact this event + and need to be rechecked. + + confirmed_redactions: a list of event-ids which redact this event and have been + confirmed as valid redactions. outlier: True if this event is an outlier. """ @@ -192,7 +196,8 @@ class _EventRow: format_version: int | None room_version_id: str | None rejected_reason: str | None - redactions: list[str] + unconfirmed_redactions: list[str] + confirmed_redactions: list[str] outlier: bool @@ -1359,14 +1364,20 @@ class EventsWorkerStore(SQLBaseStore): ) row_map = await self._enqueue_events(event_ids_to_fetch) - # we need to recursively fetch any redactions of those events + # we need to recursively fetch redaction events that require + # rechecking, so we can validate them redaction_ids: set[str] = set() for event_id in event_ids_to_fetch: row = row_map.get(event_id) fetched_event_ids.add(event_id) if row: fetched_events[event_id] = row - redaction_ids.update(row.redactions) + + # If this event only has unconfirmed redactions we fetch + # them from the DB so that we check them to see if any are + # valid. + if not row.confirmed_redactions: + redaction_ids.update(row.unconfirmed_redactions) event_ids_to_fetch = redaction_ids.difference(fetched_event_ids) return event_ids_to_fetch @@ -1510,9 +1521,12 @@ class EventsWorkerStore(SQLBaseStore): # the cache entries. result_map: dict[str, EventCacheEntry] = {} for event_id, original_ev in event_map.items(): - redactions = fetched_events[event_id].redactions + row = fetched_events[event_id] redacted_event = self._maybe_redact_event_row( - original_ev, redactions, event_map + original_ev, + row.unconfirmed_redactions, + row.confirmed_redactions, + event_map, ) cache_entry = EventCacheEntry( @@ -1606,21 +1620,25 @@ class EventsWorkerStore(SQLBaseStore): format_version=row[5], room_version_id=row[6], rejected_reason=row[7], - redactions=[], + unconfirmed_redactions=[], + confirmed_redactions=[], outlier=bool(row[8]), # This is an int in SQLite3 ) # check for redactions - redactions_sql = "SELECT event_id, redacts FROM redactions WHERE " + redactions_sql = "SELECT event_id, redacts, recheck FROM redactions WHERE " clause, args = make_in_list_sql_clause(txn.database_engine, "redacts", evs) txn.execute(redactions_sql + clause, args) - for redacter, redacted in txn: + for redacter, redacted, recheck in txn: d = event_dict.get(redacted) if d: - d.redactions.append(redacter) + if recheck: + d.unconfirmed_redactions.append(redacter) + else: + d.confirmed_redactions.append(redacter) # check for MSC4293 redactions to_check = [] @@ -1669,24 +1687,28 @@ class EventsWorkerStore(SQLBaseStore): # backfilled events, as they have a negative stream ordering if e_row.stream_ordering >= redact_end_ordering: continue - e_row.redactions.append(redacting_event_id) + e_row.unconfirmed_redactions.append(redacting_event_id) return event_dict def _maybe_redact_event_row( self, original_ev: EventBase, - redactions: Iterable[str], + unconfirmed_redactions: Iterable[str], + confirmed_redactions: Iterable[str], event_map: dict[str, EventBase], ) -> EventBase | None: - """Given an event object and a list of possible redacting event ids, + """Given an event object and lists of possible redacting event ids, determine whether to honour any of those redactions and if so return a redacted event. Args: original_ev: The original event. - redactions: list of event ids of potential redaction events + unconfirmed_redactions: list of event ids of redaction events that need + domain rechecking (room v3+). + confirmed_redactions: list of event ids of redaction events that have + already been validated and do not need rechecking. event_map: other events which have been fetched, in which we can - look up the redaaction events. Map from event id to event. + look up the redaction events. Map from event id to event. Returns: If the event should be redacted, a pruned event object. Otherwise, None. @@ -1695,7 +1717,12 @@ class EventsWorkerStore(SQLBaseStore): # we choose to ignore redactions of m.room.create events. return None - for redaction_id in redactions: + for redaction_id in confirmed_redactions: + redacted_event = prune_event(original_ev) + redacted_event.internal_metadata.redacted_by = redaction_id + return redacted_event + + for redaction_id in unconfirmed_redactions: redaction_event = event_map.get(redaction_id) if not redaction_event or redaction_event.rejected_reason: # we don't have the redaction event, or the redaction event was not @@ -1736,12 +1763,10 @@ class EventsWorkerStore(SQLBaseStore): # we found a good redaction event. Redact! redacted_event = prune_event(original_ev) - redacted_event.unsigned["redacted_by"] = redaction_id - - # It's fine to add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = redaction_event + redacted_event.internal_metadata.redacted_by = redaction_id + # Note: The `redacted_because` field will later be populated by + # `EventClientSerializer.serialize_event`. return redacted_event # no valid redaction found for this event diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index c4c4d7bcc4..e3095a9d0d 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -19,7 +19,7 @@ # # -SCHEMA_VERSION = 93 # remember to update the list below when updating +SCHEMA_VERSION = 94 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -171,6 +171,9 @@ Changes in SCHEMA_VERSION = 92 Changes in SCHEMA_VERSION = 93 - MSC4140: Set delayed events to be uniquely identifiable by their delay ID. + +Changes in SCHEMA_VERSION = 94 + - Add `recheck` column (boolean, default true) to the `redactions` table. """ diff --git a/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql b/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql new file mode 100644 index 0000000000..e99aa52f1f --- /dev/null +++ b/synapse/storage/schema/main/delta/94/01_redactions_recheck.sql @@ -0,0 +1,15 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2026 Element Creations, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + + +ALTER TABLE redactions ADD COLUMN recheck boolean NOT NULL DEFAULT true; diff --git a/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql b/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql new file mode 100644 index 0000000000..6367afd318 --- /dev/null +++ b/synapse/storage/schema/main/delta/94/02_redactions_recheck_bg_update.sql @@ -0,0 +1,15 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2026 Element Creations, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (9402, 'redactions_recheck', '{}'); diff --git a/synapse/synapse_rust/events.pyi b/synapse/synapse_rust/events.pyi index 185f29694b..e6753bbbad 100644 --- a/synapse/synapse_rust/events.pyi +++ b/synapse/synapse_rust/events.pyi @@ -21,6 +21,8 @@ class EventInternalMetadata: """the stream ordering of this event. None, until it has been persisted.""" instance_name: str | None """the instance name of the server that persisted this event. None, until it has been persisted.""" + redacted_by: str | None + """the event ID of the redaction event, if this event has been redacted. Set dynamically at load time, not persisted.""" outlier: bool """whether this event is an outlier (ie, whether we have the state at that diff --git a/synapse/types/storage/__init__.py b/synapse/types/storage/__init__.py index b01653246a..992c36caba 100644 --- a/synapse/types/storage/__init__.py +++ b/synapse/types/storage/__init__.py @@ -64,3 +64,5 @@ class _BackgroundUpdates: ) FIXUP_MAX_DEPTH_CAP = "fixup_max_depth_cap" + + REDACTIONS_RECHECK_BG_UPDATE = "redactions_recheck" diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py index f511f577d3..af44b5dec1 100644 --- a/tests/events/test_utils.py +++ b/tests/events/test_utils.py @@ -20,7 +20,7 @@ # import unittest as stdlib_unittest -from typing import Any, Mapping +from typing import TYPE_CHECKING, Any, Mapping from parameterized import parameterized @@ -37,11 +37,15 @@ from synapse.events.utils import ( make_config_for_admin, maybe_upsert_event_field, prune_event, - serialize_event, ) from synapse.types import JsonDict, create_requester from synapse.util.frozenutils import freeze +from tests.unittest import HomeserverTestCase + +if TYPE_CHECKING: + from synapse.server import HomeServer + def MockEvent(**kwargs: Any) -> EventBase: if "event_id" not in kwargs: @@ -638,19 +642,27 @@ class CloneEventTestCase(stdlib_unittest.TestCase): self.assertEqual(cloned.internal_metadata.txn_id, "txn") -class SerializeEventTestCase(stdlib_unittest.TestCase): +class SerializeEventTestCase(HomeserverTestCase): + def prepare(self, reactor: Any, clock: Any, hs: "HomeServer") -> None: + self._event_serializer = hs.get_event_client_serializer() + def serialize( self, ev: EventBase, fields: list[str] | None, include_admin_metadata: bool = False, + redaction_map: Mapping[str, EventBase] | None = None, ) -> JsonDict: - return serialize_event( - ev, - 1479807801915, - config=SerializeEventConfig( - only_event_fields=fields, include_admin_metadata=include_admin_metadata - ), + return self.get_success( + self._event_serializer.serialize_event( + ev, + 1479807801915, + config=SerializeEventConfig( + only_event_fields=fields, + include_admin_metadata=include_admin_metadata, + ), + redaction_map=redaction_map, + ) ) def test_event_fields_works_with_keys(self) -> None: @@ -764,9 +776,8 @@ class SerializeEventTestCase(stdlib_unittest.TestCase): def test_event_fields_fail_if_fields_not_str(self) -> None: with self.assertRaises(TypeError): - self.serialize( - MockEvent(room_id="!foo:bar", content={"foo": "bar"}), - ["room_id", 4], # type: ignore[list-item] + SerializeEventConfig( + only_event_fields=["room_id", 4], # type: ignore[list-item] ) def test_default_serialize_config_excludes_admin_metadata(self) -> None: @@ -867,6 +878,52 @@ class SerializeEventTestCase(stdlib_unittest.TestCase): ) self.assertTrue(admin_config.include_admin_metadata) + def test_redacted_because_is_filtered_out(self) -> None: + """If an event's unsigned dict has a `redacted_by` field, then the + `redacted_because` should be filtered out if not specified in + `only_event_fields`.""" + + redaction_id = "$redaction_event_id" + + event = MockEvent( + type="foo", + event_id="test", + room_id="!foo:bar", + content={"foo": "bar"}, + ) + event.internal_metadata.redacted_by = redaction_id + + redaction_event = MockEvent( + type="m.room.redaction", + event_id=redaction_id, + content={"redacts": "test"}, + ) + + self.assertEqual( + self.serialize( + event, + ["content.foo"], + redaction_map={redaction_id: redaction_event}, + ), + { + "content": {"foo": "bar"}, + }, + ) + + self.assertEqual( + self.serialize( + event, + ["content.foo", "unsigned.redacted_because"], + redaction_map={redaction_id: redaction_event}, + ), + { + "content": {"foo": "bar"}, + "unsigned": { + "redacted_because": self.serialize(redaction_event, fields=None), + }, + }, + ) + class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase): def setUp(self) -> None: diff --git a/tests/replication/storage/test_events.py b/tests/replication/storage/test_events.py index 28bfb8b8ea..b7b94482ef 100644 --- a/tests/replication/storage/test_events.py +++ b/tests/replication/storage/test_events.py @@ -101,11 +101,10 @@ class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase): msg_dict = msg.get_dict() msg_dict["content"] = {} - msg_dict["unsigned"]["redacted_by"] = redaction.event_id - msg_dict["unsigned"]["redacted_because"] = redaction redacted = make_event_from_dict( msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict() ) + redacted.internal_metadata.redacted_by = redaction.event_id self.check( "get_event", [msg.event_id], redacted, asserter=self.assertEventsEqual ) @@ -125,11 +124,10 @@ class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase): msg_dict = msg.get_dict() msg_dict["content"] = {} - msg_dict["unsigned"]["redacted_by"] = redaction.event_id - msg_dict["unsigned"]["redacted_because"] = redaction redacted = make_event_from_dict( msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict() ) + redacted.internal_metadata.redacted_by = redaction.event_id self.check( "get_event", [msg.event_id], redacted, asserter=self.assertEventsEqual ) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index f85c9939ce..221121007d 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -4746,10 +4746,10 @@ class MSC4293RedactOnBanKickTestCase(unittest.FederatingHomeserverTestCase): original = self.get_success(self.store.get_event(message.event_id)) if not original: self.fail("Expected to find remote message in DB") - redacted_because = original.unsigned.get("redacted_because") - if not redacted_because: - self.fail("Did not find redacted_because field") - self.assertEqual(redacted_because.event_id, ban_event_id) + redacted_by = original.internal_metadata.redacted_by + if not redacted_by: + self.fail("Did not find redacted_by field") + self.assertEqual(redacted_by, ban_event_id) def test_unbanning_remote_user_stops_redaction_action(self) -> None: bad_user = "@remote_bad_user:" + self.OTHER_SERVER_NAME @@ -5111,7 +5111,7 @@ class MSC4293RedactOnBanKickTestCase(unittest.FederatingHomeserverTestCase): original = self.get_success(self.store.get_event(message.event_id)) if not original: self.fail("Expected to find remote message in DB") - self.assertEqual(original.unsigned["redacted_by"], ban_event_id) + self.assertEqual(original.internal_metadata.redacted_by, ban_event_id) def test_rejoining_kicked_remote_user_stops_redaction_action(self) -> None: bad_user = "@remote_bad_user:" + self.OTHER_SERVER_NAME diff --git a/tests/storage/test_events_bg_updates.py b/tests/storage/test_events_bg_updates.py index d1a794c5a1..a5b53de77f 100644 --- a/tests/storage/test_events_bg_updates.py +++ b/tests/storage/test_events_bg_updates.py @@ -14,11 +14,14 @@ # +from canonicaljson import encode_canonical_json + from twisted.internet.testing import MemoryReactor from synapse.api.constants import MAX_DEPTH from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.server import HomeServer +from synapse.types.storage import _BackgroundUpdates from synapse.util.clock import Clock from tests.unittest import HomeserverTestCase @@ -154,3 +157,133 @@ class TestFixupMaxDepthCapBgUpdate(HomeserverTestCase): # Assert that the topological_ordering of events has not been changed # from their depth. self.assertDictEqual(event_id_to_depth, dict(rows)) + + +class TestRedactionsRecheckBgUpdate(HomeserverTestCase): + """Test the background update that backfills the `recheck` column in redactions.""" + + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + self.store = self.hs.get_datastores().main + self.db_pool = self.store.db_pool + + # Re-insert the background update, since it already ran during setup. + self.get_success( + self.db_pool.simple_insert( + table="background_updates", + values={ + "update_name": _BackgroundUpdates.REDACTIONS_RECHECK_BG_UPDATE, + "progress_json": "{}", + }, + ) + ) + self.db_pool.updates._all_done = False + + def _insert_redaction( + self, + event_id: str, + redacts: str, + recheck_redaction: bool | None = None, + insert_event_json: bool = True, + ) -> None: + """Insert a row into `redactions` and optionally a matching `event_json` row. + + Args: + event_id: The event ID of the redaction event. + redacts: The event ID being redacted. + recheck_redaction: The value of `recheck_redaction` in internal metadata. + If None, the key is omitted from internal metadata. + insert_event_json: Whether to insert a corresponding row in `event_json`. + """ + self.get_success( + self.db_pool.simple_insert( + table="redactions", + values={ + "event_id": event_id, + "redacts": redacts, + "have_censored": False, + "received_ts": 0, + }, + ) + ) + + if insert_event_json: + internal_metadata: dict = {} + if recheck_redaction is not None: + internal_metadata["recheck_redaction"] = recheck_redaction + + self.get_success( + self.db_pool.simple_insert( + table="event_json", + values={ + "event_id": event_id, + "room_id": "!room:test", + "internal_metadata": encode_canonical_json( + internal_metadata + ).decode("utf-8"), + "json": "{}", + "format_version": 3, + }, + ) + ) + + def _get_recheck(self, event_id: str) -> bool: + row = self.get_success( + self.db_pool.simple_select_one( + table="redactions", + keyvalues={"event_id": event_id}, + retcols=["recheck"], + ) + ) + return bool(row[0]) + + def test_recheck_true(self) -> None: + """A redaction with recheck_redaction=True in internal metadata gets recheck=True.""" + self._insert_redaction("$redact1:test", "$target1:test", recheck_redaction=True) + + self.wait_for_background_updates() + + self.assertTrue(self._get_recheck("$redact1:test")) + + def test_recheck_false(self) -> None: + """A redaction with recheck_redaction=False in internal metadata gets recheck=False.""" + self._insert_redaction( + "$redact2:test", "$target2:test", recheck_redaction=False + ) + + self.wait_for_background_updates() + + self.assertFalse(self._get_recheck("$redact2:test")) + + def test_recheck_absent_from_metadata(self) -> None: + """A redaction with no recheck_redaction key in internal metadata gets recheck=False.""" + self._insert_redaction("$redact3:test", "$target3:test", recheck_redaction=None) + + self.wait_for_background_updates() + + self.assertFalse(self._get_recheck("$redact3:test")) + + def test_recheck_no_event_json(self) -> None: + """A redaction with no event_json row gets recheck=False.""" + self._insert_redaction( + "$redact4:test", "$target4:test", insert_event_json=False + ) + + self.wait_for_background_updates() + + self.assertFalse(self._get_recheck("$redact4:test")) + + def test_batching(self) -> None: + """The update processes rows in batches, completing when all are done.""" + self._insert_redaction("$redact5:test", "$target5:test", recheck_redaction=True) + self._insert_redaction( + "$redact6:test", "$target6:test", recheck_redaction=False + ) + self._insert_redaction("$redact7:test", "$target7:test", recheck_redaction=True) + + self.wait_for_background_updates() + + self.assertTrue(self._get_recheck("$redact5:test")) + self.assertFalse(self._get_recheck("$redact6:test")) + self.assertTrue(self._get_recheck("$redact7:test")) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 92eb99f1d5..c82ccf1600 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -158,7 +158,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): event, ) - self.assertFalse("redacted_because" in event.unsigned) + self.assertIsNone(event.internal_metadata.redacted_by) # Redact event reason = "Because I said so" @@ -168,7 +168,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): self.assertEqual(msg_event.event_id, event.event_id) - self.assertTrue("redacted_because" in event.unsigned) + self.assertIsNotNone(event.internal_metadata.redacted_by) self.assertObjectHasAttributes( { @@ -179,15 +179,6 @@ class RedactionTestCase(unittest.HomeserverTestCase): event, ) - self.assertObjectHasAttributes( - { - "type": EventTypes.Redaction, - "user_id": self.u_alice.to_string(), - "content": {"reason": reason}, - }, - event.unsigned["redacted_because"], - ) - def test_redact_join(self) -> None: self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) @@ -206,7 +197,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): event, ) - self.assertFalse(hasattr(event, "redacted_because")) + self.assertIsNone(event.internal_metadata.redacted_by) # Redact event reason = "Because I said so" @@ -216,7 +207,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): event = self.get_success(self.store.get_event(msg_event.event_id)) - self.assertTrue("redacted_because" in event.unsigned) + self.assertIsNotNone(event.internal_metadata.redacted_by) self.assertObjectHasAttributes( { @@ -227,15 +218,6 @@ class RedactionTestCase(unittest.HomeserverTestCase): event, ) - self.assertObjectHasAttributes( - { - "type": EventTypes.Redaction, - "user_id": self.u_alice.to_string(), - "content": {"reason": reason}, - }, - event.unsigned["redacted_because"], - ) - def test_circular_redaction(self) -> None: redaction_event_id1 = "$redaction1_id:test" redaction_event_id2 = "$redaction2_id:test" @@ -331,10 +313,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): fetched = self.get_success(self.store.get_event(redaction_event_id1)) # it should have been redacted - self.assertEqual(fetched.unsigned["redacted_by"], redaction_event_id2) - self.assertEqual( - fetched.unsigned["redacted_because"].event_id, redaction_event_id2 - ) + self.assertEqual(fetched.internal_metadata.redacted_by, redaction_event_id2) def test_redact_censor(self) -> None: """Test that a redacted event gets censored in the DB after a month""" @@ -355,7 +334,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): event, ) - self.assertFalse("redacted_because" in event.unsigned) + self.assertIsNone(event.internal_metadata.redacted_by) # Redact event reason = "Because I said so" @@ -363,7 +342,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): event = self.get_success(self.store.get_event(msg_event.event_id)) - self.assertTrue("redacted_because" in event.unsigned) + self.assertIsNotNone(event.internal_metadata.redacted_by) self.assertObjectHasAttributes( {