mirror of
https://github.com/element-hq/synapse.git
synced 2026-05-11 17:34:45 +00:00
fe742653ff
Fixes https://github.com/element-hq/synapse/issues/19352 (See issue for history of this feature and previous PRs) > First, a [naive implementation](https://github.com/element-hq/synapse/pull/19268) of the endpoint was introduced, but it quickly ran into [performance issues on query](https://github.com/element-hq/synapse/pull/19312) and [long startup times](https://github.com/element-hq/synapse/issues/19349), leading to its [removal](https://github.com/element-hq/synapse/pull/19351). It also didn't actually work, and would fail to expose media when it was "unquarantined", so a [partial fix](https://github.com/element-hq/synapse/pull/19308) was attempted, where the suggested direction is to use a [stream](https://element-hq.github.io/synapse/latest/development/synapse_architecture/streams.html#cheatsheet-for-creating-a-new-stream) instead of a timestamp column. This PR re-introduces the API building on the previous feedback: * Adds a stream which tracks when media becomes (un)quarantined. * Runs a background update to capture already-quarantined media. * Adds a new admin API to return rows from the stream table. We track both quarantine and unquarantine actions in the stream to allow downstream consumers to process the records appropriately. Namely, to allow our Synapse exchange in HMA to remove hashes for unquarantined media (use case further explained in the [issue](https://github.com/element-hq/synapse/issues/19352)). **Note**: This knowingly does not capture all cases of media being quarantined. Other call sites are lower priority for T&S, and can be addressed in a future PR. ~~An issue will be created after this PR is merged to track those sites.~~ https://github.com/element-hq/synapse/issues/19672 ### Pull Request Checklist <!-- Please read https://element-hq.github.io/synapse/latest/development/contributing_guide.html before submitting your pull request --> * [x] Pull request is based on the develop branch * [x] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [x] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: turt2live <1190097+turt2live@users.noreply.github.com> Co-authored-by: Eric Eastwood <madlittlemods@gmail.com> Co-authored-by: Eric Eastwood <erice@element.io>
159 lines
6.3 KiB
Python
159 lines
6.3 KiB
Python
#
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
#
|
|
# Copyright 2014-2021 The Matrix.org Foundation C.I.C.
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
# License, or (at your option) any later version.
|
|
#
|
|
# See the GNU Affero General Public License for more details:
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
#
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
#
|
|
# [This file includes modifications made by New Vector Limited]
|
|
#
|
|
#
|
|
|
|
from twisted.internet.testing import MemoryReactor
|
|
|
|
from synapse.api.room_versions import RoomVersions
|
|
from synapse.server import HomeServer
|
|
from synapse.storage.databases.main.room import _BackgroundUpdates
|
|
from synapse.storage.types import Cursor
|
|
from synapse.types import RoomAlias, RoomID, UserID
|
|
from synapse.util.clock import Clock
|
|
|
|
from tests.rest.admin.test_media import _AdminMediaTests
|
|
from tests.unittest import HomeserverTestCase
|
|
|
|
|
|
class RoomStoreTestCase(HomeserverTestCase):
|
|
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
|
# We can't test RoomStore on its own without the DirectoryStore, for
|
|
# management of the 'room_aliases' table
|
|
self.store = hs.get_datastores().main
|
|
|
|
self.room = RoomID.from_string("!abcde:test")
|
|
self.alias = RoomAlias.from_string("#a-room-name:test")
|
|
self.u_creator = UserID.from_string("@creator:test")
|
|
|
|
self.get_success(
|
|
self.store.store_room(
|
|
self.room.to_string(),
|
|
room_creator_user_id=self.u_creator.to_string(),
|
|
is_public=True,
|
|
room_version=RoomVersions.V1,
|
|
)
|
|
)
|
|
|
|
def test_get_room(self) -> None:
|
|
room = self.get_success(self.store.get_room(self.room.to_string()))
|
|
assert room is not None
|
|
self.assertTrue(room[0])
|
|
|
|
def test_get_room_unknown_room(self) -> None:
|
|
self.assertIsNone(self.get_success(self.store.get_room("!uknown:test")))
|
|
|
|
def test_get_room_with_stats(self) -> None:
|
|
res = self.get_success(self.store.get_room_with_stats(self.room.to_string()))
|
|
assert res is not None
|
|
self.assertEqual(res.room_id, self.room.to_string())
|
|
self.assertEqual(res.creator, self.u_creator.to_string())
|
|
self.assertTrue(res.public)
|
|
|
|
def test_get_room_with_stats_unknown_room(self) -> None:
|
|
self.assertIsNone(
|
|
self.get_success(self.store.get_room_with_stats("!uknown:test"))
|
|
)
|
|
|
|
|
|
class FlagExistingQuarantinedMediaBackgroundUpdatesTestCase(_AdminMediaTests):
|
|
"""
|
|
Test the `flag_existing_quarantined_media` background update.
|
|
"""
|
|
|
|
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
|
self.store = hs.get_datastores().main
|
|
|
|
def test_populates_quarantined_only(self) -> None:
|
|
self.register_user("admin", "pass", admin=True)
|
|
admin_user_tok = self.login("admin", "pass")
|
|
|
|
# Upload two distinct media items so we can quarantine one. If they shared content,
|
|
# then the quarantine-by-hash code would hit both.
|
|
_unaffected_media_upload_response = self.helper.upload_media(
|
|
b"first content", tok=admin_user_tok, expect_code=200
|
|
)
|
|
# Upload the media we're going to quarantine
|
|
media_upload_response = self.helper.upload_media(
|
|
b"second content", tok=admin_user_tok, expect_code=200
|
|
)
|
|
# Extract media ID from the response
|
|
quarantined_media_origin_and_media_id = media_upload_response["content_uri"][
|
|
6:
|
|
] # cut off 'mxc://'
|
|
quarantined_media_origin, quarantined_media_id = (
|
|
quarantined_media_origin_and_media_id.split("/")
|
|
)
|
|
|
|
# Ideally we'd also upload remote media to ensure that `remote_media_cache` gets picked up, but that's
|
|
# a little tricky to set up in the test here. We hope that local and remote media
|
|
# are treated similarly during the background update.
|
|
|
|
# Quarantine the media like an admin would. Because the quarantine API also inserts
|
|
# a record into the database for us, we'll clear out the `quarantined_media_changes`
|
|
# table before running the background update. This will simulate already-quarantined
|
|
# media being in the database prior to the background update.
|
|
channel = self.make_request(
|
|
"POST",
|
|
"/_synapse/admin/v1/media/quarantine/%s/%s"
|
|
% (
|
|
quarantined_media_origin,
|
|
quarantined_media_id,
|
|
),
|
|
access_token=admin_user_tok,
|
|
)
|
|
self.assertEqual(200, channel.code, msg=channel.json_body)
|
|
|
|
# Do that table clear we mentioned above
|
|
def _wipe_table(txn: Cursor) -> None:
|
|
txn.execute("DELETE FROM quarantined_media_changes")
|
|
|
|
self.get_success(
|
|
self.store.db_pool.runInteraction(
|
|
"test_populates_quarantined_only._wipe_table", _wipe_table
|
|
)
|
|
)
|
|
|
|
# Insert and run the background update
|
|
self.get_success(
|
|
self.store.db_pool.simple_insert(
|
|
table="background_updates",
|
|
values={
|
|
"update_name": _BackgroundUpdates.FLAG_EXISTING_QUARANTINED_MEDIA,
|
|
"progress_json": "{}",
|
|
},
|
|
)
|
|
)
|
|
self.store.db_pool.updates._all_done = False
|
|
self.wait_for_background_updates()
|
|
|
|
# Check that the changes table is now populated, and has exactly 1 quarantined
|
|
# media object in it (the one we quarantined).
|
|
changes: list[tuple[str | None, str, bool]] = self.get_success(
|
|
self.store.db_pool.simple_select_list(
|
|
"quarantined_media_changes",
|
|
None,
|
|
retcols=("origin", "media_id", "quarantined"),
|
|
)
|
|
)
|
|
self.assertEqual(len(changes), 1)
|
|
self.assertEqual(changes[0][0], None) # origin (local media)
|
|
self.assertEqual(changes[0][1], quarantined_media_id) # media_id
|
|
self.assertEqual(changes[0][2], True) # quarantined flag
|