diff --git a/changelog.d/19458.feature b/changelog.d/19458.feature index fa016eaacb..e98cf5a1a8 100644 --- a/changelog.d/19458.feature +++ b/changelog.d/19458.feature @@ -1 +1 @@ -Add support for MSC4417 URL Previews via Appservices. +Add support for [MSC4417: URL Previews via Appservices](https://github.com/matrix-org/matrix-spec-proposals/pull/4417). diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 72477598e7..cc525628b6 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -85,9 +85,6 @@ class ApplicationService: NS_ALIASES = "aliases" NS_ROOMS = "rooms" NS_PREVIEW_URLS = "uk.half-shot.msc4417.preview_urls" - # The ordering here is important as it is used to map database values (which - # are stored as ints representing the position in this list) to namespace - # values. NS_LIST = [NS_USERS, NS_ALIASES, NS_ROOMS, NS_PREVIEW_URLS] def __init__( @@ -149,7 +146,7 @@ class ApplicationService: # users: [ {regex: "[A-z]+.*", exclusive: true}, ...], # aliases: [ {regex: "[A-z]+.*", exclusive: true}, ...], # rooms: [ {regex: "[A-z]+.*", exclusive: true}, ...], - # uk.half-shot.msc4417.preview_urls: [ {regex: "[A-z]+.*", exclusive: true}, ...], + # uk.half-shot.msc4417.preview_urls: [ {regex: "https?://(www.)?example.org/?.*", exclusive: true}, ...], # } if namespaces is None: namespaces = {} diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index ad0f74a6fa..c604bb58a3 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -558,7 +558,7 @@ class ApplicationServiceApi(SimpleHttpClient): user_id: The user who is requesting the preview. Returns: - A unstructrued map of data from the application service OR + A unstructured map of data from the application service OR None if the service could not be contacted, or returned any other response. """ @@ -578,9 +578,8 @@ class ApplicationServiceApi(SimpleHttpClient): # The response *must* be a JsonDict if successful. assert isinstance(response, dict) except HttpResponseException as e: - # The appservice doesn't support this endpoint. - # Since the AS has opted into these requests, we warn if the AS squarks about unknown - # endpoints. + if is_unknown_endpoint(e): + return None logger.warning("query_url_preview to %s received %s", uri, e.code) return None except Exception as ex: diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index ab0bb1894c..321fb24d71 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -21,13 +21,14 @@ import logging from typing import ( TYPE_CHECKING, + Any, Collection, Iterable, Mapping, ) -import attr from prometheus_client import Counter +from pydantic import model_validator from twisted.internet import defer @@ -58,6 +59,7 @@ from synapse.types import ( ) from synapse.util.async_helpers import Linearizer from synapse.util.metrics import Measure +from synapse.util.pydantic_models import ParseModel if TYPE_CHECKING: from synapse.server import HomeServer @@ -68,11 +70,47 @@ events_processed_counter = Counter( "synapse_handlers_appservice_events_processed", "", labelnames=[SERVER_NAME_LABEL] ) +url_previews_counter = Counter( + "synapse_handlers_appservice_url_previews", + "", + labelnames=[SERVER_NAME_LABEL], +) -@attr.s(slots=True, frozen=True, auto_attribs=True) -class ApplicationServiceUrlPreviewResult: + +class ApplicationServiceUrlPreviewResult(ParseModel): + """ + The response from a request for an appservice to generate a URL preview. + """ + + # Is the preview request exclusive to this appservice. If so, no requests should be made + # and the result should be treated as final. exclusive: bool - result: JsonDict | None + + # The OpenGraph response. If None then the preview request did not complete sucessfully. + result: dict[str, Any] | None + + @model_validator(mode="after") + def check_basic_fields(self) -> Self: + def check_str(key): + if self.result: + v = self.result.get(key) + if v is not None and isinstance(v, str): + raise ValueError(f"'{key}' must be a string if defined") + + # Non-exhaustive list of keys that should be strings based on https://ogp.me/. + # The spec does not set a hard requirement on return types all possible values, + # but these should be checked for consistency. + check_str("og:audio") + check_str("og:description") + check_str("og:determiner") + check_str("og:image") + check_str("og:locale") + check_str("og:site_name") + check_str("og:title") + check_str("og:type") + check_str("og:url") + check_str("og:video") + return self class ApplicationServicesHandler: @@ -738,16 +776,24 @@ class ApplicationServicesHandler: result = await self.appservice_api.query_preview_url( exclusive_appservice, url, user_id ) + url_previews_counter.labels(**{SERVER_NAME_LABEL: self.server_name}).inc(1) return ApplicationServiceUrlPreviewResult(exclusive=True, result=result) for appservice in self._get_services_for_preview_url(url=url): result = await self.appservice_api.query_preview_url( appservice, url, user_id ) if result: + url_previews_counter.labels( + **{SERVER_NAME_LABEL: self.server_name} + ).inc(1) return ApplicationServiceUrlPreviewResult( - exclusive=False, result=result + exclusive=False, + result=result, ) - return ApplicationServiceUrlPreviewResult(exclusive=False, result=None) + return ApplicationServiceUrlPreviewResult( + exclusive=False, + result=None, + ) async def query_user_exists(self, user_id: str) -> bool: """Check if any application service knows this user_id exists. diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index adefefe744..2178721ca0 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -227,14 +227,17 @@ class UrlPreviewer: if self.allow_as_previews: as_og_data = await self.as_services.query_preview_url(url, user) if as_og_data.result: - # This data is never cached in the homeserver, but the appservice - # is expected to cache the data. + # This result should NEVER be cached (e.g. via ResponseCache) as + # the result is dependant on which user requested the preview. + # Any caching performed should be done on the application service + # side. return json_encoder.encode(as_og_data.result).encode("utf8") if as_og_data.exclusive: # XXX: _do_preview typically returns a 500 for any failed # requests but in this case we know the appservice requested # exclusivity over this URL pattern, so we can say it's a 404. + # https://github.com/matrix-org/matrix-spec/issues/2327 raise SynapseError( 404, "Not able to preview URL", @@ -513,6 +516,7 @@ class UrlPreviewer: ) except Exception as e: # FIXME: pass through 404s and other error messages nicely + # https://github.com/matrix-org/matrix-spec/issues/2327 logger.warning("Error downloading %s: %r", url, e) raise SynapseError( diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py index 872b9946a5..ca2ee1c924 100644 --- a/tests/appservice/test_appservice.py +++ b/tests/appservice/test_appservice.py @@ -184,7 +184,7 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_non_exclusive_url(self) -> None: self.service.namespaces[ApplicationService.NS_PREVIEW_URLS].append( - _regex("https:\\/\\/matrix.org.*", exclusive=False) + _regex(r"https:\/\/matrix.org.*", exclusive=False) ) self.assertFalse(self.service.is_exclusive_preview_url("https://matrix.org")) self.assertFalse(