From 2439990efce76f159cfabc9a07c4b1b67efeb21a Mon Sep 17 00:00:00 2001 From: Will Hunt <2072976+Half-Shot@users.noreply.github.com> Date: Fri, 10 Apr 2026 18:04:11 +0100 Subject: [PATCH] Allow 'article' and 'profile' opengraph fields on URL previews. (#19659) --- changelog.d/19659.feature | 1 + synapse/media/preview_html.py | 18 ++++++++---------- tests/media/test_html_preview.py | 22 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 changelog.d/19659.feature diff --git a/changelog.d/19659.feature b/changelog.d/19659.feature new file mode 100644 index 0000000000..575b603951 --- /dev/null +++ b/changelog.d/19659.feature @@ -0,0 +1 @@ +Passthrough 'article' and 'profile' opengraph metadata on url preview requests. diff --git a/synapse/media/preview_html.py b/synapse/media/preview_html.py index 22ad581f82..4f9315f4c9 100644 --- a/synapse/media/preview_html.py +++ b/synapse/media/preview_html.py @@ -278,17 +278,15 @@ def parse_html_to_open_graph(tree: "etree._Element") -> dict[str, str | None]: # "og:video:height" : "720", # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3", - og = _get_meta_tags(tree, "property", "og") + ogRoot = _get_meta_tags(tree, "property", "og") - # TODO: Search for properties specific to the different Open Graph types, - # such as article: meta tags, e.g.: - # - # "article:publisher" : "https://www.facebook.com/thethudonline" /> - # "article:author" content="https://www.facebook.com/thethudonline" /> - # "article:tag" content="baby" /> - # "article:section" content="Breaking News" /> - # "article:published_time" content="2016-03-31T19:58:24+00:00" /> - # "article:modified_time" content="2016-04-01T18:31:53+00:00" /> + # https://ogp.me/#type_article + ogArticle = _get_meta_tags(tree, "property", "article") + # https://ogp.me/#type_profile + ogProfile = _get_meta_tags(tree, "property", "profile") + + # Merge as-is + og = ogRoot | ogArticle | ogProfile # Search for Twitter Card (twitter:) meta tags, e.g.: # diff --git a/tests/media/test_html_preview.py b/tests/media/test_html_preview.py index d3f1e8833a..ddcbbee897 100644 --- a/tests/media/test_html_preview.py +++ b/tests/media/test_html_preview.py @@ -433,6 +433,28 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase): }, ) + def test_extended_opengraph(self) -> None: + """Ensure we pull in profile and article data from opengraph.""" + html = b""" + + + + + + """ + tree = decode_body(html, "http://example.com/test.html") + assert tree is not None + og = parse_html_to_open_graph(tree) + self.assertEqual( + og, + { + "og:title": None, + "og:description": "My description", + "profile:username": "myname", + "article:published_time": "2026-04-07T10:07:37Z", + }, + ) + def test_nested_nodes(self) -> None: """A body with some nested nodes. Tests that we iterate over children in the right order (and don't reverse the order of the text)."""