From 53ab01c4e2918fa6fd67ca8642efb21f5fc1a63a Mon Sep 17 00:00:00 2001 From: Rory& Date: Thu, 26 Mar 2026 15:14:18 +0100 Subject: [PATCH] EmbedHandlers: more configuration for embeds, switch to storing all returned embeds --- nix/testVm/configuration.nix | 5 + src/api/util/utility/EmbedHandlers.ts | 95 ++++++++++++------- src/util/config/Config.ts | 2 + src/util/config/types/EmbedConfiguration.ts | 29 ++++++ src/util/config/types/index.ts | 1 + src/util/entities/EmbedCache.ts | 9 +- .../1772404321403-EmbedCacheMultiEmbed.ts | 16 ++++ 7 files changed, 117 insertions(+), 40 deletions(-) create mode 100644 src/util/config/types/EmbedConfiguration.ts create mode 100644 src/util/migration/postgres/1772404321403-EmbedCacheMultiEmbed.ts diff --git a/nix/testVm/configuration.nix b/nix/testVm/configuration.nix index f9df7426d..1e70da99a 100644 --- a/nix/testVm/configuration.nix +++ b/nix/testVm/configuration.nix @@ -51,6 +51,11 @@ in sendMessage.enabled = false; }; }; + embeds = { + youtube = { + userAgent = "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)"; + }; + }; }; offload = { diff --git a/src/api/util/utility/EmbedHandlers.ts b/src/api/util/utility/EmbedHandlers.ts index e851c33f2..679424f34 100644 --- a/src/api/util/utility/EmbedHandlers.ts +++ b/src/api/util/utility/EmbedHandlers.ts @@ -16,7 +16,7 @@ along with this program. If not, see . */ -import { arrayDistinctBy, arrayGroupBy, arrayRemove, Config, EmbedCache, emitEvent, Message, MessageUpdateEvent, normalizeUrl, OrmUtils } from "@spacebar/util"; +import { arrayDistinctBy, arrayGroupBy, Config, EmbedCache, emitEvent, Message, MessageUpdateEvent, normalizeUrl, OrmUtils } from "@spacebar/util"; import { Embed, EmbedImage, EmbedType } from "@spacebar/schemas"; import * as cheerio from "cheerio"; import crypto from "crypto"; @@ -24,15 +24,17 @@ import { yellow } from "picocolors"; import probe from "probe-image-size"; import { FindOptionsWhere, In } from "typeorm"; -export const DEFAULT_FETCH_OPTIONS: RequestInit = { - redirect: "follow", - headers: { - "user-agent": "Mozilla/5.0 (compatible; Spacebar/1.0; +https://github.com/spacebarchat/server)", - "accept-language": "en-US,en;q=0.9", - }, - // size: 1024 * 1024 * 5, // grabbed from config later - method: "GET", -}; +export function getDefaultFetchOptions(): RequestInit { + return { + redirect: "follow", + headers: { + "user-agent": Config.get().embeds.defaultUserAgent ?? "Mozilla/5.0 (compatible; Spacebar/1.0; +https://github.com/spacebarchat/server)", + "accept-language": "en-US,en;q=0.9", + }, + // size: 1024 * 1024 * 5, // grabbed from config later + method: "GET", + }; +} const makeEmbedImage = (url: string | undefined, width: number | undefined, height: number | undefined): Required | undefined => { if (!url || !width || !height) return undefined; @@ -109,7 +111,7 @@ export const getMetaDescriptions = (text: string) => { const doFetch = async (url: URL, opts?: RequestInit) => { try { - const res = await fetch(url, OrmUtils.mergeDeep({ ...DEFAULT_FETCH_OPTIONS }, opts ?? {})); + const res = await fetch(url, OrmUtils.mergeDeep({ ...getDefaultFetchOptions() }, opts ?? {})); if (res.headers.get("content-length")) { const contentLength = parseInt(res.headers.get("content-length")!); if (Config.get().limits.message.maxEmbedDownloadSize && contentLength > Config.get().limits.message.maxEmbedDownloadSize) { @@ -124,7 +126,7 @@ const doFetch = async (url: URL, opts?: RequestInit) => { const genericImageHandler = async (url: URL): Promise => { const type = await fetch(url, { - ...DEFAULT_FETCH_OPTIONS, + ...getDefaultFetchOptions(), method: "HEAD", }); @@ -159,7 +161,7 @@ export const EmbedHandlers: { // the url does not have a special handler default: async (url: URL) => { const type = await fetch(url, { - ...DEFAULT_FETCH_OPTIONS, + ...getDefaultFetchOptions(), method: "HEAD", }); if (type.headers.get("content-type")?.indexOf("image") !== -1) return await genericImageHandler(url); @@ -244,7 +246,7 @@ export const EmbedHandlers: { `&user.fields=profile_image_url`; const response = await fetch(endpointUrl, { - ...DEFAULT_FETCH_OPTIONS, + ...getDefaultFetchOptions(), headers: { authorization: `Bearer ${token}`, }, @@ -450,7 +452,16 @@ export const EmbedHandlers: { "youtube.com": (url) => EmbedHandlers["www.youtube.com"](url), "music.youtube.com": (url) => EmbedHandlers["www.youtube.com"](url), "www.youtube.com": async (url: URL): Promise => { - const response = await doFetch(url, { headers: { cookie: "CONSENT=PENDING+999; hl=en" } }); + const response = await doFetch(url, { + headers: { + cookie: Config.get().embeds.youtube.cookie ?? "CONSENT=PENDING+999; hl=en", + // TODO: dynamically obtain current system curl's user agent, ie. via https://ifconfig.me/ua + ...(Config.get().embeds.youtube.useCurlUserAgent ? { "user-agent": "curl/8.18.0" } : {}), + ...(Config.get().embeds.youtube.userAgent != null + ? { "user-agent": Config.get().embeds.youtube.userAgent ?? undefined /* type check fails for some reason otherwise */ } + : {}), + }, + }); if (!response) return null; const metas = getMetaDescriptions(await response.text()); @@ -548,7 +559,17 @@ export async function dropDuplicateCacheEntries(entries: EmbedCache[]): Promise< await EmbedCache.delete({ id: In(fullToDeleteIds) } as FindOptionsWhere); // console.log("[EmbedCache] Cached embeds:", Array.from(grouped.map((x) => x[0].url))); - return Array.from(grouped.map((x) => x[0])); + return await Promise.all( + Array.from(grouped.map((x) => x[0])).map(async (e) => { + if (e.embed != undefined && e.embeds == undefined) { + console.warn("[EmbedCache] Converting old embed to new embeds array for url", e.url); + e.embeds = [e.embed]; + e.embed = undefined; + return await e.save(); + } + return e; + }), + ); } async function sleep(ms: number) { @@ -579,7 +600,10 @@ export async function getOrUpdateEmbedCache(urls: string[], cb?: (url: string, e embeds.push(...cachedEmbeds); cb?.( "cached", - cachedEmbeds.map((e) => e.embed), + cachedEmbeds + .map((e) => e.embeds) + .flat() + .filter((e) => e !== undefined), ); const urlsToGenerate = urls.filter((url) => { @@ -597,39 +621,37 @@ export async function getOrUpdateEmbedCache(urls: string[], cb?: (url: string, e const generatedEmbeds = await Promise.all( urlsToGenerate.map(async (link) => { await sleep(getSlowdownFactor(off++)); // ...or nodejs gets overwhelmed and times out - return await getOrUpdateEmbedCacheSingle(link, cb); + return await generateEmbedSingle(link, cb); }), ); - embeds.push(...generatedEmbeds.filter((e): e is EmbedCache[] => e !== null).flat()); + embeds.push(...generatedEmbeds.filter((e) => e != null)); return embeds; } -async function getOrUpdateEmbedCacheSingle(link: string, cb?: (url: string, embeds: Embed[]) => Promise): Promise { +async function generateEmbedSingle(link: string, cb?: (url: string, embeds: Embed[]) => Promise): Promise { const url = new URL(link); const handler = url.hostname === new URL(Config.get().cdn.endpointPublic!).hostname ? EmbedHandlers["self"] : (EmbedHandlers[url.hostname] ?? EmbedHandlers["default"]); - const results: EmbedCache[] = []; try { let res = await handler(url); if (!res) return null; if (!Array.isArray(res)) res = [res]; - for (const embed of res) { - // Cache with normalized URL - const cache = await EmbedCache.create({ - url: normalizeUrl(url.href), - embed: embed, - createdAt: new Date(), - }).save(); - results.push(cache); - console.log("[Embeds] Generated embed for", link); - } + // Cache with normalized URL + const cache = await EmbedCache.create({ + url: normalizeUrl(url.href), + embeds: res, + createdAt: new Date(), + }).save(); + + console.log("[Embeds] Generated embed for", link); await cb?.(link, res); + return cache; } catch (e) { console.error(`[Embeds] Error while generating embed for ${link}`, e); } - return results.length == 0 ? null : results; + return null; } export async function fillMessageUrlEmbeds(message: Message) { @@ -644,16 +666,18 @@ export async function fillMessageUrlEmbeds(message: Message) { if (uniqueLinks.length === 0) { // No valid unique links found, update message to remove old embeds - message.embeds = message.embeds?.filter((embed) => embed.type === "rich"); + message.embeds = message.embeds.filter((embed) => embed.type === "rich"); await saveAndEmitMessageUpdate(message); return message; } // avoid a race condition updating the same row let messageUpdateLock = saveAndEmitMessageUpdate(message); - await getOrUpdateEmbedCache(uniqueLinks, async (_, embeds) => { - if (message.embeds.length + embeds.length > Config.get().limits.message.maxEmbeds) return; + await getOrUpdateEmbedCache(uniqueLinks, async (url, embeds) => { + if (url !== "cached" && message.embeds.length + embeds.length > Config.get().limits.message.maxEmbeds) return; message.embeds.push(...embeds); + if (message.embeds.length > Config.get().limits.message.maxEmbeds) message.embeds = message.embeds.slice(0, Config.get().limits.message.maxEmbeds); + try { await messageUpdateLock; } catch { @@ -667,6 +691,7 @@ export async function fillMessageUrlEmbeds(message: Message) { } async function saveAndEmitMessageUpdate(message: Message) { + // console.warn("Emitting message update for", message.id, "with embeds", message.embeds); await Message.update({ id: message.id, channel_id: message.channel_id }, { embeds: message.embeds }); await emitEvent({ event: "MESSAGE_UPDATE", diff --git a/src/util/config/Config.ts b/src/util/config/Config.ts index 7fe879db6..7549b8b6a 100644 --- a/src/util/config/Config.ts +++ b/src/util/config/Config.ts @@ -22,6 +22,7 @@ import { ComponentConfiguration, DefaultsConfiguration, EmailConfiguration, + EmbedConfiguration, EndpointConfiguration, ExternalTokensConfiguration, GeneralConfiguration, @@ -61,4 +62,5 @@ export class ConfigValue { user: UserConfiguration = new UserConfiguration(); offload: OffloadConfiguration = new OffloadConfiguration(); components = new ComponentConfiguration(); + embeds = new EmbedConfiguration(); } diff --git a/src/util/config/types/EmbedConfiguration.ts b/src/util/config/types/EmbedConfiguration.ts new file mode 100644 index 000000000..943008310 --- /dev/null +++ b/src/util/config/types/EmbedConfiguration.ts @@ -0,0 +1,29 @@ +/* + Spacebar: A FOSS re-implementation and extension of the Discord.com backend. + Copyright (C) 2025 Spacebar and Spacebar Contributors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +export class EmbedConfiguration { + defaultUserAgent: string | null = null; + + youtube = new YoutubeEmbedConfiguration(); +} + +export class YoutubeEmbedConfiguration { + useCurlUserAgent: boolean = false; + userAgent: string | null = null; + cookie: string | null = null; +} diff --git a/src/util/config/types/index.ts b/src/util/config/types/index.ts index 9e689c242..872b590b2 100644 --- a/src/util/config/types/index.ts +++ b/src/util/config/types/index.ts @@ -37,3 +37,4 @@ export * from "./subconfigurations"; export * from "./TemplateConfiguration"; export * from "./UsersConfiguration"; export * from "./ComponentConfiguration"; +export * from "./EmbedConfiguration"; diff --git a/src/util/entities/EmbedCache.ts b/src/util/entities/EmbedCache.ts index a9ffd063c..6a7738052 100644 --- a/src/util/entities/EmbedCache.ts +++ b/src/util/entities/EmbedCache.ts @@ -27,12 +27,11 @@ export class EmbedCache extends BaseClass { @Column() url: string; - @Column({ type: "simple-json" }) - embed: Embed; + @Column({ type: "simple-json", nullable: true }) + embed?: Embed; - // TODO: store all returned embed objects from a handler - // @Column({ type: "simple-json" }) - // embeds: Embed[]; + @Column({ type: "simple-json", nullable: true }) + embeds?: Embed[]; @Column({ name: "created_at", type: "timestamp with time zone" }) createdAt: Date; diff --git a/src/util/migration/postgres/1772404321403-EmbedCacheMultiEmbed.ts b/src/util/migration/postgres/1772404321403-EmbedCacheMultiEmbed.ts new file mode 100644 index 000000000..55e80fdcf --- /dev/null +++ b/src/util/migration/postgres/1772404321403-EmbedCacheMultiEmbed.ts @@ -0,0 +1,16 @@ +import { MigrationInterface, QueryRunner } from "typeorm"; + +export class EmbedCacheCreatedAt1772404321403 implements MigrationInterface { + name = "EmbedCacheCreatedAt1772404321403"; + + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(`ALTER TABLE "embed_cache" ALTER COLUMN "embed" DROP NOT NULL;`); + await queryRunner.query(`ALTER TABLE "embed_cache" ADD "embeds" text NULL;`); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`ALTER TABLE "embed_cache" DROP COLUMN "embeds"`); + await queryRunner.query(`UPDATE "embed_cache" SET "embed" = '{}' WHERE "embed" IS NULL;`); + await queryRunner.query(`ALTER TABLE "embed_cache" ALTER COLUMN "embed" SET NOT NULL;`); + } +}