diff --git a/src/api/util/handlers/Message.ts b/src/api/util/handlers/Message.ts
index ed41341e4..1fb9a080d 100644
--- a/src/api/util/handlers/Message.ts
+++ b/src/api/util/handlers/Message.ts
@@ -16,7 +16,7 @@
along with this program. If not, see .
*/
-import { EmbedHandlers, randomString } from "@spacebar/api";
+import { EmbedHandlers, randomString, fillMessageUrlEmbeds } from "@spacebar/api";
import {
Application,
Attachment,
@@ -69,13 +69,12 @@ import {
UnfurledMediaItem,
BaseMessageComponents,
v1CompTypes,
- PartialUser
+ PartialUser,
} from "@spacebar/schemas";
const allow_empty = false;
// TODO: check webhook, application, system author, stickers
// TODO: embed gifs/videos/images
-const LINK_REGEX = /?/g;
function checkActionRow(row: ActionRowComponent, knownComponentIds: string[], errors: Record, rowIndex: number) {
if (!row.components) {
return;
@@ -774,139 +773,15 @@ export async function handleMessage(opts: MessageOptions): Promise {
// TODO: cache link result in db
export async function postHandleMessage(message: Message) {
- const conf = Config.get();
- const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown
-
- const linkMatches = content?.match(LINK_REGEX) || [];
message.clean_data();
- const data = { ...message.toJSON() };
- const currentNormalizedUrls = new Set();
- for (const link of linkMatches) {
- // Don't process links in <>
- if (link.startsWith("<") && link.endsWith(">")) {
- continue;
- }
- try {
- const normalized = normalizeUrl(link);
- currentNormalizedUrls.add(normalized);
- } catch (e) {
- /* empty */
- }
- }
- if (data.embeds != undefined) {
- data.embeds?.forEach((embed) => {
- if (!embed.type) {
- embed.type = EmbedType.rich;
- }
- });
- }
- // Filter out embeds that could be links, start from scratch
- if (data.embeds != undefined) {
- data.embeds = data.embeds?.filter((embed) => embed.type === "rich");
- }
+ message.embeds ??= [];
+ message.embeds.forEach((embed) => {
+ // we need to handle false-y values (empty string) here, so cant use ??=
+ embed.type ||= EmbedType.rich;
+ });
- const seenNormalizedUrls = new Set();
- const uniqueLinks: string[] = [];
-
- for (const link of linkMatches.slice(0, 20)) {
- // embed max 20 links - TODO: make this configurable with instance policies
- // Don't embed links in <>
- if (link.startsWith("<") && link.endsWith(">")) continue;
-
- try {
- const normalized = normalizeUrl(link);
-
- if (!seenNormalizedUrls.has(normalized)) {
- seenNormalizedUrls.add(normalized);
- uniqueLinks.push(link);
- }
- } catch (e) {
- // Invalid URL, skip
- }
- }
-
- if (uniqueLinks.length === 0) {
- // No valid unique links found, update message to remove old embeds
- if (data.embeds != undefined) {
- data.embeds = data.embeds?.filter((embed) => embed.type === "rich");
- }
- // author value is already included in message.toJSON()
- const event = {
- event: "MESSAGE_UPDATE",
- channel_id: message.channel_id,
- data: {
- ...message.toJSON(),
- embeds: data.embeds == undefined ? message.embeds || [] : data.embeds,
- },
- } satisfies MessageUpdateEvent;
- const embeds = data.embeds == undefined ? [] : data.embeds;
- await Promise.all([emitEvent(event), Message.update({ id: message.id, channel_id: message.channel_id }, { embeds: embeds })]);
- return;
- }
-
- const cachePromises = [];
-
- for (const link of uniqueLinks) {
- let url: URL;
- try {
- url = new URL(link);
- } catch (e) {
- // Skip invalid URLs
- continue;
- }
-
- const normalizedUrl = normalizeUrl(link);
-
- // Check cache using normalized URL
- const cached = await EmbedCache.findOne({
- where: { url: normalizedUrl },
- });
-
- if (cached) {
- if (data.embeds == undefined) {
- data.embeds = [];
- }
- data.embeds?.push(cached.embed);
- continue;
- }
-
- // bit gross, but whatever!
- const endpointPublic = conf.cdn.endpointPublic; // lol
- const handler = url.hostname === new URL(endpointPublic!).hostname ? EmbedHandlers["self"] : EmbedHandlers[url.hostname] || EmbedHandlers["default"];
-
- try {
- let res = await handler(url);
- if (!res) continue;
- // tried to use shorthand but types didn't like me L
- if (!Array.isArray(res)) res = [res];
-
- for (const embed of res) {
- // Cache with normalized URL
- const cache = EmbedCache.create({
- url: normalizedUrl,
- embed: embed,
- });
- cachePromises.push(cache.save());
- if (data.embeds == undefined) {
- data.embeds = [];
- }
- data.embeds?.push(embed);
- }
- } catch (e) {
- console.error(`[Embeds] Error while generating embed for ${link}`, e);
- }
- }
- const embeds = data.embeds == undefined ? [] : data.embeds;
- await Promise.all([
- emitEvent({
- event: "MESSAGE_UPDATE",
- channel_id: message.channel_id,
- data: message.toJSON(),
- } satisfies MessageUpdateEvent),
- Message.update({ id: message.id, channel_id: message.channel_id }, { embeds: embeds }),
- ...cachePromises,
- ]);
+ if ((await getPermission(message.author_id, message.channel.guild_id, message.channel_id)).has(Permissions.FLAGS.EMBED_LINKS)) await fillMessageUrlEmbeds(message);
}
export async function sendMessage(opts: MessageOptions) {
diff --git a/src/api/util/utility/EmbedHandlers.ts b/src/api/util/utility/EmbedHandlers.ts
index 77d0c8f0c..da6a74294 100644
--- a/src/api/util/utility/EmbedHandlers.ts
+++ b/src/api/util/utility/EmbedHandlers.ts
@@ -16,12 +16,13 @@
along with this program. If not, see .
*/
-import { Config } from "@spacebar/util";
+import { arrayDistinctBy, arrayGroupBy, arrayRemove, Config, EmbedCache, emitEvent, Message, MessageUpdateEvent, normalizeUrl } from "@spacebar/util";
import { Embed, EmbedImage, EmbedType } from "@spacebar/schemas";
import * as cheerio from "cheerio";
import crypto from "crypto";
import { yellow } from "picocolors";
import probe from "probe-image-size";
+import { FindOptionsWhere, In } from "typeorm";
export const DEFAULT_FETCH_OPTIONS: RequestInit = {
redirect: "follow",
@@ -517,3 +518,161 @@ export const EmbedHandlers: {
};
},
};
+
+const LINK_REGEX = /?/g;
+
+export function getMessageContentUrls(message: Message) {
+ const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown
+
+ return content?.match(LINK_REGEX) ?? [];
+}
+
+export async function dropDuplicateCacheEntries(entries: EmbedCache[]): Promise {
+ const grouped = Array.from(arrayGroupBy(entries, (e) => e.url).values()).map((g) =>
+ g.toSorted((e1, e2) => {
+ let diff = e2.createdAt.getTime() - e1.createdAt.getTime();
+ if (diff == 0) diff = Number(BigInt(e2.id) - BigInt(e1.id));
+ return diff;
+ }),
+ );
+
+ const fullToDeleteIds: string[] = [];
+ for (const group of grouped) {
+ if (group.length <= 1) continue;
+ // console.log("[EmbedCache] Removing all but first from cache:", group);
+ // this might be backwards, sort always confuses me lol
+ const toDelete = group.slice(1);
+ const toDeleteIds = toDelete.map((x) => x.id);
+ fullToDeleteIds.push(...toDeleteIds);
+ console.warn("[EmbedCache] Removing duplicate IDs for", toDelete[0].url, " - ", toDeleteIds);
+ }
+
+ await EmbedCache.delete({ id: In(fullToDeleteIds) } as FindOptionsWhere);
+
+ // console.log("[EmbedCache] Cached embeds:", Array.from(grouped.map((x) => x[0].url)));
+ return Array.from(grouped.map((x) => x[0]));
+}
+
+async function sleep(ms: number) {
+ return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+// hack to make nodejs not die
+function getSlowdownFactor(off: number) {
+ if (off < 10) return off;
+ if (off < 25) return 100 + off * 2;
+ if (off < 50) return 200 + off * 10;
+ if (off < 100) return 500 + off * 15;
+ if (off < 250) return 750 + off * 20;
+ return 1000 + off * 150;
+}
+
+export async function getOrUpdateEmbedCache(urls: string[], cb?: (url: string, embeds: Embed[]) => Promise): Promise {
+ urls = arrayDistinctBy(urls, (x) => x);
+ const embeds: EmbedCache[] = [];
+
+ const cachedEmbeds = await dropDuplicateCacheEntries(
+ await EmbedCache.find({
+ where: {
+ url: In(urls.map(normalizeUrl)),
+ },
+ }),
+ );
+ embeds.push(...cachedEmbeds);
+ cb?.(
+ "cached",
+ cachedEmbeds.map((e) => e.embed),
+ );
+
+ const urlsToGenerate = urls.filter((url) => {
+ return !cachedEmbeds.some((e) => e.url == normalizeUrl(url));
+ });
+
+ if (urlsToGenerate.length > 0) console.log("[Embeds] Need to generate embeds for urls:", urlsToGenerate);
+ if (cachedEmbeds.length > 0)
+ console.log(
+ "[Embeds] Already had embeds for urls:",
+ cachedEmbeds.map((e) => e.url),
+ );
+
+ let off = 0;
+ const generatedEmbeds = await Promise.all(
+ urlsToGenerate.map(async (link) => {
+ await sleep(getSlowdownFactor(off++)); // ...or nodejs gets overwhelmed and times out
+ return await getOrUpdateEmbedCacheSingle(link, cb);
+ }),
+ );
+
+ embeds.push(...generatedEmbeds.filter((e): e is EmbedCache[] => e !== null).flat());
+
+ return embeds;
+}
+
+async function getOrUpdateEmbedCacheSingle(link: string, cb?: (url: string, embeds: Embed[]) => Promise): Promise {
+ const url = new URL(link);
+ const handler = url.hostname === new URL(Config.get().cdn.endpointPublic!).hostname ? EmbedHandlers["self"] : (EmbedHandlers[url.hostname] ?? EmbedHandlers["default"]);
+ const results: EmbedCache[] = [];
+ try {
+ let res = await handler(url);
+ if (!res) return null;
+ if (!Array.isArray(res)) res = [res];
+
+ for (const embed of res) {
+ // Cache with normalized URL
+ const cache = await EmbedCache.create({
+ url: normalizeUrl(url.href),
+ embed: embed,
+ createdAt: new Date(),
+ }).save();
+ results.push(cache);
+ console.log("[Embeds] Generated embed for", link);
+ }
+ await cb?.(link, res);
+ } catch (e) {
+ console.error(`[Embeds] Error while generating embed for ${link}`, e);
+ }
+ return results.length == 0 ? null : results;
+}
+
+export async function fillMessageUrlEmbeds(message: Message) {
+ const linkMatches = getMessageContentUrls(message).filter((l) => !l.startsWith("<") && !l.endsWith(">"));
+
+ // Filter out embeds that could be links, start from scratch
+ message.embeds = message.embeds.filter((embed) => embed.type === "rich");
+
+ if (linkMatches.length == 0) return message;
+
+ const uniqueLinks: string[] = arrayDistinctBy(linkMatches, normalizeUrl);
+
+ if (uniqueLinks.length === 0) {
+ // No valid unique links found, update message to remove old embeds
+ message.embeds = message.embeds?.filter((embed) => embed.type === "rich");
+ await saveAndEmitMessageUpdate(message);
+ return message;
+ }
+
+ // avoid a race condition updating the same row
+ let messageUpdateLock = saveAndEmitMessageUpdate(message);
+ await getOrUpdateEmbedCache(uniqueLinks, async (_, embeds) => {
+ if (message.embeds.length + embeds.length > Config.get().limits.message.maxEmbeds) return;
+ message.embeds.push(...embeds);
+ try {
+ await messageUpdateLock;
+ } catch {
+ /* empty */
+ }
+ messageUpdateLock = saveAndEmitMessageUpdate(message);
+ });
+
+ await saveAndEmitMessageUpdate(message);
+ return message;
+}
+
+async function saveAndEmitMessageUpdate(message: Message) {
+ await Message.update({ id: message.id, channel_id: message.channel_id }, { embeds: message.embeds });
+ await emitEvent({
+ event: "MESSAGE_UPDATE",
+ channel_id: message.channel_id,
+ data: message.toJSON(),
+ } satisfies MessageUpdateEvent);
+}
diff --git a/src/util/config/types/subconfigurations/limits/MessageLimits.ts b/src/util/config/types/subconfigurations/limits/MessageLimits.ts
index f61c0b651..50ea32d22 100644
--- a/src/util/config/types/subconfigurations/limits/MessageLimits.ts
+++ b/src/util/config/types/subconfigurations/limits/MessageLimits.ts
@@ -24,4 +24,5 @@ export class MessageLimits {
maxBulkDelete: number = 1000;
maxEmbedDownloadSize: number = 1024 * 1024 * 5;
maxPreloadCount: number = 100;
+ maxEmbeds: number = 20;
}
diff --git a/src/util/entities/EmbedCache.ts b/src/util/entities/EmbedCache.ts
index db1fbba67..a9ffd063c 100644
--- a/src/util/entities/EmbedCache.ts
+++ b/src/util/entities/EmbedCache.ts
@@ -29,4 +29,11 @@ export class EmbedCache extends BaseClass {
@Column({ type: "simple-json" })
embed: Embed;
+
+ // TODO: store all returned embed objects from a handler
+ // @Column({ type: "simple-json" })
+ // embeds: Embed[];
+
+ @Column({ name: "created_at", type: "timestamp with time zone" })
+ createdAt: Date;
}
diff --git a/src/util/migration/postgres/1772404321402-EmbedCacheCreatedAt.ts b/src/util/migration/postgres/1772404321402-EmbedCacheCreatedAt.ts
new file mode 100644
index 000000000..bf845ff74
--- /dev/null
+++ b/src/util/migration/postgres/1772404321402-EmbedCacheCreatedAt.ts
@@ -0,0 +1,13 @@
+import { MigrationInterface, QueryRunner } from "typeorm";
+
+export class EmbedCacheCreatedAt1772404321402 implements MigrationInterface {
+ name = "EmbedCacheCreatedAt1772404321402";
+
+ public async up(queryRunner: QueryRunner): Promise {
+ await queryRunner.query(`ALTER TABLE "embed_cache" ADD "created_at" timestamp with time zone DEFAULT now();`);
+ }
+
+ public async down(queryRunner: QueryRunner): Promise {
+ await queryRunner.query(`ALTER TABLE "embed_cache" DROP COLUMN "created_at"`);
+ }
+}
diff --git a/src/util/util/extensions/Array.ts b/src/util/util/extensions/Array.ts
index bb5f3362a..61cf469b2 100644
--- a/src/util/util/extensions/Array.ts
+++ b/src/util/util/extensions/Array.ts
@@ -42,3 +42,16 @@ export function arrayDistinctBy(array: T[], selector: (elem: T) => M): T[]
return true;
});
}
+
+export function arrayGroupBy(array: T[], selector: (elem: T) => M): Map {
+ const map = new Map();
+
+ array.forEach((item) => {
+ const mappedValue = selector(item);
+ const existing = map.get(mappedValue);
+ if (existing) existing.push(item);
+ else map.set(mappedValue, [item]);
+ });
+
+ return map;
+}