mirror of
https://github.com/spacebarchat/server.git
synced 2026-03-30 22:35:40 +00:00
Embeds: factor out from postMessage
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { EmbedHandlers, randomString } from "@spacebar/api";
|
||||
import { EmbedHandlers, randomString, fillMessageUrlEmbeds } from "@spacebar/api";
|
||||
import {
|
||||
Application,
|
||||
Attachment,
|
||||
@@ -69,13 +69,12 @@ import {
|
||||
UnfurledMediaItem,
|
||||
BaseMessageComponents,
|
||||
v1CompTypes,
|
||||
PartialUser
|
||||
PartialUser,
|
||||
} from "@spacebar/schemas";
|
||||
const allow_empty = false;
|
||||
// TODO: check webhook, application, system author, stickers
|
||||
// TODO: embed gifs/videos/images
|
||||
|
||||
const LINK_REGEX = /<?https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)>?/g;
|
||||
function checkActionRow(row: ActionRowComponent, knownComponentIds: string[], errors: Record<string, { code?: string; message: string }>, rowIndex: number) {
|
||||
if (!row.components) {
|
||||
return;
|
||||
@@ -774,139 +773,15 @@ export async function handleMessage(opts: MessageOptions): Promise<Message> {
|
||||
|
||||
// TODO: cache link result in db
|
||||
export async function postHandleMessage(message: Message) {
|
||||
const conf = Config.get();
|
||||
const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown
|
||||
|
||||
const linkMatches = content?.match(LINK_REGEX) || [];
|
||||
message.clean_data();
|
||||
const data = { ...message.toJSON() };
|
||||
|
||||
const currentNormalizedUrls = new Set<string>();
|
||||
for (const link of linkMatches) {
|
||||
// Don't process links in <>
|
||||
if (link.startsWith("<") && link.endsWith(">")) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const normalized = normalizeUrl(link);
|
||||
currentNormalizedUrls.add(normalized);
|
||||
} catch (e) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
if (data.embeds != undefined) {
|
||||
data.embeds?.forEach((embed) => {
|
||||
if (!embed.type) {
|
||||
embed.type = EmbedType.rich;
|
||||
}
|
||||
});
|
||||
}
|
||||
// Filter out embeds that could be links, start from scratch
|
||||
if (data.embeds != undefined) {
|
||||
data.embeds = data.embeds?.filter((embed) => embed.type === "rich");
|
||||
}
|
||||
message.embeds ??= [];
|
||||
message.embeds.forEach((embed) => {
|
||||
// we need to handle false-y values (empty string) here, so cant use ??=
|
||||
embed.type ||= EmbedType.rich;
|
||||
});
|
||||
|
||||
const seenNormalizedUrls = new Set<string>();
|
||||
const uniqueLinks: string[] = [];
|
||||
|
||||
for (const link of linkMatches.slice(0, 20)) {
|
||||
// embed max 20 links - TODO: make this configurable with instance policies
|
||||
// Don't embed links in <>
|
||||
if (link.startsWith("<") && link.endsWith(">")) continue;
|
||||
|
||||
try {
|
||||
const normalized = normalizeUrl(link);
|
||||
|
||||
if (!seenNormalizedUrls.has(normalized)) {
|
||||
seenNormalizedUrls.add(normalized);
|
||||
uniqueLinks.push(link);
|
||||
}
|
||||
} catch (e) {
|
||||
// Invalid URL, skip
|
||||
}
|
||||
}
|
||||
|
||||
if (uniqueLinks.length === 0) {
|
||||
// No valid unique links found, update message to remove old embeds
|
||||
if (data.embeds != undefined) {
|
||||
data.embeds = data.embeds?.filter((embed) => embed.type === "rich");
|
||||
}
|
||||
// author value is already included in message.toJSON()
|
||||
const event = {
|
||||
event: "MESSAGE_UPDATE",
|
||||
channel_id: message.channel_id,
|
||||
data: {
|
||||
...message.toJSON(),
|
||||
embeds: data.embeds == undefined ? message.embeds || [] : data.embeds,
|
||||
},
|
||||
} satisfies MessageUpdateEvent;
|
||||
const embeds = data.embeds == undefined ? [] : data.embeds;
|
||||
await Promise.all([emitEvent(event), Message.update({ id: message.id, channel_id: message.channel_id }, { embeds: embeds })]);
|
||||
return;
|
||||
}
|
||||
|
||||
const cachePromises = [];
|
||||
|
||||
for (const link of uniqueLinks) {
|
||||
let url: URL;
|
||||
try {
|
||||
url = new URL(link);
|
||||
} catch (e) {
|
||||
// Skip invalid URLs
|
||||
continue;
|
||||
}
|
||||
|
||||
const normalizedUrl = normalizeUrl(link);
|
||||
|
||||
// Check cache using normalized URL
|
||||
const cached = await EmbedCache.findOne({
|
||||
where: { url: normalizedUrl },
|
||||
});
|
||||
|
||||
if (cached) {
|
||||
if (data.embeds == undefined) {
|
||||
data.embeds = [];
|
||||
}
|
||||
data.embeds?.push(cached.embed);
|
||||
continue;
|
||||
}
|
||||
|
||||
// bit gross, but whatever!
|
||||
const endpointPublic = conf.cdn.endpointPublic; // lol
|
||||
const handler = url.hostname === new URL(endpointPublic!).hostname ? EmbedHandlers["self"] : EmbedHandlers[url.hostname] || EmbedHandlers["default"];
|
||||
|
||||
try {
|
||||
let res = await handler(url);
|
||||
if (!res) continue;
|
||||
// tried to use shorthand but types didn't like me L
|
||||
if (!Array.isArray(res)) res = [res];
|
||||
|
||||
for (const embed of res) {
|
||||
// Cache with normalized URL
|
||||
const cache = EmbedCache.create({
|
||||
url: normalizedUrl,
|
||||
embed: embed,
|
||||
});
|
||||
cachePromises.push(cache.save());
|
||||
if (data.embeds == undefined) {
|
||||
data.embeds = [];
|
||||
}
|
||||
data.embeds?.push(embed);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`[Embeds] Error while generating embed for ${link}`, e);
|
||||
}
|
||||
}
|
||||
const embeds = data.embeds == undefined ? [] : data.embeds;
|
||||
await Promise.all([
|
||||
emitEvent({
|
||||
event: "MESSAGE_UPDATE",
|
||||
channel_id: message.channel_id,
|
||||
data: message.toJSON(),
|
||||
} satisfies MessageUpdateEvent),
|
||||
Message.update({ id: message.id, channel_id: message.channel_id }, { embeds: embeds }),
|
||||
...cachePromises,
|
||||
]);
|
||||
if ((await getPermission(message.author_id, message.channel.guild_id, message.channel_id)).has(Permissions.FLAGS.EMBED_LINKS)) await fillMessageUrlEmbeds(message);
|
||||
}
|
||||
|
||||
export async function sendMessage(opts: MessageOptions) {
|
||||
|
||||
@@ -16,12 +16,13 @@
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { Config } from "@spacebar/util";
|
||||
import { arrayDistinctBy, arrayGroupBy, arrayRemove, Config, EmbedCache, emitEvent, Message, MessageUpdateEvent, normalizeUrl } from "@spacebar/util";
|
||||
import { Embed, EmbedImage, EmbedType } from "@spacebar/schemas";
|
||||
import * as cheerio from "cheerio";
|
||||
import crypto from "crypto";
|
||||
import { yellow } from "picocolors";
|
||||
import probe from "probe-image-size";
|
||||
import { FindOptionsWhere, In } from "typeorm";
|
||||
|
||||
export const DEFAULT_FETCH_OPTIONS: RequestInit = {
|
||||
redirect: "follow",
|
||||
@@ -517,3 +518,161 @@ export const EmbedHandlers: {
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const LINK_REGEX = /<?https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)>?/g;
|
||||
|
||||
export function getMessageContentUrls(message: Message) {
|
||||
const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown
|
||||
|
||||
return content?.match(LINK_REGEX) ?? [];
|
||||
}
|
||||
|
||||
export async function dropDuplicateCacheEntries(entries: EmbedCache[]): Promise<EmbedCache[]> {
|
||||
const grouped = Array.from(arrayGroupBy(entries, (e) => e.url).values()).map((g) =>
|
||||
g.toSorted((e1, e2) => {
|
||||
let diff = e2.createdAt.getTime() - e1.createdAt.getTime();
|
||||
if (diff == 0) diff = Number(BigInt(e2.id) - BigInt(e1.id));
|
||||
return diff;
|
||||
}),
|
||||
);
|
||||
|
||||
const fullToDeleteIds: string[] = [];
|
||||
for (const group of grouped) {
|
||||
if (group.length <= 1) continue;
|
||||
// console.log("[EmbedCache] Removing all but first from cache:", group);
|
||||
// this might be backwards, sort always confuses me lol
|
||||
const toDelete = group.slice(1);
|
||||
const toDeleteIds = toDelete.map((x) => x.id);
|
||||
fullToDeleteIds.push(...toDeleteIds);
|
||||
console.warn("[EmbedCache] Removing duplicate IDs for", toDelete[0].url, " - ", toDeleteIds);
|
||||
}
|
||||
|
||||
await EmbedCache.delete({ id: In(fullToDeleteIds) } as FindOptionsWhere<EmbedCache>);
|
||||
|
||||
// console.log("[EmbedCache] Cached embeds:", Array.from(grouped.map((x) => x[0].url)));
|
||||
return Array.from(grouped.map((x) => x[0]));
|
||||
}
|
||||
|
||||
async function sleep(ms: number) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
// hack to make nodejs not die
|
||||
function getSlowdownFactor(off: number) {
|
||||
if (off < 10) return off;
|
||||
if (off < 25) return 100 + off * 2;
|
||||
if (off < 50) return 200 + off * 10;
|
||||
if (off < 100) return 500 + off * 15;
|
||||
if (off < 250) return 750 + off * 20;
|
||||
return 1000 + off * 150;
|
||||
}
|
||||
|
||||
export async function getOrUpdateEmbedCache(urls: string[], cb?: (url: string, embeds: Embed[]) => Promise<void>): Promise<EmbedCache[]> {
|
||||
urls = arrayDistinctBy(urls, (x) => x);
|
||||
const embeds: EmbedCache[] = [];
|
||||
|
||||
const cachedEmbeds = await dropDuplicateCacheEntries(
|
||||
await EmbedCache.find({
|
||||
where: {
|
||||
url: In(urls.map(normalizeUrl)),
|
||||
},
|
||||
}),
|
||||
);
|
||||
embeds.push(...cachedEmbeds);
|
||||
cb?.(
|
||||
"cached",
|
||||
cachedEmbeds.map((e) => e.embed),
|
||||
);
|
||||
|
||||
const urlsToGenerate = urls.filter((url) => {
|
||||
return !cachedEmbeds.some((e) => e.url == normalizeUrl(url));
|
||||
});
|
||||
|
||||
if (urlsToGenerate.length > 0) console.log("[Embeds] Need to generate embeds for urls:", urlsToGenerate);
|
||||
if (cachedEmbeds.length > 0)
|
||||
console.log(
|
||||
"[Embeds] Already had embeds for urls:",
|
||||
cachedEmbeds.map((e) => e.url),
|
||||
);
|
||||
|
||||
let off = 0;
|
||||
const generatedEmbeds = await Promise.all(
|
||||
urlsToGenerate.map(async (link) => {
|
||||
await sleep(getSlowdownFactor(off++)); // ...or nodejs gets overwhelmed and times out
|
||||
return await getOrUpdateEmbedCacheSingle(link, cb);
|
||||
}),
|
||||
);
|
||||
|
||||
embeds.push(...generatedEmbeds.filter((e): e is EmbedCache[] => e !== null).flat());
|
||||
|
||||
return embeds;
|
||||
}
|
||||
|
||||
async function getOrUpdateEmbedCacheSingle(link: string, cb?: (url: string, embeds: Embed[]) => Promise<void>): Promise<EmbedCache[] | null> {
|
||||
const url = new URL(link);
|
||||
const handler = url.hostname === new URL(Config.get().cdn.endpointPublic!).hostname ? EmbedHandlers["self"] : (EmbedHandlers[url.hostname] ?? EmbedHandlers["default"]);
|
||||
const results: EmbedCache[] = [];
|
||||
try {
|
||||
let res = await handler(url);
|
||||
if (!res) return null;
|
||||
if (!Array.isArray(res)) res = [res];
|
||||
|
||||
for (const embed of res) {
|
||||
// Cache with normalized URL
|
||||
const cache = await EmbedCache.create({
|
||||
url: normalizeUrl(url.href),
|
||||
embed: embed,
|
||||
createdAt: new Date(),
|
||||
}).save();
|
||||
results.push(cache);
|
||||
console.log("[Embeds] Generated embed for", link);
|
||||
}
|
||||
await cb?.(link, res);
|
||||
} catch (e) {
|
||||
console.error(`[Embeds] Error while generating embed for ${link}`, e);
|
||||
}
|
||||
return results.length == 0 ? null : results;
|
||||
}
|
||||
|
||||
export async function fillMessageUrlEmbeds(message: Message) {
|
||||
const linkMatches = getMessageContentUrls(message).filter((l) => !l.startsWith("<") && !l.endsWith(">"));
|
||||
|
||||
// Filter out embeds that could be links, start from scratch
|
||||
message.embeds = message.embeds.filter((embed) => embed.type === "rich");
|
||||
|
||||
if (linkMatches.length == 0) return message;
|
||||
|
||||
const uniqueLinks: string[] = arrayDistinctBy(linkMatches, normalizeUrl);
|
||||
|
||||
if (uniqueLinks.length === 0) {
|
||||
// No valid unique links found, update message to remove old embeds
|
||||
message.embeds = message.embeds?.filter((embed) => embed.type === "rich");
|
||||
await saveAndEmitMessageUpdate(message);
|
||||
return message;
|
||||
}
|
||||
|
||||
// avoid a race condition updating the same row
|
||||
let messageUpdateLock = saveAndEmitMessageUpdate(message);
|
||||
await getOrUpdateEmbedCache(uniqueLinks, async (_, embeds) => {
|
||||
if (message.embeds.length + embeds.length > Config.get().limits.message.maxEmbeds) return;
|
||||
message.embeds.push(...embeds);
|
||||
try {
|
||||
await messageUpdateLock;
|
||||
} catch {
|
||||
/* empty */
|
||||
}
|
||||
messageUpdateLock = saveAndEmitMessageUpdate(message);
|
||||
});
|
||||
|
||||
await saveAndEmitMessageUpdate(message);
|
||||
return message;
|
||||
}
|
||||
|
||||
async function saveAndEmitMessageUpdate(message: Message) {
|
||||
await Message.update({ id: message.id, channel_id: message.channel_id }, { embeds: message.embeds });
|
||||
await emitEvent({
|
||||
event: "MESSAGE_UPDATE",
|
||||
channel_id: message.channel_id,
|
||||
data: message.toJSON(),
|
||||
} satisfies MessageUpdateEvent);
|
||||
}
|
||||
|
||||
@@ -24,4 +24,5 @@ export class MessageLimits {
|
||||
maxBulkDelete: number = 1000;
|
||||
maxEmbedDownloadSize: number = 1024 * 1024 * 5;
|
||||
maxPreloadCount: number = 100;
|
||||
maxEmbeds: number = 20;
|
||||
}
|
||||
|
||||
@@ -29,4 +29,11 @@ export class EmbedCache extends BaseClass {
|
||||
|
||||
@Column({ type: "simple-json" })
|
||||
embed: Embed;
|
||||
|
||||
// TODO: store all returned embed objects from a handler
|
||||
// @Column({ type: "simple-json" })
|
||||
// embeds: Embed[];
|
||||
|
||||
@Column({ name: "created_at", type: "timestamp with time zone" })
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
import { MigrationInterface, QueryRunner } from "typeorm";
|
||||
|
||||
export class EmbedCacheCreatedAt1772404321402 implements MigrationInterface {
|
||||
name = "EmbedCacheCreatedAt1772404321402";
|
||||
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "embed_cache" ADD "created_at" timestamp with time zone DEFAULT now();`);
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "embed_cache" DROP COLUMN "created_at"`);
|
||||
}
|
||||
}
|
||||
@@ -42,3 +42,16 @@ export function arrayDistinctBy<T, M>(array: T[], selector: (elem: T) => M): T[]
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
export function arrayGroupBy<T, M>(array: T[], selector: (elem: T) => M): Map<M, T[]> {
|
||||
const map = new Map<M, T[]>();
|
||||
|
||||
array.forEach((item) => {
|
||||
const mappedValue = selector(item);
|
||||
const existing = map.get(mappedValue);
|
||||
if (existing) existing.push(item);
|
||||
else map.set(mappedValue, [item]);
|
||||
});
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user