mirror of
https://github.com/the-draupnir-project/Draupnir.git
synced 2026-05-14 11:25:11 +00:00
419 lines
18 KiB
TypeScript
419 lines
18 KiB
TypeScript
/*
|
|
Copyright 2019-2021 The Matrix.org Foundation C.I.C.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
import {
|
|
extractRequestError,
|
|
LogLevel,
|
|
LogService,
|
|
MatrixClient,
|
|
MatrixGlob,
|
|
MessageType,
|
|
Permalinks,
|
|
TextualMessageEventContent,
|
|
UserID,
|
|
getRequestFn,
|
|
setRequestFn,
|
|
} from "matrix-bot-sdk";
|
|
import { logMessage } from "./LogProxy";
|
|
import config from "./config";
|
|
import { ClientRequest, IncomingMessage } from "http";
|
|
|
|
export function htmlEscape(input: string): string {
|
|
return input.replace(/["&<>]/g, (char: string) => ({
|
|
['"'.charCodeAt(0)]: """,
|
|
["&".charCodeAt(0)]: "&",
|
|
["<".charCodeAt(0)]: "<",
|
|
[">".charCodeAt(0)]: ">"
|
|
})[char.charCodeAt(0)]);
|
|
}
|
|
|
|
export function setToArray<T>(set: Set<T>): T[] {
|
|
const arr: T[] = [];
|
|
for (const v of set) {
|
|
arr.push(v);
|
|
}
|
|
return arr;
|
|
}
|
|
|
|
export function isTrueJoinEvent(event: any): boolean {
|
|
const membership = event['content']['membership'] || 'join';
|
|
let prevMembership = "leave";
|
|
if (event['unsigned'] && event['unsigned']['prev_content']) {
|
|
prevMembership = event['unsigned']['prev_content']['membership'] || 'leave';
|
|
}
|
|
|
|
// We look at the previous membership to filter out profile changes
|
|
return membership === 'join' && prevMembership !== "join";
|
|
}
|
|
|
|
export async function redactUserMessagesIn(client: MatrixClient, userIdOrGlob: string, targetRoomIds: string[], limit = 1000) {
|
|
for (const targetRoomId of targetRoomIds) {
|
|
await logMessage(LogLevel.DEBUG, "utils#redactUserMessagesIn", `Fetching sent messages for ${userIdOrGlob} in ${targetRoomId} to redact...`, targetRoomId);
|
|
|
|
await getMessagesByUserIn(client, userIdOrGlob, targetRoomId, limit, async (eventsToRedact) => {
|
|
for (const victimEvent of eventsToRedact) {
|
|
await logMessage(LogLevel.DEBUG, "utils#redactUserMessagesIn", `Redacting ${victimEvent['event_id']} in ${targetRoomId}`, targetRoomId);
|
|
if (!config.noop) {
|
|
await client.redactEvent(targetRoomId, victimEvent['event_id']);
|
|
} else {
|
|
await logMessage(LogLevel.WARN, "utils#redactUserMessagesIn", `Tried to redact ${victimEvent['event_id']} in ${targetRoomId} but Mjolnir is running in no-op mode`, targetRoomId);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets all the events sent by a user (or users if using wildcards) in a given room ID, since
|
|
* the time they joined.
|
|
* @param {MatrixClient} client The client to use.
|
|
* @param {string} sender The sender. A matrix user id or a wildcard to match multiple senders e.g. *.example.com.
|
|
* Can also be used to generically search the sender field e.g. *bob* for all events from senders with "bob" in them.
|
|
* See `MatrixGlob` in matrix-bot-sdk.
|
|
* @param {string} roomId The room ID to search in.
|
|
* @param {number} limit The maximum number of messages to search. Defaults to 1000. This will be a greater or equal
|
|
* number of events that are provided to the callback if a wildcard is used, as not all events paginated
|
|
* will match the glob. The reason the limit is calculated this way is so that a caller cannot accidentally
|
|
* traverse the entire room history.
|
|
* @param {function} cb Callback function to handle the events as they are received.
|
|
* The callback will only be called if there are any relevant events.
|
|
* @returns {Promise<void>} Resolves when either: the limit has been reached, no relevant events could be found or there is no more timeline to paginate.
|
|
*/
|
|
export async function getMessagesByUserIn(client: MatrixClient, sender: string, roomId: string, limit: number, cb: (events: any[]) => void): Promise<void> {
|
|
const isGlob = sender.includes("*");
|
|
const roomEventFilter = {
|
|
rooms: [roomId],
|
|
... isGlob ? {} : {senders: [sender]}
|
|
};
|
|
|
|
const matcher = new MatrixGlob(sender);
|
|
|
|
function testUser(userId: string): boolean {
|
|
if (isGlob) {
|
|
return matcher.test(userId);
|
|
} else {
|
|
return userId === sender;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Note: `rooms/initialSync` is deprecated. However, there is no replacement for this API for the time being.
|
|
* While previous versions of this function used `/sync`, experience shows that it can grow extremely
|
|
* slow (4-5 minutes long) when we need to sync many large rooms, which leads to timeouts and
|
|
* breakage in Mjolnir, see https://github.com/matrix-org/synapse/issues/10842.
|
|
*/
|
|
function roomInitialSync() {
|
|
return client.doRequest("GET", `/_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/initialSync`);
|
|
}
|
|
|
|
function backfill(from: string) {
|
|
const qs = {
|
|
filter: JSON.stringify(roomEventFilter),
|
|
from: from,
|
|
dir: "b",
|
|
};
|
|
LogService.info("utils", "Backfilling with token: " + from);
|
|
return client.doRequest("GET", `/_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/messages`, qs);
|
|
}
|
|
|
|
// Do an initial sync first to get the batch token
|
|
const response = await roomInitialSync();
|
|
|
|
let processed = 0;
|
|
/**
|
|
* Filter events from the timeline to events that are from a matching sender and under the limit that can be processed by the callback.
|
|
* @param events Events from the room timeline.
|
|
* @returns Events that can safely be processed by the callback.
|
|
*/
|
|
function filterEvents(events: any[]) {
|
|
const messages: any[] = [];
|
|
for (const event of events) {
|
|
if (processed >= limit) return messages; // we have provided enough events.
|
|
processed++;
|
|
|
|
if (testUser(event['sender'])) messages.push(event);
|
|
}
|
|
return messages;
|
|
}
|
|
|
|
// The recommended APIs for fetching events from a room is to use both rooms/initialSync then /messages.
|
|
// Unfortunately, this results in code that is rather hard to read, as these two APIs employ very different data structures.
|
|
// We prefer discarding the results from rooms/initialSync and reading only from /messages,
|
|
// even if it's a little slower, for the sake of code maintenance.
|
|
const timeline = response['messages']
|
|
if (timeline) {
|
|
// The end of the PaginationChunk has the most recent events from rooms/initialSync.
|
|
// This token is required be present in the PagintionChunk from rooms/initialSync.
|
|
let token = timeline['end']!;
|
|
// We check that we have the token because rooms/messages is not required to provide one
|
|
// and will not provide one when there is no more history to paginate.
|
|
while (token && processed < limit) {
|
|
const bfMessages = await backfill(token);
|
|
let lastToken = token;
|
|
token = bfMessages['end'];
|
|
if (lastToken === token) {
|
|
LogService.debug("utils", "Backfill returned same end token - returning early.");
|
|
return;
|
|
}
|
|
const events = filterEvents(bfMessages['chunk'] || []);
|
|
// If we are using a glob, there may be no relevant events in this chunk.
|
|
if (events.length > 0) {
|
|
await cb(events);
|
|
}
|
|
}
|
|
} else {
|
|
throw new Error(`Internal Error: rooms/initialSync did not return a pagination chunk for ${roomId}, this is not normal and if it is we need to stop using it. See roomInitialSync() for why we are using it.`);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Take an arbitrary string and a set of room IDs, and return a
|
|
* TextualMessageEventContent whose plaintext component replaces those room
|
|
* IDs with their canonical aliases, and whose html component replaces those
|
|
* room IDs with their matrix.to room pills.
|
|
*
|
|
* @param client The matrix client on which to query for room aliases
|
|
* @param text An arbitrary string to rewrite with room aliases and pills
|
|
* @param roomIds A set of room IDs to find and replace in `text`
|
|
* @param msgtype The desired message type of the returned TextualMessageEventContent
|
|
* @returns A TextualMessageEventContent with replaced room IDs
|
|
*/
|
|
export async function replaceRoomIdsWithPills(client: MatrixClient, text: string, roomIds: Set<string>, msgtype: MessageType = "m.text"): Promise<TextualMessageEventContent> {
|
|
const content: TextualMessageEventContent = {
|
|
body: text,
|
|
formatted_body: htmlEscape(text),
|
|
msgtype: msgtype,
|
|
format: "org.matrix.custom.html",
|
|
};
|
|
|
|
const escapeRegex = (v: string): string => {
|
|
return v.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
|
|
};
|
|
|
|
const viaServers = [(new UserID(await client.getUserId())).domain];
|
|
for (const roomId of roomIds) {
|
|
let alias = roomId;
|
|
try {
|
|
alias = (await client.getPublishedAlias(roomId)) || roomId;
|
|
} catch (e) {
|
|
// This is a recursive call, so tell the function not to try and call us
|
|
await logMessage(LogLevel.WARN, "utils", `Failed to resolve room alias for ${roomId} - see console for details`, null, true);
|
|
LogService.warn("utils", extractRequestError(e));
|
|
}
|
|
const regexRoomId = new RegExp(escapeRegex(roomId), "g");
|
|
content.body = content.body.replace(regexRoomId, alias);
|
|
if (content.formatted_body) {
|
|
content.formatted_body = content.formatted_body.replace(regexRoomId, `<a href="${Permalinks.forRoom(alias, viaServers)}">${alias}</a>`);
|
|
}
|
|
}
|
|
|
|
return content;
|
|
}
|
|
|
|
let isMatrixClientPatchedForConciseExceptions = false;
|
|
|
|
/**
|
|
* Patch `MatrixClient` into something that throws concise exceptions.
|
|
*
|
|
* By default, instances of `MatrixClient` throw instances of `IncomingMessage`
|
|
* in case of many errors. Unfortunately, these instances are unusable:
|
|
*
|
|
* - they are logged as ~800 *lines of code*;
|
|
* - there is no error message;
|
|
* - they offer no stack.
|
|
*
|
|
* This method configures `MatrixClient` to ensure that methods that may throw
|
|
* instead throws more reasonable insetances of `Error`.
|
|
*/
|
|
function patchMatrixClientForConciseExceptions() {
|
|
if (isMatrixClientPatchedForConciseExceptions) {
|
|
return;
|
|
}
|
|
let originalRequestFn = getRequestFn();
|
|
setRequestFn((params: { [k: string]: any }, cb: any) => {
|
|
// Store an error early, to maintain *some* semblance of stack.
|
|
// We'll only throw the error if there is one.
|
|
let error = new Error("STACK CAPTURE");
|
|
originalRequestFn(params, function conciseExceptionRequestFn(
|
|
err: { [key: string]: any }, response: { [key: string]: any }, resBody: string
|
|
) {
|
|
if (!err && (response?.statusCode < 200 || response?.statusCode >= 300)) {
|
|
// Normally, converting HTTP Errors into rejections is done by the caller
|
|
// of `requestFn` within matrix-bot-sdk. However, this always ends up rejecting
|
|
// with an `IncomingMessage` - exactly what we wish to avoid here.
|
|
err = response;
|
|
|
|
// Safety note: In the calling code within matrix-bot-sdk, if we return
|
|
// an IncomingMessage as an error, we end up logging an unredacted response,
|
|
// which may include tokens, passwords, etc. This could be a grave privacy
|
|
// leak. The matrix-bot-sdk typically handles this by sanitizing the data
|
|
// before logging it but, by converting the HTTP Error into a rejection
|
|
// earlier than expected by the matrix-bot-sdk, we skip this step of
|
|
// sanitization.
|
|
//
|
|
// However, since the error we're creating is an `IncomingMessage`, we
|
|
// rewrite it into an `Error` ourselves in this function. Our `Error`
|
|
// is even more sanitized (we only include the URL, HTTP method and
|
|
// the error response) so we are NOT causing a privacy leak.
|
|
if (!(err instanceof IncomingMessage)) {
|
|
// Safety check.
|
|
throw new TypeError("Internal error: at this stage, the error should be an IncomingMessage");
|
|
}
|
|
}
|
|
if (!(err instanceof IncomingMessage)) {
|
|
// In most cases, we're happy with the result.
|
|
return cb(err, response, resBody);
|
|
}
|
|
// However, MatrixClient has a tendency of throwing
|
|
// instances of `IncomingMessage` instead of instances
|
|
// of `Error`. The former take ~800 lines of log and
|
|
// provide no stack trace, which makes them typically
|
|
// useless.
|
|
let method: string | null = null;
|
|
let path = '';
|
|
let body: string | null = null;
|
|
if (err.method) {
|
|
method = err.method;
|
|
}
|
|
if (err.url) {
|
|
path = err.url;
|
|
}
|
|
if ("req" in err && (err as any).req instanceof ClientRequest) {
|
|
if (!method) {
|
|
method = (err as any).req.method;
|
|
}
|
|
if (!path) {
|
|
path = (err as any).req.path;
|
|
}
|
|
}
|
|
if ("body" in err) {
|
|
body = (err as any).body;
|
|
}
|
|
let message = `Error during MatrixClient request ${method} ${path}: ${err.statusCode} ${err.statusMessage} -- ${body}`;
|
|
error.message = message;
|
|
if (body) {
|
|
// Calling code may use `body` to check for errors, so let's
|
|
// make sure that we're providing it.
|
|
try {
|
|
body = JSON.parse(body);
|
|
} catch (ex) {
|
|
// Not JSON.
|
|
}
|
|
// Define the property but don't make it visible during logging.
|
|
Object.defineProperty(error, "body", {
|
|
value: body,
|
|
enumerable: false,
|
|
});
|
|
}
|
|
// Calling code may use `statusCode` to check for errors, so let's
|
|
// make sure that we're providing it.
|
|
if ("statusCode" in err) {
|
|
// Define the property but don't make it visible during logging.
|
|
Object.defineProperty(error, "statusCode", {
|
|
value: err.statusCode,
|
|
enumerable: false,
|
|
});
|
|
}
|
|
return cb(error, response, resBody);
|
|
})
|
|
});
|
|
isMatrixClientPatchedForConciseExceptions = true;
|
|
}
|
|
|
|
const MAX_REQUEST_ATTEMPTS = 15;
|
|
const REQUEST_RETRY_BASE_DURATION_MS = 100;
|
|
|
|
const TRACE_CONCURRENT_REQUESTS = false;
|
|
let numberOfConcurrentRequests = 0;
|
|
let isMatrixClientPatchedForRetryWhenThrottled = false;
|
|
/**
|
|
* Patch instances of MatrixClient to make sure that it retries requests
|
|
* in case of throttling.
|
|
*
|
|
* Note: As of this writing, we do not re-attempt requests that timeout,
|
|
* only request that are throttled by the server. The rationale is that,
|
|
* in case of DoS, we do not wish to make the situation even worse.
|
|
*/
|
|
function patchMatrixClientForRetry() {
|
|
if (isMatrixClientPatchedForRetryWhenThrottled) {
|
|
return;
|
|
}
|
|
let originalRequestFn = getRequestFn();
|
|
setRequestFn(async (params: { [k: string]: any }, cb: any) => {
|
|
let attempt = 1;
|
|
numberOfConcurrentRequests += 1;
|
|
if (TRACE_CONCURRENT_REQUESTS) {
|
|
console.trace("Current number of concurrent requests", numberOfConcurrentRequests);
|
|
}
|
|
try {
|
|
while (true) {
|
|
try {
|
|
let result: any[] = await new Promise((resolve, reject) => {
|
|
originalRequestFn(params, function requestFnWithRetry(
|
|
err: { [key: string]: any }, response: { [key: string]: any }, resBody: string
|
|
) {
|
|
// Note: There is no data race on `attempt` as we `await` before continuing
|
|
// to the next iteration of the loop.
|
|
if (attempt < MAX_REQUEST_ATTEMPTS && err?.body?.errcode === 'M_LIMIT_EXCEEDED') {
|
|
// We need to retry.
|
|
reject(err);
|
|
} else {
|
|
// No need-to-retry error? Lucky us!
|
|
// Note that this may very well be an error, just not
|
|
// one we need to retry.
|
|
resolve([err, response, resBody]);
|
|
}
|
|
});
|
|
});
|
|
// This is our final result.
|
|
// Pass result, whether success or error.
|
|
return cb(...result);
|
|
} catch (err) {
|
|
// Need to retry.
|
|
let retryAfterMs = attempt * attempt * REQUEST_RETRY_BASE_DURATION_MS;
|
|
if ("retry_after_ms" in err) {
|
|
try {
|
|
retryAfterMs = Number.parseInt(err.retry_after_ms, 10);
|
|
} catch (ex) {
|
|
// Use default value.
|
|
}
|
|
}
|
|
LogService.debug("Mjolnir.client", `Waiting ${retryAfterMs}ms before retrying ${params.method} ${params.uri}`);
|
|
await new Promise(resolve => setTimeout(resolve, retryAfterMs));
|
|
attempt += 1;
|
|
}
|
|
}
|
|
} finally {
|
|
numberOfConcurrentRequests -= 1;
|
|
}
|
|
});
|
|
isMatrixClientPatchedForRetryWhenThrottled = true;
|
|
}
|
|
|
|
/**
|
|
* Perform any patching deemed necessary to MatrixClient.
|
|
*/
|
|
export function patchMatrixClient() {
|
|
// Note that the order of patches is meaningful.
|
|
//
|
|
// - `patchMatrixClientForConciseExceptions` converts all `IncomingMessage`
|
|
// errors into instances of `Error` handled as errors;
|
|
// - `patchMatrixClientForRetry` expects that all errors are returned as
|
|
// errors.
|
|
patchMatrixClientForConciseExceptions();
|
|
patchMatrixClientForRetry();
|
|
}
|