mirror of
https://github.com/the-draupnir-project/Draupnir.git
synced 2026-04-26 19:05:15 +00:00
Add health endpoint to appservice and add metrics via prometheus (#70)
This adds a `/healthz` endpoint to the appservice which allows this to work more nicely in kubernetes. It also adds some metrics for tracking the provisioning state. Grafana result:  Note: The ts-ignore are sadly required since the `_getValue` method is not public :/ I didnt find another solution apart from tracking it maybe elsewhere. * Add health endpoint to appservice and add metrics via prometheus * Ensure that we dont have duplicate metrics when the appservice is registered multiple times * Move gauge modifications to utils function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix typo
This commit is contained in:
@@ -25,7 +25,7 @@ limitations under the License.
|
||||
* are NOT distributed, contributed, committed, or licensed under the Apache License.
|
||||
*/
|
||||
|
||||
import { AppServiceRegistration, Bridge, Request, WeakEvent, BridgeContext, MatrixUser, Logger } from "matrix-appservice-bridge";
|
||||
import { AppServiceRegistration, Bridge, Request, WeakEvent, BridgeContext, MatrixUser, Logger, setBridgeVersion, PrometheusMetrics } from "matrix-appservice-bridge";
|
||||
import { MjolnirManager } from ".//MjolnirManager";
|
||||
import { DataStore } from ".//datastore";
|
||||
import { PgDataStore } from "./postgres/PgDataStore";
|
||||
@@ -33,6 +33,8 @@ import { Api } from "./Api";
|
||||
import { IConfig } from "./config/config";
|
||||
import { AccessControl } from "./AccessControl";
|
||||
import { AppserviceCommandHandler } from "./bot/AppserviceCommandHandler";
|
||||
import { SOFTWARE_VERSION } from "../config";
|
||||
import { Registry } from 'prom-client';
|
||||
|
||||
const log = new Logger("AppService");
|
||||
/**
|
||||
@@ -54,6 +56,7 @@ export class MjolnirAppService {
|
||||
public readonly mjolnirManager: MjolnirManager,
|
||||
public readonly accessControl: AccessControl,
|
||||
private readonly dataStore: DataStore,
|
||||
private readonly prometheusMetrics: PrometheusMetrics
|
||||
) {
|
||||
this.api = new Api(config.homeserver.url, mjolnirManager);
|
||||
this.commands = new AppserviceCommandHandler(this);
|
||||
@@ -75,21 +78,35 @@ export class MjolnirAppService {
|
||||
// It also allows us to combine constructor/initialize logic
|
||||
// to make the code base much simpler. A small hack to pay for an overall less hacky code base.
|
||||
controller: {
|
||||
onUserQuery: () => {throw new Error("Mjolnir uninitialized")},
|
||||
onEvent: () => {throw new Error("Mjolnir uninitialized")},
|
||||
onUserQuery: () => { throw new Error("Mjolnir uninitialized") },
|
||||
onEvent: () => { throw new Error("Mjolnir uninitialized") },
|
||||
},
|
||||
suppressEcho: false,
|
||||
});
|
||||
await bridge.initialise();
|
||||
const accessControlListId = await bridge.getBot().getClient().resolveRoom(config.adminRoom);
|
||||
const accessControl = await AccessControl.setupAccessControl(accessControlListId, bridge);
|
||||
const mjolnirManager = await MjolnirManager.makeMjolnirManager(dataStore, bridge, accessControl);
|
||||
// Activate /metrics endpoint for Prometheus
|
||||
|
||||
// This should happen automatically but in testing this didn't happen in the docker image
|
||||
setBridgeVersion(SOFTWARE_VERSION);
|
||||
|
||||
// Due to the way the tests and this prom library works we need to explicitly create a new one each time.
|
||||
const prometheus = bridge.getPrometheusMetrics(true, new Registry());
|
||||
const instanceCountGauge = prometheus.addGauge({
|
||||
name: "draupnir_instances",
|
||||
help: "Count of Draupnir Instances",
|
||||
labels: ["status", "uuid"],
|
||||
});
|
||||
|
||||
const mjolnirManager = await MjolnirManager.makeMjolnirManager(dataStore, bridge, accessControl, instanceCountGauge);
|
||||
const appService = new MjolnirAppService(
|
||||
config,
|
||||
bridge,
|
||||
mjolnirManager,
|
||||
accessControl,
|
||||
dataStore
|
||||
dataStore,
|
||||
prometheus
|
||||
);
|
||||
bridge.opts.controller = {
|
||||
onUserQuery: appService.onUserQuery.bind(appService),
|
||||
@@ -114,7 +131,7 @@ export class MjolnirAppService {
|
||||
return service;
|
||||
}
|
||||
|
||||
public onUserQuery (queriedUser: MatrixUser) {
|
||||
public onUserQuery(queriedUser: MatrixUser) {
|
||||
return {}; // auto-provision users with no additonal data
|
||||
}
|
||||
|
||||
@@ -160,6 +177,15 @@ export class MjolnirAppService {
|
||||
log.info("Starting MjolnirAppService, Matrix-side to listen on port", port);
|
||||
this.api.start(this.config.webAPI.port);
|
||||
await this.bridge.listen(port);
|
||||
this.prometheusMetrics.addAppServicePath(this.bridge);
|
||||
this.bridge.addAppServicePath({
|
||||
method: "GET",
|
||||
path: "/healthz",
|
||||
authenticate: false,
|
||||
handler: async (_req, res) => {
|
||||
res.status(200).send('ok');
|
||||
}
|
||||
});
|
||||
log.info("MjolnirAppService started successfully");
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@ import EventEmitter from "events";
|
||||
import { MatrixEmitter } from "../MatrixEmitter";
|
||||
import { Permalinks } from "../commands/interface-manager/Permalinks";
|
||||
import { MatrixRoomReference } from "../commands/interface-manager/MatrixRoomReference";
|
||||
import { Gauge } from "prom-client";
|
||||
import { decrementGaugeValue, incrementGaugeValue } from "../utils";
|
||||
|
||||
const log = new Logger('MjolnirManager');
|
||||
|
||||
@@ -30,7 +32,8 @@ export class MjolnirManager {
|
||||
private constructor(
|
||||
private readonly dataStore: DataStore,
|
||||
private readonly bridge: Bridge,
|
||||
private readonly accessControl: AccessControl
|
||||
private readonly accessControl: AccessControl,
|
||||
private readonly instanceCountGauge: Gauge<"status" | "uuid">
|
||||
) {
|
||||
|
||||
}
|
||||
@@ -42,8 +45,8 @@ export class MjolnirManager {
|
||||
* @param accessControl Who has access to the bridge.
|
||||
* @returns A new mjolnir manager.
|
||||
*/
|
||||
public static async makeMjolnirManager(dataStore: DataStore, bridge: Bridge, accessControl: AccessControl): Promise<MjolnirManager> {
|
||||
const mjolnirManager = new MjolnirManager(dataStore, bridge, accessControl);
|
||||
public static async makeMjolnirManager(dataStore: DataStore, bridge: Bridge, accessControl: AccessControl, instanceCountGauge: Gauge<"status" | "uuid">): Promise<MjolnirManager> {
|
||||
const mjolnirManager = new MjolnirManager(dataStore, bridge, accessControl, instanceCountGauge);
|
||||
await mjolnirManager.startMjolnirs(await dataStore.list());
|
||||
return mjolnirManager;
|
||||
}
|
||||
@@ -55,7 +58,7 @@ export class MjolnirManager {
|
||||
* @param client A client for the appservice virtual user that the new mjolnir should use.
|
||||
* @returns A new managed mjolnir.
|
||||
*/
|
||||
public async makeInstance(requestingUserId: string, managementRoomId: string, client: MatrixClient): Promise<ManagedMjolnir> {
|
||||
public async makeInstance(localPart: string, requestingUserId: string, managementRoomId: string, client: MatrixClient): Promise<ManagedMjolnir> {
|
||||
const mxid = await client.getUserId();
|
||||
const intentListener = new MatrixIntentListener(mxid);
|
||||
const managedMjolnir = new ManagedMjolnir(
|
||||
@@ -70,6 +73,9 @@ export class MjolnirManager {
|
||||
await managedMjolnir.start();
|
||||
this.mjolnirs.set(mxid, managedMjolnir);
|
||||
this.unstartedMjolnirs.delete(mxid);
|
||||
incrementGaugeValue(this.instanceCountGauge, "offline", localPart);
|
||||
decrementGaugeValue(this.instanceCountGauge, "disabled", localPart);
|
||||
incrementGaugeValue(this.instanceCountGauge, "online", localPart);
|
||||
return managedMjolnir;
|
||||
}
|
||||
|
||||
@@ -79,7 +85,7 @@ export class MjolnirManager {
|
||||
* @param ownerId The owner of the mjolnir. We ask for it explicitly to not leak access to another user's mjolnir.
|
||||
* @returns The matching managed mjolnir instance.
|
||||
*/
|
||||
public getMjolnir(mjolnirId: string, ownerId: string): ManagedMjolnir|undefined {
|
||||
public getMjolnir(mjolnirId: string, ownerId: string): ManagedMjolnir | undefined {
|
||||
const mjolnir = this.mjolnirs.get(mjolnirId);
|
||||
if (mjolnir) {
|
||||
if (mjolnir.ownerId !== ownerId) {
|
||||
@@ -141,7 +147,7 @@ export class MjolnirManager {
|
||||
}
|
||||
});
|
||||
|
||||
const mjolnir = await this.makeInstance(requestingUserId, managementRoomId, mjIntent.matrixClient);
|
||||
const mjolnir = await this.makeInstance(mjolnirLocalPart, requestingUserId, managementRoomId, mjIntent.matrixClient);
|
||||
await mjolnir.createFirstList(requestingUserId, "list");
|
||||
|
||||
await this.dataStore.store({
|
||||
@@ -164,7 +170,7 @@ export class MjolnirManager {
|
||||
return [...this.unstartedMjolnirs.values()];
|
||||
}
|
||||
|
||||
public findUnstartedMjolnir(localPart: string): UnstartedMjolnir|undefined {
|
||||
public findUnstartedMjolnir(localPart: string): UnstartedMjolnir | undefined {
|
||||
return [...this.unstartedMjolnirs.values()].find(unstarted => unstarted.mjolnirRecord.local_part === localPart);
|
||||
}
|
||||
|
||||
@@ -195,8 +201,11 @@ export class MjolnirManager {
|
||||
// Don't await, we don't want to clobber initialization just because we can't tell someone they're no longer allowed.
|
||||
mjIntent.matrixClient.sendNotice(mjolnirRecord.management_room, `Your mjolnir has been disabled by the administrator: ${access.rule?.reason ?? "no reason supplied"}`);
|
||||
this.reportUnstartedMjolnir(UnstartedMjolnir.FailCode.Unauthorized, access.outcome, mjolnirRecord, mjIntent.userId);
|
||||
decrementGaugeValue(this.instanceCountGauge, "online", mjolnirRecord.local_part);
|
||||
incrementGaugeValue(this.instanceCountGauge, "disabled", mjolnirRecord.local_part);
|
||||
} else {
|
||||
await this.makeInstance(
|
||||
mjolnirRecord.local_part,
|
||||
mjolnirRecord.owner,
|
||||
mjolnirRecord.management_room,
|
||||
mjIntent.matrixClient,
|
||||
@@ -205,6 +214,8 @@ export class MjolnirManager {
|
||||
// Don't await, we don't want to clobber initialization if this fails.
|
||||
mjIntent.matrixClient.sendNotice(mjolnirRecord.management_room, `Your mjolnir could not be started. Please alert the administrator`);
|
||||
this.reportUnstartedMjolnir(UnstartedMjolnir.FailCode.StartError, e, mjolnirRecord, mjIntent.userId);
|
||||
decrementGaugeValue(this.instanceCountGauge, "online", mjolnirRecord.local_part);
|
||||
incrementGaugeValue(this.instanceCountGauge, "offline", mjolnirRecord.local_part);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -279,7 +290,7 @@ export class MatrixIntentListener extends EventEmitter implements MatrixEmitter
|
||||
public handleEvent(mxEvent: WeakEvent) {
|
||||
// These are ordered to be the same as matrix-bot-sdk's MatrixClient
|
||||
// They shouldn't need to be, but they are just in case it matters.
|
||||
if (mxEvent['type'] === 'm.room.member' && mxEvent.state_key === this.mjolnirId) {
|
||||
if (mxEvent['type'] === 'm.room.member' && mxEvent.state_key === this.mjolnirId) {
|
||||
if (mxEvent['content']['membership'] === 'leave') {
|
||||
this.emit('room.leave', mxEvent.room_id, mxEvent);
|
||||
}
|
||||
|
||||
+46
-9
@@ -41,6 +41,7 @@ import * as _ from '@sentry/tracing'; // Performing the import activates tracing
|
||||
import ManagementRoomOutput from "./ManagementRoomOutput";
|
||||
import { IConfig } from "./config";
|
||||
import { MatrixSendClient } from "./MatrixEmitter";
|
||||
import { Gauge } from "prom-client";
|
||||
|
||||
// Define a few aliases to simplify parsing durations.
|
||||
|
||||
@@ -70,6 +71,42 @@ export function setToArray<T>(set: Set<T>): T[] {
|
||||
return arr;
|
||||
}
|
||||
|
||||
/**
|
||||
* This increments a prometheus gauge. Used in the Appservice MjolnirManager.
|
||||
*
|
||||
* The ts-ignore is mandatory since we access a private method due to lack of a public one.
|
||||
*
|
||||
* See https://github.com/Gnuxie/Draupnir/pull/70#discussion_r1299188922
|
||||
*
|
||||
* @param gauge The Gauge to be modified
|
||||
* @param status The status value that should be modified
|
||||
* @param uuid The UUID of the instance. (Usually the localPart)
|
||||
*/
|
||||
export function incrementGaugeValue(gauge: Gauge<"status" | "uuid">, status: "offline" | "disabled" | "online", uuid: string) {
|
||||
// @ts-ignore
|
||||
if (!gauge._getValue({ status: status, uuid: uuid })) {
|
||||
gauge.inc({ status: status, uuid: uuid });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This decrements a prometheus gauge. Used in the Appservice MjolnirManager.
|
||||
*
|
||||
* The ts-ignore is mandatory since we access a private method due to lack of a public one.
|
||||
*
|
||||
* See https://github.com/Gnuxie/Draupnir/pull/70#discussion_r1299188922
|
||||
*
|
||||
* @param gauge The Gauge to be modified
|
||||
* @param status The status value that should be modified
|
||||
* @param uuid The UUID of the instance. (Usually the localPart)
|
||||
*/
|
||||
export function decrementGaugeValue(gauge: Gauge<"status" | "uuid">, status: "offline" | "disabled" | "online", uuid: string) {
|
||||
// @ts-ignore
|
||||
if (gauge._getValue({ status: status, uuid: uuid })) {
|
||||
gauge.dec({ status: status, uuid: uuid });
|
||||
}
|
||||
}
|
||||
|
||||
export function isTrueJoinEvent(event: any): boolean {
|
||||
const membership = event['content']['membership'] || 'join';
|
||||
let prevMembership = "leave";
|
||||
@@ -134,7 +171,7 @@ export async function getMessagesByUserIn(client: MatrixSendClient, sender: stri
|
||||
const isGlob = sender.includes("*");
|
||||
const roomEventFilter = {
|
||||
rooms: [roomId],
|
||||
... isGlob ? {} : {senders: [sender]}
|
||||
...isGlob ? {} : { senders: [sender] }
|
||||
};
|
||||
|
||||
const matcher = new MatrixGlob(sender);
|
||||
@@ -167,11 +204,11 @@ export async function getMessagesByUserIn(client: MatrixSendClient, sender: stri
|
||||
* if `null`, start from the most recent point in the timeline.
|
||||
* @returns The response part of the `/messages` API, see `BackfillResponse`.
|
||||
*/
|
||||
async function backfill(from: string|null): Promise<BackfillResponse> {
|
||||
async function backfill(from: string | null): Promise<BackfillResponse> {
|
||||
const qs = {
|
||||
filter: JSON.stringify(roomEventFilter),
|
||||
dir: "b",
|
||||
... from ? { from } : {}
|
||||
...from ? { from } : {}
|
||||
};
|
||||
LogService.info("utils", "Backfilling with token: " + from);
|
||||
return client.doRequest("GET", `/_matrix/client/v3/rooms/${encodeURIComponent(roomId)}/messages`, qs);
|
||||
@@ -195,10 +232,10 @@ export async function getMessagesByUserIn(client: MatrixSendClient, sender: stri
|
||||
}
|
||||
// We check that we have the token because rooms/messages is not required to provide one
|
||||
// and will not provide one when there is no more history to paginate.
|
||||
let token: string|null = null;
|
||||
let token: string | null = null;
|
||||
do {
|
||||
const bfMessages: BackfillResponse = await backfill(token);
|
||||
const previousToken: string|null = token;
|
||||
const previousToken: string | null = token;
|
||||
token = bfMessages['end'] ?? null;
|
||||
const events = filterEvents(bfMessages['chunk'] || []);
|
||||
// If we are using a glob, there may be no relevant events in this chunk.
|
||||
@@ -287,13 +324,13 @@ function patchMatrixClientForConciseExceptions() {
|
||||
const method: string | undefined = err.method
|
||||
? err.method
|
||||
: "req" in err && err.req instanceof ClientRequest
|
||||
? err.req.method
|
||||
: params.method;
|
||||
? err.req.method
|
||||
: params.method;
|
||||
const path: string = err.url
|
||||
? err.url
|
||||
: "req" in err && err.req instanceof ClientRequest
|
||||
? err.req.path
|
||||
: params.uri ?? '';
|
||||
? err.req.path
|
||||
: params.uri ?? '';
|
||||
let body: unknown = null;
|
||||
if ("body" in err) {
|
||||
body = err.body;
|
||||
|
||||
Reference in New Issue
Block a user