Add health endpoint to appservice and add metrics via prometheus (#70)

This adds a `/healthz` endpoint to the appservice which allows this to work more nicely in kubernetes.

It also adds some metrics for tracking the provisioning state.

Grafana result:
![image](https://github.com/Gnuxie/Draupnir/assets/1374914/9426c8e6-2c1c-469c-8902-1b9e2b6db529)

Note: The ts-ignore are sadly required since the `_getValue` method is not public :/ I didnt find another solution apart from tracking it maybe elsewhere.

* Add health endpoint to appservice and add metrics via prometheus

* Ensure that we dont have duplicate metrics when the appservice is registered multiple times

* Move gauge modifications to utils function

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix typo
This commit is contained in:
Marcel
2023-09-04 13:32:26 +02:00
committed by GitHub
parent 2ea8ddf864
commit f55d8a453d
3 changed files with 97 additions and 23 deletions
+32 -6
View File
@@ -25,7 +25,7 @@ limitations under the License.
* are NOT distributed, contributed, committed, or licensed under the Apache License.
*/
import { AppServiceRegistration, Bridge, Request, WeakEvent, BridgeContext, MatrixUser, Logger } from "matrix-appservice-bridge";
import { AppServiceRegistration, Bridge, Request, WeakEvent, BridgeContext, MatrixUser, Logger, setBridgeVersion, PrometheusMetrics } from "matrix-appservice-bridge";
import { MjolnirManager } from ".//MjolnirManager";
import { DataStore } from ".//datastore";
import { PgDataStore } from "./postgres/PgDataStore";
@@ -33,6 +33,8 @@ import { Api } from "./Api";
import { IConfig } from "./config/config";
import { AccessControl } from "./AccessControl";
import { AppserviceCommandHandler } from "./bot/AppserviceCommandHandler";
import { SOFTWARE_VERSION } from "../config";
import { Registry } from 'prom-client';
const log = new Logger("AppService");
/**
@@ -54,6 +56,7 @@ export class MjolnirAppService {
public readonly mjolnirManager: MjolnirManager,
public readonly accessControl: AccessControl,
private readonly dataStore: DataStore,
private readonly prometheusMetrics: PrometheusMetrics
) {
this.api = new Api(config.homeserver.url, mjolnirManager);
this.commands = new AppserviceCommandHandler(this);
@@ -75,21 +78,35 @@ export class MjolnirAppService {
// It also allows us to combine constructor/initialize logic
// to make the code base much simpler. A small hack to pay for an overall less hacky code base.
controller: {
onUserQuery: () => {throw new Error("Mjolnir uninitialized")},
onEvent: () => {throw new Error("Mjolnir uninitialized")},
onUserQuery: () => { throw new Error("Mjolnir uninitialized") },
onEvent: () => { throw new Error("Mjolnir uninitialized") },
},
suppressEcho: false,
});
await bridge.initialise();
const accessControlListId = await bridge.getBot().getClient().resolveRoom(config.adminRoom);
const accessControl = await AccessControl.setupAccessControl(accessControlListId, bridge);
const mjolnirManager = await MjolnirManager.makeMjolnirManager(dataStore, bridge, accessControl);
// Activate /metrics endpoint for Prometheus
// This should happen automatically but in testing this didn't happen in the docker image
setBridgeVersion(SOFTWARE_VERSION);
// Due to the way the tests and this prom library works we need to explicitly create a new one each time.
const prometheus = bridge.getPrometheusMetrics(true, new Registry());
const instanceCountGauge = prometheus.addGauge({
name: "draupnir_instances",
help: "Count of Draupnir Instances",
labels: ["status", "uuid"],
});
const mjolnirManager = await MjolnirManager.makeMjolnirManager(dataStore, bridge, accessControl, instanceCountGauge);
const appService = new MjolnirAppService(
config,
bridge,
mjolnirManager,
accessControl,
dataStore
dataStore,
prometheus
);
bridge.opts.controller = {
onUserQuery: appService.onUserQuery.bind(appService),
@@ -114,7 +131,7 @@ export class MjolnirAppService {
return service;
}
public onUserQuery (queriedUser: MatrixUser) {
public onUserQuery(queriedUser: MatrixUser) {
return {}; // auto-provision users with no additonal data
}
@@ -160,6 +177,15 @@ export class MjolnirAppService {
log.info("Starting MjolnirAppService, Matrix-side to listen on port", port);
this.api.start(this.config.webAPI.port);
await this.bridge.listen(port);
this.prometheusMetrics.addAppServicePath(this.bridge);
this.bridge.addAppServicePath({
method: "GET",
path: "/healthz",
authenticate: false,
handler: async (_req, res) => {
res.status(200).send('ok');
}
});
log.info("MjolnirAppService started successfully");
}
+19 -8
View File
@@ -11,6 +11,8 @@ import EventEmitter from "events";
import { MatrixEmitter } from "../MatrixEmitter";
import { Permalinks } from "../commands/interface-manager/Permalinks";
import { MatrixRoomReference } from "../commands/interface-manager/MatrixRoomReference";
import { Gauge } from "prom-client";
import { decrementGaugeValue, incrementGaugeValue } from "../utils";
const log = new Logger('MjolnirManager');
@@ -30,7 +32,8 @@ export class MjolnirManager {
private constructor(
private readonly dataStore: DataStore,
private readonly bridge: Bridge,
private readonly accessControl: AccessControl
private readonly accessControl: AccessControl,
private readonly instanceCountGauge: Gauge<"status" | "uuid">
) {
}
@@ -42,8 +45,8 @@ export class MjolnirManager {
* @param accessControl Who has access to the bridge.
* @returns A new mjolnir manager.
*/
public static async makeMjolnirManager(dataStore: DataStore, bridge: Bridge, accessControl: AccessControl): Promise<MjolnirManager> {
const mjolnirManager = new MjolnirManager(dataStore, bridge, accessControl);
public static async makeMjolnirManager(dataStore: DataStore, bridge: Bridge, accessControl: AccessControl, instanceCountGauge: Gauge<"status" | "uuid">): Promise<MjolnirManager> {
const mjolnirManager = new MjolnirManager(dataStore, bridge, accessControl, instanceCountGauge);
await mjolnirManager.startMjolnirs(await dataStore.list());
return mjolnirManager;
}
@@ -55,7 +58,7 @@ export class MjolnirManager {
* @param client A client for the appservice virtual user that the new mjolnir should use.
* @returns A new managed mjolnir.
*/
public async makeInstance(requestingUserId: string, managementRoomId: string, client: MatrixClient): Promise<ManagedMjolnir> {
public async makeInstance(localPart: string, requestingUserId: string, managementRoomId: string, client: MatrixClient): Promise<ManagedMjolnir> {
const mxid = await client.getUserId();
const intentListener = new MatrixIntentListener(mxid);
const managedMjolnir = new ManagedMjolnir(
@@ -70,6 +73,9 @@ export class MjolnirManager {
await managedMjolnir.start();
this.mjolnirs.set(mxid, managedMjolnir);
this.unstartedMjolnirs.delete(mxid);
incrementGaugeValue(this.instanceCountGauge, "offline", localPart);
decrementGaugeValue(this.instanceCountGauge, "disabled", localPart);
incrementGaugeValue(this.instanceCountGauge, "online", localPart);
return managedMjolnir;
}
@@ -79,7 +85,7 @@ export class MjolnirManager {
* @param ownerId The owner of the mjolnir. We ask for it explicitly to not leak access to another user's mjolnir.
* @returns The matching managed mjolnir instance.
*/
public getMjolnir(mjolnirId: string, ownerId: string): ManagedMjolnir|undefined {
public getMjolnir(mjolnirId: string, ownerId: string): ManagedMjolnir | undefined {
const mjolnir = this.mjolnirs.get(mjolnirId);
if (mjolnir) {
if (mjolnir.ownerId !== ownerId) {
@@ -141,7 +147,7 @@ export class MjolnirManager {
}
});
const mjolnir = await this.makeInstance(requestingUserId, managementRoomId, mjIntent.matrixClient);
const mjolnir = await this.makeInstance(mjolnirLocalPart, requestingUserId, managementRoomId, mjIntent.matrixClient);
await mjolnir.createFirstList(requestingUserId, "list");
await this.dataStore.store({
@@ -164,7 +170,7 @@ export class MjolnirManager {
return [...this.unstartedMjolnirs.values()];
}
public findUnstartedMjolnir(localPart: string): UnstartedMjolnir|undefined {
public findUnstartedMjolnir(localPart: string): UnstartedMjolnir | undefined {
return [...this.unstartedMjolnirs.values()].find(unstarted => unstarted.mjolnirRecord.local_part === localPart);
}
@@ -195,8 +201,11 @@ export class MjolnirManager {
// Don't await, we don't want to clobber initialization just because we can't tell someone they're no longer allowed.
mjIntent.matrixClient.sendNotice(mjolnirRecord.management_room, `Your mjolnir has been disabled by the administrator: ${access.rule?.reason ?? "no reason supplied"}`);
this.reportUnstartedMjolnir(UnstartedMjolnir.FailCode.Unauthorized, access.outcome, mjolnirRecord, mjIntent.userId);
decrementGaugeValue(this.instanceCountGauge, "online", mjolnirRecord.local_part);
incrementGaugeValue(this.instanceCountGauge, "disabled", mjolnirRecord.local_part);
} else {
await this.makeInstance(
mjolnirRecord.local_part,
mjolnirRecord.owner,
mjolnirRecord.management_room,
mjIntent.matrixClient,
@@ -205,6 +214,8 @@ export class MjolnirManager {
// Don't await, we don't want to clobber initialization if this fails.
mjIntent.matrixClient.sendNotice(mjolnirRecord.management_room, `Your mjolnir could not be started. Please alert the administrator`);
this.reportUnstartedMjolnir(UnstartedMjolnir.FailCode.StartError, e, mjolnirRecord, mjIntent.userId);
decrementGaugeValue(this.instanceCountGauge, "online", mjolnirRecord.local_part);
incrementGaugeValue(this.instanceCountGauge, "offline", mjolnirRecord.local_part);
});
}
}
@@ -279,7 +290,7 @@ export class MatrixIntentListener extends EventEmitter implements MatrixEmitter
public handleEvent(mxEvent: WeakEvent) {
// These are ordered to be the same as matrix-bot-sdk's MatrixClient
// They shouldn't need to be, but they are just in case it matters.
if (mxEvent['type'] === 'm.room.member' && mxEvent.state_key === this.mjolnirId) {
if (mxEvent['type'] === 'm.room.member' && mxEvent.state_key === this.mjolnirId) {
if (mxEvent['content']['membership'] === 'leave') {
this.emit('room.leave', mxEvent.room_id, mxEvent);
}
+46 -9
View File
@@ -41,6 +41,7 @@ import * as _ from '@sentry/tracing'; // Performing the import activates tracing
import ManagementRoomOutput from "./ManagementRoomOutput";
import { IConfig } from "./config";
import { MatrixSendClient } from "./MatrixEmitter";
import { Gauge } from "prom-client";
// Define a few aliases to simplify parsing durations.
@@ -70,6 +71,42 @@ export function setToArray<T>(set: Set<T>): T[] {
return arr;
}
/**
* This increments a prometheus gauge. Used in the Appservice MjolnirManager.
*
* The ts-ignore is mandatory since we access a private method due to lack of a public one.
*
* See https://github.com/Gnuxie/Draupnir/pull/70#discussion_r1299188922
*
* @param gauge The Gauge to be modified
* @param status The status value that should be modified
* @param uuid The UUID of the instance. (Usually the localPart)
*/
export function incrementGaugeValue(gauge: Gauge<"status" | "uuid">, status: "offline" | "disabled" | "online", uuid: string) {
// @ts-ignore
if (!gauge._getValue({ status: status, uuid: uuid })) {
gauge.inc({ status: status, uuid: uuid });
}
}
/**
* This decrements a prometheus gauge. Used in the Appservice MjolnirManager.
*
* The ts-ignore is mandatory since we access a private method due to lack of a public one.
*
* See https://github.com/Gnuxie/Draupnir/pull/70#discussion_r1299188922
*
* @param gauge The Gauge to be modified
* @param status The status value that should be modified
* @param uuid The UUID of the instance. (Usually the localPart)
*/
export function decrementGaugeValue(gauge: Gauge<"status" | "uuid">, status: "offline" | "disabled" | "online", uuid: string) {
// @ts-ignore
if (gauge._getValue({ status: status, uuid: uuid })) {
gauge.dec({ status: status, uuid: uuid });
}
}
export function isTrueJoinEvent(event: any): boolean {
const membership = event['content']['membership'] || 'join';
let prevMembership = "leave";
@@ -134,7 +171,7 @@ export async function getMessagesByUserIn(client: MatrixSendClient, sender: stri
const isGlob = sender.includes("*");
const roomEventFilter = {
rooms: [roomId],
... isGlob ? {} : {senders: [sender]}
...isGlob ? {} : { senders: [sender] }
};
const matcher = new MatrixGlob(sender);
@@ -167,11 +204,11 @@ export async function getMessagesByUserIn(client: MatrixSendClient, sender: stri
* if `null`, start from the most recent point in the timeline.
* @returns The response part of the `/messages` API, see `BackfillResponse`.
*/
async function backfill(from: string|null): Promise<BackfillResponse> {
async function backfill(from: string | null): Promise<BackfillResponse> {
const qs = {
filter: JSON.stringify(roomEventFilter),
dir: "b",
... from ? { from } : {}
...from ? { from } : {}
};
LogService.info("utils", "Backfilling with token: " + from);
return client.doRequest("GET", `/_matrix/client/v3/rooms/${encodeURIComponent(roomId)}/messages`, qs);
@@ -195,10 +232,10 @@ export async function getMessagesByUserIn(client: MatrixSendClient, sender: stri
}
// We check that we have the token because rooms/messages is not required to provide one
// and will not provide one when there is no more history to paginate.
let token: string|null = null;
let token: string | null = null;
do {
const bfMessages: BackfillResponse = await backfill(token);
const previousToken: string|null = token;
const previousToken: string | null = token;
token = bfMessages['end'] ?? null;
const events = filterEvents(bfMessages['chunk'] || []);
// If we are using a glob, there may be no relevant events in this chunk.
@@ -287,13 +324,13 @@ function patchMatrixClientForConciseExceptions() {
const method: string | undefined = err.method
? err.method
: "req" in err && err.req instanceof ClientRequest
? err.req.method
: params.method;
? err.req.method
: params.method;
const path: string = err.url
? err.url
: "req" in err && err.req instanceof ClientRequest
? err.req.path
: params.uri ?? '';
? err.req.path
: params.uri ?? '';
let body: unknown = null;
if ("body" in err) {
body = err.body;