diff --git a/src/admin/debug/commands.rs b/src/admin/debug/commands.rs index 83763ea3e..b5fdaeacd 100644 --- a/src/admin/debug/commands.rs +++ b/src/admin/debug/commands.rs @@ -1,29 +1,29 @@ use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fmt::Write, iter::once, time::{Instant, SystemTime}, }; use conduwuit::{ - debug_error, err, info, matrix::{ - pdu::{PduEvent, PduId, RawPduId}, + Err, Result, debug_error, err, info, + matrix::{ Event, - }, trace, - utils, + pdu::{PduEvent, PduId, RawPduId}, + }, + trace, utils, utils::{ stream::{IterStream, ReadyExt}, string::EMPTY, - }, warn, - Err, - Result, + }, + warn, }; use futures::{FutureExt, StreamExt, TryStreamExt}; use lettre::message::Mailbox; use ruma::{ - api::federation::event::get_room_state, events::AnyStateEvent, serde::Raw, CanonicalJsonObject, CanonicalJsonValue, - EventId, OwnedEventId, OwnedRoomId, OwnedRoomOrAliasId, - OwnedServerName, RoomId, RoomVersionId, + CanonicalJsonObject, CanonicalJsonValue, EventId, OwnedEventId, OwnedRoomId, + OwnedRoomOrAliasId, OwnedServerName, RoomId, RoomVersionId, UInt, + api::federation::event::get_room_state, events::AnyStateEvent, serde::Raw, }; use service::rooms::{ short::{ShortEventId, ShortRoomId}, @@ -69,6 +69,189 @@ pub(super) async fn get_auth_chain(&self, event_id: OwnedEventId) -> Result { self.write_str(&out).await } +#[derive(Clone, Copy, Eq, PartialEq)] +enum NodeStatus { + Normal(bool), + SoftFailed(bool), + Rejected(bool), +} + +struct AuthChild { + node_id: String, + event_id: OwnedEventId, + depth: UInt, + ts: UInt, + first_seen: bool, + pdu: Option, +} + +fn render_node( + graph: &mut String, + node_id: &str, + event_id: &EventId, + status: NodeStatus, +) -> Result { + let evt_str = event_id.to_string(); + + let status_label = match status { + | NodeStatus::Normal(false) => evt_str, + | NodeStatus::Normal(true) => format!("{evt_str} (missing locally)"), + | NodeStatus::SoftFailed(false) => format!("{evt_str} (soft-failed)"), + | NodeStatus::SoftFailed(true) => format!("{evt_str} (soft-failed & missing locally)"), + | NodeStatus::Rejected(false) => format!("{evt_str} (rejected)"), + | NodeStatus::Rejected(true) => format!("{evt_str} (rejected & missing locally)"), + }; + + writeln!(graph, "{node_id}[\"{}\"]", status_label.as_str())?; + + match status { + | NodeStatus::Rejected(_) => writeln!(graph, "class {node_id} rejected;")?, + | NodeStatus::SoftFailed(_) => writeln!(graph, "class {node_id} soft_failed;")?, + | NodeStatus::Normal(_) => {}, + } + + Ok(()) +} + +#[admin_command] +pub(super) async fn show_auth_chain(&self, event_id: OwnedEventId) -> Result { + let node_status = async |event_id: &EventId, missing: bool| -> NodeStatus { + if self + .services + .rooms + .pdu_metadata + .is_event_rejected(event_id) + .await + { + NodeStatus::Rejected(missing) + } else if self + .services + .rooms + .pdu_metadata + .is_event_soft_failed(event_id) + .await + { + NodeStatus::SoftFailed(missing) + } else { + NodeStatus::Normal(missing) + } + }; + + let Ok(root) = self.services.rooms.timeline.get_pdu(&event_id).await else { + return Err!("Event not found."); + }; + + let mut graph = String::from( + "```mermaid\n%% This is a mermaid graph. You can plug this output into\n\ + %% https://mermaid.live/edit to visualise it on-the-fly.\nflowchart TD\n\ + classDef rejected fill:#ffe5e5,stroke:#cc0000,stroke-width:2px,color:#000;\n\ + classDef soft_failed fill:#fff6cc,stroke:#c9a400,stroke-width:2px,color:#000;\n" + ); + + let mut node_ids: HashMap = HashMap::new(); + let mut cached_events: HashMap = + HashMap::from([(event_id.clone(), root.clone())]); + let mut scheduled: HashSet = HashSet::from([event_id.clone()]); + let mut visited: HashSet = HashSet::new(); + let mut stack = vec![root]; + let mut next_node_id = 0_usize; + + let node_id_for = |event_id: &OwnedEventId, + node_ids: &mut HashMap, + next_node_id: &mut usize| { + node_ids + .entry(event_id.clone()) + .or_insert_with(|| { + let id = format!("n{}", *next_node_id); + *next_node_id = next_node_id.saturating_add(1); + id + }) + .clone() + }; + + while let Some(event) = stack.pop() { + let current_event_id = event.event_id().to_owned(); + if !visited.insert(current_event_id.clone()) { + continue; + } + + let current_node_id = node_id_for(¤t_event_id, &mut node_ids, &mut next_node_id); + let current_status = node_status(¤t_event_id, false).await; + + render_node(&mut graph, ¤t_node_id, ¤t_event_id, current_status)?; + + let mut children = Vec::with_capacity(event.auth_events.len()); + for auth_event_id in event.auth_events().rev() { + let auth_event_id = auth_event_id.to_owned(); + let auth_node_id = node_id_for(&auth_event_id, &mut node_ids, &mut next_node_id); + writeln!(graph, "{current_node_id} --> {auth_node_id}")?; + + let first_seen = scheduled.insert(auth_event_id.clone()); + let auth_pdu = if let Some(auth_pdu) = cached_events.get(&auth_event_id) { + // NOTE: events might be referenced multiple times (like the create event) + // so this saves some cheeky db lookup time + Some(auth_pdu.clone()) + } else if first_seen { + match self.services.rooms.timeline.get_pdu(&auth_event_id).await { + | Ok(auth_event) => { + cached_events.insert(auth_event_id.clone(), auth_event.clone()); + Some(auth_event) + }, + | Err(_) => None, + } + } else { + None + }; + + // NOTE: Depth is used as the primary sorting key here, even though it has no + // bearing on state resolution or anything. Timestamp is used as a + // tiebreaker, failing back to lexicographical comparison. + let (depth, ts) = auth_pdu + .as_ref() + .map_or((UInt::MAX, UInt::MAX), |pdu| (pdu.depth, pdu.origin_server_ts)); + + children.push(AuthChild { + node_id: auth_node_id, + event_id: auth_event_id, + depth, + ts, + first_seen, + pdu: auth_pdu, + }); + } + + children.sort_by(|a, b| { + a.depth + .cmp(&b.depth) + .then(a.ts.cmp(&b.ts)) + .then(a.event_id.as_str().cmp(b.event_id.as_str())) + }); + + for child in children.into_iter().rev() { + if !child.first_seen { + continue; + } + + if let Some(child_pdu) = child.pdu { + // We have this PDU so will want to traverse it. + stack.push(child_pdu); + } else { + // We don't have this PDU locally so we can't traverse its auth events, + // but we can still render it as a node. + render_node( + &mut graph, + &child.node_id, + &child.event_id, + node_status(&child.event_id, true).await, + )?; + } + } + } + + graph.push_str("```\n"); + self.write_str(&graph).await +} + #[admin_command] pub(super) async fn parse_pdu(&self) -> Result { if self.body.len() < 2 @@ -111,8 +294,18 @@ pub(super) async fn get_pdu(&self, event_id: OwnedEventId) -> Result { outlier = true; pdu_json = self.services.rooms.timeline.get_pdu_json(&event_id).await; } - let rejected = self.services.rooms.pdu_metadata.is_event_rejected(&event_id).await; - let soft_failed = self.services.rooms.pdu_metadata.is_event_soft_failed(&event_id).await; + let rejected = self + .services + .rooms + .pdu_metadata + .is_event_rejected(&event_id) + .await; + let soft_failed = self + .services + .rooms + .pdu_metadata + .is_event_soft_failed(&event_id) + .await; match pdu_json { | Err(_) => return Err!("PDU not found locally."), diff --git a/src/admin/debug/mod.rs b/src/admin/debug/mod.rs index f06c6d0ce..a9478ce82 100644 --- a/src/admin/debug/mod.rs +++ b/src/admin/debug/mod.rs @@ -17,12 +17,21 @@ pub enum DebugCommand { message: Vec, }, - /// Get the auth_chain of a PDU + /// Loads the auth_chain of a PDU, reporting how long it took. GetAuthChain { /// An event ID (the $ character followed by the base64 reference hash) event_id: OwnedEventId, }, + /// Walks & displays the auth_chain of a PDU in a mermaid graph format. + /// + /// This is useless to basically anyone but developers, and is also probably + /// slow and memory hungry. + ShowAuthChain { + /// The root event ID to start walking back from. + event_id: OwnedEventId, + }, + /// Parse and print a PDU from a JSON /// /// The PDU event is only checked for validity and is not added to the