From f1e4dc8fe5dd7e8993a257f24ca2002fcdc2644d Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Fri, 31 Jan 2025 12:21:27 +0000 Subject: [PATCH] For performance, switch to a row count estimate for users and devices --- crates/syn2mas/src/migration.rs | 6 ++++-- crates/syn2mas/src/synapse_reader/mod.rs | 15 +++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/crates/syn2mas/src/migration.rs b/crates/syn2mas/src/migration.rs index 19e46a06c..525350579 100644 --- a/crates/syn2mas/src/migration.rs +++ b/crates/syn2mas/src/migration.rs @@ -142,8 +142,10 @@ pub async fn migrate( let state = MigrationState { server_name, - users: HashMap::with_capacity(counts.users), - devices_to_compat_sessions: HashMap::with_capacity(counts.devices), + // We oversize the hashmaps, as the estimates are innaccurate, and we would like to avoid + // reallocations. + users: HashMap::with_capacity(counts.users * 9 / 8), + devices_to_compat_sessions: HashMap::with_capacity(counts.devices * 9 / 8), provider_id_mapping, }; diff --git a/crates/syn2mas/src/synapse_reader/mod.rs b/crates/syn2mas/src/synapse_reader/mod.rs index 6646af1b1..860ea936b 100644 --- a/crates/syn2mas/src/synapse_reader/mod.rs +++ b/crates/syn2mas/src/synapse_reader/mod.rs @@ -336,28 +336,31 @@ impl<'conn> SynapseReader<'conn> { /// /// - An underlying database error pub async fn count_rows(&mut self) -> Result { + // We don't get to filter out application service users by using this estimate, + // which is a shame, but on a large database this is way faster. + // On matrix.org, counting users and devices properly takes around 1m10s, + // which is unnecessary extra downtime during the migration, just to + // show a more accurate progress bar and size a hash map accurately. let users: usize = sqlx::query_scalar::<_, i64>( " - SELECT COUNT(1) FROM users - WHERE appservice_id IS NULL + SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'users'::regclass; ", ) .fetch_one(&mut *self.txn) .await - .into_database("counting Synapse users")? + .into_database("estimating count of users")? .max(0) .try_into() .unwrap_or(usize::MAX); let devices = sqlx::query_scalar::<_, i64>( " - SELECT COUNT(1) FROM devices - WHERE NOT hidden + SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'devices'::regclass; ", ) .fetch_one(&mut *self.txn) .await - .into_database("counting Synapse devices")? + .into_database("estimating count of devices")? .max(0) .try_into() .unwrap_or(usize::MAX);