For performance, switch to a row count estimate for users and devices

This commit is contained in:
Olivier 'reivilibre
2025-01-31 12:21:27 +00:00
parent 487c53cab3
commit f1e4dc8fe5
2 changed files with 13 additions and 8 deletions
+4 -2
View File
@@ -142,8 +142,10 @@ pub async fn migrate(
let state = MigrationState {
server_name,
users: HashMap::with_capacity(counts.users),
devices_to_compat_sessions: HashMap::with_capacity(counts.devices),
// We oversize the hashmaps, as the estimates are innaccurate, and we would like to avoid
// reallocations.
users: HashMap::with_capacity(counts.users * 9 / 8),
devices_to_compat_sessions: HashMap::with_capacity(counts.devices * 9 / 8),
provider_id_mapping,
};
+9 -6
View File
@@ -336,28 +336,31 @@ impl<'conn> SynapseReader<'conn> {
///
/// - An underlying database error
pub async fn count_rows(&mut self) -> Result<SynapseRowCounts, Error> {
// We don't get to filter out application service users by using this estimate,
// which is a shame, but on a large database this is way faster.
// On matrix.org, counting users and devices properly takes around 1m10s,
// which is unnecessary extra downtime during the migration, just to
// show a more accurate progress bar and size a hash map accurately.
let users: usize = sqlx::query_scalar::<_, i64>(
"
SELECT COUNT(1) FROM users
WHERE appservice_id IS NULL
SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'users'::regclass;
",
)
.fetch_one(&mut *self.txn)
.await
.into_database("counting Synapse users")?
.into_database("estimating count of users")?
.max(0)
.try_into()
.unwrap_or(usize::MAX);
let devices = sqlx::query_scalar::<_, i64>(
"
SELECT COUNT(1) FROM devices
WHERE NOT hidden
SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'devices'::regclass;
",
)
.fetch_one(&mut *self.txn)
.await
.into_database("counting Synapse devices")?
.into_database("estimating count of devices")?
.max(0)
.try_into()
.unwrap_or(usize::MAX);