Fix race in new pruning of device lists tables. (#19709)

Follows on from #19473. We should be recording where we have deleted up to in the same transaction as we perform the delete, rather than at the end. This code only starts deleting rows after a month (and the original PR isn't in a release yet), so no server should have run into this problem yet. Also let's log more regularly, as the initial set of deletions will likely take a long time.
2026-05-12 04:24:43 +00:00 · 2026-04-21 11:39:39 +01:00
parent a9361c4f51
commit 3cdae2e278
2 changed files with 28 additions and 17 deletions
@@ -0,0 +1 @@
+Reduce database disk space usage by pruning old rows from `device_lists_changes_in_room`.
@@ -2583,36 +2583,46 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                num_deleted += 1
                min_stream_id = max(min_stream_id, row[0])

+            if num_deleted:
+                # Update the max pruned stream ID tracking table so that the
+                # safety check knows data up to this point has been deleted.
+                self.db_pool.simple_update_one_txn(
+                    txn,
+                    table="device_lists_changes_in_room_max_pruned_stream_id",
+                    keyvalues={},
+                    updatevalues={"stream_id": min_stream_id},
+                )
+
            return num_deleted

-        num_rows_deleted = 0
+        progress_num_rows_deleted = 0
        while True:
            batch_deleted = await self.db_pool.runInteraction(
                "prune_device_lists_changes_in_room",
                prune_device_lists_changes_in_room_txn,
            )
-            num_rows_deleted += batch_deleted
-            if batch_deleted < PRUNE_DEVICE_LISTS_BATCH_SIZE:
+
+            finished = batch_deleted < PRUNE_DEVICE_LISTS_BATCH_SIZE
+
+            progress_num_rows_deleted += batch_deleted
+
+            # Periodically report progress in the logs. We do this either when
+            # we've deleted a significant number of rows or when we've finished
+            # deleting all rows in this round.
+            if finished or progress_num_rows_deleted > 10000:
+                logger.info(
+                    "Pruned %d rows from device_lists_changes_in_room",
+                    progress_num_rows_deleted,
+                )
+                progress_num_rows_deleted = 0
+
+            if finished:
                break

            # Sleep for a short time to avoid hammering the database too much if
            # there are a lot of rows to delete.
            await self.clock.sleep(Duration(milliseconds=100))

-        if num_rows_deleted:
-            # Update the max pruned stream ID tracking table so that the
-            # safety check knows data up to this point has been deleted.
-            await self.db_pool.simple_update_one(
-                table="device_lists_changes_in_room_max_pruned_stream_id",
-                keyvalues={},
-                updatevalues={"stream_id": prune_before_stream_id},
-                desc="prune_device_lists_changes_in_room_update_max_pruned",
-            )
-
-            logger.info(
-                "Pruned %d rows from device_lists_changes_in_room", num_rows_deleted
-            )
-

 class DeviceBackgroundUpdateStore(SQLBaseStore):
    _instance_name: str
				`@@ -0,0 +1 @@`
				Reduce database disk space usage by pruning old rows from `device_lists_changes_in_room`.