Slim scheduler, add maintenance module

- Introduced a new `maintenance` module to handle data retention, log rotation, and nightly email tasks. - Updated the `scheduler` to utilize the `MaintenanceRunner` for executing maintenance tasks, improving code organization and clarity. - Enhanced documentation to reflect changes in logging configuration and data retention processes. - Adjusted tests to accommodate the refactored scheduler methods and ensure proper functionality.
2026-07-09 07:41:39 +00:00 · 2026-03-19 19:40:33 -07:00
parent 219de10afd
commit 07a2db4ede
9 changed files with 920 additions and 519 deletions
@@ -20,6 +20,14 @@ The main sections include:
 | `[Weather]` | Units and settings shared by `wx` / `gwx` and Weather Service |
 | `[Logging]` | Log file path and level |

+### Logging and log rotation
+
+- **Startup (config.ini):** Under `[Logging]`, `log_file`, `log_max_bytes`, and `log_backup_count` are read when the bot starts. They control the initial `RotatingFileHandler` for the bot log file (see `config.ini.example`).
+
+- **Live changes (web viewer):** The Config tab can store **`maint.log_max_bytes`** and **`maint.log_backup_count`** in the database (`bot_metadata`). The scheduler’s maintenance loop applies those values to the existing rotating file handler **without restarting** the bot—**but only after** you save rotation settings from the web UI (which writes the metadata keys). Editing `config.ini` alone does not update `bot_metadata`, so hot-apply will not see a change until you save from the viewer (or set the keys another way).
+
+If you rely on config-file-only workflows, restart the bot after changing `[Logging]` rotation options.
+
 ## Channels section

 `[Channels]` controls where the bot responds:
@@ -34,7 +34,7 @@ Shorter retention (e.g. 2–3 days for `packet_stream`) is enough for the web vi

 ## How cleanup runs

-1. The **scheduler** (in the main bot process) runs a single data-retention task once every 24 hours.
+1. The **scheduler** (in the main bot process) runs a single data-retention task on a **24-hour interval** after startup (the first run is not immediate on boot; it aligns with the nightly maintenance email cadence).
 2. That task:
   - Cleans **packet_stream** (via web viewer integration when enabled).
   - Cleans **purging_log**, **daily_stats**, **unique_advert_packets**, and **observed_paths** (repeater manager).
@@ -0,0 +1,593 @@
+#!/usr/bin/env python3
+"""Periodic maintenance: data retention, nightly digest email, log rotation, DB backups."""
+
+from __future__ import annotations
+
+import datetime
+import json
+import os
+import sqlite3
+import time
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable
+
+if TYPE_CHECKING:
+    pass
+
+
+def _iso_week_key_from_ran_at(ran_at: str) -> str:
+    """Derive YYYY-Www from maint.status db_backup_ran_at ISO string for weekly dedup."""
+    if not ran_at:
+        return ''
+    try:
+        s = ran_at.strip()
+        if s.endswith('Z'):
+            s = s[:-1] + '+00:00'
+        dt = datetime.datetime.fromisoformat(s)
+        if dt.tzinfo is not None:
+            dt = dt.replace(tzinfo=None)
+        y, w, _ = dt.isocalendar()
+        return f'{y}-W{w}'
+    except (ValueError, TypeError):
+        return ''
+
+
+def _row_n(cur: sqlite3.Cursor) -> int:
+    row = cur.fetchone()
+    if row is None:
+        return 0
+    if isinstance(row, dict):
+        return int(row['n'])
+    try:
+        return int(dict(row)['n'])
+    except (KeyError, TypeError, ValueError):
+        return int(row['n'])
+
+
+def _count_log_errors_last_24h(log_path: Path) -> tuple[int | str, int | str]:
+    """Count ERROR / CRITICAL log lines from the last 24 hours.
+
+    Supports default text format (`YYYY-MM-DD HH:MM:SS - name - LEVEL - msg`) and
+    JSON lines from json_logging (`_JsonFormatter` in core).
+    """
+    cutoff_local = datetime.datetime.now() - datetime.timedelta(hours=24)
+    cutoff_utc = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=24)
+    err = crit = 0
+    try:
+        with open(log_path, encoding='utf-8', errors='replace') as fh:
+            for raw in fh:
+                ln = raw.rstrip('\n')
+                if not ln:
+                    continue
+                if ln.startswith('{'):
+                    try:
+                        obj = json.loads(ln)
+                    except json.JSONDecodeError:
+                        continue
+                    level = str(obj.get('level', ''))
+                    ts = obj.get('timestamp', '')
+                    if not ts:
+                        continue
+                    ts_s = str(ts).replace('Z', '+00:00')
+                    try:
+                        line_dt = datetime.datetime.fromisoformat(ts_s)
+                    except ValueError:
+                        continue
+                    if line_dt.tzinfo is None:
+                        line_dt = line_dt.replace(tzinfo=datetime.timezone.utc)
+                    else:
+                        line_dt = line_dt.astimezone(datetime.timezone.utc)
+                    if line_dt < cutoff_utc:
+                        continue
+                    if level == 'ERROR':
+                        err += 1
+                    elif level == 'CRITICAL':
+                        crit += 1
+                    continue
+
+                if len(ln) < 19:
+                    continue
+                try:
+                    line_dt = datetime.datetime.strptime(ln[:19], '%Y-%m-%d %H:%M:%S')
+                except ValueError:
+                    continue
+                if line_dt < cutoff_local:
+                    continue
+                if ' - CRITICAL - ' in ln:
+                    crit += 1
+                elif ' - ERROR - ' in ln:
+                    err += 1
+        return err, crit
+    except OSError:
+        return 'n/a', 'n/a'
+
+
+class MaintenanceRunner:
+    """Runs data retention, nightly email, log rotation hot-apply, and DB backups."""
+
+    def __init__(self, bot: Any, get_current_time: Callable[[], datetime.datetime]) -> None:
+        self.bot = bot
+        self.logger = bot.logger
+        self._get_current_time = get_current_time
+        self._last_retention_stats: dict[str, Any] = {}
+        self._last_db_backup_stats: dict[str, Any] = {}
+        self._last_log_rotation_applied: dict[str, str] = {}
+
+    @property
+    def last_retention_stats(self) -> dict[str, Any]:
+        return self._last_retention_stats
+
+    def get_notif(self, key: str) -> str:
+        try:
+            val = self.bot.db_manager.get_metadata(f'notif.{key}')
+            return val if val is not None else ''
+        except Exception:
+            return ''
+
+    def get_maint(self, key: str) -> str:
+        try:
+            val = self.bot.db_manager.get_metadata(f'maint.{key}')
+            return val if val is not None else ''
+        except Exception:
+            return ''
+
+    def run_data_retention(self) -> None:
+        """Run data retention cleanup: packet_stream, repeater tables, stats, caches, mesh_connections."""
+        import asyncio
+
+        def get_retention_days(section: str, key: str, default: int) -> int:
+            try:
+                if self.bot.config.has_section(section) and self.bot.config.has_option(section, key):
+                    return self.bot.config.getint(section, key)
+            except Exception:
+                pass
+            return default
+
+        packet_stream_days = get_retention_days('Data_Retention', 'packet_stream_retention_days', 3)
+        purging_log_days = get_retention_days('Data_Retention', 'purging_log_retention_days', 90)
+        daily_stats_days = get_retention_days('Data_Retention', 'daily_stats_retention_days', 90)
+        observed_paths_days = get_retention_days('Data_Retention', 'observed_paths_retention_days', 90)
+        mesh_connections_days = get_retention_days('Data_Retention', 'mesh_connections_retention_days', 7)
+        stats_days = get_retention_days('Stats_Command', 'data_retention_days', 7)
+
+        try:
+            if hasattr(self.bot, 'web_viewer_integration') and self.bot.web_viewer_integration:
+                bi = getattr(self.bot.web_viewer_integration, 'bot_integration', None)
+                if bi and hasattr(bi, 'cleanup_old_data'):
+                    bi.cleanup_old_data(packet_stream_days)
+
+            if hasattr(self.bot, 'repeater_manager') and self.bot.repeater_manager:
+                if hasattr(self.bot, 'main_event_loop') and self.bot.main_event_loop and self.bot.main_event_loop.is_running():
+                    future = asyncio.run_coroutine_threadsafe(
+                        self.bot.repeater_manager.cleanup_database(purging_log_days),
+                        self.bot.main_event_loop
+                    )
+                    try:
+                        future.result(timeout=60)
+                    except Exception as e:
+                        self.logger.error(f"Error in repeater_manager.cleanup_database: {e}")
+                else:
+                    try:
+                        loop = asyncio.get_event_loop()
+                    except RuntimeError:
+                        loop = asyncio.new_event_loop()
+                        asyncio.set_event_loop(loop)
+                    loop.run_until_complete(self.bot.repeater_manager.cleanup_database(purging_log_days))
+                if hasattr(self.bot.repeater_manager, 'cleanup_repeater_retention'):
+                    self.bot.repeater_manager.cleanup_repeater_retention(
+                        daily_stats_days=daily_stats_days,
+                        observed_paths_days=observed_paths_days
+                    )
+
+            if hasattr(self.bot, 'command_manager') and self.bot.command_manager:
+                stats_cmd = self.bot.command_manager.commands.get('stats') if getattr(self.bot.command_manager, 'commands', None) else None
+                if stats_cmd and hasattr(stats_cmd, 'cleanup_old_stats'):
+                    stats_cmd.cleanup_old_stats(stats_days)
+
+            if hasattr(self.bot, 'db_manager') and self.bot.db_manager and hasattr(self.bot.db_manager, 'cleanup_expired_cache'):
+                self.bot.db_manager.cleanup_expired_cache()
+
+            if hasattr(self.bot, 'mesh_graph') and self.bot.mesh_graph and hasattr(self.bot.mesh_graph, 'delete_expired_edges_from_db'):
+                self.bot.mesh_graph.delete_expired_edges_from_db(mesh_connections_days)
+
+            ran_at = datetime.datetime.utcnow().isoformat()
+            self._last_retention_stats['ran_at'] = ran_at
+            try:
+                self.bot.db_manager.set_metadata('maint.status.data_retention_ran_at', ran_at)
+                self.bot.db_manager.set_metadata('maint.status.data_retention_outcome', 'ok')
+            except Exception:
+                pass
+
+        except Exception as e:
+            self.logger.exception(f"Error during data retention cleanup: {e}")
+            self._last_retention_stats['error'] = str(e)
+            try:
+                ran_at = datetime.datetime.utcnow().isoformat()
+                self.bot.db_manager.set_metadata('maint.status.data_retention_ran_at', ran_at)
+                self.bot.db_manager.set_metadata('maint.status.data_retention_outcome', f'error: {e}')
+            except Exception:
+                pass
+
+    def collect_email_stats(self) -> dict[str, Any]:
+        """Gather summary stats for the nightly digest."""
+        stats: dict[str, Any] = {}
+
+        try:
+            start = getattr(self.bot, 'connection_time', None)
+            if start:
+                delta = datetime.timedelta(seconds=int(time.time() - start))
+                hours, rem = divmod(delta.seconds, 3600)
+                minutes = rem // 60
+                parts = []
+                if delta.days:
+                    parts.append(f"{delta.days}d")
+                parts.append(f"{hours}h {minutes}m")
+                stats['uptime'] = ' '.join(parts)
+            else:
+                stats['uptime'] = 'unknown'
+        except Exception:
+            stats['uptime'] = 'unknown'
+
+        try:
+            with self.bot.db_manager.connection() as conn:
+                cur = conn.cursor()
+                cur.execute("SELECT COUNT(*) AS n FROM complete_contact_tracking")
+                stats['contacts_total'] = _row_n(cur)
+                cur.execute(
+                    "SELECT COUNT(*) AS n FROM complete_contact_tracking "
+                    "WHERE last_heard >= datetime('now', '-1 day')"
+                )
+                stats['contacts_24h'] = _row_n(cur)
+                cur.execute(
+                    "SELECT COUNT(*) AS n FROM complete_contact_tracking "
+                    "WHERE first_heard >= datetime('now', '-1 day')"
+                )
+                stats['contacts_new_24h'] = _row_n(cur)
+        except Exception as e:
+            stats['contacts_error'] = str(e)
+
+        try:
+            db_path = str(self.bot.db_manager.db_path)
+            size_bytes = os.path.getsize(db_path)
+            stats['db_size_mb'] = f'{size_bytes / 1_048_576:.1f}'
+            stats['db_path'] = db_path
+        except Exception:
+            stats['db_size_mb'] = 'unknown'
+
+        try:
+            log_file = self.bot.config.get('Logging', 'log_file', fallback='').strip()
+            if log_file:
+                log_path = Path(log_file)
+                stats['log_file'] = str(log_path)
+                if log_path.exists():
+                    stats['log_size_mb'] = f'{log_path.stat().st_size / 1_048_576:.1f}'
+                    err_ct, crit_ct = _count_log_errors_last_24h(log_path)
+                    stats['errors_24h'] = err_ct
+                    stats['criticals_24h'] = crit_ct
+                    backup = Path(str(log_path) + '.1')
+                    if backup.exists() and (time.time() - backup.stat().st_mtime) < 86400:
+                        stats['log_rotated_24h'] = True
+                        stats['log_backup_size_mb'] = f'{backup.stat().st_size / 1_048_576:.1f}'
+                    else:
+                        stats['log_rotated_24h'] = False
+        except Exception:
+            pass
+
+        stats['retention'] = self._last_retention_stats.copy()
+        return stats
+
+    def format_email_body(self, stats: dict[str, Any], period_start: str, period_end: str) -> str:
+        lines = [
+            'MeshCore Bot — Nightly Maintenance Report',
+            '=' * 44,
+            f'Period : {period_start} → {period_end}',
+            '',
+            'BOT STATUS',
+            '─' * 30,
+            f"  Uptime    : {stats.get('uptime', 'unknown')}",
+            f"  Connected : {'yes' if getattr(self.bot, 'connected', False) else 'no'}",
+            '',
+            'NETWORK ACTIVITY (past 24 h)',
+            '─' * 30,
+            f"  Active contacts  : {stats.get('contacts_24h', 'n/a')}",
+            f"  New contacts     : {stats.get('contacts_new_24h', 'n/a')}",
+            f"  Total tracked    : {stats.get('contacts_total', 'n/a')}",
+            '',
+            'DATABASE',
+            '─' * 30,
+            f"  Size : {stats.get('db_size_mb', 'n/a')} MB",
+        ]
+        if self._last_retention_stats.get('ran_at'):
+            lines.append(f"  Last retention run : {self._last_retention_stats['ran_at']} UTC")
+        if self._last_retention_stats.get('error'):
+            lines.append(f"  Retention error    : {self._last_retention_stats['error']}")
+
+        lines += [
+            '',
+            'ERRORS (past 24 h, log file)',
+            '─' * 30,
+            f"  ERROR    : {stats.get('errors_24h', 'n/a')}",
+            f"  CRITICAL : {stats.get('criticals_24h', 'n/a')}",
+        ]
+        if stats.get('log_file'):
+            lines += [
+                '',
+                'LOG FILES',
+                '─' * 30,
+                f"  Current : {stats.get('log_file')} ({stats.get('log_size_mb', '?')} MB)",
+            ]
+            if stats.get('log_rotated_24h'):
+                lines.append(
+                    f"  Rotated : yes — backup is {stats.get('log_backup_size_mb', '?')} MB"
+                )
+            else:
+                lines.append('  Rotated : no')
+
+        lines += [
+            '',
+            '─' * 44,
+            'Manage notification settings: /config',
+        ]
+        return '\n'.join(lines)
+
+    def send_nightly_email(self) -> None:
+        """Build and dispatch the nightly maintenance digest if enabled."""
+        import smtplib
+        import ssl as _ssl
+        from email.message import EmailMessage
+
+        if self.get_notif('nightly_enabled') != 'true':
+            return
+
+        smtp_host = self.get_notif('smtp_host')
+        smtp_security = self.get_notif('smtp_security') or 'starttls'
+        smtp_user = self.get_notif('smtp_user')
+        smtp_password = self.get_notif('smtp_password')
+        from_name = self.get_notif('from_name') or 'MeshCore Bot'
+        from_email = self.get_notif('from_email')
+        recipients = [r.strip() for r in self.get_notif('recipients').split(',') if r.strip()]
+
+        if not smtp_host or not from_email or not recipients:
+            self.logger.warning(
+                "Nightly email enabled but SMTP settings incomplete "
+                f"(host={smtp_host!r}, from={from_email!r}, recipients={recipients})"
+            )
+            return
+
+        try:
+            smtp_port = int(self.get_notif('smtp_port') or (465 if smtp_security == 'ssl' else 587))
+        except ValueError:
+            smtp_port = 587
+
+        now_utc = datetime.datetime.utcnow()
+        yesterday = now_utc - datetime.timedelta(days=1)
+        period_start = yesterday.strftime('%Y-%m-%d %H:%M UTC')
+        period_end = now_utc.strftime('%Y-%m-%d %H:%M UTC')
+
+        try:
+            stats = self.collect_email_stats()
+            body = self.format_email_body(stats, period_start, period_end)
+
+            msg = EmailMessage()
+            msg['Subject'] = f'MeshCore Bot — Nightly Report {now_utc.strftime("%Y-%m-%d")}'
+            msg['From'] = f'{from_name} <{from_email}>'
+            msg['To'] = ', '.join(recipients)
+            msg.set_content(body)
+
+            if self.get_maint('email_attach_log') == 'true':
+                log_file = self.bot.config.get('Logging', 'log_file', fallback='').strip()
+                if log_file:
+                    log_path = Path(log_file)
+                    max_attach = 5 * 1024 * 1024
+                    if log_path.exists() and log_path.stat().st_size <= max_attach:
+                        try:
+                            with open(log_path, 'rb') as fh:
+                                msg.add_attachment(fh.read(), maintype='text', subtype='plain',
+                                                   filename=log_path.name)
+                        except Exception as attach_err:
+                            self.logger.warning(f"Could not attach log file to nightly email: {attach_err}")
+
+            context = _ssl.create_default_context()
+            _smtp_timeout = 30
+            if smtp_security == 'ssl':
+                with smtplib.SMTP_SSL(smtp_host, smtp_port, context=context, timeout=_smtp_timeout) as s:
+                    if smtp_user and smtp_password:
+                        s.login(smtp_user, smtp_password)
+                    s.send_message(msg)
+            else:
+                with smtplib.SMTP(smtp_host, smtp_port, timeout=_smtp_timeout) as s:
+                    if smtp_security == 'starttls':
+                        s.ehlo()
+                        s.starttls(context=context)
+                        s.ehlo()
+                    if smtp_user and smtp_password:
+                        s.login(smtp_user, smtp_password)
+                    s.send_message(msg)
+
+            self.logger.info(
+                f"Nightly maintenance email sent to {recipients} "
+                f"(contacts_24h={stats.get('contacts_24h')}, "
+                f"errors={stats.get('errors_24h')})"
+            )
+            try:
+                ran_at = datetime.datetime.utcnow().isoformat()
+                self.bot.db_manager.set_metadata('maint.status.nightly_email_ran_at', ran_at)
+                self.bot.db_manager.set_metadata('maint.status.nightly_email_outcome', 'ok')
+            except Exception:
+                pass
+
+        except Exception as e:
+            self.logger.error(f"Failed to send nightly maintenance email: {e}")
+            try:
+                ran_at = datetime.datetime.utcnow().isoformat()
+                self.bot.db_manager.set_metadata('maint.status.nightly_email_ran_at', ran_at)
+                self.bot.db_manager.set_metadata('maint.status.nightly_email_outcome', f'error: {e}')
+            except Exception:
+                pass
+
+    def apply_log_rotation_config(self) -> None:
+        """Check bot_metadata for log rotation settings and replace the RotatingFileHandler if changed."""
+        from logging.handlers import RotatingFileHandler as _RFH
+
+        max_bytes_str = self.get_maint('log_max_bytes')
+        backup_count_str = self.get_maint('log_backup_count')
+
+        if not max_bytes_str and not backup_count_str:
+            return
+
+        new_cfg = {'max_bytes': max_bytes_str, 'backup_count': backup_count_str}
+        if new_cfg == self._last_log_rotation_applied:
+            return
+
+        try:
+            max_bytes = int(max_bytes_str) if max_bytes_str else 5 * 1024 * 1024
+            backup_count = int(backup_count_str) if backup_count_str else 3
+        except ValueError:
+            self.logger.warning(f"Invalid log rotation config in bot_metadata: {new_cfg}")
+            return
+
+        logger = self.bot.logger
+        for i, handler in enumerate(logger.handlers):
+            if isinstance(handler, _RFH):
+                log_path = handler.baseFilename
+                formatter = handler.formatter
+                level = handler.level
+                handler.close()
+                new_handler = _RFH(log_path, maxBytes=max_bytes, backupCount=backup_count, encoding='utf-8')
+                new_handler.setFormatter(formatter)
+                new_handler.setLevel(level)
+                logger.handlers[i] = new_handler
+                self._last_log_rotation_applied = new_cfg
+                self.logger.info(f"Log rotation config applied: maxBytes={max_bytes}, backupCount={backup_count}")
+                try:
+                    ran_at = datetime.datetime.utcnow().isoformat()
+                    self.bot.db_manager.set_metadata('maint.status.log_rotation_applied_at', ran_at)
+                except Exception:
+                    pass
+                break
+
+    def maybe_run_db_backup(self) -> None:
+        """Check if a scheduled DB backup is due and run it."""
+        if self.get_maint('db_backup_enabled') != 'true':
+            return
+
+        sched = self.get_maint('db_backup_schedule') or 'daily'
+        if sched == 'manual':
+            return
+
+        backup_time_str = self.get_maint('db_backup_time') or '02:00'
+        now = self._get_current_time()
+        try:
+            bh, bm = [int(x) for x in backup_time_str.split(':')]
+        except Exception:
+            bh, bm = 2, 0
+
+        scheduled_today = now.replace(hour=bh, minute=bm, second=0, microsecond=0)
+        fire_window_end = scheduled_today + datetime.timedelta(minutes=2)
+        if now < scheduled_today or now > fire_window_end:
+            return
+
+        if sched == 'weekly' and now.weekday() != 0:
+            return
+
+        if not self._last_db_backup_stats:
+            try:
+                db_ran_at = self.bot.db_manager.get_metadata('maint.status.db_backup_ran_at') or ''
+                if db_ran_at:
+                    self._last_db_backup_stats['ran_at'] = db_ran_at
+                    wk = _iso_week_key_from_ran_at(db_ran_at)
+                    if wk:
+                        self._last_db_backup_stats['week_key'] = wk
+            except Exception:
+                pass
+
+        date_key = now.strftime('%Y-%m-%d')
+        week_key = f"{now.year}-W{now.isocalendar()[1]}"
+        last_ran = self._last_db_backup_stats.get('ran_at', '')
+        if sched == 'daily' and last_ran.startswith(date_key):
+            return
+        if sched == 'weekly':
+            seeded_week = self._last_db_backup_stats.get('week_key', '')
+            if seeded_week == week_key:
+                return
+
+        self.run_db_backup()
+        if sched == 'weekly':
+            self._last_db_backup_stats['week_key'] = week_key
+
+    def run_db_backup(self) -> None:
+        """Backup the SQLite database using sqlite3.Connection.backup(), then prune old backups."""
+        import sqlite3 as _sqlite3
+
+        backup_dir_str = self.get_maint('db_backup_dir') or '/data/backups'
+        try:
+            retention_count = int(self.get_maint('db_backup_retention_count') or '7')
+        except ValueError:
+            retention_count = 7
+
+        backup_dir = Path(backup_dir_str)
+        ran_at = datetime.datetime.utcnow().isoformat()
+
+        try:
+            backup_dir.mkdir(parents=True, exist_ok=True)
+        except OSError as e:
+            self.logger.error(f"DB backup: cannot create backup directory {backup_dir}: {e}")
+            wk = _iso_week_key_from_ran_at(ran_at)
+            self._last_db_backup_stats = {'ran_at': ran_at, 'error': str(e), **({'week_key': wk} if wk else {})}
+            try:
+                self.bot.db_manager.set_metadata('maint.status.db_backup_ran_at', ran_at)
+                self.bot.db_manager.set_metadata('maint.status.db_backup_outcome', f'error: {e}')
+            except Exception:
+                pass
+            return
+
+        db_path = Path(str(self.bot.db_manager.db_path))
+        ts = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%S')
+        backup_path = backup_dir / f"{db_path.stem}_{ts}.db"
+
+        try:
+            src = _sqlite3.connect(str(db_path), check_same_thread=False)
+            dst = _sqlite3.connect(str(backup_path))
+            try:
+                src.backup(dst, pages=200)
+            finally:
+                dst.close()
+                src.close()
+
+            size_mb = backup_path.stat().st_size / 1_048_576
+            self.logger.info(f"DB backup created: {backup_path} ({size_mb:.1f} MB)")
+
+            stem = db_path.stem
+            backups = sorted(backup_dir.glob(f"{stem}_*.db"), key=lambda p: p.stat().st_mtime)
+            while len(backups) > retention_count:
+                oldest = backups.pop(0)
+                try:
+                    oldest.unlink()
+                    self.logger.info(f"DB backup pruned: {oldest}")
+                except OSError:
+                    pass
+
+            ran_at = datetime.datetime.utcnow().isoformat()
+            wk = _iso_week_key_from_ran_at(ran_at)
+            self._last_db_backup_stats = {
+                'ran_at': ran_at,
+                'path': str(backup_path),
+                'size_mb': f'{size_mb:.1f}',
+                **({'week_key': wk} if wk else {}),
+            }
+            try:
+                self.bot.db_manager.set_metadata('maint.status.db_backup_ran_at', ran_at)
+                self.bot.db_manager.set_metadata('maint.status.db_backup_outcome', 'ok')
+                self.bot.db_manager.set_metadata('maint.status.db_backup_path', str(backup_path))
+            except Exception:
+                pass
+
+        except Exception as e:
+            self.logger.error(f"DB backup failed: {e}")
+            wk = _iso_week_key_from_ran_at(ran_at)
+            self._last_db_backup_stats = {'ran_at': ran_at, 'error': str(e), **({'week_key': wk} if wk else {})}
+            try:
+                self.bot.db_manager.set_metadata('maint.status.db_backup_ran_at', ran_at)
+                self.bot.db_manager.set_metadata('maint.status.db_backup_outcome', f'error: {e}')
+            except Exception:
+                pass
@@ -16,6 +16,7 @@ from typing import Any, Optional
 from apscheduler.schedulers.background import BackgroundScheduler
 from apscheduler.triggers.cron import CronTrigger

+from .maintenance import MaintenanceRunner
 from .utils import decode_escape_sequences, format_keyword_response_with_placeholders, get_config_timezone


@@ -31,14 +32,13 @@ class MessageScheduler:
        self.last_channel_ops_check_time = 0
        self.last_message_queue_check_time = 0
        self.last_radio_ops_check_time = 0
-        self.last_data_retention_run = 0
+        # Align with nightly email: first retention run after ~24h uptime (not immediately on boot).
+        self.last_data_retention_run = time.time()
        self._data_retention_interval_seconds = 86400  # 24 hours
        self.last_nightly_email_time = time.time()     # don't send immediately on startup
-        self._last_retention_stats: dict[str, Any] = {}
        self.last_db_backup_run = 0
-        self._last_db_backup_stats: dict[str, Any] = {}
        self.last_log_rotation_check_time = 0
-        self._last_log_rotation_applied: dict[str, str] = {}
+        self.maintenance = MaintenanceRunner(bot, get_current_time=self.get_current_time)

    def get_current_time(self):
        """Get current time in configured timezone"""
@@ -476,110 +476,28 @@ class MessageScheduler:

            # Data retention: run daily (packet_stream, repeater tables, stats, caches, mesh_connections)
            if time.time() - self.last_data_retention_run >= self._data_retention_interval_seconds:
-                self._run_data_retention()
+                self.maintenance.run_data_retention()
                self.last_data_retention_run = time.time()

            # Nightly maintenance email (24 h interval, after retention so stats are fresh)
            if time.time() - self.last_nightly_email_time >= self._data_retention_interval_seconds:
-                self._send_nightly_email()
+                self.maintenance.send_nightly_email()
                self.last_nightly_email_time = time.time()

            # Log rotation live-apply: check bot_metadata for config changes every 60 s
            if time.time() - self.last_log_rotation_check_time >= 60:
-                self._apply_log_rotation_config()
+                self.maintenance.apply_log_rotation_config()
                self.last_log_rotation_check_time = time.time()

            # DB backup: evaluate schedule every 5 minutes
            if time.time() - self.last_db_backup_run >= 300:
-                self._maybe_run_db_backup()
+                self.maintenance.maybe_run_db_backup()
                self.last_db_backup_run = time.time()

            time.sleep(1)

        self.logger.info("Scheduler thread stopped")

-    def _run_data_retention(self):
-        """Run data retention cleanup: packet_stream, repeater tables, stats, caches, mesh_connections."""
-        import asyncio
-
-        def get_retention_days(section: str, key: str, default: int) -> int:
-            try:
-                if self.bot.config.has_section(section) and self.bot.config.has_option(section, key):
-                    return self.bot.config.getint(section, key)
-            except Exception:
-                pass
-            return default
-
-        packet_stream_days = get_retention_days('Data_Retention', 'packet_stream_retention_days', 3)
-        purging_log_days = get_retention_days('Data_Retention', 'purging_log_retention_days', 90)
-        daily_stats_days = get_retention_days('Data_Retention', 'daily_stats_retention_days', 90)
-        observed_paths_days = get_retention_days('Data_Retention', 'observed_paths_retention_days', 90)
-        mesh_connections_days = get_retention_days('Data_Retention', 'mesh_connections_retention_days', 7)
-        stats_days = get_retention_days('Stats_Command', 'data_retention_days', 7)
-
-        try:
-            # Packet stream (web viewer integration)
-            if hasattr(self.bot, 'web_viewer_integration') and self.bot.web_viewer_integration:
-                bi = getattr(self.bot.web_viewer_integration, 'bot_integration', None)
-                if bi and hasattr(bi, 'cleanup_old_data'):
-                    bi.cleanup_old_data(packet_stream_days)
-
-            # Repeater manager: purging_log and optional daily_stats / unique_advert / observed_paths
-            if hasattr(self.bot, 'repeater_manager') and self.bot.repeater_manager:
-                if hasattr(self.bot, 'main_event_loop') and self.bot.main_event_loop and self.bot.main_event_loop.is_running():
-                    future = asyncio.run_coroutine_threadsafe(
-                        self.bot.repeater_manager.cleanup_database(purging_log_days),
-                        self.bot.main_event_loop
-                    )
-                    try:
-                        future.result(timeout=60)
-                    except Exception as e:
-                        self.logger.error(f"Error in repeater_manager.cleanup_database: {e}")
-                else:
-                    try:
-                        loop = asyncio.get_event_loop()
-                    except RuntimeError:
-                        loop = asyncio.new_event_loop()
-                        asyncio.set_event_loop(loop)
-                    loop.run_until_complete(self.bot.repeater_manager.cleanup_database(purging_log_days))
-                if hasattr(self.bot.repeater_manager, 'cleanup_repeater_retention'):
-                    self.bot.repeater_manager.cleanup_repeater_retention(
-                        daily_stats_days=daily_stats_days,
-                        observed_paths_days=observed_paths_days
-                    )
-
-            # Stats tables (message_stats, command_stats, path_stats)
-            if hasattr(self.bot, 'command_manager') and self.bot.command_manager:
-                stats_cmd = self.bot.command_manager.commands.get('stats') if getattr(self.bot.command_manager, 'commands', None) else None
-                if stats_cmd and hasattr(stats_cmd, 'cleanup_old_stats'):
-                    stats_cmd.cleanup_old_stats(stats_days)
-
-            # Expired caches (geocoding_cache, generic_cache)
-            if hasattr(self.bot, 'db_manager') and self.bot.db_manager and hasattr(self.bot.db_manager, 'cleanup_expired_cache'):
-                self.bot.db_manager.cleanup_expired_cache()
-
-            # Mesh connections (DB prune to match in-memory expiration)
-            if hasattr(self.bot, 'mesh_graph') and self.bot.mesh_graph and hasattr(self.bot.mesh_graph, 'delete_expired_edges_from_db'):
-                self.bot.mesh_graph.delete_expired_edges_from_db(mesh_connections_days)
-
-            ran_at = datetime.datetime.utcnow().isoformat()
-            self._last_retention_stats['ran_at'] = ran_at
-            try:
-                self.bot.db_manager.set_metadata('maint.status.data_retention_ran_at', ran_at)
-                self.bot.db_manager.set_metadata('maint.status.data_retention_outcome', 'ok')
-            except Exception:
-                pass
-
-        except Exception as e:
-            self.logger.exception(f"Error during data retention cleanup: {e}")
-            self._last_retention_stats['error'] = str(e)
-            try:
-                ran_at = datetime.datetime.utcnow().isoformat()
-                self.bot.db_manager.set_metadata('maint.status.data_retention_ran_at', ran_at)
-                self.bot.db_manager.set_metadata('maint.status.data_retention_outcome', f'error: {e}')
-            except Exception:
-                pass
-
    def check_interval_advertising(self):
        """Check if it's time to send an interval-based advert"""
        try:
@@ -914,424 +832,62 @@ class MessageScheduler:
            self.logger.error(f"Firmware write failed: {e}")
            return False, {'error': str(e)}

-    # ── Nightly maintenance email ────────────────────────────────────────────
+    # ── Maintenance (delegates to MaintenanceRunner) ─────────────────────────
+
+    @property
+    def _last_retention_stats(self) -> dict[str, Any]:
+        return self.maintenance._last_retention_stats
+
+    @_last_retention_stats.setter
+    def _last_retention_stats(self, value: dict[str, Any]) -> None:
+        self.maintenance._last_retention_stats.clear()
+        self.maintenance._last_retention_stats.update(value)
+
+    @property
+    def _last_db_backup_stats(self) -> dict[str, Any]:
+        return self.maintenance._last_db_backup_stats
+
+    @_last_db_backup_stats.setter
+    def _last_db_backup_stats(self, value: dict[str, Any]) -> None:
+        self.maintenance._last_db_backup_stats.clear()
+        self.maintenance._last_db_backup_stats.update(value)
+
+    @property
+    def _last_log_rotation_applied(self) -> dict[str, str]:
+        return self.maintenance._last_log_rotation_applied
+
+    @_last_log_rotation_applied.setter
+    def _last_log_rotation_applied(self, value: dict[str, str]) -> None:
+        self.maintenance._last_log_rotation_applied.clear()
+        self.maintenance._last_log_rotation_applied.update(value)
+
+    def run_db_backup(self) -> None:
+        """Run a DB backup immediately (manual / HTTP)."""
+        self.maintenance.run_db_backup()
+
+    def _run_data_retention(self) -> None:
+        self.maintenance.run_data_retention()

    def _get_notif(self, key: str) -> str:
-        """Read a notification setting from bot_metadata."""
-        try:
-            val = self.bot.db_manager.get_metadata(f'notif.{key}')
-            return val if val is not None else ''
-        except Exception:
-            return ''
+        return self.maintenance.get_notif(key)

    def _collect_email_stats(self) -> dict[str, Any]:
-        """Gather 24h summary stats for the nightly digest."""
-        stats: dict[str, Any] = {}
-
-        # Bot uptime
-        try:
-            start = getattr(self.bot, 'connection_time', None)
-            if start:
-                delta = datetime.timedelta(seconds=int(time.time() - start))
-                hours, rem = divmod(delta.seconds, 3600)
-                minutes = rem // 60
-                parts = []
-                if delta.days:
-                    parts.append(f"{delta.days}d")
-                parts.append(f"{hours}h {minutes}m")
-                stats['uptime'] = ' '.join(parts)
-            else:
-                stats['uptime'] = 'unknown'
-        except Exception:
-            stats['uptime'] = 'unknown'
-
-        # Contact counts from DB
-        try:
-            with self.bot.db_manager.connection() as conn:
-                conn.row_factory = sqlite3.Row
-                cur = conn.cursor()
-                cur.execute("SELECT COUNT(*) AS n FROM complete_contact_tracking")
-                stats['contacts_total'] = (cur.fetchone() or {}).get('n', 0)
-                cur.execute(
-                    "SELECT COUNT(*) AS n FROM complete_contact_tracking "
-                    "WHERE last_heard >= datetime('now', '-1 day')"
-                )
-                stats['contacts_24h'] = (cur.fetchone() or {}).get('n', 0)
-                cur.execute(
-                    "SELECT COUNT(*) AS n FROM complete_contact_tracking "
-                    "WHERE first_heard >= datetime('now', '-1 day')"
-                )
-                stats['contacts_new_24h'] = (cur.fetchone() or {}).get('n', 0)
-        except Exception as e:
-            stats['contacts_error'] = str(e)
-
-        # DB file size
-        try:
-            db_path = str(self.bot.db_manager.db_path)
-            size_bytes = os.path.getsize(db_path)
-            stats['db_size_mb'] = f'{size_bytes / 1_048_576:.1f}'
-            stats['db_path'] = db_path
-        except Exception:
-            stats['db_size_mb'] = 'unknown'
-
-        # Log file stats + rotation
-        try:
-            log_file = self.bot.config.get('Logging', 'log_file', fallback='').strip()
-            if log_file:
-                log_path = Path(log_file)
-                stats['log_file'] = str(log_path)
-                if log_path.exists():
-                    stats['log_size_mb'] = f'{log_path.stat().st_size / 1_048_576:.1f}'
-                    # Count ERROR/CRITICAL lines written in the last 24h by scanning the file
-                    time.time() - 86400
-                    error_count = critical_count = 0
-                    try:
-                        with open(log_path, encoding='utf-8', errors='replace') as fh:
-                            for line in fh:
-                                if ' ERROR ' in line or ' CRITICAL ' in line:
-                                    if ' ERROR ' in line:
-                                        error_count += 1
-                                    else:
-                                        critical_count += 1
-                        stats['errors_24h'] = error_count
-                        stats['criticals_24h'] = critical_count
-                    except Exception:
-                        stats['errors_24h'] = 'n/a'
-                        stats['criticals_24h'] = 'n/a'
-                    # Detect recent rotation: check for .1 backup file newer than 24h
-                    backup = Path(str(log_path) + '.1')
-                    if backup.exists() and (time.time() - backup.stat().st_mtime) < 86400:
-                        stats['log_rotated_24h'] = True
-                        stats['log_backup_size_mb'] = f'{backup.stat().st_size / 1_048_576:.1f}'
-                    else:
-                        stats['log_rotated_24h'] = False
-        except Exception:
-            pass
-
-        # Data retention last run
-        stats['retention'] = self._last_retention_stats.copy()
-
-        return stats
+        return self.maintenance.collect_email_stats()

    def _format_email_body(self, stats: dict[str, Any], period_start: str, period_end: str) -> str:
-        lines = [
-            'MeshCore Bot — Nightly Maintenance Report',
-            '=' * 44,
-            f'Period : {period_start} → {period_end}',
-            '',
-            'BOT STATUS',
-            '─' * 30,
-            f"  Uptime    : {stats.get('uptime', 'unknown')}",
-            f"  Connected : {'yes' if getattr(self.bot, 'connected', False) else 'no'}",
-            '',
-            'NETWORK ACTIVITY (past 24 h)',
-            '─' * 30,
-            f"  Active contacts  : {stats.get('contacts_24h', 'n/a')}",
-            f"  New contacts     : {stats.get('contacts_new_24h', 'n/a')}",
-            f"  Total tracked    : {stats.get('contacts_total', 'n/a')}",
-            '',
-            'DATABASE',
-            '─' * 30,
-            f"  Size : {stats.get('db_size_mb', 'n/a')} MB",
-        ]
-        if self._last_retention_stats.get('ran_at'):
-            lines.append(f"  Last retention run : {self._last_retention_stats['ran_at']} UTC")
-        if self._last_retention_stats.get('error'):
-            lines.append(f"  Retention error    : {self._last_retention_stats['error']}")
-
-        lines += [
-            '',
-            'ERRORS (past 24 h)',
-            '─' * 30,
-            f"  ERROR    : {stats.get('errors_24h', 'n/a')}",
-            f"  CRITICAL : {stats.get('criticals_24h', 'n/a')}",
-        ]
-        if stats.get('log_file'):
-            lines += [
-                '',
-                'LOG FILES',
-                '─' * 30,
-                f"  Current : {stats.get('log_file')} ({stats.get('log_size_mb', '?')} MB)",
-            ]
-            if stats.get('log_rotated_24h'):
-                lines.append(
-                    f"  Rotated : yes — backup is {stats.get('log_backup_size_mb', '?')} MB"
-                )
-            else:
-                lines.append('  Rotated : no')
-
-        lines += [
-            '',
-            '─' * 44,
-            'Manage notification settings: /config',
-        ]
-        return '\n'.join(lines)
+        return self.maintenance.format_email_body(stats, period_start, period_end)

    def _send_nightly_email(self) -> None:
-        """Build and dispatch the nightly maintenance digest if enabled."""
-        import smtplib
-        import ssl as _ssl
-        from email.message import EmailMessage
-
-        if self._get_notif('nightly_enabled') != 'true':
-            return
-
-        smtp_host     = self._get_notif('smtp_host')
-        smtp_security = self._get_notif('smtp_security') or 'starttls'
-        smtp_user     = self._get_notif('smtp_user')
-        smtp_password = self._get_notif('smtp_password')
-        from_name     = self._get_notif('from_name') or 'MeshCore Bot'
-        from_email    = self._get_notif('from_email')
-        recipients    = [r.strip() for r in self._get_notif('recipients').split(',') if r.strip()]
-
-        if not smtp_host or not from_email or not recipients:
-            self.logger.warning(
-                "Nightly email enabled but SMTP settings incomplete "
-                f"(host={smtp_host!r}, from={from_email!r}, recipients={recipients})"
-            )
-            return
-
-        try:
-            smtp_port = int(self._get_notif('smtp_port') or (465 if smtp_security == 'ssl' else 587))
-        except ValueError:
-            smtp_port = 587
-
-        now_utc   = datetime.datetime.utcnow()
-        yesterday = now_utc - datetime.timedelta(days=1)
-        period_start = yesterday.strftime('%Y-%m-%d %H:%M UTC')
-        period_end   = now_utc.strftime('%Y-%m-%d %H:%M UTC')
-
-        try:
-            stats = self._collect_email_stats()
-            body  = self._format_email_body(stats, period_start, period_end)
-
-            msg = EmailMessage()
-            msg['Subject'] = f'MeshCore Bot — Nightly Report {now_utc.strftime("%Y-%m-%d")}'
-            msg['From']    = f'{from_name} <{from_email}>'
-            msg['To']      = ', '.join(recipients)
-            msg.set_content(body)
-
-            # Optionally attach current log file before rotation
-            if self._get_maint('email_attach_log') == 'true':
-                log_file = self.bot.config.get('Logging', 'log_file', fallback='').strip()
-                if log_file:
-                    log_path = Path(log_file)
-                    max_attach = 5 * 1024 * 1024  # 5 MB cap on attachment
-                    if log_path.exists() and log_path.stat().st_size <= max_attach:
-                        try:
-                            with open(log_path, 'rb') as fh:
-                                msg.add_attachment(fh.read(), maintype='text', subtype='plain',
-                                                   filename=log_path.name)
-                        except Exception as attach_err:
-                            self.logger.warning(f"Could not attach log file to nightly email: {attach_err}")
-
-            context = _ssl.create_default_context()
-
-            _smtp_timeout = 30  # seconds — prevents indefinite hang on unreachable host
-            if smtp_security == 'ssl':
-                with smtplib.SMTP_SSL(smtp_host, smtp_port, context=context, timeout=_smtp_timeout) as s:
-                    if smtp_user and smtp_password:
-                        s.login(smtp_user, smtp_password)
-                    s.send_message(msg)
-            else:
-                with smtplib.SMTP(smtp_host, smtp_port, timeout=_smtp_timeout) as s:
-                    if smtp_security == 'starttls':
-                        s.ehlo()
-                        s.starttls(context=context)
-                        s.ehlo()
-                    if smtp_user and smtp_password:
-                        s.login(smtp_user, smtp_password)
-                    s.send_message(msg)
-
-            self.logger.info(
-                f"Nightly maintenance email sent to {recipients} "
-                f"(contacts_24h={stats.get('contacts_24h')}, "
-                f"errors={stats.get('errors_24h')})"
-            )
-            try:
-                ran_at = datetime.datetime.utcnow().isoformat()
-                self.bot.db_manager.set_metadata('maint.status.nightly_email_ran_at', ran_at)
-                self.bot.db_manager.set_metadata('maint.status.nightly_email_outcome', 'ok')
-            except Exception:
-                pass
-
-        except Exception as e:
-            self.logger.error(f"Failed to send nightly maintenance email: {e}")
-            try:
-                ran_at = datetime.datetime.utcnow().isoformat()
-                self.bot.db_manager.set_metadata('maint.status.nightly_email_ran_at', ran_at)
-                self.bot.db_manager.set_metadata('maint.status.nightly_email_outcome', f'error: {e}')
-            except Exception:
-                pass
-
-    # ── Maintenance helpers ──────────────────────────────────────────────────
+        self.maintenance.send_nightly_email()

    def _get_maint(self, key: str) -> str:
-        """Read a maintenance setting from bot_metadata."""
-        try:
-            val = self.bot.db_manager.get_metadata(f'maint.{key}')
-            return val if val is not None else ''
-        except Exception:
-            return ''
+        return self.maintenance.get_maint(key)

    def _apply_log_rotation_config(self) -> None:
-        """Check bot_metadata for log rotation settings and replace the RotatingFileHandler if changed."""
-        from logging.handlers import RotatingFileHandler as _RFH
-
-        max_bytes_str = self._get_maint('log_max_bytes')
-        backup_count_str = self._get_maint('log_backup_count')
-
-        if not max_bytes_str and not backup_count_str:
-            return  # Nothing stored yet — nothing to apply
-
-        new_cfg = {'max_bytes': max_bytes_str, 'backup_count': backup_count_str}
-        if new_cfg == self._last_log_rotation_applied:
-            return  # No change
-
-        try:
-            max_bytes = int(max_bytes_str) if max_bytes_str else 5 * 1024 * 1024
-            backup_count = int(backup_count_str) if backup_count_str else 3
-        except ValueError:
-            self.logger.warning(f"Invalid log rotation config in bot_metadata: {new_cfg}")
-            return
-
-        logger = self.bot.logger
-        for i, handler in enumerate(logger.handlers):
-            if isinstance(handler, _RFH):
-                log_path = handler.baseFilename
-                formatter = handler.formatter
-                level = handler.level
-                handler.close()
-                new_handler = _RFH(log_path, maxBytes=max_bytes, backupCount=backup_count, encoding='utf-8')
-                new_handler.setFormatter(formatter)
-                new_handler.setLevel(level)
-                logger.handlers[i] = new_handler
-                self._last_log_rotation_applied = new_cfg
-                self.logger.info(f"Log rotation config applied: maxBytes={max_bytes}, backupCount={backup_count}")
-                try:
-                    ran_at = datetime.datetime.utcnow().isoformat()
-                    self.bot.db_manager.set_metadata('maint.status.log_rotation_applied_at', ran_at)
-                except Exception:
-                    pass
-                break
+        self.maintenance.apply_log_rotation_config()

    def _maybe_run_db_backup(self) -> None:
-        """Check if a scheduled DB backup is due and run it."""
-        if self._get_maint('db_backup_enabled') != 'true':
-            return
-
-        sched = self._get_maint('db_backup_schedule') or 'daily'
-        if sched == 'manual':
-            return
-
-        backup_time_str = self._get_maint('db_backup_time') or '02:00'
-        now = self.get_current_time()
-        try:
-            bh, bm = [int(x) for x in backup_time_str.split(':')]
-        except Exception:
-            bh, bm = 2, 0
-
-        scheduled_today = now.replace(hour=bh, minute=bm, second=0, microsecond=0)
-
-        # Only fire within a 2-minute window after the scheduled time.
-        # This allows for scheduler lag while preventing a late bot startup
-        # from triggering an immediate backup for a time that passed hours ago.
-        fire_window_end = scheduled_today + datetime.timedelta(minutes=2)
-        if now < scheduled_today or now > fire_window_end:
-            return
-
-        if sched == 'weekly' and now.weekday() != 0:  # Monday only
-            return
-
-        # Deduplicate: don't re-run if already ran today (daily) / this week (weekly).
-        # Seed from DB on first check so restarts don't re-trigger a backup that
-        # already ran earlier today.
-        if not self._last_db_backup_stats:
-            try:
-                db_ran_at = self.bot.db_manager.get_metadata('maint.status.db_backup_ran_at') or ''
-                if db_ran_at:
-                    self._last_db_backup_stats['ran_at'] = db_ran_at
-            except Exception:
-                pass
-
-        date_key = now.strftime('%Y-%m-%d')
-        week_key = f"{now.year}-W{now.isocalendar()[1]}"
-        last_ran = self._last_db_backup_stats.get('ran_at', '')
-        if sched == 'daily' and last_ran.startswith(date_key):
-            return
-        if sched == 'weekly' and self._last_db_backup_stats.get('week_key') == week_key:
-            return
-
-        self._run_db_backup()
-        if sched == 'weekly':
-            self._last_db_backup_stats['week_key'] = week_key
+        self.maintenance.maybe_run_db_backup()

    def _run_db_backup(self) -> None:
-        """Backup the SQLite database using sqlite3.Connection.backup(), then prune old backups."""
-        import sqlite3 as _sqlite3
-
-        backup_dir_str = self._get_maint('db_backup_dir') or '/data/backups'
-        try:
-            retention_count = int(self._get_maint('db_backup_retention_count') or '7')
-        except ValueError:
-            retention_count = 7
-
-        backup_dir = Path(backup_dir_str)
-        ran_at = datetime.datetime.utcnow().isoformat()
-
-        try:
-            backup_dir.mkdir(parents=True, exist_ok=True)
-        except OSError as e:
-            self.logger.error(f"DB backup: cannot create backup directory {backup_dir}: {e}")
-            self._last_db_backup_stats = {'ran_at': ran_at, 'error': str(e)}
-            try:
-                self.bot.db_manager.set_metadata('maint.status.db_backup_ran_at', ran_at)
-                self.bot.db_manager.set_metadata('maint.status.db_backup_outcome', f'error: {e}')
-            except Exception:
-                pass
-            return
-
-        db_path = Path(str(self.bot.db_manager.db_path))
-        ts = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%S')
-        backup_path = backup_dir / f"{db_path.stem}_{ts}.db"
-
-        try:
-            src = _sqlite3.connect(str(db_path), check_same_thread=False)
-            dst = _sqlite3.connect(str(backup_path))
-            try:
-                src.backup(dst, pages=200)
-            finally:
-                dst.close()
-                src.close()
-
-            size_mb = backup_path.stat().st_size / 1_048_576
-            self.logger.info(f"DB backup created: {backup_path} ({size_mb:.1f} MB)")
-
-            # Prune oldest backups beyond retention count
-            stem = db_path.stem
-            backups = sorted(backup_dir.glob(f"{stem}_*.db"), key=lambda p: p.stat().st_mtime)
-            while len(backups) > retention_count:
-                oldest = backups.pop(0)
-                try:
-                    oldest.unlink()
-                    self.logger.info(f"DB backup pruned: {oldest}")
-                except OSError:
-                    pass
-
-            ran_at = datetime.datetime.utcnow().isoformat()
-            self._last_db_backup_stats = {'ran_at': ran_at, 'path': str(backup_path), 'size_mb': f'{size_mb:.1f}'}
-            try:
-                self.bot.db_manager.set_metadata('maint.status.db_backup_ran_at', ran_at)
-                self.bot.db_manager.set_metadata('maint.status.db_backup_outcome', 'ok')
-                self.bot.db_manager.set_metadata('maint.status.db_backup_path', str(backup_path))
-            except Exception:
-                pass
-
-        except Exception as e:
-            self.logger.error(f"DB backup failed: {e}")
-            self._last_db_backup_stats = {'ran_at': ran_at, 'error': str(e)}
-            try:
-                self.bot.db_manager.set_metadata('maint.status.db_backup_ran_at', ran_at)
-                self.bot.db_manager.set_metadata('maint.status.db_backup_outcome', f'error: {e}')
-            except Exception:
-                pass
+        self.maintenance.run_db_backup()
@@ -1440,9 +1440,9 @@ class BotDataViewer:
            try:
                bot = getattr(self, 'bot', None)
                scheduler = getattr(bot, 'scheduler', None) if bot else None
-                if scheduler is None or not hasattr(scheduler, '_run_db_backup'):
+                if scheduler is None or not hasattr(scheduler, 'run_db_backup'):
                    return jsonify({'success': False, 'error': 'Scheduler not available'}), 503
-                scheduler._run_db_backup()
+                scheduler.run_db_backup()
                # Read outcome written by _run_db_backup
                path = self.db_manager.get_metadata('maint.status.db_backup_path') or ''
                outcome = self.db_manager.get_metadata('maint.status.db_backup_outcome') or ''
@@ -141,6 +141,7 @@ module = [
    "modules.feed_manager",
    "modules.mesh_graph",
    "modules.scheduler",
+    "modules.maintenance",
    "modules.transmission_tracker",
    "modules.commands.alert_command",
    "modules.commands.announcements_command",
@@ -0,0 +1,250 @@
+"""Unit tests for modules.maintenance helpers and MaintenanceRunner."""
+
+from __future__ import annotations
+
+import datetime
+import json
+import sqlite3
+from configparser import ConfigParser
+import tempfile
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+
+from modules.maintenance import (
+    MaintenanceRunner,
+    _count_log_errors_last_24h,
+    _iso_week_key_from_ran_at,
+    _row_n,
+)
+from modules.scheduler import MessageScheduler
+
+
+# ---------------------------------------------------------------------------
+# _iso_week_key_from_ran_at
+# ---------------------------------------------------------------------------
+
+
+class TestIsoWeekKeyFromRanAt:
+    def test_empty_returns_empty(self):
+        assert _iso_week_key_from_ran_at("") == ""
+        assert _iso_week_key_from_ran_at("   ") == ""
+
+    def test_invalid_returns_empty(self):
+        assert _iso_week_key_from_ran_at("not-a-date") == ""
+
+    def test_naive_iso_matches_isocalendar(self):
+        # 2026-03-17 is a Monday
+        wk = _iso_week_key_from_ran_at("2026-03-17T02:00:00")
+        y, week, _ = datetime.date(2026, 3, 17).isocalendar()
+        assert wk == f"{y}-W{week}"
+
+    def test_z_suffix_parsed(self):
+        wk = _iso_week_key_from_ran_at("2026-03-17T02:00:00Z")
+        y, week, _ = datetime.date(2026, 3, 17).isocalendar()
+        assert wk == f"{y}-W{week}"
+
+    def test_same_calendar_week_same_key(self):
+        a = _iso_week_key_from_ran_at("2026-03-17T08:00:00")
+        b = _iso_week_key_from_ran_at("2026-03-18T15:30:00")
+        assert a == b
+
+
+# ---------------------------------------------------------------------------
+# _row_n
+# ---------------------------------------------------------------------------
+
+
+class TestRowN:
+    def test_sqlite_row(self):
+        conn = sqlite3.connect(":memory:")
+        conn.row_factory = sqlite3.Row
+        conn.execute("CREATE TABLE t (n INTEGER)")
+        conn.execute("INSERT INTO t VALUES (42)")
+        cur = conn.cursor()
+        cur.execute("SELECT n AS n FROM t")
+        assert _row_n(cur) == 42
+        conn.close()
+
+    def test_dict_row(self):
+        conn = sqlite3.connect(":memory:")
+        conn.execute("CREATE TABLE t (n INTEGER)")
+        conn.execute("INSERT INTO t VALUES (7)")
+
+        class _Cur:
+            def fetchone(self):
+                return {"n": 7}
+
+        cur = _Cur()
+        assert _row_n(cur) == 7
+
+
+# ---------------------------------------------------------------------------
+# _count_log_errors_last_24h
+# ---------------------------------------------------------------------------
+
+
+class TestCountLogErrorsLast24h:
+    def _write(self, path: Path, lines: list[str]) -> None:
+        path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+    def test_counts_recent_text_errors(self, tmp_path: Path):
+        now = datetime.datetime.now()
+        old = now - datetime.timedelta(hours=25)
+        recent = now - datetime.timedelta(hours=1)
+        log = tmp_path / "bot.log"
+        self._write(
+            log,
+            [
+                f'{old.strftime("%Y-%m-%d %H:%M:%S")} - MeshCoreBot - ERROR - stale',
+                f'{recent.strftime("%Y-%m-%d %H:%M:%S")} - MeshCoreBot - ERROR - fresh',
+                f'{recent.strftime("%Y-%m-%d %H:%M:%S")} - MeshCoreBot - CRITICAL - bad',
+            ],
+        )
+        err, crit = _count_log_errors_last_24h(log)
+        assert err == 1
+        assert crit == 1
+
+    def test_skips_old_text_lines(self, tmp_path: Path):
+        now = datetime.datetime.now()
+        old = now - datetime.timedelta(days=2)
+        log = tmp_path / "bot.log"
+        self._write(
+            log,
+            [f'{old.strftime("%Y-%m-%d %H:%M:%S")} - MeshCoreBot - ERROR - ancient'],
+        )
+        err, crit = _count_log_errors_last_24h(log)
+        assert err == 0
+        assert crit == 0
+
+    def test_json_recent_error(self, tmp_path: Path):
+        now = datetime.datetime.now(datetime.timezone.utc)
+        recent = (now - datetime.timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%S.000Z")
+        old = (now - datetime.timedelta(hours=48)).strftime("%Y-%m-%dT%H:%M:%S.000Z")
+        log = tmp_path / "json.log"
+        self._write(
+            log,
+            [
+                json.dumps({"timestamp": old, "level": "ERROR", "message": "x"}),
+                json.dumps({"timestamp": recent, "level": "ERROR", "message": "y"}),
+                json.dumps({"timestamp": recent, "level": "CRITICAL", "message": "z"}),
+            ],
+        )
+        err, crit = _count_log_errors_last_24h(log)
+        assert err == 1
+        assert crit == 1
+
+    def test_missing_file_returns_na(self, tmp_path: Path):
+        err, crit = _count_log_errors_last_24h(tmp_path / "nope.log")
+        assert err == "n/a"
+        assert crit == "n/a"
+
+
+# ---------------------------------------------------------------------------
+# MaintenanceRunner.maybe_run_db_backup — weekly dedup after restart
+# ---------------------------------------------------------------------------
+
+
+class TestMaybeRunDbBackupWeeklyDedup:
+    """DB metadata seeds week_key so weekly backup does not repeat same ISO week."""
+
+    def _make_runner(self, now: datetime.datetime, db_ran_at: str):
+        bot = MagicMock()
+        bot.logger = Mock()
+
+        def get_maint(key: str) -> str:
+            return {
+                "db_backup_enabled": "true",
+                "db_backup_schedule": "weekly",
+                "db_backup_time": f"{now.hour:02d}:{now.minute:02d}",
+                "db_backup_retention_count": "7",
+                "db_backup_dir": "/tmp",
+            }.get(key, "")
+
+        bot.db_manager.get_metadata = Mock(
+            side_effect=lambda k: (
+                db_ran_at if k == "maint.status.db_backup_ran_at" else None
+            )
+        )
+
+        runner = MaintenanceRunner(bot, get_current_time=lambda: now)
+        runner.get_maint = Mock(side_effect=get_maint)
+        return runner
+
+    def test_weekly_skips_when_db_ran_same_iso_week(self):
+        # Monday 10:01, window 10:00–10:02; DB says backup already ran this Monday morning
+        now = datetime.datetime(2026, 3, 16, 10, 1, 0)  # Monday
+        assert now.weekday() == 0
+        db_ran = "2026-03-16T09:30:00"
+        runner = self._make_runner(now, db_ran)
+        with patch.object(runner, "run_db_backup") as mock_run:
+            runner.maybe_run_db_backup()
+        mock_run.assert_not_called()
+        assert runner._last_db_backup_stats.get("ran_at", "").startswith("2026-03-16")
+        wk = f"{now.year}-W{now.isocalendar()[1]}"
+        assert runner._last_db_backup_stats.get("week_key") == wk
+
+    def test_weekly_runs_when_db_ran_previous_week(self):
+        now = datetime.datetime(2026, 3, 16, 10, 1, 0)  # Monday
+        db_ran = "2026-03-09T09:00:00"  # prior Monday, different ISO week
+        runner = self._make_runner(now, db_ran)
+        with patch.object(runner, "run_db_backup") as mock_run:
+            runner.maybe_run_db_backup()
+        mock_run.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# MessageScheduler — retention timer not immediate
+# ---------------------------------------------------------------------------
+
+
+class TestSchedulerRetentionTimer:
+    def test_last_data_retention_run_is_recent_at_init(self):
+        bot = Mock()
+        bot.logger = Mock()
+        bot.config = ConfigParser()
+        bot.config.add_section("Bot")
+        sched = MessageScheduler(bot)
+        assert time.time() - sched.last_data_retention_run < 3.0
+        assert time.time() - sched.last_nightly_email_time < 3.0
+
+
+# ---------------------------------------------------------------------------
+# Nice-to-have: run_db_backup with temp SQLite (integration-style)
+# ---------------------------------------------------------------------------
+
+
+class TestRunDbBackupIntegration:
+    def test_creates_backup_file(self, tmp_path: Path):
+        db_file = tmp_path / "live.db"
+        src = sqlite3.connect(str(db_file))
+        src.execute("CREATE TABLE x (i INTEGER)")
+        src.execute("INSERT INTO x VALUES (1)")
+        src.commit()
+        src.close()
+
+        bot = MagicMock()
+        bot.logger = Mock()
+        bot.db_manager.db_path = db_file
+
+        def get_maint(key: str) -> str:
+            return {
+                "db_backup_dir": str(tmp_path / "bk"),
+                "db_backup_retention_count": "3",
+            }.get(key, "")
+
+        bot.db_manager.get_metadata = Mock(return_value=None)
+        bot.db_manager.set_metadata = Mock()
+
+        runner = MaintenanceRunner(bot, get_current_time=lambda: datetime.datetime.now())
+        runner.get_maint = Mock(side_effect=get_maint)
+
+        runner.run_db_backup()
+
+        backups = list((tmp_path / "bk").glob("live_*.db"))
+        assert len(backups) == 1
+        dst = sqlite3.connect(str(backups[0]))
+        assert dst.execute("SELECT i FROM x").fetchone()[0] == 1
+        dst.close()
@@ -294,18 +294,18 @@ class TestMaybeRunDbBackup:
                "db_backup_retention_count": "7",
                "db_backup_dir": "/tmp/backup",
            }.get(key, "")
-        scheduler._get_maint = Mock(side_effect=maint)
+        scheduler.maintenance.get_maint = Mock(side_effect=maint)
        scheduler._last_db_backup_stats = {"ran_at": last_ran}

    def test_disabled_does_not_run(self, scheduler):
        self._setup(scheduler, enabled="false")
-        with patch.object(scheduler, "_run_db_backup") as mock_run:
+        with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
            scheduler._maybe_run_db_backup()
        mock_run.assert_not_called()

    def test_manual_schedule_does_not_run(self, scheduler):
        self._setup(scheduler, schedule="manual")
-        with patch.object(scheduler, "_run_db_backup") as mock_run:
+        with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
            scheduler._maybe_run_db_backup()
        mock_run.assert_not_called()

@@ -316,7 +316,7 @@ class TestMaybeRunDbBackup:
        sched_time = now - datetime.timedelta(minutes=1)
        time_str = sched_time.strftime("%H:%M")
        self._setup(scheduler, time_str=time_str, last_ran=f"{today}T00:01:00")
-        with patch.object(scheduler, "_run_db_backup") as mock_run:
+        with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
            scheduler._maybe_run_db_backup()
        mock_run.assert_not_called()

@@ -328,7 +328,7 @@ class TestMaybeRunDbBackup:
        time_str = sched_time.strftime("%H:%M")
        yesterday = (now - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        self._setup(scheduler, time_str=time_str, last_ran=f"{yesterday}T00:01:00")
-        with patch.object(scheduler, "_run_db_backup") as mock_run:
+        with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
            scheduler._maybe_run_db_backup()
        mock_run.assert_called_once()

@@ -340,7 +340,7 @@ class TestMaybeRunDbBackup:
        time_str = sched_time.strftime("%H:%M")
        yesterday = (now - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        self._setup(scheduler, time_str=time_str, last_ran=f"{yesterday}T00:01:00")
-        with patch.object(scheduler, "_run_db_backup") as mock_run:
+        with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
            scheduler._maybe_run_db_backup()
        mock_run.assert_not_called()

@@ -350,7 +350,7 @@ class TestMaybeRunDbBackup:
        sched_time = now + datetime.timedelta(minutes=30)
        time_str = sched_time.strftime("%H:%M")
        self._setup(scheduler, time_str=time_str, last_ran="")
-        with patch.object(scheduler, "_run_db_backup") as mock_run:
+        with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
            scheduler._maybe_run_db_backup()
        mock_run.assert_not_called()

@@ -372,7 +372,7 @@ class TestMaybeRunDbBackup:
        fake_now.strftime = now.strftime
        fake_now.isocalendar.return_value = (2026, 11, 2)
        with patch.object(scheduler, "get_current_time", return_value=fake_now):
-            with patch.object(scheduler, "_run_db_backup") as mock_run:
+            with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
                scheduler._maybe_run_db_backup()
        mock_run.assert_not_called()

@@ -560,9 +560,9 @@ class TestDbBackupIntervalGuard:
                "db_backup_retention_count": "7",
                "db_backup_dir": "/tmp",
            }.get(key, "")
-        scheduler._get_maint = Mock(side_effect=maint)
+        scheduler.maintenance.get_maint = Mock(side_effect=maint)

-        with patch.object(scheduler, "_run_db_backup") as mock_run:
+        with patch.object(scheduler.maintenance, "run_db_backup") as mock_run:
            scheduler._maybe_run_db_backup()
        # Should NOT run because DB says it already ran today
        mock_run.assert_not_called()
@@ -1209,15 +1209,8 @@ class TestCollectEmailStats:
        conn_mock.__enter__ = Mock(return_value=conn_mock)
        conn_mock.__exit__ = Mock(return_value=False)

-        row_total = MagicMock()
-        row_total.get = Mock(side_effect=lambda k, d=0: {"n": 50}.get(k, d))
-        row_24h = MagicMock()
-        row_24h.get = Mock(side_effect=lambda k, d=0: {"n": 10}.get(k, d))
-        row_new = MagicMock()
-        row_new.get = Mock(side_effect=lambda k, d=0: {"n": 3}.get(k, d))
-
        cursor_mock = MagicMock()
-        cursor_mock.fetchone.side_effect = [row_total, row_24h, row_new]
+        cursor_mock.fetchone.side_effect = [{"n": 50}, {"n": 10}, {"n": 3}]
        conn_mock.cursor.return_value = cursor_mock
        scheduler.bot.db_manager.connection.return_value = conn_mock

@@ -1843,13 +1843,13 @@ class TestBackupNowRoute:
        mock_scheduler = MagicMock()

        def fake_run_db_backup():
-            # Simulate what _run_db_backup writes to metadata
+            # Simulate what run_db_backup writes to metadata
            viewer.db_manager.set_metadata(
                'maint.status.db_backup_path', str(tmp_path / "test.db")
            )
            viewer.db_manager.set_metadata('maint.status.db_backup_outcome', 'ok')

-        mock_scheduler._run_db_backup = fake_run_db_backup
+        mock_scheduler.run_db_backup = fake_run_db_backup
        mock_bot = MagicMock()
        mock_bot.scheduler = mock_scheduler

@@ -1874,7 +1874,7 @@ class TestBackupNowRoute:
                'maint.status.db_backup_outcome', 'error: cannot create dir'
            )

-        mock_scheduler._run_db_backup = fake_run_db_backup
+        mock_scheduler.run_db_backup = fake_run_db_backup
        mock_bot = MagicMock()
        mock_bot.scheduler = mock_scheduler