diff --git a/config.ini.example b/config.ini.example index 1b3ec62..e9202cc 100644 --- a/config.ini.example +++ b/config.ini.example @@ -625,6 +625,25 @@ track_command_details = true # false: Keep actual user IDs in stats anonymize_users = false +[Data_Retention] +# Data retention controls how long the bot keeps data in the database. +# The scheduler runs cleanup daily so retention is enforced even when the +# standalone web viewer is not running. Shorter retention reduces DB size. +# +# Packet stream (web viewer real-time display and transmission_tracker) +# 2-3 days is enough for most deployments; 7 days if you need longer history. +packet_stream_retention_days = 3 +# +# Repeater/stats tables: daily_stats, unique_advert_packets, observed_paths +daily_stats_retention_days = 90 +observed_paths_retention_days = 90 +# +# Purging log (audit trail for repeater purges) +purging_log_retention_days = 90 +# +# Mesh connections (path graph edges). Should be >= Path_Command graph_edge_expiration_days. +mesh_connections_retention_days = 7 + [Path_Command] # Enable or disable the path command enabled = true diff --git a/docs/configuration.md b/docs/configuration.md index 2375cc7..61313aa 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -56,6 +56,10 @@ Examples of sections that configure specific commands or features: Full reference: see `config.ini.example` in the repository for every section and option, with inline comments. +## Data retention + +Database tables (packet stream, stats, repeater data, mesh graph) are pruned automatically. Retention periods and defaults are described in **[Data retention](data-retention.md)**. The bot’s scheduler runs cleanup daily even when the standalone web viewer is not running. + ## Path Command configuration The Path command has many options (presets, proximity, graph validation, etc.). All are documented in: diff --git a/docs/data-retention.md b/docs/data-retention.md new file mode 100644 index 0000000..340eaaf --- /dev/null +++ b/docs/data-retention.md @@ -0,0 +1,45 @@ +# Data retention + +The bot stores data in a SQLite database for the web viewer, stats, repeater management, and path routing. To limit database size, **data retention** controls how long rows are kept. Cleanup runs **daily** from the bot’s scheduler, so retention is enforced even when the standalone web viewer is not running. + +## Configuration + +All retention options live in the **`[Data_Retention]`** section of `config.ini`. Example (see `config.ini.example` for full comments): + +```ini +[Data_Retention] +packet_stream_retention_days = 3 +daily_stats_retention_days = 90 +observed_paths_retention_days = 90 +purging_log_retention_days = 90 +mesh_connections_retention_days = 7 +``` + +Stats tables (message_stats, command_stats, path_stats) use **`[Stats_Command]`** `data_retention_days` (default 7); the scheduler runs that cleanup daily as well. + +## Tables and defaults + +| Table / data | Purpose | Default retention | +|--------------|---------|--------------------| +| **packet_stream** | Real-time packets, commands, routing in the web viewer; transmission_tracker repeat counts | 3 days | +| **daily_stats** | Daily repeater/advert stats | 90 days | +| **unique_advert_packets** | Unique packet hashes for advert stats | 90 days (same as daily_stats) | +| **observed_paths** | Path strings from adverts and messages | 90 days | +| **purging_log** | Audit trail for repeater purges | 90 days | +| **mesh_connections** | Path graph edges (in-memory + DB); should be ≥ Path_Command `graph_edge_expiration_days` | 7 days | +| **message_stats, command_stats, path_stats** | Stats command data | 7 days (`[Stats_Command]` `data_retention_days`) | +| **geocoding_cache, generic_cache** | Expired entries removed by scheduler | By expiry time | + +Shorter retention (e.g. 2–3 days for `packet_stream`) is enough for the web viewer and transmission_tracker; longer retention is only needed if you want more history. + +## How cleanup runs + +1. The **scheduler** (in the main bot process) runs a single data-retention task once every 24 hours. +2. That task: + - Cleans **packet_stream** (via web viewer integration when enabled). + - Cleans **purging_log**, **daily_stats**, **unique_advert_packets**, and **observed_paths** (repeater manager). + - Cleans **message_stats**, **command_stats**, **path_stats** (stats command’s `cleanup_old_stats`). + - Removes expired rows from **geocoding_cache** and **generic_cache** (DB manager). + - Deletes old rows from **mesh_connections** (mesh graph). + +So as long as the bot is running, the database is pruned on a schedule regardless of whether you run the standalone web viewer or the stats command. diff --git a/mkdocs.yml b/mkdocs.yml index 9339457..aab72d8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -56,6 +56,7 @@ nav: - Service: service-installation.md - Configuration: - Overview: configuration.md + - Data retention: data-retention.md - Path Command: path-command-config.md - Config validation: config-validation.md - Web Viewer: web-viewer.md diff --git a/modules/mesh_graph.py b/modules/mesh_graph.py index a61e9ae..d69ba5a 100644 --- a/modules/mesh_graph.py +++ b/modules/mesh_graph.py @@ -717,6 +717,28 @@ class MeshGraph: self.logger.debug(f"Pruned {len(expired_keys)} expired graph edges (older than {self.edge_expiration_days} days)") return len(expired_keys) + def delete_expired_edges_from_db(self, days: int) -> int: + """Delete mesh_connections rows older than the given days. + Keeps the on-disk table aligned with in-memory pruning and prevents unbounded growth. + Called from the scheduler (e.g. daily). Use Data_Retention mesh_connections_retention_days + or Path_Command graph_edge_expiration_days. + Returns: + int: Number of rows deleted. + """ + if days <= 0: + return 0 + try: + deleted = self.db_manager.execute_update( + "DELETE FROM mesh_connections WHERE last_seen < datetime('now', ?)", + (f'-{days} days',) + ) + if deleted > 0: + self.logger.info(f"Cleaned up {deleted} old mesh_connections entries (older than {days} days)") + return deleted + except Exception as e: + self.logger.error(f"Error cleaning up mesh_connections: {e}") + return 0 + def _start_batch_writer(self): """Start background task for batched writes.""" def batch_writer_loop(): diff --git a/modules/repeater_manager.py b/modules/repeater_manager.py index a2589ce..5424079 100644 --- a/modules/repeater_manager.py +++ b/modules/repeater_manager.py @@ -3032,7 +3032,48 @@ class RepeaterManager: except Exception as e: self.logger.error(f"Error cleaning up database: {e}") - + + def cleanup_repeater_retention( + self, + daily_stats_days: int = 90, + observed_paths_days: int = 90 + ) -> None: + """Clean up old daily_stats, unique_advert_packets, and observed_paths rows. + Called from the scheduler so retention is enforced even when stats command is not run.""" + try: + total_deleted = 0 + + # daily_stats and unique_advert_packets use date column + cutoff_date = (datetime.now() - timedelta(days=daily_stats_days)).date().isoformat() + n = self.db_manager.execute_update( + 'DELETE FROM daily_stats WHERE date < ?', + (cutoff_date,) + ) + if n > 0: + self.logger.info(f"Cleaned up {n} old daily_stats entries (older than {daily_stats_days} days)") + total_deleted += n + + n = self.db_manager.execute_update( + 'DELETE FROM unique_advert_packets WHERE date < ?', + (cutoff_date,) + ) + if n > 0: + self.logger.info(f"Cleaned up {n} old unique_advert_packets entries (older than {daily_stats_days} days)") + total_deleted += n + + # observed_paths uses last_seen (timestamp) + cutoff_ts = (datetime.now() - timedelta(days=observed_paths_days)).isoformat() + n = self.db_manager.execute_update( + 'DELETE FROM observed_paths WHERE last_seen < ?', + (cutoff_ts,) + ) + if n > 0: + self.logger.info(f"Cleaned up {n} old observed_paths entries (older than {observed_paths_days} days)") + total_deleted += n + + except Exception as e: + self.logger.error(f"Error cleaning up repeater retention tables: {e}") + # Delegate geocoding cache methods to db_manager def get_cached_geocoding(self, query: str) -> Tuple[Optional[float], Optional[float]]: """Get cached geocoding result for a query""" diff --git a/modules/scheduler.py b/modules/scheduler.py index 2c23249..12dbe36 100644 --- a/modules/scheduler.py +++ b/modules/scheduler.py @@ -26,6 +26,8 @@ class MessageScheduler: self.scheduler_thread = None self.last_channel_ops_check_time = 0 self.last_message_queue_check_time = 0 + self.last_data_retention_run = 0 + self._data_retention_interval_seconds = 86400 # 24 hours def get_current_time(self): """Get current time in configured timezone""" @@ -425,11 +427,83 @@ class MessageScheduler: loop.run_until_complete(self.bot.feed_manager.process_message_queue()) self.last_message_queue_check_time = time.time() + # Data retention: run daily (packet_stream, repeater tables, stats, caches, mesh_connections) + if time.time() - self.last_data_retention_run >= self._data_retention_interval_seconds: + self._run_data_retention() + self.last_data_retention_run = time.time() + schedule.run_pending() time.sleep(1) self.logger.info("Scheduler thread stopped") + def _run_data_retention(self): + """Run data retention cleanup: packet_stream, repeater tables, stats, caches, mesh_connections.""" + import asyncio + + def get_retention_days(section: str, key: str, default: int) -> int: + try: + if self.bot.config.has_section(section) and self.bot.config.has_option(section, key): + return self.bot.config.getint(section, key) + except Exception: + pass + return default + + packet_stream_days = get_retention_days('Data_Retention', 'packet_stream_retention_days', 3) + purging_log_days = get_retention_days('Data_Retention', 'purging_log_retention_days', 90) + daily_stats_days = get_retention_days('Data_Retention', 'daily_stats_retention_days', 90) + observed_paths_days = get_retention_days('Data_Retention', 'observed_paths_retention_days', 90) + mesh_connections_days = get_retention_days('Data_Retention', 'mesh_connections_retention_days', 7) + stats_days = get_retention_days('Stats_Command', 'data_retention_days', 7) + + try: + # Packet stream (web viewer integration) + if hasattr(self.bot, 'web_viewer_integration') and self.bot.web_viewer_integration: + bi = getattr(self.bot.web_viewer_integration, 'bot_integration', None) + if bi and hasattr(bi, 'cleanup_old_data'): + bi.cleanup_old_data(packet_stream_days) + + # Repeater manager: purging_log and optional daily_stats / unique_advert / observed_paths + if hasattr(self.bot, 'repeater_manager') and self.bot.repeater_manager: + if hasattr(self.bot, 'main_event_loop') and self.bot.main_event_loop and self.bot.main_event_loop.is_running(): + future = asyncio.run_coroutine_threadsafe( + self.bot.repeater_manager.cleanup_database(purging_log_days), + self.bot.main_event_loop + ) + try: + future.result(timeout=60) + except Exception as e: + self.logger.error(f"Error in repeater_manager.cleanup_database: {e}") + else: + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(self.bot.repeater_manager.cleanup_database(purging_log_days)) + if hasattr(self.bot.repeater_manager, 'cleanup_repeater_retention'): + self.bot.repeater_manager.cleanup_repeater_retention( + daily_stats_days=daily_stats_days, + observed_paths_days=observed_paths_days + ) + + # Stats tables (message_stats, command_stats, path_stats) + if hasattr(self.bot, 'command_manager') and self.bot.command_manager: + stats_cmd = self.bot.command_manager.commands.get('stats') if getattr(self.bot.command_manager, 'commands', None) else None + if stats_cmd and hasattr(stats_cmd, 'cleanup_old_stats'): + stats_cmd.cleanup_old_stats(stats_days) + + # Expired caches (geocoding_cache, generic_cache) + if hasattr(self.bot, 'db_manager') and self.bot.db_manager and hasattr(self.bot.db_manager, 'cleanup_expired_cache'): + self.bot.db_manager.cleanup_expired_cache() + + # Mesh connections (DB prune to match in-memory expiration) + if hasattr(self.bot, 'mesh_graph') and self.bot.mesh_graph and hasattr(self.bot.mesh_graph, 'delete_expired_edges_from_db'): + self.bot.mesh_graph.delete_expired_edges_from_db(mesh_connections_days) + + except Exception as e: + self.logger.exception(f"Error during data retention cleanup: {e}") + def check_interval_advertising(self): """Check if it's time to send an interval-based advert""" try: diff --git a/modules/web_viewer/app.py b/modules/web_viewer/app.py index ddd4305..1410be7 100644 --- a/modules/web_viewer/app.py +++ b/modules/web_viewer/app.py @@ -2824,7 +2824,7 @@ class BotDataViewer: self._cleanup_stale_clients() # Clean up old data every hour (after 12 stale client cleanups) - self._cleanup_old_data(days_to_keep=7) + self._cleanup_old_data() except Exception as e: self.logger.error(f"Error in cleanup scheduler: {e}", exc_info=True) @@ -2856,13 +2856,22 @@ class BotDataViewer: except Exception as e: self.logger.error(f"Error cleaning up stale clients: {e}") - def _cleanup_old_data(self, days_to_keep: int = 7): - """Clean up old packet stream data to prevent database bloat""" + def _cleanup_old_data(self, days_to_keep: Optional[int] = None): + """Clean up old packet stream data to prevent database bloat. + Uses [Data_Retention] packet_stream_retention_days when days_to_keep is not provided.""" conn = None try: import sqlite3 import time - + + if days_to_keep is None: + days_to_keep = 3 + if self.config.has_section('Data_Retention') and self.config.has_option('Data_Retention', 'packet_stream_retention_days'): + try: + days_to_keep = self.config.getint('Data_Retention', 'packet_stream_retention_days') + except (ValueError, TypeError): + pass + cutoff_time = time.time() - (days_to_keep * 24 * 60 * 60) # Use DEFERRED isolation; longer timeout to wait out bot writes diff --git a/modules/web_viewer/integration.py b/modules/web_viewer/integration.py index 9acb6d1..4b273d4 100644 --- a/modules/web_viewer/integration.py +++ b/modules/web_viewer/integration.py @@ -11,6 +11,7 @@ import sys import os import re from pathlib import Path +from typing import Optional from ..utils import resolve_path @@ -295,12 +296,21 @@ class BotIntegration: except Exception as e: self.bot.logger.debug(f"Error storing routing data: {e}") - def cleanup_old_data(self, days_to_keep: int = 7): - """Clean up old packet stream data to prevent database bloat""" + def cleanup_old_data(self, days_to_keep: Optional[int] = None): + """Clean up old packet stream data to prevent database bloat. + Uses [Data_Retention] packet_stream_retention_days when days_to_keep is not provided.""" try: import sqlite3 import time - + + if days_to_keep is None: + days_to_keep = 3 + if self.bot.config.has_section('Data_Retention') and self.bot.config.has_option('Data_Retention', 'packet_stream_retention_days'): + try: + days_to_keep = self.bot.config.getint('Data_Retention', 'packet_stream_retention_days') + except (ValueError, TypeError): + pass + cutoff_time = time.time() - (days_to_keep * 24 * 60 * 60) db_path = self._get_web_viewer_db_path()