mirror of
https://github.com/agessaman/meshcore-bot.git
synced 2026-05-25 00:35:18 +00:00
Add data retention configuration and cleanup functionality
- Introduced a new `[Data_Retention]` section in `config.ini.example` to manage retention periods for various database tables, including packet stream, daily stats, and observed paths. - Updated `mkdocs.yml` and `configuration.md` to include documentation for the new data retention settings. - Implemented data retention cleanup methods in `mesh_graph.py`, `repeater_manager.py`, and `scheduler.py` to enforce retention policies automatically. - Enhanced the web viewer's data cleanup logic to utilize the new retention settings, ensuring efficient database management.
This commit is contained in:
@@ -717,6 +717,28 @@ class MeshGraph:
|
||||
self.logger.debug(f"Pruned {len(expired_keys)} expired graph edges (older than {self.edge_expiration_days} days)")
|
||||
return len(expired_keys)
|
||||
|
||||
def delete_expired_edges_from_db(self, days: int) -> int:
|
||||
"""Delete mesh_connections rows older than the given days.
|
||||
Keeps the on-disk table aligned with in-memory pruning and prevents unbounded growth.
|
||||
Called from the scheduler (e.g. daily). Use Data_Retention mesh_connections_retention_days
|
||||
or Path_Command graph_edge_expiration_days.
|
||||
Returns:
|
||||
int: Number of rows deleted.
|
||||
"""
|
||||
if days <= 0:
|
||||
return 0
|
||||
try:
|
||||
deleted = self.db_manager.execute_update(
|
||||
"DELETE FROM mesh_connections WHERE last_seen < datetime('now', ?)",
|
||||
(f'-{days} days',)
|
||||
)
|
||||
if deleted > 0:
|
||||
self.logger.info(f"Cleaned up {deleted} old mesh_connections entries (older than {days} days)")
|
||||
return deleted
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error cleaning up mesh_connections: {e}")
|
||||
return 0
|
||||
|
||||
def _start_batch_writer(self):
|
||||
"""Start background task for batched writes."""
|
||||
def batch_writer_loop():
|
||||
|
||||
@@ -3032,7 +3032,48 @@ class RepeaterManager:
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error cleaning up database: {e}")
|
||||
|
||||
|
||||
def cleanup_repeater_retention(
|
||||
self,
|
||||
daily_stats_days: int = 90,
|
||||
observed_paths_days: int = 90
|
||||
) -> None:
|
||||
"""Clean up old daily_stats, unique_advert_packets, and observed_paths rows.
|
||||
Called from the scheduler so retention is enforced even when stats command is not run."""
|
||||
try:
|
||||
total_deleted = 0
|
||||
|
||||
# daily_stats and unique_advert_packets use date column
|
||||
cutoff_date = (datetime.now() - timedelta(days=daily_stats_days)).date().isoformat()
|
||||
n = self.db_manager.execute_update(
|
||||
'DELETE FROM daily_stats WHERE date < ?',
|
||||
(cutoff_date,)
|
||||
)
|
||||
if n > 0:
|
||||
self.logger.info(f"Cleaned up {n} old daily_stats entries (older than {daily_stats_days} days)")
|
||||
total_deleted += n
|
||||
|
||||
n = self.db_manager.execute_update(
|
||||
'DELETE FROM unique_advert_packets WHERE date < ?',
|
||||
(cutoff_date,)
|
||||
)
|
||||
if n > 0:
|
||||
self.logger.info(f"Cleaned up {n} old unique_advert_packets entries (older than {daily_stats_days} days)")
|
||||
total_deleted += n
|
||||
|
||||
# observed_paths uses last_seen (timestamp)
|
||||
cutoff_ts = (datetime.now() - timedelta(days=observed_paths_days)).isoformat()
|
||||
n = self.db_manager.execute_update(
|
||||
'DELETE FROM observed_paths WHERE last_seen < ?',
|
||||
(cutoff_ts,)
|
||||
)
|
||||
if n > 0:
|
||||
self.logger.info(f"Cleaned up {n} old observed_paths entries (older than {observed_paths_days} days)")
|
||||
total_deleted += n
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error cleaning up repeater retention tables: {e}")
|
||||
|
||||
# Delegate geocoding cache methods to db_manager
|
||||
def get_cached_geocoding(self, query: str) -> Tuple[Optional[float], Optional[float]]:
|
||||
"""Get cached geocoding result for a query"""
|
||||
|
||||
@@ -26,6 +26,8 @@ class MessageScheduler:
|
||||
self.scheduler_thread = None
|
||||
self.last_channel_ops_check_time = 0
|
||||
self.last_message_queue_check_time = 0
|
||||
self.last_data_retention_run = 0
|
||||
self._data_retention_interval_seconds = 86400 # 24 hours
|
||||
|
||||
def get_current_time(self):
|
||||
"""Get current time in configured timezone"""
|
||||
@@ -425,11 +427,83 @@ class MessageScheduler:
|
||||
loop.run_until_complete(self.bot.feed_manager.process_message_queue())
|
||||
self.last_message_queue_check_time = time.time()
|
||||
|
||||
# Data retention: run daily (packet_stream, repeater tables, stats, caches, mesh_connections)
|
||||
if time.time() - self.last_data_retention_run >= self._data_retention_interval_seconds:
|
||||
self._run_data_retention()
|
||||
self.last_data_retention_run = time.time()
|
||||
|
||||
schedule.run_pending()
|
||||
time.sleep(1)
|
||||
|
||||
self.logger.info("Scheduler thread stopped")
|
||||
|
||||
def _run_data_retention(self):
|
||||
"""Run data retention cleanup: packet_stream, repeater tables, stats, caches, mesh_connections."""
|
||||
import asyncio
|
||||
|
||||
def get_retention_days(section: str, key: str, default: int) -> int:
|
||||
try:
|
||||
if self.bot.config.has_section(section) and self.bot.config.has_option(section, key):
|
||||
return self.bot.config.getint(section, key)
|
||||
except Exception:
|
||||
pass
|
||||
return default
|
||||
|
||||
packet_stream_days = get_retention_days('Data_Retention', 'packet_stream_retention_days', 3)
|
||||
purging_log_days = get_retention_days('Data_Retention', 'purging_log_retention_days', 90)
|
||||
daily_stats_days = get_retention_days('Data_Retention', 'daily_stats_retention_days', 90)
|
||||
observed_paths_days = get_retention_days('Data_Retention', 'observed_paths_retention_days', 90)
|
||||
mesh_connections_days = get_retention_days('Data_Retention', 'mesh_connections_retention_days', 7)
|
||||
stats_days = get_retention_days('Stats_Command', 'data_retention_days', 7)
|
||||
|
||||
try:
|
||||
# Packet stream (web viewer integration)
|
||||
if hasattr(self.bot, 'web_viewer_integration') and self.bot.web_viewer_integration:
|
||||
bi = getattr(self.bot.web_viewer_integration, 'bot_integration', None)
|
||||
if bi and hasattr(bi, 'cleanup_old_data'):
|
||||
bi.cleanup_old_data(packet_stream_days)
|
||||
|
||||
# Repeater manager: purging_log and optional daily_stats / unique_advert / observed_paths
|
||||
if hasattr(self.bot, 'repeater_manager') and self.bot.repeater_manager:
|
||||
if hasattr(self.bot, 'main_event_loop') and self.bot.main_event_loop and self.bot.main_event_loop.is_running():
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.bot.repeater_manager.cleanup_database(purging_log_days),
|
||||
self.bot.main_event_loop
|
||||
)
|
||||
try:
|
||||
future.result(timeout=60)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in repeater_manager.cleanup_database: {e}")
|
||||
else:
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_until_complete(self.bot.repeater_manager.cleanup_database(purging_log_days))
|
||||
if hasattr(self.bot.repeater_manager, 'cleanup_repeater_retention'):
|
||||
self.bot.repeater_manager.cleanup_repeater_retention(
|
||||
daily_stats_days=daily_stats_days,
|
||||
observed_paths_days=observed_paths_days
|
||||
)
|
||||
|
||||
# Stats tables (message_stats, command_stats, path_stats)
|
||||
if hasattr(self.bot, 'command_manager') and self.bot.command_manager:
|
||||
stats_cmd = self.bot.command_manager.commands.get('stats') if getattr(self.bot.command_manager, 'commands', None) else None
|
||||
if stats_cmd and hasattr(stats_cmd, 'cleanup_old_stats'):
|
||||
stats_cmd.cleanup_old_stats(stats_days)
|
||||
|
||||
# Expired caches (geocoding_cache, generic_cache)
|
||||
if hasattr(self.bot, 'db_manager') and self.bot.db_manager and hasattr(self.bot.db_manager, 'cleanup_expired_cache'):
|
||||
self.bot.db_manager.cleanup_expired_cache()
|
||||
|
||||
# Mesh connections (DB prune to match in-memory expiration)
|
||||
if hasattr(self.bot, 'mesh_graph') and self.bot.mesh_graph and hasattr(self.bot.mesh_graph, 'delete_expired_edges_from_db'):
|
||||
self.bot.mesh_graph.delete_expired_edges_from_db(mesh_connections_days)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.exception(f"Error during data retention cleanup: {e}")
|
||||
|
||||
def check_interval_advertising(self):
|
||||
"""Check if it's time to send an interval-based advert"""
|
||||
try:
|
||||
|
||||
@@ -2824,7 +2824,7 @@ class BotDataViewer:
|
||||
self._cleanup_stale_clients()
|
||||
|
||||
# Clean up old data every hour (after 12 stale client cleanups)
|
||||
self._cleanup_old_data(days_to_keep=7)
|
||||
self._cleanup_old_data()
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in cleanup scheduler: {e}", exc_info=True)
|
||||
@@ -2856,13 +2856,22 @@ class BotDataViewer:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error cleaning up stale clients: {e}")
|
||||
|
||||
def _cleanup_old_data(self, days_to_keep: int = 7):
|
||||
"""Clean up old packet stream data to prevent database bloat"""
|
||||
def _cleanup_old_data(self, days_to_keep: Optional[int] = None):
|
||||
"""Clean up old packet stream data to prevent database bloat.
|
||||
Uses [Data_Retention] packet_stream_retention_days when days_to_keep is not provided."""
|
||||
conn = None
|
||||
try:
|
||||
import sqlite3
|
||||
import time
|
||||
|
||||
|
||||
if days_to_keep is None:
|
||||
days_to_keep = 3
|
||||
if self.config.has_section('Data_Retention') and self.config.has_option('Data_Retention', 'packet_stream_retention_days'):
|
||||
try:
|
||||
days_to_keep = self.config.getint('Data_Retention', 'packet_stream_retention_days')
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
cutoff_time = time.time() - (days_to_keep * 24 * 60 * 60)
|
||||
|
||||
# Use DEFERRED isolation; longer timeout to wait out bot writes
|
||||
|
||||
@@ -11,6 +11,7 @@ import sys
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from ..utils import resolve_path
|
||||
|
||||
@@ -295,12 +296,21 @@ class BotIntegration:
|
||||
except Exception as e:
|
||||
self.bot.logger.debug(f"Error storing routing data: {e}")
|
||||
|
||||
def cleanup_old_data(self, days_to_keep: int = 7):
|
||||
"""Clean up old packet stream data to prevent database bloat"""
|
||||
def cleanup_old_data(self, days_to_keep: Optional[int] = None):
|
||||
"""Clean up old packet stream data to prevent database bloat.
|
||||
Uses [Data_Retention] packet_stream_retention_days when days_to_keep is not provided."""
|
||||
try:
|
||||
import sqlite3
|
||||
import time
|
||||
|
||||
|
||||
if days_to_keep is None:
|
||||
days_to_keep = 3
|
||||
if self.bot.config.has_section('Data_Retention') and self.bot.config.has_option('Data_Retention', 'packet_stream_retention_days'):
|
||||
try:
|
||||
days_to_keep = self.bot.config.getint('Data_Retention', 'packet_stream_retention_days')
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
cutoff_time = time.time() - (days_to_keep * 24 * 60 * 60)
|
||||
|
||||
db_path = self._get_web_viewer_db_path()
|
||||
|
||||
Reference in New Issue
Block a user