From 6a9e0eccbe58badc06ae7dc26164d3edd9fba31e Mon Sep 17 00:00:00 2001 From: lincomatic Date: Thu, 2 Apr 2026 22:17:53 -0700 Subject: [PATCH] add reconnect logic --- config.ini.example | 14 +++++++++++ modules/core.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/config.ini.example b/config.ini.example index 1c778fb..2c88861 100644 --- a/config.ini.example +++ b/config.ini.example @@ -21,6 +21,20 @@ serial_port = /dev/ttyUSB0 # Connection timeout in seconds timeout = 30 +# Automatic reconnection settings +# Reconnect when the connection is lost (applies to all connection types: serial, BLE, TCP) + +# Maximum number of reconnect attempts before giving up and shutting down +# 0 = unlimited (keep trying forever) — recommended for unattended deployments +reconnect_max_retries = 0 + +# Initial delay in seconds between reconnect attempts +# Doubles after each failed attempt (exponential backoff) up to reconnect_max_delay_seconds +reconnect_delay_seconds = 5 + +# Maximum delay in seconds between reconnect attempts (cap on exponential backoff) +reconnect_max_delay_seconds = 60 + [Bot] # Bot name for identification and logging bot_name = MeshCoreBot diff --git a/modules/core.py b/modules/core.py index d2b2bbe..8592f63 100644 --- a/modules/core.py +++ b/modules/core.py @@ -943,6 +943,60 @@ long_jokes = false signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) + async def _attempt_reconnect(self) -> bool: + """Attempt to reconnect to the MeshCore node with exponential backoff. + + Reads reconnect settings from [Connection]: + reconnect_max_retries – max attempts before giving up (0 = unlimited, default 0) + reconnect_delay_seconds – initial wait between attempts (default 5) + reconnect_max_delay_seconds – cap on wait time (default 60) + + Returns: + bool: True if reconnection succeeded, False if max retries exhausted or shutdown. + """ + max_retries = self.config.getint('Connection', 'reconnect_max_retries', fallback=0) + delay = self.config.getfloat('Connection', 'reconnect_delay_seconds', fallback=5.0) + max_delay = self.config.getfloat('Connection', 'reconnect_max_delay_seconds', fallback=60.0) + + attempt = 0 + while not self._shutdown_event.is_set(): + if max_retries > 0 and attempt >= max_retries: + self.logger.error(f"Reconnect failed after {max_retries} attempt(s), giving up") + return False + + attempt += 1 + retry_label = f"{attempt}/{max_retries}" if max_retries > 0 else str(attempt) + self.logger.info(f"Reconnect attempt {retry_label}...") + + # Clean up the stale connection object + old_meshcore = self.meshcore + self.meshcore = None + self.connected = False + if old_meshcore is not None: + try: + await asyncio.wait_for(old_meshcore.disconnect(), timeout=5.0) + except Exception: + pass + + if await self.connect(): + self.logger.info("Reconnected successfully") + if hasattr(self, 'transmission_tracker') and self.transmission_tracker: + self.transmission_tracker._update_bot_prefix() + return True + + self.logger.warning( + f"Reconnect attempt {retry_label} failed, retrying in {delay:.0f}s..." + ) + # Interruptible sleep so shutdown isn't delayed + elapsed = 0.0 + while elapsed < delay and not self._shutdown_event.is_set(): + await asyncio.sleep(1.0) + elapsed += 1.0 + + delay = min(delay * 2, max_delay) + + return False + async def connect(self) -> bool: """Connect to MeshCore node using official package. @@ -1265,6 +1319,14 @@ long_jokes = false self.logger.info("Bot is running. Press Ctrl+C to stop.") try: while self.connected and not self._shutdown_event.is_set(): + # Check if the underlying connection dropped + if self.meshcore and not self.meshcore.is_connected: + self.logger.warning("Connection lost, attempting to reconnect...") + if not await self._attempt_reconnect(): + self.logger.error("Could not reconnect, shutting down") + break + continue + # Monitor web viewer process and health if self.web_viewer_integration and self.web_viewer_integration.enabled: # Check if process died