#!/usr/bin/env python3
"""
Mesh Graph Module
Tracks observed connections between repeaters to improve path guessing accuracy.
Persists graph state across bot restarts for development scenarios.
"""

import sqlite3
import sys
import time
import threading
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Set
from collections import defaultdict


class MeshGraph:
    """Graph structure tracking observed connections between mesh nodes."""
    
    def __init__(self, bot):
        """Initialize the mesh graph.
        
        Args:
            bot: Bot instance with db_manager and config access.
        """
        self.bot = bot
        self.logger = bot.logger
        self.db_manager = bot.db_manager

        # Capture/validation feature flags
        # graph_capture_enabled: controls whether new edge data is collected from packets
        # When False, no new edges are added and the batch writer thread is not started.
        self.capture_enabled = bot.config.getboolean('Path_Command', 'graph_capture_enabled', fallback=True)

        # In-memory graph storage: {(from_prefix, to_prefix): edge_data}
        self.edges: Dict[Tuple[str, str], Dict] = {}

        # Adjacency indexes for O(1) neighbour lookups (derived from self.edges)
        self._outgoing_index: Dict[str, Set[str]] = defaultdict(set)  # from_prefix -> set of to_prefixes
        self._incoming_index: Dict[str, Set[str]] = defaultdict(set)  # to_prefix -> set of from_prefixes

        # Per-edge last-notification timestamps for web viewer throttling (unix float)
        self._notification_timestamps: Dict[Tuple[str, str], float] = {}

        # Track pending updates for batched writes
        self.pending_updates: Set[Tuple[str, str]] = set()
        self.pending_lock = threading.Lock()

        # Write strategy configuration
        self.write_strategy = bot.config.get('Path_Command', 'graph_write_strategy', fallback='hybrid')
        self.batch_interval = bot.config.getint('Path_Command', 'graph_batch_interval_seconds', fallback=30)
        self.batch_max_pending = bot.config.getint('Path_Command', 'graph_batch_max_pending', fallback=100)
        # Default 14 days: edges older than this carry near-zero recency confidence anyway.
        # Set to 0 in config.ini to load all historical edges (e.g. on servers with ample RAM).
        self.startup_load_days = bot.config.getint('Path_Command', 'graph_startup_load_days', fallback=14)
        self.edge_expiration_days = bot.config.getint('Path_Command', 'graph_edge_expiration_days', fallback=7)

        # Background task for batched writes
        self._batch_task = None
        self._shutdown_event = threading.Event()

        # Load graph from database on startup
        self._load_from_database()

        # Start background batch writer only when capture is active
        if self.capture_enabled and self.write_strategy in ('batched', 'hybrid'):
            self._start_batch_writer()
    
    def _load_from_database(self):
        """Load graph edges from database on startup."""
        try:
            query = '''
                SELECT from_prefix, to_prefix, from_public_key, to_public_key,
                       observation_count, first_seen, last_seen, avg_hop_position,
                       geographic_distance
                FROM mesh_connections
            '''

            # Build WHERE clause combining startup_load_days and edge_expiration_days.
            # startup_load_days: explicit cap on how far back to load (0 = no cap).
            # edge_expiration_days: always applied — never load edges we would immediately
            #   evict as expired (this bounds memory even when startup_load_days=0).
            where_parts = []
            if self.startup_load_days > 0:
                cutoff_date = datetime.now() - timedelta(days=self.startup_load_days)
                where_parts.append(f"last_seen >= '{cutoff_date.isoformat()}'")
            if self.edge_expiration_days > 0:
                expiry_date = datetime.now() - timedelta(days=self.edge_expiration_days)
                where_parts.append(f"last_seen >= '{expiry_date.isoformat()}'")
            if where_parts:
                # Use the most restrictive (most recent) cutoff
                query += " WHERE " + " AND ".join(where_parts)

            query += " ORDER BY last_seen DESC"

            results = self.db_manager.execute_query(query)

            edge_count = 0
            for row in results:
                from_prefix = row['from_prefix']
                to_prefix = row['to_prefix']
                edge_key = (from_prefix, to_prefix)

                # Intern public key strings so identical keys across many edges share
                # a single string object in memory rather than duplicating bytes.
                from_pk = row.get('from_public_key')
                to_pk = row.get('to_public_key')
                if from_pk:
                    from_pk = sys.intern(from_pk)
                if to_pk:
                    to_pk = sys.intern(to_pk)

                self.edges[edge_key] = {
                    'from_prefix': from_prefix,
                    'to_prefix': to_prefix,
                    'from_public_key': from_pk,
                    'to_public_key': to_pk,
                    'observation_count': row.get('observation_count', 1),
                    'first_seen': row.get('first_seen'),
                    'last_seen': row.get('last_seen'),
                    'avg_hop_position': row.get('avg_hop_position'),
                    'geographic_distance': row.get('geographic_distance')
                }

                # Maintain adjacency indexes
                self._outgoing_index[from_prefix].add(to_prefix)
                self._incoming_index[to_prefix].add(from_prefix)

                edge_count += 1

            self.logger.info(f"Loaded {edge_count} graph edges from database")

            # Log statistics
            if edge_count > 0:
                total_observations = sum(e['observation_count'] for e in self.edges.values())
                self.logger.info(f"Graph statistics: {edge_count} edges, {total_observations} total observations")

            # Belt-and-suspenders: prune any edges that slipped through the SQL filter
            # (e.g. timezone edge cases or edges loaded before expiration_days was set)
            self.prune_expired_edges()

        except Exception as e:
            self.logger.warning(f"Error loading graph from database: {e}")
            # Continue with empty graph
    
    def add_edge(self, from_prefix: str, to_prefix: str,
                 from_public_key: Optional[str] = None,
                 to_public_key: Optional[str] = None,
                 hop_position: Optional[int] = None,
                 geographic_distance: Optional[float] = None,
                 prefix_bytes: int = 1):
        """Add or update an edge in the graph.

        Args:
            from_prefix: Source node prefix (2 hex chars, or 4 when prefix_bytes=2).
            to_prefix: Destination node prefix (2 hex chars, or 4 when prefix_bytes=2).
            from_public_key: Full public key of source node (optional).
            to_public_key: Full public key of destination node (optional).
            hop_position: Position in path where this edge was observed (optional).
            geographic_distance: Distance in km between nodes (optional).
            prefix_bytes: 1 = 1-byte (2-char) prefix (default); 2 = 2-byte confirmed (stored as edge flag for weighting).
        """
        if not from_prefix or not to_prefix:
            return

        # Respect the capture kill-switch — allow reads but block writes
        if not self.capture_enabled:
            return

        # Normalize prefixes to lowercase (2-char keys; 2-byte trace only sets confirmed_2byte flag)
        from_prefix = from_prefix.lower()[:2]
        to_prefix = to_prefix.lower()[:2]

        # Intern public key strings so repeated identical keys share one object in RAM
        if from_public_key:
            from_public_key = sys.intern(from_public_key)
        if to_public_key:
            to_public_key = sys.intern(to_public_key)

        edge_key = (from_prefix, to_prefix)
        now = datetime.now()

        # Update or create edge
        if edge_key in self.edges:
            edge = self.edges[edge_key]
            edge['observation_count'] += 1
            edge['last_seen'] = now

            # Update average hop position
            if hop_position is not None:
                current_avg = edge.get('avg_hop_position')
                count = edge['observation_count']
                if current_avg is not None:
                    # Weighted average: (old_avg * (count-1) + new_pos) / count
                    edge['avg_hop_position'] = ((current_avg * (count - 1)) + hop_position) / count
                else:
                    # First time setting hop position
                    edge['avg_hop_position'] = hop_position

            # Update public keys if provided (always update if we have a better key)
            # This allows us to fill in missing keys on existing edges
            if from_public_key:
                edge['from_public_key'] = from_public_key
            if to_public_key:
                edge['to_public_key'] = to_public_key

            # Update geographic distance if provided
            if geographic_distance is not None:
                edge['geographic_distance'] = geographic_distance

            # 2-byte trace confirmation (for path weighting when supported)
            if prefix_bytes == 2:
                edge['confirmed_2byte'] = True

            is_new_edge = False
        else:
            # New edge — also update adjacency indexes
            self.edges[edge_key] = {
                'from_prefix': from_prefix,
                'to_prefix': to_prefix,
                'from_public_key': from_public_key,
                'to_public_key': to_public_key,
                'observation_count': 1,
                'first_seen': now,
                'last_seen': now,
                'avg_hop_position': hop_position if hop_position is not None else None,
                'geographic_distance': geographic_distance,
                'confirmed_2byte': True if prefix_bytes == 2 else False,
            }
            self._outgoing_index[from_prefix].add(to_prefix)
            self._incoming_index[to_prefix].add(from_prefix)
            is_new_edge = True
        
        # Persist according to write strategy
        self.logger.debug(f"Mesh graph: Edge {edge_key} - new={is_new_edge}, strategy={self.write_strategy}")
        if self.write_strategy == 'immediate':
            self._write_edge_to_db(edge_key, is_new_edge)
        elif self.write_strategy == 'batched':
            with self.pending_lock:
                self.pending_updates.add(edge_key)
                if len(self.pending_updates) >= self.batch_max_pending:
                    # Force flush if too many pending
                    self._flush_pending_updates_sync()
        elif self.write_strategy == 'hybrid':
            if is_new_edge:
                # Immediate write for new edges
                self._write_edge_to_db(edge_key, True)
            else:
                # Batched for updates
                with self.pending_lock:
                    self.pending_updates.add(edge_key)
                    if len(self.pending_updates) >= self.batch_max_pending:
                        # Force flush if too many pending
                        self._flush_pending_updates_sync()
        
        # Notify web viewer of edge update
        self._notify_web_viewer_edge(edge_key, is_new_edge)
    
    def _notify_web_viewer_edge(self, edge_key: Tuple[str, str], is_new: bool):
        """Notify web viewer of edge update via bot integration.

        New edges always trigger an immediate notification.  Updates to existing
        edges are throttled to at most once every 10 seconds to reduce HTTP
        traffic on busy meshes.
        """
        try:
            if not hasattr(self.bot, 'web_viewer_integration') or not self.bot.web_viewer_integration:
                return

            if not hasattr(self.bot.web_viewer_integration, 'bot_integration'):
                return

            edge = self.edges.get(edge_key)
            if not edge:
                return

            # Throttle repeated updates for the same edge (new edges always notify)
            now_ts = time.time()
            if not is_new:
                last_notified = self._notification_timestamps.get(edge_key, 0.0)
                if (now_ts - last_notified) < 10.0:
                    return  # Skip — notified recently enough
            self._notification_timestamps[edge_key] = now_ts

            # Prepare edge data for web viewer
            edge_data = {
                'from_prefix': edge['from_prefix'],
                'to_prefix': edge['to_prefix'],
                'from_public_key': edge.get('from_public_key'),
                'to_public_key': edge.get('to_public_key'),
                'observation_count': edge['observation_count'],
                'first_seen': edge['first_seen'].isoformat() if isinstance(edge['first_seen'], datetime) else str(edge['first_seen']),
                'last_seen': edge['last_seen'].isoformat() if isinstance(edge['last_seen'], datetime) else str(edge['last_seen']),
                'avg_hop_position': edge.get('avg_hop_position'),
                'geographic_distance': edge.get('geographic_distance'),
                'is_new': is_new
            }

            # Send update asynchronously
            self.bot.web_viewer_integration.bot_integration.send_mesh_edge_update(edge_data)
        except Exception as e:
            self.logger.debug(f"Error notifying web viewer of edge update: {e}")
    
    def _recalculate_distance_if_needed(
        self,
        edge: Dict,
        conn: Optional[sqlite3.Connection] = None,
        location_cache: Optional[Dict[str, Tuple[float, float]]] = None,
    ) -> Optional[float]:
        """Recalculate geographic distance using full public keys if available.
        
        This ensures we get the correct location when there are prefix collisions.
        
        Args:
            edge: Edge dictionary with prefix and optional public keys.
            conn: Optional existing DB connection for batch operations.
            location_cache: Optional cache for location lookups within a flush (keyed by pk: or prefix:).
            
        Returns:
            Optional[float]: Recalculated distance in km, or None if can't calculate.
        """
        from .utils import calculate_distance
        
        # Get location for 'from' node (conn optional for single-connection batch flush)
        if edge.get('from_public_key'):
            from_location = self._get_location_by_public_key(
                edge['from_public_key'], conn=conn, location_cache=location_cache
            )
        else:
            from_location = None
        if not from_location:
            to_location_temp = None
            if edge.get('to_public_key'):
                to_location_temp = self._get_location_by_public_key(
                    edge['to_public_key'], conn=conn, location_cache=location_cache
                )
            if not to_location_temp:
                to_location_temp = self._get_location_by_prefix(
                    edge['to_prefix'], conn=conn, location_cache=location_cache
                )
            from_location = self._get_location_by_prefix(
                edge['from_prefix'], to_location_temp, conn=conn, location_cache=location_cache
            )
        
        # Get location for 'to' node
        if edge.get('to_public_key'):
            to_location = self._get_location_by_public_key(
                edge['to_public_key'], conn=conn, location_cache=location_cache
            )
        else:
            to_location = None
        if not to_location:
            to_location = self._get_location_by_prefix(
                edge['to_prefix'], from_location, conn=conn, location_cache=location_cache
            )
        
        # Calculate distance if we have both locations
        if from_location and to_location:
            return calculate_distance(
                from_location[0], from_location[1],
                to_location[0], to_location[1]
            )
        
        return None
    
    def _get_location_by_public_key(
        self,
        public_key: str,
        conn: Optional[sqlite3.Connection] = None,
        location_cache: Optional[Dict[str, Tuple[float, float]]] = None,
    ) -> Optional[Tuple[float, float]]:
        """Get location for a full public key (more accurate than prefix lookup).
        
        Prefers starred repeaters if there are somehow multiple entries (shouldn't happen with full key).
        """
        cache_key = f"pk:{public_key}" if location_cache is not None else None
        if cache_key is not None and cache_key in location_cache:
            return location_cache[cache_key]
        try:
            query = '''
                SELECT latitude, longitude 
                FROM complete_contact_tracking 
                WHERE public_key = ?
                AND latitude IS NOT NULL AND longitude IS NOT NULL
                AND latitude != 0 AND longitude != 0
                AND role IN ('repeater', 'roomserver')
                ORDER BY is_starred DESC, COALESCE(last_advert_timestamp, last_heard) DESC
                LIMIT 1
            '''
            if conn is not None:
                results = self.db_manager.execute_query_on_connection(conn, query, (public_key,))
            else:
                results = self.db_manager.execute_query(query, (public_key,))
            if results:
                row = results[0]
                lat = row.get('latitude')
                lon = row.get('longitude')
                if lat is not None and lon is not None:
                    result = (float(lat), float(lon))
                    if cache_key is not None:
                        location_cache[cache_key] = result
                    return result
        except Exception as e:
            self.logger.debug(f"Error getting location by public key {public_key[:16]}...: {e}")
        return None
    
    def _get_location_by_prefix(
        self,
        prefix: str,
        reference_location: Optional[Tuple[float, float]] = None,
        conn: Optional[sqlite3.Connection] = None,
        location_cache: Optional[Dict[str, Tuple[float, float]]] = None,
    ) -> Optional[Tuple[float, float]]:
        """Get location for a prefix (fallback when full public key not available).
        
        For LoRa networks, prefers shorter distances when there are prefix collisions,
        as LoRa range is limited by the curve of the earth.
        
        Args:
            prefix: 2-character hex prefix.
            reference_location: Optional (lat, lon) to calculate distance from for LoRa preference.
            conn: Optional existing DB connection for batch operations.
            location_cache: Optional cache for location lookups within a flush.
        """
        if location_cache is not None:
            if reference_location is not None:
                cache_key = f"prefix:{prefix}:{reference_location[0]}:{reference_location[1]}"
            else:
                cache_key = f"prefix:{prefix}"
            if cache_key in location_cache:
                return location_cache[cache_key]
        try:
            prefix_pattern = f"{prefix}%"
            
            # Get all candidates with locations
            query = '''
                SELECT latitude, longitude, is_starred,
                       COALESCE(last_advert_timestamp, last_heard) as last_seen
                FROM complete_contact_tracking 
                WHERE public_key LIKE ?
                AND latitude IS NOT NULL AND longitude IS NOT NULL
                AND latitude != 0 AND longitude != 0
                AND role IN ('repeater', 'roomserver')
            '''
            if conn is not None:
                results = self.db_manager.execute_query_on_connection(conn, query, (prefix_pattern,))
            else:
                results = self.db_manager.execute_query(query, (prefix_pattern,))
            
            if not results:
                return None
            
            # If we have a reference location, prefer shorter distances (LoRa range limitation)
            if reference_location and len(results) > 1:
                from .utils import calculate_distance
                ref_lat, ref_lon = reference_location
                
                # Calculate distances and sort by distance (shorter first)
                candidates_with_distance = []
                for row in results:
                    lat = row.get('latitude')
                    lon = row.get('longitude')
                    if lat is not None and lon is not None:
                        distance = calculate_distance(ref_lat, ref_lon, float(lat), float(lon))
                        is_starred = row.get('is_starred', False)
                        last_seen = row.get('last_seen', '')
                        candidates_with_distance.append((distance, is_starred, last_seen, row))
                
                if candidates_with_distance:
                    # Sort by: starred first (False < True), then distance (shorter = better for LoRa), then recency
                    candidates_with_distance.sort(key=lambda x: (
                        not x[1],  # Starred first (False < True, so starred=True comes before starred=False)
                        x[0],  # Distance (shorter first)
                        x[2] if x[2] else ''  # More recent first (newer timestamps sort later in string comparison)
                    ))
                    
                    # Get the best candidate
                    best_row = candidates_with_distance[0][3]
                    lat = best_row.get('latitude')
                    lon = best_row.get('longitude')
                    if lat is not None and lon is not None:
                        result = (float(lat), float(lon))
                        if location_cache is not None and reference_location is not None:
                            cache_key = f"prefix:{prefix}:{reference_location[0]}:{reference_location[1]}"
                            location_cache[cache_key] = result
                        return result
            
            # No reference location or single result - use standard ordering
            # Prefer starred, then most recent
            results.sort(key=lambda x: (
                not x.get('is_starred', False),  # Starred first (False < True)
                x.get('last_seen', '') if x.get('last_seen') else ''  # More recent first
            ))
            
            row = results[0]
            lat = row.get('latitude')
            lon = row.get('longitude')
            if lat is not None and lon is not None:
                result = (float(lat), float(lon))
                if location_cache is not None:
                    cache_key = f"prefix:{prefix}" if reference_location is None else f"prefix:{prefix}:{reference_location[0]}:{reference_location[1]}"
                    location_cache[cache_key] = result
                return result
        except Exception as e:
            self.logger.debug(f"Error getting location by prefix {prefix}: {e}")
        return None
    
    def _write_edge_to_db(
        self,
        edge_key: Tuple[str, str],
        is_new: bool,
        conn: Optional[sqlite3.Connection] = None,
        location_cache: Optional[Dict[str, Tuple[float, float]]] = None,
        skip_distance_recalc: bool = False,
    ):
        """Write a single edge to the database.

        Args:
            edge_key: (from_prefix, to_prefix) tuple.
            is_new: True if this is a new edge, False if updating existing.
            conn: Optional existing DB connection for batch operations (caller commits).
            location_cache: Optional cache for location lookups within a flush.
            skip_distance_recalc: If True, skip distance recalculation (used by
                _flush_pending_updates_sync which already recalculates before calling here).
        """
        if edge_key not in self.edges:
            return

        edge = self.edges[edge_key]

        # Recalculate distance using full public keys if available (more accurate).
        # Skipped when called from the batch flush loop, which already recalculated.
        if not skip_distance_recalc and (edge.get('from_public_key') or edge.get('to_public_key')):
            recalculated_distance = self._recalculate_distance_if_needed(
                edge, conn=conn, location_cache=location_cache
            )
            if recalculated_distance is not None:
                edge['geographic_distance'] = recalculated_distance
                self.logger.debug(f"Mesh graph: Recalculated distance for {edge_key} using public keys: {recalculated_distance:.1f} km")

        try:
            if is_new:
                # Upsert new edge.
                # Use INSERT ... ON CONFLICT DO UPDATE so that if the row already exists
                # in the database (e.g. it was filtered out of the in-memory graph at
                # startup by startup_load_days / edge_expiration_days, or written by a
                # concurrent process), we merge rather than fail with UNIQUE constraint.
                query = '''
                    INSERT INTO mesh_connections
                    (from_prefix, to_prefix, from_public_key, to_public_key,
                     observation_count, first_seen, last_seen, avg_hop_position,
                     geographic_distance)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
                    ON CONFLICT(from_prefix, to_prefix) DO UPDATE SET
                        observation_count  = MAX(observation_count, excluded.observation_count),
                        last_seen          = MAX(last_seen, excluded.last_seen),
                        avg_hop_position   = excluded.avg_hop_position,
                        geographic_distance = COALESCE(excluded.geographic_distance, geographic_distance),
                        from_public_key    = COALESCE(excluded.from_public_key, from_public_key),
                        to_public_key      = COALESCE(excluded.to_public_key, to_public_key)
                '''
                params = (
                    edge['from_prefix'],
                    edge['to_prefix'],
                    edge.get('from_public_key'),
                    edge.get('to_public_key'),
                    edge['observation_count'],
                    edge['first_seen'].isoformat() if isinstance(edge['first_seen'], datetime) else edge['first_seen'],
                    edge['last_seen'].isoformat() if isinstance(edge['last_seen'], datetime) else edge['last_seen'],
                    edge.get('avg_hop_position'),
                    edge.get('geographic_distance')
                )
            else:
                # Update existing edge — recalculate distance if we now have public keys,
                # but only when not already done by the caller (skip_distance_recalc=False).
                current_distance = edge.get('geographic_distance')
                if not skip_distance_recalc and (edge.get('from_public_key') or edge.get('to_public_key')) and current_distance:
                    recalculated = self._recalculate_distance_if_needed(
                        edge, conn=conn, location_cache=location_cache
                    )
                    if recalculated is not None:
                        # Update if recalculated distance is significantly different (more than 20% difference)
                        if abs(recalculated - current_distance) / max(current_distance, 1.0) > 0.2:
                            edge['geographic_distance'] = recalculated
                            self.logger.info(f"Mesh graph: Corrected distance for {edge_key}: {current_distance:.1f} -> {recalculated:.1f} km")
                
                # Update existing edge
                # Always update public keys if provided (allows filling in missing keys on existing edges)
                from_key = edge.get('from_public_key')
                to_key = edge.get('to_public_key')
                query = self._MESH_EDGE_UPDATE_QUERY
                params = (
                    edge['observation_count'],
                    edge['last_seen'].isoformat() if isinstance(edge['last_seen'], datetime) else edge['last_seen'],
                    edge.get('avg_hop_position'),
                    edge.get('geographic_distance'),
                    from_key,  # First occurrence for CASE WHEN check
                    from_key,  # Second occurrence for value assignment
                    to_key,  # First occurrence for CASE WHEN check
                    to_key,  # Second occurrence for value assignment
                    edge['from_prefix'],
                    edge['to_prefix']
                )
            
            if conn is not None:
                rows_affected = self.db_manager.execute_update_on_connection(conn, query, params)
            else:
                rows_affected = self.db_manager.execute_update(query, params)
            if rows_affected > 0:
                self.logger.debug(f"Mesh graph: Successfully wrote edge {edge_key} to database ({'INSERT' if is_new else 'UPDATE'}, {rows_affected} rows)")
            else:
                self.logger.warning(f"Mesh graph: Edge write returned 0 rows affected for {edge_key}")
        
        except Exception as e:
            self.logger.warning(f"Error writing edge to database: {e}")
            import traceback
            self.logger.debug(traceback.format_exc())
    
    # UPDATE statement used for both single-edge writes and batch executemany
    _MESH_EDGE_UPDATE_QUERY = '''
        UPDATE mesh_connections
        SET observation_count = ?, last_seen = ?,
            avg_hop_position = ?, geographic_distance = ?,
            from_public_key = CASE WHEN ? IS NOT NULL THEN ? ELSE from_public_key END,
            to_public_key = CASE WHEN ? IS NOT NULL THEN ? ELSE to_public_key END
        WHERE from_prefix = ? AND to_prefix = ?
    '''
    
    def _build_update_params_for_edge(
        self,
        edge_key: Tuple[str, str],
        conn: Optional[sqlite3.Connection],
        location_cache: Optional[Dict[str, Tuple[float, float]]],
    ) -> Optional[Tuple]:
        """Build UPDATE params for an edge (for batch executemany). Returns None to skip."""
        if edge_key not in self.edges:
            return None
        try:
            edge = self.edges[edge_key]
            # Recalculate distance if we have public keys (same logic as _write_edge_to_db)
            if edge.get('from_public_key') or edge.get('to_public_key'):
                recalculated_distance = self._recalculate_distance_if_needed(
                    edge, conn=conn, location_cache=location_cache
                )
                if recalculated_distance is not None:
                    edge['geographic_distance'] = recalculated_distance
            current_distance = edge.get('geographic_distance')
            if (edge.get('from_public_key') or edge.get('to_public_key')) and current_distance:
                recalculated = self._recalculate_distance_if_needed(
                    edge, conn=conn, location_cache=location_cache
                )
                if recalculated is not None and abs(recalculated - current_distance) / max(current_distance, 1.0) > 0.2:
                    edge['geographic_distance'] = recalculated
            from_key = edge.get('from_public_key')
            to_key = edge.get('to_public_key')
            last_seen = edge['last_seen']
            if isinstance(last_seen, datetime):
                last_seen = last_seen.isoformat()
            return (
                edge['observation_count'],
                last_seen,
                edge.get('avg_hop_position'),
                edge.get('geographic_distance'),
                from_key,
                from_key,
                to_key,
                to_key,
                edge['from_prefix'],
                edge['to_prefix'],
            )
        except Exception as e:
            self.logger.debug(f"Error building update params for {edge_key}: {e}")
            return None
    
    def prune_expired_edges(self) -> int:
        """Remove edges from the in-memory graph that have exceeded graph_edge_expiration_days.

        Only evicts from RAM — the database rows are kept so that historical data is
        preserved and can be reloaded if the expiration window is later widened.

        Returns:
            int: Number of edges evicted.
        """
        if self.edge_expiration_days <= 0:
            return 0

        cutoff = datetime.now() - timedelta(days=self.edge_expiration_days)
        expired_keys = []
        for edge_key, edge in self.edges.items():
            last_seen = edge.get('last_seen')
            if last_seen is None:
                continue
            if isinstance(last_seen, str):
                try:
                    last_seen = datetime.fromisoformat(last_seen.replace('Z', '+00:00'))
                except ValueError:
                    continue
            if last_seen < cutoff:
                expired_keys.append(edge_key)

        for edge_key in expired_keys:
            from_prefix, to_prefix = edge_key
            del self.edges[edge_key]
            # Clean up adjacency indexes
            if from_prefix in self._outgoing_index:
                self._outgoing_index[from_prefix].discard(to_prefix)
                if not self._outgoing_index[from_prefix]:
                    del self._outgoing_index[from_prefix]
            if to_prefix in self._incoming_index:
                self._incoming_index[to_prefix].discard(from_prefix)
                if not self._incoming_index[to_prefix]:
                    del self._incoming_index[to_prefix]
            # Drop stale notification timestamp if present
            self._notification_timestamps.pop(edge_key, None)

        if expired_keys:
            self.logger.debug(f"Pruned {len(expired_keys)} expired graph edges (older than {self.edge_expiration_days} days)")
        return len(expired_keys)

    def delete_expired_edges_from_db(self, days: int) -> int:
        """Delete mesh_connections rows older than the given days.
        Keeps the on-disk table aligned with in-memory pruning and prevents unbounded growth.
        Called from the scheduler (e.g. daily). Use Data_Retention mesh_connections_retention_days
        or Path_Command graph_edge_expiration_days.
        Returns:
            int: Number of rows deleted.
        """
        if days <= 0:
            return 0
        try:
            deleted = self.db_manager.execute_update(
                "DELETE FROM mesh_connections WHERE last_seen < datetime('now', ?)",
                (f'-{days} days',)
            )
            if deleted > 0:
                self.logger.info(f"Cleaned up {deleted} old mesh_connections entries (older than {days} days)")
            return deleted
        except Exception as e:
            self.logger.error(f"Error cleaning up mesh_connections: {e}")
            return 0

    def _start_batch_writer(self):
        """Start background task for batched writes."""
        def batch_writer_loop():
            while not self._shutdown_event.is_set():
                self._shutdown_event.wait(self.batch_interval)
                if not self._shutdown_event.is_set():
                    # Flush synchronously (database operations are synchronous)
                    self._flush_pending_updates_sync()
                    # Periodically evict expired edges from RAM
                    self.prune_expired_edges()

        import threading
        batch_thread = threading.Thread(target=batch_writer_loop, daemon=True)
        batch_thread.start()
        self._batch_thread = batch_thread
    
    def _flush_pending_updates_sync(self):
        """Flush all pending edge updates to database (synchronous version).
        Uses a single connection for the entire batch to avoid 'unable to open database file'
        when many edges are written in quick succession.
        Handles both new edges (INSERT) and existing edges (UPDATE).
        """
        with self.pending_lock:
            if not self.pending_updates:
                return

            updates = list(self.pending_updates)
            self.pending_updates.clear()

        conn = None
        location_cache: Dict[str, Tuple[float, float]] = {}
        try:
            conn = self.db_manager.get_connection()
            cursor = conn.cursor()
            for edge_key in updates:
                if edge_key not in self.edges:
                    continue
                edge = self.edges[edge_key]
                # Recalculate distance if we have public keys
                if edge.get('from_public_key') or edge.get('to_public_key'):
                    recalculated = self._recalculate_distance_if_needed(
                        edge, conn=conn, location_cache=location_cache
                    )
                    if recalculated is not None:
                        edge['geographic_distance'] = recalculated
                # Check if edge exists in DB
                cursor.execute(
                    'SELECT 1 FROM mesh_connections WHERE from_prefix = ? AND to_prefix = ?',
                    (edge_key[0], edge_key[1]),
                )
                is_new = cursor.fetchone() is None
                # Distance was already recalculated above — tell _write_edge_to_db to skip it
                self._write_edge_to_db(edge_key, is_new, conn=conn, location_cache=location_cache,
                                       skip_distance_recalc=True)
            if conn:
                conn.commit()
        except Exception as e:
            self.logger.warning(f"Error flushing graph updates: {e}")
            if conn:
                try:
                    conn.rollback()
                except Exception:
                    pass
        finally:
            if conn:
                try:
                    conn.close()
                except Exception:
                    pass
        
        if updates:
            self.logger.debug(f"Flushed {len(updates)} pending graph edge updates")
    
    async def _flush_pending_updates(self):
        """Flush all pending edge updates to database (async wrapper)."""
        self._flush_pending_updates_sync()
    
    def has_edge(self, from_prefix: str, to_prefix: str) -> bool:
        """Check if an edge exists in the graph.
        
        Args:
            from_prefix: Source node prefix.
            to_prefix: Destination node prefix.
            
        Returns:
            bool: True if edge exists.
        """
        from_prefix = from_prefix.lower()[:2]
        to_prefix = to_prefix.lower()[:2]
        return (from_prefix, to_prefix) in self.edges
    
    def get_edge(self, from_prefix: str, to_prefix: str) -> Optional[Dict]:
        """Get edge data if it exists.
        
        Args:
            from_prefix: Source node prefix.
            to_prefix: Destination node prefix.
            
        Returns:
            Dict with edge data or None if not found.
        """
        from_prefix = from_prefix.lower()[:2]
        to_prefix = to_prefix.lower()[:2]
        return self.edges.get((from_prefix, to_prefix))
    
    def get_outgoing_edges(self, prefix: str) -> List[Dict]:
        """Get all edges originating from a node.

        Uses the adjacency index for O(1) lookup instead of a full scan.

        Args:
            prefix: Node prefix.

        Returns:
            List of edge dictionaries.
        """
        prefix = prefix.lower()[:2]
        to_prefixes = self._outgoing_index.get(prefix)
        if not to_prefixes:
            return []
        result = []
        for to_prefix in to_prefixes:
            edge = self.edges.get((prefix, to_prefix))
            if edge is not None:
                result.append(edge)
        return result

    def get_incoming_edges(self, prefix: str) -> List[Dict]:
        """Get all edges ending at a node.

        Uses the adjacency index for O(1) lookup instead of a full scan.

        Args:
            prefix: Node prefix.

        Returns:
            List of edge dictionaries.
        """
        prefix = prefix.lower()[:2]
        from_prefixes = self._incoming_index.get(prefix)
        if not from_prefixes:
            return []
        result = []
        for from_prefix in from_prefixes:
            edge = self.edges.get((from_prefix, prefix))
            if edge is not None:
                result.append(edge)
        return result
    
    def validate_path_segment(self, from_prefix: str, to_prefix: str, 
                             min_observations: int = 1,
                             check_bidirectional: bool = False) -> Tuple[bool, float]:
        """Validate a path segment using graph data.
        
        Args:
            from_prefix: Source node prefix.
            to_prefix: Destination node prefix.
            min_observations: Minimum observations required for confidence.
            check_bidirectional: If True, check if reverse edge exists and boost confidence.
            
        Returns:
            Tuple of (is_valid, confidence_score) where confidence is 0.0-1.0.
        """
        edge = self.get_edge(from_prefix, to_prefix)
        
        if not edge:
            return (False, 0.0)
        
        if edge['observation_count'] < min_observations:
            return (False, 0.0)
        
        # Confidence based on observation count and recency
        obs_count = edge['observation_count']
        last_seen = edge['last_seen']
        
        if isinstance(last_seen, str):
            last_seen = datetime.fromisoformat(last_seen.replace('Z', '+00:00'))
        
        hours_ago = (datetime.now() - last_seen).total_seconds() / 3600.0
        
        # Observation count confidence (logarithmic scale)
        obs_confidence = min(1.0, 0.3 + (0.7 * (1.0 - 1.0 / (1.0 + obs_count / 10.0))))
        
        # Recency confidence (exponential decay, 48 hour half-life for longer advert intervals)
        recency_confidence = 1.0 if hours_ago < 1 else max(0.0, 2.0 ** (-hours_ago / 48.0))
        
        # Combined confidence
        confidence = (obs_confidence * 0.6) + (recency_confidence * 0.4)
        
        # Bidirectional edge bonus
        if check_bidirectional:
            reverse_edge = self.get_edge(to_prefix, from_prefix)
            if reverse_edge and reverse_edge['observation_count'] >= min_observations:
                # Bidirectional connection is more reliable
                confidence = min(1.0, confidence + 0.15)
        
        return (True, confidence)
    
    def validate_path(self, path_nodes: List[str], min_observations: int = 1) -> Tuple[bool, float]:
        """Validate an entire path using graph data.
        
        Args:
            path_nodes: List of node prefixes in path order.
            min_observations: Minimum observations required per edge.
            
        Returns:
            Tuple of (is_valid, average_confidence).
        """
        if len(path_nodes) < 2:
            return (True, 1.0)  # Single node or empty path is always valid
        
        validations = []
        for i in range(len(path_nodes) - 1):
            from_node = path_nodes[i]
            to_node = path_nodes[i + 1]
            is_valid, confidence = self.validate_path_segment(from_node, to_node, min_observations)
            
            if not is_valid:
                return (False, 0.0)
            
            validations.append(confidence)
        
        # Return average confidence
        avg_confidence = sum(validations) / len(validations) if validations else 0.0
        return (True, avg_confidence)
    
    def get_candidate_score(self, candidate_prefix: str, prev_prefix: Optional[str],
                           next_prefix: Optional[str], min_observations: int = 1,
                           hop_position: Optional[int] = None,
                           use_bidirectional: bool = True,
                           use_hop_position: bool = True) -> float:
        """Get graph-based score for a candidate node in a path.
        
        Args:
            candidate_prefix: The candidate node prefix.
            prev_prefix: Previous node in path (if available).
            next_prefix: Next node in path (if available).
            min_observations: Minimum observations required.
            hop_position: Current position in path (0-based index) for hop position validation.
            use_bidirectional: If True, check bidirectional edges for higher confidence.
            use_hop_position: If True, validate against avg_hop_position if available.
            
        Returns:
            Score from 0.0 to 1.0 based on graph evidence.
        """
        score = 0.0
        evidence_count = 0
        scores = []
        
        # Check edge from previous node
        if prev_prefix:
            is_valid, confidence = self.validate_path_segment(
                prev_prefix, candidate_prefix, min_observations,
                check_bidirectional=use_bidirectional
            )
            if is_valid:
                scores.append(confidence)
                score += confidence
                evidence_count += 1
        
        # Check edge to next node
        if next_prefix:
            is_valid, confidence = self.validate_path_segment(
                candidate_prefix, next_prefix, min_observations,
                check_bidirectional=use_bidirectional
            )
            if is_valid:
                scores.append(confidence)
                score += confidence
                evidence_count += 1
        
        if evidence_count == 0:
            return 0.0
        
        # Calculate base score as average
        base_score = score / evidence_count
        
        # Hop position validation bonus
        if use_hop_position and hop_position is not None:
            # Check if candidate appears in expected position based on avg_hop_position
            # Check both incoming and outgoing edges for hop position data
            hop_position_match = False
            
            if prev_prefix:
                edge = self.get_edge(prev_prefix, candidate_prefix)
                if edge and edge.get('avg_hop_position') is not None:
                    # Allow some tolerance (within 0.5 of expected position)
                    expected_pos = edge['avg_hop_position']
                    if abs(hop_position - expected_pos) <= 0.5:
                        hop_position_match = True
            
            if not hop_position_match and next_prefix:
                edge = self.get_edge(candidate_prefix, next_prefix)
                if edge and edge.get('avg_hop_position') is not None:
                    # For outgoing edge, expected position is one less (since it's the from node)
                    expected_pos = edge['avg_hop_position'] - 1
                    if abs(hop_position - expected_pos) <= 0.5:
                        hop_position_match = True
            
            if hop_position_match:
                base_score = min(1.0, base_score + 0.1)
        
        # Geographic distance validation (if available)
        # Use stored geographic_distance from edges when available (more accurate)
        if prev_prefix or next_prefix:
            # Check if we have geographic distance data that suggests reasonable routing
            # This is informational - we don't heavily penalize based on distance alone
            # but can use it as a tie-breaker
            geographic_available = False
            if prev_prefix:
                edge = self.get_edge(prev_prefix, candidate_prefix)
                if edge and edge.get('geographic_distance') is not None:
                    geographic_available = True
            if not geographic_available and next_prefix:
                edge = self.get_edge(candidate_prefix, next_prefix)
                if edge and edge.get('geographic_distance') is not None:
                    geographic_available = True
            
            # Having geographic data increases confidence slightly (indicates well-tracked edge)
            if geographic_available:
                base_score = min(1.0, base_score + 0.05)
        
        return base_score
    
    def find_intermediate_nodes(self, from_prefix: str, to_prefix: str,
                              min_observations: int = 1,
                              max_hops: int = 2) -> List[Tuple[str, float]]:
        """Find intermediate nodes that connect from_prefix to to_prefix.
        
        Uses multi-hop path inference to find nodes that connect two prefixes
        when a direct edge may not exist or have low confidence.
        
        Args:
            from_prefix: Source node prefix.
            to_prefix: Destination node prefix.
            min_observations: Minimum observations required per edge.
            max_hops: Maximum number of hops to search (default: 2, fallback to 3).
            
        Returns:
            List of (candidate_prefix, score) tuples sorted by score (highest first).
            Score is 0.0-1.0 based on path strength.
        """
        from_prefix = from_prefix.lower()[:2]
        to_prefix = to_prefix.lower()[:2]
        
        candidates: Dict[str, float] = {}
        
        # Try 2-hop paths first: from_prefix -> intermediate -> to_prefix
        outgoing_edges = self.get_outgoing_edges(from_prefix)
        
        for edge in outgoing_edges:
            intermediate_prefix = edge['to_prefix']
            
            # Skip if this is the destination (direct edge case)
            if intermediate_prefix == to_prefix:
                continue
            
            # Check if intermediate connects to destination
            to_edge = self.get_edge(intermediate_prefix, to_prefix)
            if not to_edge or to_edge['observation_count'] < min_observations:
                continue
            
            # Validate both edges
            from_valid, from_confidence = self.validate_path_segment(
                from_prefix, intermediate_prefix, min_observations,
                check_bidirectional=True
            )
            to_valid, to_confidence = self.validate_path_segment(
                intermediate_prefix, to_prefix, min_observations,
                check_bidirectional=True
            )
            
            if from_valid and to_valid:
                # Score is minimum of both edges (weakest link)
                path_score = min(from_confidence, to_confidence)
                
                # Bidirectional path bonus
                reverse_from = self.get_edge(intermediate_prefix, from_prefix)
                reverse_to = self.get_edge(to_prefix, intermediate_prefix)
                bidirectional_bonus = 1.0
                if reverse_from and reverse_from['observation_count'] >= min_observations:
                    if reverse_to and reverse_to['observation_count'] >= min_observations:
                        # Both edges are bidirectional - strong evidence
                        bidirectional_bonus = 1.2
                    else:
                        bidirectional_bonus = 1.1
                elif reverse_to and reverse_to['observation_count'] >= min_observations:
                    bidirectional_bonus = 1.1
                
                path_score = min(1.0, path_score * bidirectional_bonus)
                
                # Use best score if we've seen this candidate before
                if intermediate_prefix not in candidates or path_score > candidates[intermediate_prefix]:
                    candidates[intermediate_prefix] = path_score
        
        # If no 2-hop paths found and max_hops >= 3, try 3-hop paths
        if not candidates and max_hops >= 3:
            # Find 3-hop paths: from_prefix -> intermediate1 -> intermediate2 -> to_prefix
            for edge1 in outgoing_edges:
                intermediate1 = edge1['to_prefix']
                if intermediate1 == to_prefix:
                    continue
                
                # Get edges from intermediate1
                intermediate1_edges = self.get_outgoing_edges(intermediate1)
                
                for edge2 in intermediate1_edges:
                    intermediate2 = edge2['to_prefix']
                    if intermediate2 == from_prefix or intermediate2 == intermediate1:
                        continue
                    
                    # Check if intermediate2 connects to destination
                    to_edge = self.get_edge(intermediate2, to_prefix)
                    if not to_edge or to_edge['observation_count'] < min_observations:
                        continue
                    
                    # Validate all three edges
                    valid1, conf1 = self.validate_path_segment(
                        from_prefix, intermediate1, min_observations
                    )
                    valid2, conf2 = self.validate_path_segment(
                        intermediate1, intermediate2, min_observations
                    )
                    valid3, conf3 = self.validate_path_segment(
                        intermediate2, to_prefix, min_observations
                    )
                    
                    if valid1 and valid2 and valid3:
                        # Score is minimum of all three edges
                        path_score = min(conf1, conf2, conf3)
                        
                        # 3-hop paths are less reliable, so reduce score
                        path_score *= 0.8
                        
                        # Use intermediate2 as candidate (the one before destination)
                        if intermediate2 not in candidates or path_score > candidates[intermediate2]:
                            candidates[intermediate2] = path_score
        
        # Sort by score (highest first) and return
        sorted_candidates = sorted(candidates.items(), key=lambda x: x[1], reverse=True)
        return sorted_candidates
    
    def shutdown(self):
        """Shutdown graph, flushing all pending writes."""
        self.logger.info("Shutting down mesh graph, flushing pending writes...")
        
        # Signal shutdown
        self._shutdown_event.set()
        
        # Flush pending updates
        try:
            self._flush_pending_updates_sync()
        except Exception as e:
            self.logger.warning(f"Error flushing graph updates on shutdown: {e}")
        
        # Log final statistics
        if self.edges:
            total_observations = sum(e['observation_count'] for e in self.edges.values())
            self.logger.info(f"Graph shutdown complete: {len(self.edges)} edges, {total_observations} total observations")