Files
meshcore-bot/shared/geocoding.py
T
agessaman 7042059d6d refactor(shared): extract geocoding and text utilities into shared/
Move geocoding functions (calculate_distance, Nominatim wrappers, geocode_city/zipcode,
location normalization) from modules/utils.py to shared/geocoding.py.

Move text formatting functions (abbreviate_location, truncate_string,
decode_escape_sequences, format_location_for_display, format_elapsed_display)
to shared/text_utils.py.

utils.py shrinks from 2,447 to ~1,125 lines. Update imports across ~25 files.
No logic changes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 17:36:11 -07:00

1156 lines
51 KiB
Python

#!/usr/bin/env python3
"""
Geocoding utilities shared by the bot and web viewer.
Includes Haversine distance, Nominatim wrappers (rate-limited, sync and async),
ZIP/city geocoding with caching, and location-string normalization helpers.
"""
import asyncio
from typing import Any, Optional
def get_major_city_queries(city: str, state_abbr: Optional[str] = None) -> list[str]:
"""Get prioritized geocoding queries for major cities that have multiple locations.
This helps ensure that common city names resolve to the most likely major city
rather than a small town with the same name.
Args:
city: City name (normalized, lowercase).
state_abbr: Optional state abbreviation (e.g., "CA", "NY").
Returns:
List[str]: List of geocoding query strings in priority order.
"""
city_lower = city.lower().strip()
# Comprehensive mapping of major cities with multiple locations
# Format: 'city_name': [list of queries in priority order]
major_city_mappings = {
'new york': ['New York, NY, USA', 'New York City, NY, USA'],
'los angeles': ['Los Angeles, CA, USA'],
'chicago': ['Chicago, IL, USA'],
'houston': ['Houston, TX, USA'],
'phoenix': ['Phoenix, AZ, USA'],
'philadelphia': ['Philadelphia, PA, USA'],
'san antonio': ['San Antonio, TX, USA'],
'san diego': ['San Diego, CA, USA'],
'dallas': ['Dallas, TX, USA'],
'san jose': ['San Jose, CA, USA'],
'austin': ['Austin, TX, USA'],
'jacksonville': ['Jacksonville, FL, USA'],
'san francisco': ['San Francisco, CA, USA'],
'columbus': ['Columbus, OH, USA'],
'fort worth': ['Fort Worth, TX, USA'],
'charlotte': ['Charlotte, NC, USA'],
'seattle': ['Seattle, WA, USA'],
'denver': ['Denver, CO, USA'],
'washington': ['Washington, DC, USA'],
'boston': ['Boston, MA, USA'],
'el paso': ['El Paso, TX, USA'],
'detroit': ['Detroit, MI, USA'],
'nashville': ['Nashville, TN, USA'],
'portland': ['Portland, OR, USA', 'Portland, ME, USA'],
'oklahoma city': ['Oklahoma City, OK, USA'],
'las vegas': ['Las Vegas, NV, USA'],
'memphis': ['Memphis, TN, USA'],
'louisville': ['Louisville, KY, USA'],
'baltimore': ['Baltimore, MD, USA'],
'milwaukee': ['Milwaukee, WI, USA'],
'albuquerque': ['Albuquerque, NM, USA'],
'tucson': ['Tucson, AZ, USA'],
'fresno': ['Fresno, CA, USA'],
'sacramento': ['Sacramento, CA, USA'],
'kansas city': ['Kansas City, MO, USA', 'Kansas City, KS, USA'],
'mesa': ['Mesa, AZ, USA'],
'atlanta': ['Atlanta, GA, USA'],
'omaha': ['Omaha, NE, USA'],
'colorado springs': ['Colorado Springs, CO, USA'],
'raleigh': ['Raleigh, NC, USA'],
'virginia beach': ['Virginia Beach, VA, USA'],
'miami': ['Miami, FL, USA'],
'oakland': ['Oakland, CA, USA'],
'minneapolis': ['Minneapolis, MN, USA'],
'tulsa': ['Tulsa, OK, USA'],
'cleveland': ['Cleveland, OH, USA'],
'wichita': ['Wichita, KS, USA'],
'arlington': ['Arlington, TX, USA', 'Arlington, VA, USA'],
'new orleans': ['New Orleans, LA, USA'],
'honolulu': ['Honolulu, HI, USA'],
# Cities with multiple locations that need disambiguation
'albany': ['Albany, NY, USA', 'Albany, OR, USA', 'Albany, CA, USA'],
'springfield': ['Springfield, IL, USA', 'Springfield, MO, USA', 'Springfield, MA, USA'],
'franklin': ['Franklin, TN, USA', 'Franklin, MA, USA'],
'georgetown': ['Georgetown, TX, USA', 'Georgetown, SC, USA'],
'madison': ['Madison, WI, USA', 'Madison, AL, USA'],
'auburn': ['Auburn, AL, USA', 'Auburn, WA, USA'],
'troy': ['Troy, NY, USA', 'Troy, MI, USA'],
'clinton': ['Clinton, IA, USA', 'Clinton, MS, USA'],
'paris': ['Paris, TX, USA', 'Paris, IL, USA', 'Paris, TN, USA'],
}
# Check if this is a major city
if city_lower in major_city_mappings:
queries = major_city_mappings[city_lower].copy()
# If state abbreviation was provided, prioritize queries with that state
if state_abbr:
state_upper = state_abbr.upper()
# Move matching state queries to the front
matching = [q for q in queries if f', {state_upper},' in q or q.endswith(f', {state_upper}')]
non_matching = [q for q in queries if q not in matching]
if matching:
return matching + non_matching
return queries
# Not a major city - return empty list (caller should use standard geocoding)
return []
def calculate_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate haversine distance between two points in kilometers.
Args:
lat1: Latitude of first point in degrees.
lon1: Longitude of first point in degrees.
lat2: Latitude of second point in degrees.
lon2: Longitude of second point in degrees.
Returns:
float: Distance in kilometers.
"""
import math
# Convert latitude and longitude from degrees to radians
lat1_rad = math.radians(lat1)
lon1_rad = math.radians(lon1)
lat2_rad = math.radians(lat2)
lon2_rad = math.radians(lon2)
# Haversine formula
dlat = lat2_rad - lat1_rad
dlon = lon2_rad - lon1_rad
a = math.sin(dlat/2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
# Earth's radius in kilometers
earth_radius = 6371.0
return earth_radius * c
# Optional geocoding helper libraries
try:
import pycountry
PYCOUNTRY_AVAILABLE = True
except ImportError:
PYCOUNTRY_AVAILABLE = False
try:
import us
US_AVAILABLE = True
except ImportError:
US_AVAILABLE = False
def normalize_country_name(country_input: str) -> tuple[Optional[str], Optional[str]]:
"""Normalize country name to ISO code and standard name.
Args:
country_input: Country name or code (e.g., "Sweden", "SE", "United States", "USA", "US")
Returns:
tuple: (iso_code, standard_name) or (None, None) if not found
Example: ("SE", "Sweden") or ("US", "United States")
"""
if not PYCOUNTRY_AVAILABLE:
return None, None
if not country_input:
return None, None
country_input = country_input.strip()
# Try to find by alpha_2 code (e.g., "US", "SE")
if len(country_input) == 2:
try:
country = pycountry.countries.get(alpha_2=country_input.upper())
if country:
return country.alpha_2, country.name
except (KeyError, AttributeError):
pass
# Try to find by alpha_3 code (e.g., "USA", "SWE")
if len(country_input) == 3:
try:
country = pycountry.countries.get(alpha_3=country_input.upper())
if country:
return country.alpha_2, country.name
except (KeyError, AttributeError):
pass
# Try to find by name (case-insensitive, handles common variants)
country_input_lower = country_input.lower()
# Handle common variants
country_variants = {
'usa': 'United States',
'u.s.a.': 'United States',
'u.s.': 'United States',
'uk': 'United Kingdom',
'u.k.': 'United Kingdom',
'great britain': 'United Kingdom',
}
search_name = country_variants.get(country_input_lower, country_input)
try:
# Try exact match first
country = pycountry.countries.get(name=search_name)
if country:
return country.alpha_2, country.name
# Try fuzzy search
for country in pycountry.countries:
if country.name.lower() == search_name.lower():
return country.alpha_2, country.name
except (KeyError, AttributeError):
pass
return None, None
def normalize_us_state(state_input: str) -> tuple[Optional[str], Optional[str]]:
"""Normalize US state name to abbreviation and full name.
Args:
state_input: State name or abbreviation (e.g., "Washington", "WA", "California", "CA")
Returns:
tuple: (abbreviation, full_name) or (None, None) if not found
Example: ("WA", "Washington") or ("CA", "California")
"""
if not US_AVAILABLE:
return None, None
if not state_input:
return None, None
state_input = state_input.strip()
# Try to find by abbreviation
if len(state_input) == 2:
try:
state = us.states.lookup(state_input.upper())
if state:
return state.abbr, state.name
except (AttributeError, KeyError):
pass
# Try to find by name
try:
state = us.states.lookup(state_input)
if state:
return state.abbr, state.name
except (AttributeError, KeyError):
pass
return None, None
def is_country_name(text: str) -> bool:
"""Check if text is likely a country name.
Args:
text: Text to check
Returns:
bool: True if text appears to be a country name
"""
if not text:
return False
if PYCOUNTRY_AVAILABLE:
iso_code, _ = normalize_country_name(text)
if iso_code is not None:
return True
if US_AVAILABLE:
state_abbr, _ = normalize_us_state(text)
if state_abbr:
return False # It's a US state, not a country
if len(text) <= 2:
return False # Unknown 2-char (not a known country or US state)
return len(text) > 2 # Longer text, assume country
def is_us_state(text: str) -> bool:
"""Check if text is likely a US state name or abbreviation.
Args:
text: Text to check
Returns:
bool: True if text appears to be a US state
"""
if not text:
return False
if US_AVAILABLE:
state_abbr, _ = normalize_us_state(text)
return state_abbr is not None
return False
def parse_location_string(location: str) -> tuple[str, Optional[str], Optional[str]]:
"""Parse a location string into city, state/country parts.
Args:
location: Location string (e.g., "Stockholm, Sweden" or "Seattle, WA")
Returns:
tuple: (city, state_or_country, type) where type is "state", "country", or None
Example: ("Stockholm", "Sweden", "country") or ("Seattle", "WA", "state")
"""
if ',' not in location:
return location.strip(), None, None
parts = [p.strip() for p in location.rsplit(',', 1)]
if len(parts) != 2:
return location.strip(), None, None
city, second_part = parts
# Check if it's a US state
if is_us_state(second_part):
state_abbr, _ = normalize_us_state(second_part)
return city, state_abbr, "state"
# Check if it's a country
if is_country_name(second_part):
iso_code, country_name = normalize_country_name(second_part)
if iso_code:
return city, country_name, "country"
# If 2 chars or less, assume state abbreviation
if len(second_part) <= 2:
return city, second_part.upper(), "state"
# Otherwise, assume country
return city, second_part, "country"
def get_nominatim_geocoder(user_agent: str = "meshcore-bot", timeout: int = 10) -> Any:
"""Get a Nominatim geocoder instance with proper User-Agent.
Args:
user_agent: User-Agent string for Nominatim (required by their policy).
timeout: Request timeout in seconds.
Returns:
Any: Nominatim geocoder instance (from geopy).
"""
from geopy.geocoders import Nominatim
return Nominatim(user_agent=user_agent, timeout=timeout)
async def rate_limited_nominatim_geocode(bot: Any, query: str, timeout: int = 10) -> Optional[Any]:
"""Perform rate-limited Nominatim geocoding (forward geocoding).
Args:
bot: Bot instance (must have nominatim_rate_limiter attribute).
query: Location query string.
timeout: Request timeout in seconds.
Returns:
Optional[Any]: Geocoding result or None if failed/timed out.
"""
if not hasattr(bot, 'nominatim_rate_limiter'):
# Fallback if rate limiter not initialized
geolocator = get_nominatim_geocoder(timeout=timeout)
return geolocator.geocode(query, timeout=timeout)
# Wait for rate limiter
await bot.nominatim_rate_limiter.wait_for_request()
# Make the request
geolocator = get_nominatim_geocoder(timeout=timeout)
result = geolocator.geocode(query, timeout=timeout)
# Record the request
bot.nominatim_rate_limiter.record_request()
return result
async def rate_limited_nominatim_reverse(bot: Any, coordinates: str, timeout: int = 10) -> Optional[Any]:
"""Perform rate-limited Nominatim reverse geocoding.
Args:
bot: Bot instance (must have nominatim_rate_limiter attribute).
coordinates: Coordinates string in format "lat, lon".
timeout: Request timeout in seconds.
Returns:
Optional[Any]: Reverse geocoding result or None if failed/timed out.
"""
if not hasattr(bot, 'nominatim_rate_limiter'):
# Fallback if rate limiter not initialized
geolocator = get_nominatim_geocoder(timeout=timeout)
return geolocator.reverse(coordinates, timeout=timeout)
# Wait for rate limiter
await bot.nominatim_rate_limiter.wait_for_request()
# Make the request
geolocator = get_nominatim_geocoder(timeout=timeout)
result = geolocator.reverse(coordinates, timeout=timeout)
# Record the request
bot.nominatim_rate_limiter.record_request()
return result
def rate_limited_nominatim_geocode_sync(bot: Any, query: str, timeout: int = 10) -> Optional[Any]:
"""Perform rate-limited Nominatim geocoding (synchronous version).
Args:
bot: Bot instance (must have nominatim_rate_limiter attribute).
query: Location query string.
timeout: Request timeout in seconds.
Returns:
Optional[Any]: Geocoding result or None if failed/timed out.
"""
if not hasattr(bot, 'nominatim_rate_limiter'):
# Fallback if rate limiter not initialized
geolocator = get_nominatim_geocoder(timeout=timeout)
return geolocator.geocode(query, timeout=timeout)
# Wait for rate limiter
bot.nominatim_rate_limiter.wait_for_request_sync()
# Make the request
geolocator = get_nominatim_geocoder(timeout=timeout)
result = geolocator.geocode(query, timeout=timeout)
# Record the request
bot.nominatim_rate_limiter.record_request()
return result
def rate_limited_nominatim_reverse_sync(bot: Any, coordinates: str, timeout: int = 10) -> Optional[Any]:
"""Perform rate-limited Nominatim reverse geocoding (synchronous version).
Args:
bot: Bot instance (must have nominatim_rate_limiter attribute).
coordinates: Coordinates string in format "lat, lon".
timeout: Request timeout in seconds.
Returns:
Optional[Any]: Reverse geocoding result or None if failed/timed out.
"""
if not hasattr(bot, 'nominatim_rate_limiter'):
# Fallback if rate limiter not initialized
geolocator = get_nominatim_geocoder(timeout=timeout)
return geolocator.reverse(coordinates, timeout=timeout)
# Wait for rate limiter
bot.nominatim_rate_limiter.wait_for_request_sync()
# Make the request
geolocator = get_nominatim_geocoder(timeout=timeout)
result = geolocator.reverse(coordinates, timeout=timeout)
# Record the request
bot.nominatim_rate_limiter.record_request()
return result
async def geocode_zipcode(bot: Any, zipcode: str, default_country: Optional[str] = None, timeout: int = 10) -> tuple[Optional[float], Optional[float]]:
"""Shared function to geocode a ZIP code to lat/lon coordinates.
Checks cache first, then makes rate-limited API call if needed.
Args:
bot: Bot instance (must have db_manager and nominatim_rate_limiter).
zipcode: ZIP code string.
default_country: Default country code (e.g., "US"). If None, reads from bot.config.
timeout: Request timeout in seconds.
Returns:
Tuple[Optional[float], Optional[float]]: Tuple of (latitude, longitude) or (None, None) if not found.
"""
try:
# Get default country from config if not provided
if default_country is None:
default_country = bot.config.get('Weather', 'default_country', fallback='US')
# Check cache first
cache_query = f"{zipcode}, {default_country}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(cache_query)
if cached_lat is not None and cached_lon is not None:
return cached_lat, cached_lon
# Use rate-limited Nominatim to geocode the zipcode
location = await rate_limited_nominatim_geocode(bot, cache_query, timeout=timeout)
if location:
# Cache the result for future use
bot.db_manager.cache_geocoding(cache_query, location.latitude, location.longitude)
return location.latitude, location.longitude
else:
return None, None
except Exception as e:
bot.logger.error(f"Error geocoding zipcode {zipcode}: {e}")
return None, None
def geocode_zipcode_sync(bot: Any, zipcode: str, default_country: Optional[str] = None, timeout: int = 10) -> tuple[Optional[float], Optional[float]]:
"""Synchronous version of geocode_zipcode.
Args:
bot: Bot instance (must have db_manager and nominatim_rate_limiter).
zipcode: ZIP code string.
default_country: Default country code (e.g., "US"). If None, reads from bot.config.
timeout: Request timeout in seconds.
Returns:
Tuple[Optional[float], Optional[float]]: Tuple of (latitude, longitude) or (None, None) if not found.
"""
try:
# Get default country from config if not provided
if default_country is None:
default_country = bot.config.get('Weather', 'default_country', fallback='US')
# Check cache first
cache_query = f"{zipcode}, {default_country}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(cache_query)
if cached_lat is not None and cached_lon is not None:
return cached_lat, cached_lon
# Use rate-limited Nominatim to geocode the zipcode
location = rate_limited_nominatim_geocode_sync(bot, cache_query, timeout=timeout)
if location:
# Cache the result for future use
bot.db_manager.cache_geocoding(cache_query, location.latitude, location.longitude)
return location.latitude, location.longitude
else:
return None, None
except Exception as e:
bot.logger.error(f"Error geocoding zipcode {zipcode}: {e}")
return None, None
async def geocode_city(bot: Any, city: str, default_state: Optional[str] = None,
default_country: Optional[str] = None,
include_address_info: bool = False,
timeout: int = 10) -> tuple[Optional[float], Optional[float], Optional[dict]]:
"""Shared function to geocode a city name to lat/lon coordinates.
Uses intelligent fallback logic with major city prioritization.
Args:
bot: Bot instance (must have db_manager and nominatim_rate_limiter).
city: City name (may include state/country, e.g., "Seattle, WA" or "Paris, France").
default_state: Default state abbreviation (e.g., "WA"). If None, reads from bot.config.
default_country: Default country code (e.g., "US"). If None, reads from bot.config.
include_address_info: If True, also return address info via reverse geocoding.
timeout: Request timeout in seconds.
Returns:
Tuple[Optional[float], Optional[float], Optional[Dict]]:
Tuple of (latitude, longitude, address_info_dict) or (None, None, None) if not found.
address_info_dict is None if include_address_info is False.
"""
try:
# Get defaults from config if not provided
if default_state is None:
default_state = bot.config.get('Weather', 'default_state', fallback='')
if default_country is None:
default_country = bot.config.get('Weather', 'default_country', fallback='US')
city_clean = city.strip()
state_abbr = None
country_name = None
# Parse city, state/country format if present
if ',' in city_clean:
parts = [p.strip() for p in city_clean.rsplit(',', 1)]
if len(parts) == 2:
city_clean = parts[0]
second_part = parts[1]
# Use geocoding helpers to determine if it's a state or country
try:
_, parsed_part, part_type = parse_location_string(f"{city_clean}, {second_part}")
if part_type == "state":
state_abbr, _ = normalize_us_state(second_part)
if not state_abbr:
state_abbr = second_part.upper() if len(second_part) <= 2 else None
elif part_type == "country":
iso_code, country_name = normalize_country_name(second_part)
if iso_code:
# Use the normalized country name for better geocoding
country_name = country_name
else:
country_name = second_part
else:
# Fallback to original logic
if len(second_part) <= 2:
state_abbr = second_part.upper()
else:
country_name = second_part
except ImportError:
# Fallback if helpers not available
if len(second_part) <= 2:
state_abbr = second_part.upper()
else:
country_name = second_part
# Handle major cities with multiple locations (prioritize major cities).
# Skip when user specified a country (e.g. "Paris, FR") so we honor their choice.
major_city_queries = get_major_city_queries(city_clean, state_abbr)
if major_city_queries and not country_name:
# Try major city options first
for major_city_query in major_city_queries:
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(major_city_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = await rate_limited_nominatim_geocode(bot, major_city_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(major_city_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
continue
# Get address info if requested
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = await rate_limited_nominatim_reverse(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# If country name was parsed (not a state abbreviation), try geocoding with country first
if country_name:
# Try with country name directly (e.g., "Stockholm, Sweden")
country_query = f"{city_clean}, {country_name}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(country_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = await rate_limited_nominatim_geocode(bot, country_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(country_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
lat, lon = None, None
if lat and lon:
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = await rate_limited_nominatim_reverse(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# If state abbreviation was parsed, use it
if state_abbr:
state_query = f"{city_clean}, {state_abbr}, {default_country}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(state_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = await rate_limited_nominatim_geocode(bot, state_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(state_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
lat, lon = None, None
if lat and lon:
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = await rate_limited_nominatim_reverse(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# If no country/state specified, try city name alone first (finds most prominent international city)
# This handles cases like "Tokyo" -> Tokyo, Japan (not Tokyo, WA)
if not state_abbr and not country_name:
location = await rate_limited_nominatim_geocode(bot, city_clean, timeout=timeout)
if location:
# Check if result is in default country and is a small/obscure location
# If so, we'll try with default country/state as fallback
result_in_default_country = False
is_obscure_location = False
# Always get address info to check the result
try:
reverse_location = await rate_limited_nominatim_reverse(bot, f"{location.latitude}, {location.longitude}", timeout=timeout)
if reverse_location:
address = reverse_location.raw.get('address', {})
result_country = address.get('country', '').upper()
result_country_code = address.get('country_code', '').upper()
# Check if result is in default country
default_country_upper = default_country.upper()
if (result_country == default_country_upper or
result_country_code == default_country_upper or
'United States' in result_country and default_country_upper == 'US'):
result_in_default_country = True
# Check if it's an obscure location (county, township, small town)
place_type = address.get('type', '').lower()
place_name = (address.get('city') or
address.get('town') or
address.get('village') or
address.get('municipality') or
address.get('county', '')).lower()
# Obscure if it's a county, township, or if city name doesn't match the place name
if ('county' in place_type or
'township' in place_type or
(place_name and city_clean.lower() not in place_name and place_name not in city_clean.lower())):
is_obscure_location = True
except:
pass
# If result is in default country and is obscure, skip it and try with default country/state
if result_in_default_country and is_obscure_location:
# Fall through to try with default country/state
pass
else:
# Use the international result (either not in default country, or is a proper city match)
bot.db_manager.cache_geocoding(city_clean, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
if not reverse_location:
reverse_location = await rate_limited_nominatim_reverse(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# Try with default state (fallback for US cities when no country specified).
# Skip when default_state is empty (e.g. non-US default_country or key unset).
if default_state and default_state.strip():
cache_query = f"{city_clean}, {default_state}, {default_country}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(cache_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = await rate_limited_nominatim_geocode(bot, cache_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(cache_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
lat, lon = None, None
if lat and lon:
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = await rate_limited_nominatim_reverse(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# Try without state
location = await rate_limited_nominatim_geocode(bot, f"{city_clean}, {default_country}", timeout=timeout)
if location:
bot.db_manager.cache_geocoding(f"{city_clean}, {default_country}", location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = await rate_limited_nominatim_reverse(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
return None, None, None
except Exception as e:
bot.logger.error(f"Error geocoding city {city}: {e}")
return None, None, None
def geocode_city_sync(bot: Any, city: str, default_state: Optional[str] = None,
default_country: Optional[str] = None,
include_address_info: bool = False,
timeout: int = 10) -> tuple[Optional[float], Optional[float], Optional[dict]]:
"""Synchronous version of geocode_city.
Args:
bot: Bot instance (must have db_manager and nominatim_rate_limiter).
city: City name (may include state/country, e.g., "Seattle, WA" or "Paris, France").
default_state: Default state abbreviation (e.g., "WA"). If None, reads from bot.config.
default_country: Default country code (e.g., "US"). If None, reads from bot.config.
include_address_info: If True, also return address info via reverse geocoding.
timeout: Request timeout in seconds.
Returns:
Tuple[Optional[float], Optional[float], Optional[Dict]]:
Tuple of (latitude, longitude, address_info_dict) or (None, None, None) if not found.
address_info_dict is None if include_address_info is False.
"""
try:
# Get defaults from config if not provided
if default_state is None:
default_state = bot.config.get('Weather', 'default_state', fallback='')
if default_country is None:
default_country = bot.config.get('Weather', 'default_country', fallback='US')
city_clean = city.strip()
state_abbr = None
# Parse city, state/country format if present
state_abbr = None
country_name = None
if ',' in city_clean:
parts = [p.strip() for p in city_clean.rsplit(',', 1)]
if len(parts) == 2:
city_clean = parts[0]
second_part = parts[1]
# Use geocoding helpers to determine if it's a state or country
try:
_, parsed_part, part_type = parse_location_string(f"{city_clean}, {second_part}")
if part_type == "state":
state_abbr, _ = normalize_us_state(second_part)
if not state_abbr:
state_abbr = second_part.upper() if len(second_part) <= 2 else None
elif part_type == "country":
iso_code, country_name = normalize_country_name(second_part)
if iso_code:
# Use the normalized country name for better geocoding
country_name = country_name
else:
country_name = second_part
else:
# Fallback to original logic
if len(second_part) <= 2:
state_abbr = second_part.upper()
else:
country_name = second_part
except ImportError:
# Fallback if helpers not available
if len(second_part) <= 2:
state_abbr = second_part.upper()
else:
country_name = second_part
# Handle major cities with multiple locations (prioritize major cities).
# Skip when user specified a country (e.g. "Paris, FR") so we honor their choice.
major_city_queries = get_major_city_queries(city_clean, state_abbr)
if major_city_queries and not country_name:
# Try major city options first
for major_city_query in major_city_queries:
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(major_city_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = rate_limited_nominatim_geocode_sync(bot, major_city_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(major_city_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
continue
# Get address info if requested
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = rate_limited_nominatim_reverse_sync(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# If country name was parsed (not a state abbreviation), try geocoding with country first
if country_name:
# Try with country name directly (e.g., "Stockholm, Sweden")
country_query = f"{city_clean}, {country_name}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(country_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = rate_limited_nominatim_geocode_sync(bot, country_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(country_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
lat, lon = None, None
if lat and lon:
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = rate_limited_nominatim_reverse_sync(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# If state abbreviation was parsed, use it
if state_abbr:
state_query = f"{city_clean}, {state_abbr}, {default_country}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(state_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = rate_limited_nominatim_geocode_sync(bot, state_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(state_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
lat, lon = None, None
if lat and lon:
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = rate_limited_nominatim_reverse_sync(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# If no country/state specified, try city name alone first (finds most prominent international city)
# This handles cases like "Tokyo" -> Tokyo, Japan (not Tokyo, WA)
if not state_abbr and not country_name:
location = rate_limited_nominatim_geocode_sync(bot, city_clean, timeout=timeout)
if location:
# Check if result is in default country and is a small/obscure location
# If so, we'll try with default country/state as fallback
result_in_default_country = False
is_obscure_location = False
if include_address_info:
try:
reverse_location = rate_limited_nominatim_reverse_sync(bot, f"{location.latitude}, {location.longitude}", timeout=timeout)
if reverse_location:
address = reverse_location.raw.get('address', {})
result_country = address.get('country', '').upper()
result_country_code = address.get('country_code', '').upper()
# Check if result is in default country
default_country_upper = default_country.upper()
if (result_country == default_country_upper or
result_country_code == default_country_upper or
'United States' in result_country and default_country_upper == 'US'):
result_in_default_country = True
# Check if it's an obscure location (county, township, small town)
place_type = address.get('type', '').lower()
place_name = address.get('city') or address.get('town') or address.get('village') or ''
# Obscure if it's a county, township, or if city name doesn't match
if ('county' in place_type or
'township' in place_type or
city_clean.lower() not in place_name.lower()):
is_obscure_location = True
except:
pass
# If result is in default country and is obscure, try with default country/state
if result_in_default_country and is_obscure_location:
# Fall through to try with default country/state
pass
else:
# Use the international result
bot.db_manager.cache_geocoding(city_clean, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = rate_limited_nominatim_reverse_sync(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# Try with default state (fallback for US cities when no country specified).
# Skip when default_state is empty (e.g. non-US default_country or key unset).
if default_state and default_state.strip():
cache_query = f"{city_clean}, {default_state}, {default_country}"
cached_lat, cached_lon = bot.db_manager.get_cached_geocoding(cache_query)
if cached_lat and cached_lon:
lat, lon = cached_lat, cached_lon
else:
location = rate_limited_nominatim_geocode_sync(bot, cache_query, timeout=timeout)
if location:
bot.db_manager.cache_geocoding(cache_query, location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
else:
lat, lon = None, None
if lat and lon:
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = rate_limited_nominatim_reverse_sync(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
# Try without state
location = rate_limited_nominatim_geocode_sync(bot, f"{city_clean}, {default_country}", timeout=timeout)
if location:
bot.db_manager.cache_geocoding(f"{city_clean}, {default_country}", location.latitude, location.longitude)
lat, lon = location.latitude, location.longitude
address_info = None
if include_address_info:
# Check cache for reverse geocoding result
reverse_cache_key = f"reverse_{lat}_{lon}"
cached_address = bot.db_manager.get_cached_json(reverse_cache_key, "geolocation")
if cached_address:
address_info = cached_address
else:
try:
reverse_location = rate_limited_nominatim_reverse_sync(bot, f"{lat}, {lon}", timeout=timeout)
if reverse_location:
address_info = reverse_location.raw.get('address', {})
# Cache the reverse geocoding result
bot.db_manager.cache_json(reverse_cache_key, address_info, "geolocation", cache_hours=720)
except:
address_info = {}
return lat, lon, address_info
return None, None, None
except Exception as e:
bot.logger.error(f"Error geocoding city {city}: {e}")
return None, None, None