"""Geolocation resolution for the fraud engine. Tier 2 Block G wants two things here: 1. Enrich every scored access with (country, city, lat, lon) so the host UI can render "Sam opened from Lagos, Nigeria" rather than a raw IPv4 string nobody can read. 2. Surface large geographic jumps (>500 km in <1h) as a scoring signal — the geo_jump feature in scoring.py. That feature needs the *previous* access's coordinates, which we stash on the per- guest baseline alongside fingerprint + IP prefix. Design choices: * Pluggable resolvers. The spec mentions MaxMind GeoIP2 *or* a free HTTP API like ipapi.com. We support both: MaxMind reads a local GeoLite2 mmdb file (set `GG_GEOIP_DB_PATH`), and the default falls back to ip-api.com (free, no auth, 45 req/min/IP — fine for a homelab demo, sized to upgrade later). * Redis cache wrapper. Lookups are stable for ~30 days; caching avoids hammering the upstream and keeps the synchronous gRPC scoring path fast on repeat opens of the same invitation. * Private + invalid IPs short-circuit to None. Loopback, RFC1918, IPv6 link-local etc. would just confuse the upstream and waste a Redis miss. * Fail-open. A resolver error (network blip, malformed response) is *not* a scoring signal — we score with `geo=None` and move on. """ from __future__ import annotations import asyncio import ipaddress import json import logging from dataclasses import dataclass from typing import Protocol logger = logging.getLogger(__name__) @dataclass class GeoLocation: country: str | None = None # ISO-3166 alpha-2 code, e.g. "NG" city: str | None = None lat: float | None = None lon: float | None = None def __bool__(self) -> bool: return bool(self.country or self.city or self.lat is not None) class GeoResolver(Protocol): async def resolve(self, ip: str | None) -> GeoLocation | None: ... async def close(self) -> None: ... # --- helpers --- def _is_resolvable(ip: str | None) -> bool: if not ip: return False try: a = ipaddress.ip_address(ip) except ValueError: return False if a.is_loopback or a.is_private or a.is_link_local or a.is_multicast: return False if a.is_unspecified or a.is_reserved: return False return True # --- null (test / disabled) --- class NullResolver: """Returns None for everything. Used in tests and when geolocation is explicitly disabled via `GG_GEOIP_PROVIDER=null`.""" async def resolve(self, ip: str | None) -> GeoLocation | None: return None async def close(self) -> None: return None # --- ip-api.com (default for dev) --- class IPApiResolver: """Resolves via http://ip-api.com — free, no auth, 45 req/min/IP. We deliberately stay on HTTP (not HTTPS) because the free tier redirects HTTPS to a 403; the request carries no credentials so the cleartext-ness isn't a leak. Switch to a paid tier (ipapi.co, ipinfo, MaxMind) for production load. """ def __init__(self, timeout_seconds: float = 1.5) -> None: # aiohttp is imported lazily so app.scoring can be unit-tested # without the optional HTTP dep installed. import aiohttp # noqa: PLC0415 self._aiohttp = aiohttp self._timeout = aiohttp.ClientTimeout(total=timeout_seconds) self._session = None def _session_or_create(self): if self._session is None or self._session.closed: self._session = self._aiohttp.ClientSession(timeout=self._timeout) return self._session async def resolve(self, ip: str | None) -> GeoLocation | None: if not _is_resolvable(ip): return None session = self._session_or_create() url = f"http://ip-api.com/json/{ip}?fields=status,country,countryCode,city,lat,lon" try: async with session.get(url) as resp: if resp.status != 200: logger.debug("geoip lookup non-200", extra={"ip": ip, "status": resp.status}) return None data = await resp.json(content_type=None) except (self._aiohttp.ClientError, asyncio.TimeoutError) as exc: logger.debug("geoip lookup error", extra={"ip": ip, "err": str(exc)}) return None if data.get("status") != "success": return None return GeoLocation( country=data.get("countryCode") or data.get("country"), city=data.get("city"), lat=data.get("lat"), lon=data.get("lon"), ) async def close(self) -> None: if self._session is not None and not self._session.closed: await self._session.close() # --- MaxMind GeoLite2-City (lazy import) --- class MaxMindResolver: """Reads a local GeoLite2-City.mmdb. Lazy-imports `geoip2` so the base image doesn't carry it unless this resolver is actually selected. Synchronous reader; we call it in a thread to keep the asyncio loop unblocked.""" def __init__(self, db_path: str) -> None: import geoip2.database # noqa: PLC0415 — intentionally lazy self._reader = geoip2.database.Reader(db_path) async def resolve(self, ip: str | None) -> GeoLocation | None: if not _is_resolvable(ip): return None try: rec = await asyncio.to_thread(self._reader.city, ip) except Exception as exc: # noqa: BLE001 — generic per geoip2 raise hierarchy logger.debug("maxmind lookup error", extra={"ip": ip, "err": str(exc)}) return None return GeoLocation( country=rec.country.iso_code, city=rec.city.name, lat=rec.location.latitude, lon=rec.location.longitude, ) async def close(self) -> None: try: self._reader.close() except Exception: # noqa: BLE001 — close is best-effort pass # --- Redis-cached wrapper --- class CachedGeoResolver: """Wraps any resolver in a Redis cache. 30-day TTL because public IPs rarely change location, and dropping the wrong city for a few days is cheaper than re-querying ip-api.com on every page load.""" KEY_PREFIX = "gg:geo:v1:" TTL_SECONDS = 30 * 24 * 3600 def __init__(self, inner: GeoResolver, redis_client) -> None: self._inner = inner self._redis = redis_client async def resolve(self, ip: str | None) -> GeoLocation | None: if not _is_resolvable(ip): return None key = self.KEY_PREFIX + ip # type: ignore[operator] try: cached = await self._redis.get(key) except Exception as exc: # noqa: BLE001 logger.debug("geo cache get failed", extra={"err": str(exc)}) cached = None if cached: try: data = json.loads(cached) return GeoLocation(**data) except (ValueError, TypeError): pass result = await self._inner.resolve(ip) if result is not None: try: await self._redis.set( key, json.dumps(result.__dict__), ex=self.TTL_SECONDS, ) except Exception as exc: # noqa: BLE001 logger.debug("geo cache set failed", extra={"err": str(exc)}) return result async def close(self) -> None: await self._inner.close() # --- factory --- async def make_resolver( *, provider: str, db_path: str | None, redis_url: str | None, ) -> GeoResolver: """Build the resolver stack from settings. provider: - "null": NullResolver (geo disabled) - "ipapi": IPApiResolver - "maxmind": MaxMindResolver (requires db_path) - "auto": MaxMind if db_path file exists, else IPApi Wraps in CachedGeoResolver when redis_url is set. """ inner: GeoResolver chosen = provider.lower() if chosen == "auto": chosen = "maxmind" if (db_path and _file_exists(db_path)) else "ipapi" if chosen == "null": inner = NullResolver() elif chosen == "maxmind": if not db_path or not _file_exists(db_path): logger.warning("maxmind db missing — falling back to ipapi") inner = IPApiResolver() else: try: inner = MaxMindResolver(db_path) except Exception as exc: # noqa: BLE001 logger.warning("maxmind init failed — falling back to ipapi", extra={"err": str(exc)}) inner = IPApiResolver() else: inner = IPApiResolver() if not redis_url: return inner try: import redis.asyncio as redislib # noqa: PLC0415 client = redislib.from_url(redis_url, decode_responses=True) await client.ping() logger.info("geo cache: redis connected", extra={"url": redis_url}) return CachedGeoResolver(inner, client) except Exception as exc: # noqa: BLE001 logger.warning("geo cache: redis unavailable — running uncached", extra={"err": str(exc)}) return inner def _file_exists(path: str) -> bool: import os # noqa: PLC0415 return os.path.isfile(path)