"""DOLPHIN — ExF (External Factors) Live Daemon ================================================ Long-running process that wraps RealTimeExFService and streams its lag-adjusted indicator snapshot to Hazelcast every `HZ_PUSH_INTERVAL_S` seconds (default 5 s), satisfying the never-starve 5-second consumer guarantee. Architecture (from ExF_EsoF_Complete_Specification.md): - 85 indicators across 8 providers (Binance, Deribit, FRED, …) - Per-indicator polling at native rate (5m Binance → 8h funding) - Per-provider rate-limit semaphores (Binance 20/s, CoinMetrics 0.15/s) - In-memory state: IndicatorState with daily_history deque (lag support) - Consumer API: get_indicators(apply_lag=True) < 1 ms, never blocks - HZ key: DOLPHIN_FEATURES['exf_latest'] Usage: python prod/exf_fetcher_flow.py # run live python prod/exf_fetcher_flow.py --warmup 60 # override warmup seconds """ import sys import json import logging import signal import time import argparse import traceback from pathlib import Path from datetime import datetime, timezone HCM_DIR = Path(__file__).parent.parent sys.path.insert(0, str(HCM_DIR / "external_factors")) sys.path.insert(0, str(HCM_DIR)) # ── Logging Setup ────────────────────────────────────────────────────────────── # Use minimal formatting, no timestamp in formatter (handled by journald/syslog) logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s — %(message)s", ) logger = logging.getLogger("exf_fetcher") # Guard for expensive debug operations _LOG_DEBUG = logger.isEnabledFor(logging.DEBUG) _LOG_INFO = logger.isEnabledFor(logging.INFO) # ── Constants ────────────────────────────────────────────────────────────────── HZ_PUSH_INTERVAL_S = 0.5 # push to HZ every 0.5 s (Aggressive Oversampling) WARMUP_S = 30 # wait for first indicator fetch cycle HZ_KEY = "exf_latest" # Keys actually consumed by _calculate_signals() in AdaptiveCircuitBreaker. # Trimmed from original broader set — mcap_bc and addr_btc are not used by # ACBv6 computation and were causing _acb_ready to be permanently False. ACB_KEYS = frozenset([ "funding_btc", "fund_dbt_btc", "dvol_btc", "fng", "taker", "funding_eth", "dvol_eth", "vix", "ls_btc", "oi_btc", "fund_dbt_eth", ]) # Log throttling - only log status every N iterations LOG_STATUS_INTERVAL = 120 # Log status every ~60 seconds (120 * 0.5s) from prefect import flow, task, get_run_logger # Import persistence layer (for NPZ logging) sys.path.insert(0, str(HCM_DIR / "prod")) try: from exf_persistence import ExFPersistenceService, AlphaEngineDataChecker _HAS_PERSISTENCE = True except ImportError as e: _HAS_PERSISTENCE = False print(f"Warning: Persistence layer not available: {e}") # Pre-compiled for speed _isinstance = isinstance _isin = ACB_KEYS.__contains__ @task(name="push_indicators_to_hz", retries=3, retry_delay_seconds=1, cache_policy=None) # Disable caching - client not serializable def push_to_hz(client, payload): """Push indicators to Hazelcast with automatic retry.""" from prod._hz_push import hz_push try: return hz_push(HZ_KEY, payload, client) except Exception: # Re-raise for Prefect retry - traceback logged by Prefect if configured raise @flow(name="exf-fetcher-flow", log_prints=False) # Disable log_prints for performance def exf_fetcher_flow(warmup_s: int = WARMUP_S): from realtime_exf_service import RealTimeExFService from prod._hz_push import make_hz_client log = get_run_logger() svc = None client = None # ── Startup ───────────────────────────────────────────────────────────────── try: svc = RealTimeExFService() svc.start() if _LOG_INFO: log.info("ExF svc started — warmup %ds", warmup_s) except Exception as e: if _LOG_INFO: log.error("CRITICAL: ExF svc start failed: %s", e) raise # Warmup period time.sleep(warmup_s) # ── Hazelcast Connection ───────────────────────────────────────────────────── def _connect(): nonlocal client try: if client: try: client.shutdown() except Exception: pass # Ignore shutdown errors client = make_hz_client() if _LOG_INFO: log.info("HZ connected") return True except Exception as e: if _LOG_INFO: log.warning("HZ connect failed: %s", e) client = None return False if not _connect() and _LOG_INFO: log.warning("Initial HZ conn failed — retrying in loop") # ── Start Persistence Service (OFF hot path) ───────────────────────────────── persistence = None data_checker = None if _HAS_PERSISTENCE: try: persistence = ExFPersistenceService() persistence.start() data_checker = AlphaEngineDataChecker() if _LOG_INFO: log.info("Persistence svc started (5min flush interval)") except Exception as e: if _LOG_INFO: log.warning("Persistence svc failed to start: %s", e) # ── Main Loop ──────────────────────────────────────────────────────────────── push_count = 0 insufficient_data_warnings = 0 fail_count = 0 consecutive_fails = 0 loop_iter = 0 if _LOG_INFO: log.info("ExF loop live — HZ['%s'] every %.1fs", HZ_KEY, HZ_PUSH_INTERVAL_S) try: while True: t0 = time.monotonic() loop_iter += 1 try: # Get dual-sampled snapshot (<1 ms) - ZERO logging here indicators = svc.get_indicators(dual_sample=True) # Build HZ payload - minimize object creation staleness = indicators.pop("_staleness", {}) payload = {k: v for k, v in indicators.items() if _isinstance(v, (int, float))} payload["_staleness_s"] = {k: round(v, 1) for k, v in staleness.items()} payload["_acb_ready"] = all(k in payload for k in ACB_KEYS) payload["_ok_count"] = sum(1 for v in payload.values() if _isinstance(v, float) and v == v) # Ensure HZ connection (silent unless fails 3x) if client is None: if not _connect(): consecutive_fails += 1 if consecutive_fails > 10 and _LOG_INFO: log.error("CRITICAL: HZ lost for %d+ iters", consecutive_fails) time.sleep(1.0) continue # Execute push task - ZERO logging on success path try: success = push_to_hz.submit(client, payload).result() except Exception: success = False if success: push_count += 1 consecutive_fails = 0 try: from ch_writer import ch_put, ts_us as _ts ch_put("exf_data", { "ts": _ts(), "funding_rate": float(payload.get("funding_btc") or 0), "dvol": float(payload.get("dvol_btc") or 0), "fear_greed": float(payload.get("fng") or 0), "taker_ratio": float(payload.get("taker") or 0), }) except Exception: pass # Check data sufficiency for Alpha Engine (every 60 iterations = 30s) if data_checker and (push_count % 60 == 0): is_sufficient, details = data_checker.check_sufficiency(payload) if not is_sufficient and _LOG_INFO: insufficient_data_warnings += 1 if insufficient_data_warnings % 10 == 1: # Log every 5 min log.warning("Alpha Engine data INSUFFICIENT: score=%.2f, ACB=%d/%d, missing=%s", details['score'], details['acb_indicators'], len(data_checker.ACB_CRITICAL), details['missing_acb'][:3]) # Throttled status logging - only every LOG_STATUS_INTERVAL if _LOG_INFO and (push_count % LOG_STATUS_INTERVAL == 1): st = svc.status() sufficiency = "SUFF" if (data_checker and data_checker._sufficiency_score > 0.7) else "INSUFF" log.info("ExF#%d ok=%d/%d acb=%s data=%s", push_count, st['indicators_ok'], st['indicators_total'], payload['_acb_ready'], sufficiency) else: fail_count += 1 consecutive_fails += 1 # Only log failures every 10th to avoid spam if _LOG_INFO and (consecutive_fails % 10 == 1): log.warning("HZ fail#%d (tot=%d)", consecutive_fails, fail_count) if consecutive_fails >= 3 and client is not None: _connect() # Silent reconnect attempt except Exception as e: fail_count += 1 consecutive_fails += 1 # Only log exceptions periodically if _LOG_INFO and (consecutive_fails % 10 == 1): log.error("Loop exc: %s", e) if consecutive_fails > 20: if _LOG_INFO: log.error("FATAL: 20+ consecutive failures") raise # Maintain precise interval - this is critical elapsed = time.monotonic() - t0 sleep_time = HZ_PUSH_INTERVAL_S - elapsed if sleep_time > 0: time.sleep(sleep_time) elif _LOG_DEBUG: # Only log overruns in debug mode log.debug("Overrun: %.3fs", -sleep_time) except KeyboardInterrupt: if _LOG_INFO: log.info("Interrupted") except Exception: if _LOG_INFO: log.exception("Fatal error") raise finally: if _LOG_INFO: log.info("Shutting down — pushes=%d fails=%d", push_count, fail_count) if svc: try: svc.stop() except Exception: pass if client: try: client.shutdown() except Exception: pass if persistence: try: persistence.stop() if _LOG_INFO: log.info("Persistence svc stopped (%d writes)", persistence.stats.get('writes', 0)) except Exception: pass if __name__ == "__main__": parser = argparse.ArgumentParser(description="DOLPHIN ExF live daemon") parser.add_argument("--warmup", type=int, default=WARMUP_S, help="Warmup seconds before first HZ push") args = parser.parse_args() exf_fetcher_flow(warmup_s=args.warmup)