initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/prod/exf_fetcher_flow.py
+++ b/prod/exf_fetcher_flow.py
@@ -0,0 +1,289 @@
+"""DOLPHIN — ExF (External Factors) Live Daemon
+================================================
+Long-running process that wraps RealTimeExFService and streams its
+lag-adjusted indicator snapshot to Hazelcast every `HZ_PUSH_INTERVAL_S`
+seconds (default 5 s), satisfying the never-starve 5-second consumer guarantee.
+
+Architecture (from ExF_EsoF_Complete_Specification.md):
+  - 85 indicators across 8 providers (Binance, Deribit, FRED, …)
+  - Per-indicator polling at native rate (5m Binance → 8h funding)
+  - Per-provider rate-limit semaphores (Binance 20/s, CoinMetrics 0.15/s)
+  - In-memory state: IndicatorState with daily_history deque (lag support)
+  - Consumer API: get_indicators(apply_lag=True) < 1 ms, never blocks
+  - HZ key: DOLPHIN_FEATURES['exf_latest']
+
+Usage:
+  python prod/exf_fetcher_flow.py               # run live
+  python prod/exf_fetcher_flow.py --warmup 60   # override warmup seconds
+"""
+
+import sys
+import json
+import logging
+import signal
+import time
+import argparse
+import traceback
+from pathlib import Path
+from datetime import datetime, timezone
+
+HCM_DIR = Path(__file__).parent.parent
+sys.path.insert(0, str(HCM_DIR / "external_factors"))
+sys.path.insert(0, str(HCM_DIR))
+
+# ── Logging Setup ──────────────────────────────────────────────────────────────
+# Use minimal formatting, no timestamp in formatter (handled by journald/syslog)
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s — %(message)s",
+)
+logger = logging.getLogger("exf_fetcher")
+
+# Guard for expensive debug operations
+_LOG_DEBUG = logger.isEnabledFor(logging.DEBUG)
+_LOG_INFO = logger.isEnabledFor(logging.INFO)
+
+# ── Constants ──────────────────────────────────────────────────────────────────
+HZ_PUSH_INTERVAL_S = 0.5      # push to HZ every 0.5 s (Aggressive Oversampling)
+WARMUP_S           = 30       # wait for first indicator fetch cycle
+HZ_KEY             = "exf_latest"
+# Keys actually consumed by _calculate_signals() in AdaptiveCircuitBreaker.
+# Trimmed from original broader set — mcap_bc and addr_btc are not used by
+# ACBv6 computation and were causing _acb_ready to be permanently False.
+ACB_KEYS = frozenset([
+    "funding_btc", "fund_dbt_btc", "dvol_btc", "fng", "taker",
+    "funding_eth", "dvol_eth", "vix", "ls_btc", "oi_btc", "fund_dbt_eth",
+])
+
+# Log throttling - only log status every N iterations
+LOG_STATUS_INTERVAL = 120     # Log status every ~60 seconds (120 * 0.5s)
+
+from prefect import flow, task, get_run_logger
+
+# Import persistence layer (for NPZ logging)
+sys.path.insert(0, str(HCM_DIR / "prod"))
+try:
+    from exf_persistence import ExFPersistenceService, AlphaEngineDataChecker
+    _HAS_PERSISTENCE = True
+except ImportError as e:
+    _HAS_PERSISTENCE = False
+    print(f"Warning: Persistence layer not available: {e}")
+
+# Pre-compiled for speed
+_isinstance = isinstance
+_isin = ACB_KEYS.__contains__
+
+
+@task(name="push_indicators_to_hz", retries=3, retry_delay_seconds=1, 
+      cache_policy=None)  # Disable caching - client not serializable
+def push_to_hz(client, payload):
+    """Push indicators to Hazelcast with automatic retry."""
+    from prod._hz_push import hz_push
+    try:
+        return hz_push(HZ_KEY, payload, client)
+    except Exception:
+        # Re-raise for Prefect retry - traceback logged by Prefect if configured
+        raise
+
+
+@flow(name="exf-fetcher-flow", log_prints=False)  # Disable log_prints for performance
+def exf_fetcher_flow(warmup_s: int = WARMUP_S):
+    from realtime_exf_service import RealTimeExFService
+    from prod._hz_push import make_hz_client
+    
+    log = get_run_logger()
+    svc = None
+    client = None
+    
+    # ── Startup ─────────────────────────────────────────────────────────────────
+    try:
+        svc = RealTimeExFService()
+        svc.start()
+        if _LOG_INFO:
+            log.info("ExF svc started — warmup %ds", warmup_s)
+    except Exception as e:
+        if _LOG_INFO:
+            log.error("CRITICAL: ExF svc start failed: %s", e)
+        raise
+    
+    # Warmup period
+    time.sleep(warmup_s)
+
+    # ── Hazelcast Connection ─────────────────────────────────────────────────────
+    def _connect():
+        nonlocal client
+        try:
+            if client:
+                try: 
+                    client.shutdown()
+                except Exception:
+                    pass  # Ignore shutdown errors
+            client = make_hz_client()
+            if _LOG_INFO:
+                log.info("HZ connected")
+            return True
+        except Exception as e:
+            if _LOG_INFO:
+                log.warning("HZ connect failed: %s", e)
+            client = None
+            return False
+
+    if not _connect() and _LOG_INFO:
+        log.warning("Initial HZ conn failed — retrying in loop")
+
+    # ── Start Persistence Service (OFF hot path) ─────────────────────────────────
+    persistence = None
+    data_checker = None
+    if _HAS_PERSISTENCE:
+        try:
+            persistence = ExFPersistenceService()
+            persistence.start()
+            data_checker = AlphaEngineDataChecker()
+            if _LOG_INFO:
+                log.info("Persistence svc started (5min flush interval)")
+        except Exception as e:
+            if _LOG_INFO:
+                log.warning("Persistence svc failed to start: %s", e)
+
+    # ── Main Loop ────────────────────────────────────────────────────────────────
+    push_count = 0
+    insufficient_data_warnings = 0
+    fail_count = 0
+    consecutive_fails = 0
+    loop_iter = 0
+    
+    if _LOG_INFO:
+        log.info("ExF loop live — HZ['%s'] every %.1fs", HZ_KEY, HZ_PUSH_INTERVAL_S)
+
+    try:
+        while True:
+            t0 = time.monotonic()
+            loop_iter += 1
+            
+            try:
+                # Get dual-sampled snapshot (<1 ms) - ZERO logging here
+                indicators = svc.get_indicators(dual_sample=True)
+
+                # Build HZ payload - minimize object creation
+                staleness = indicators.pop("_staleness", {})
+                payload = {k: v for k, v in indicators.items() 
+                          if _isinstance(v, (int, float))}
+                payload["_staleness_s"] = {k: round(v, 1) for k, v in staleness.items()}
+                payload["_acb_ready"] = all(k in payload for k in ACB_KEYS)
+                payload["_ok_count"] = sum(1 for v in payload.values()
+                                            if _isinstance(v, float) and v == v)
+
+                # Ensure HZ connection (silent unless fails 3x)
+                if client is None:
+                    if not _connect():
+                        consecutive_fails += 1
+                        if consecutive_fails > 10 and _LOG_INFO:
+                            log.error("CRITICAL: HZ lost for %d+ iters", consecutive_fails)
+                        time.sleep(1.0)
+                        continue
+
+                # Execute push task - ZERO logging on success path
+                try:
+                    success = push_to_hz.submit(client, payload).result()
+                except Exception:
+                    success = False
+                
+                if success:
+                    push_count += 1
+                    consecutive_fails = 0
+                    try:
+                        from ch_writer import ch_put, ts_us as _ts
+                        ch_put("exf_data", {
+                            "ts":           _ts(),
+                            "funding_rate": float(payload.get("funding_btc") or 0),
+                            "dvol":         float(payload.get("dvol_btc") or 0),
+                            "fear_greed":   float(payload.get("fng") or 0),
+                            "taker_ratio":  float(payload.get("taker") or 0),
+                        })
+                    except Exception:
+                        pass
+
+                    # Check data sufficiency for Alpha Engine (every 60 iterations = 30s)
+                    if data_checker and (push_count % 60 == 0):
+                        is_sufficient, details = data_checker.check_sufficiency(payload)
+                        if not is_sufficient and _LOG_INFO:
+                            insufficient_data_warnings += 1
+                            if insufficient_data_warnings % 10 == 1:  # Log every 5 min
+                                log.warning("Alpha Engine data INSUFFICIENT: score=%.2f, ACB=%d/%d, missing=%s",
+                                           details['score'], details['acb_indicators'], 
+                                           len(data_checker.ACB_CRITICAL),
+                                           details['missing_acb'][:3])
+                    
+                    # Throttled status logging - only every LOG_STATUS_INTERVAL
+                    if _LOG_INFO and (push_count % LOG_STATUS_INTERVAL == 1):
+                        st = svc.status()
+                        sufficiency = "SUFF" if (data_checker and data_checker._sufficiency_score > 0.7) else "INSUFF"
+                        log.info("ExF#%d ok=%d/%d acb=%s data=%s", 
+                                push_count, st['indicators_ok'], 
+                                st['indicators_total'], payload['_acb_ready'],
+                                sufficiency)
+                else:
+                    fail_count += 1
+                    consecutive_fails += 1
+                    # Only log failures every 10th to avoid spam
+                    if _LOG_INFO and (consecutive_fails % 10 == 1):
+                        log.warning("HZ fail#%d (tot=%d)", consecutive_fails, fail_count)
+                    if consecutive_fails >= 3 and client is not None:
+                        _connect()  # Silent reconnect attempt
+
+            except Exception as e:
+                fail_count += 1
+                consecutive_fails += 1
+                # Only log exceptions periodically
+                if _LOG_INFO and (consecutive_fails % 10 == 1):
+                    log.error("Loop exc: %s", e)
+                if consecutive_fails > 20:
+                    if _LOG_INFO:
+                        log.error("FATAL: 20+ consecutive failures")
+                    raise
+
+            # Maintain precise interval - this is critical
+            elapsed = time.monotonic() - t0
+            sleep_time = HZ_PUSH_INTERVAL_S - elapsed
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+            elif _LOG_DEBUG:
+                # Only log overruns in debug mode
+                log.debug("Overrun: %.3fs", -sleep_time)
+            
+    except KeyboardInterrupt:
+        if _LOG_INFO:
+            log.info("Interrupted")
+    except Exception:
+        if _LOG_INFO:
+            log.exception("Fatal error")
+        raise
+    finally:
+        if _LOG_INFO:
+            log.info("Shutting down — pushes=%d fails=%d", push_count, fail_count)
+        if svc:
+            try:
+                svc.stop()
+            except Exception:
+                pass
+        if client:
+            try:
+                client.shutdown()
+            except Exception:
+                pass
+        if persistence:
+            try:
+                persistence.stop()
+                if _LOG_INFO:
+                    log.info("Persistence svc stopped (%d writes)", 
+                            persistence.stats.get('writes', 0))
+            except Exception:
+                pass
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="DOLPHIN ExF live daemon")
+    parser.add_argument("--warmup", type=int, default=WARMUP_S,
+                        help="Warmup seconds before first HZ push")
+    args = parser.parse_args()
+    exf_fetcher_flow(warmup_s=args.warmup)