initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
289
prod/exf_fetcher_flow.py
Executable file
289
prod/exf_fetcher_flow.py
Executable file
@@ -0,0 +1,289 @@
|
||||
"""DOLPHIN — ExF (External Factors) Live Daemon
|
||||
================================================
|
||||
Long-running process that wraps RealTimeExFService and streams its
|
||||
lag-adjusted indicator snapshot to Hazelcast every `HZ_PUSH_INTERVAL_S`
|
||||
seconds (default 5 s), satisfying the never-starve 5-second consumer guarantee.
|
||||
|
||||
Architecture (from ExF_EsoF_Complete_Specification.md):
|
||||
- 85 indicators across 8 providers (Binance, Deribit, FRED, …)
|
||||
- Per-indicator polling at native rate (5m Binance → 8h funding)
|
||||
- Per-provider rate-limit semaphores (Binance 20/s, CoinMetrics 0.15/s)
|
||||
- In-memory state: IndicatorState with daily_history deque (lag support)
|
||||
- Consumer API: get_indicators(apply_lag=True) < 1 ms, never blocks
|
||||
- HZ key: DOLPHIN_FEATURES['exf_latest']
|
||||
|
||||
Usage:
|
||||
python prod/exf_fetcher_flow.py # run live
|
||||
python prod/exf_fetcher_flow.py --warmup 60 # override warmup seconds
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import signal
|
||||
import time
|
||||
import argparse
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
HCM_DIR = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(HCM_DIR / "external_factors"))
|
||||
sys.path.insert(0, str(HCM_DIR))
|
||||
|
||||
# ── Logging Setup ──────────────────────────────────────────────────────────────
|
||||
# Use minimal formatting, no timestamp in formatter (handled by journald/syslog)
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s — %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("exf_fetcher")
|
||||
|
||||
# Guard for expensive debug operations
|
||||
_LOG_DEBUG = logger.isEnabledFor(logging.DEBUG)
|
||||
_LOG_INFO = logger.isEnabledFor(logging.INFO)
|
||||
|
||||
# ── Constants ──────────────────────────────────────────────────────────────────
|
||||
HZ_PUSH_INTERVAL_S = 0.5 # push to HZ every 0.5 s (Aggressive Oversampling)
|
||||
WARMUP_S = 30 # wait for first indicator fetch cycle
|
||||
HZ_KEY = "exf_latest"
|
||||
# Keys actually consumed by _calculate_signals() in AdaptiveCircuitBreaker.
|
||||
# Trimmed from original broader set — mcap_bc and addr_btc are not used by
|
||||
# ACBv6 computation and were causing _acb_ready to be permanently False.
|
||||
ACB_KEYS = frozenset([
|
||||
"funding_btc", "fund_dbt_btc", "dvol_btc", "fng", "taker",
|
||||
"funding_eth", "dvol_eth", "vix", "ls_btc", "oi_btc", "fund_dbt_eth",
|
||||
])
|
||||
|
||||
# Log throttling - only log status every N iterations
|
||||
LOG_STATUS_INTERVAL = 120 # Log status every ~60 seconds (120 * 0.5s)
|
||||
|
||||
from prefect import flow, task, get_run_logger
|
||||
|
||||
# Import persistence layer (for NPZ logging)
|
||||
sys.path.insert(0, str(HCM_DIR / "prod"))
|
||||
try:
|
||||
from exf_persistence import ExFPersistenceService, AlphaEngineDataChecker
|
||||
_HAS_PERSISTENCE = True
|
||||
except ImportError as e:
|
||||
_HAS_PERSISTENCE = False
|
||||
print(f"Warning: Persistence layer not available: {e}")
|
||||
|
||||
# Pre-compiled for speed
|
||||
_isinstance = isinstance
|
||||
_isin = ACB_KEYS.__contains__
|
||||
|
||||
|
||||
@task(name="push_indicators_to_hz", retries=3, retry_delay_seconds=1,
|
||||
cache_policy=None) # Disable caching - client not serializable
|
||||
def push_to_hz(client, payload):
|
||||
"""Push indicators to Hazelcast with automatic retry."""
|
||||
from prod._hz_push import hz_push
|
||||
try:
|
||||
return hz_push(HZ_KEY, payload, client)
|
||||
except Exception:
|
||||
# Re-raise for Prefect retry - traceback logged by Prefect if configured
|
||||
raise
|
||||
|
||||
|
||||
@flow(name="exf-fetcher-flow", log_prints=False) # Disable log_prints for performance
|
||||
def exf_fetcher_flow(warmup_s: int = WARMUP_S):
|
||||
from realtime_exf_service import RealTimeExFService
|
||||
from prod._hz_push import make_hz_client
|
||||
|
||||
log = get_run_logger()
|
||||
svc = None
|
||||
client = None
|
||||
|
||||
# ── Startup ─────────────────────────────────────────────────────────────────
|
||||
try:
|
||||
svc = RealTimeExFService()
|
||||
svc.start()
|
||||
if _LOG_INFO:
|
||||
log.info("ExF svc started — warmup %ds", warmup_s)
|
||||
except Exception as e:
|
||||
if _LOG_INFO:
|
||||
log.error("CRITICAL: ExF svc start failed: %s", e)
|
||||
raise
|
||||
|
||||
# Warmup period
|
||||
time.sleep(warmup_s)
|
||||
|
||||
# ── Hazelcast Connection ─────────────────────────────────────────────────────
|
||||
def _connect():
|
||||
nonlocal client
|
||||
try:
|
||||
if client:
|
||||
try:
|
||||
client.shutdown()
|
||||
except Exception:
|
||||
pass # Ignore shutdown errors
|
||||
client = make_hz_client()
|
||||
if _LOG_INFO:
|
||||
log.info("HZ connected")
|
||||
return True
|
||||
except Exception as e:
|
||||
if _LOG_INFO:
|
||||
log.warning("HZ connect failed: %s", e)
|
||||
client = None
|
||||
return False
|
||||
|
||||
if not _connect() and _LOG_INFO:
|
||||
log.warning("Initial HZ conn failed — retrying in loop")
|
||||
|
||||
# ── Start Persistence Service (OFF hot path) ─────────────────────────────────
|
||||
persistence = None
|
||||
data_checker = None
|
||||
if _HAS_PERSISTENCE:
|
||||
try:
|
||||
persistence = ExFPersistenceService()
|
||||
persistence.start()
|
||||
data_checker = AlphaEngineDataChecker()
|
||||
if _LOG_INFO:
|
||||
log.info("Persistence svc started (5min flush interval)")
|
||||
except Exception as e:
|
||||
if _LOG_INFO:
|
||||
log.warning("Persistence svc failed to start: %s", e)
|
||||
|
||||
# ── Main Loop ────────────────────────────────────────────────────────────────
|
||||
push_count = 0
|
||||
insufficient_data_warnings = 0
|
||||
fail_count = 0
|
||||
consecutive_fails = 0
|
||||
loop_iter = 0
|
||||
|
||||
if _LOG_INFO:
|
||||
log.info("ExF loop live — HZ['%s'] every %.1fs", HZ_KEY, HZ_PUSH_INTERVAL_S)
|
||||
|
||||
try:
|
||||
while True:
|
||||
t0 = time.monotonic()
|
||||
loop_iter += 1
|
||||
|
||||
try:
|
||||
# Get dual-sampled snapshot (<1 ms) - ZERO logging here
|
||||
indicators = svc.get_indicators(dual_sample=True)
|
||||
|
||||
# Build HZ payload - minimize object creation
|
||||
staleness = indicators.pop("_staleness", {})
|
||||
payload = {k: v for k, v in indicators.items()
|
||||
if _isinstance(v, (int, float))}
|
||||
payload["_staleness_s"] = {k: round(v, 1) for k, v in staleness.items()}
|
||||
payload["_acb_ready"] = all(k in payload for k in ACB_KEYS)
|
||||
payload["_ok_count"] = sum(1 for v in payload.values()
|
||||
if _isinstance(v, float) and v == v)
|
||||
|
||||
# Ensure HZ connection (silent unless fails 3x)
|
||||
if client is None:
|
||||
if not _connect():
|
||||
consecutive_fails += 1
|
||||
if consecutive_fails > 10 and _LOG_INFO:
|
||||
log.error("CRITICAL: HZ lost for %d+ iters", consecutive_fails)
|
||||
time.sleep(1.0)
|
||||
continue
|
||||
|
||||
# Execute push task - ZERO logging on success path
|
||||
try:
|
||||
success = push_to_hz.submit(client, payload).result()
|
||||
except Exception:
|
||||
success = False
|
||||
|
||||
if success:
|
||||
push_count += 1
|
||||
consecutive_fails = 0
|
||||
try:
|
||||
from ch_writer import ch_put, ts_us as _ts
|
||||
ch_put("exf_data", {
|
||||
"ts": _ts(),
|
||||
"funding_rate": float(payload.get("funding_btc") or 0),
|
||||
"dvol": float(payload.get("dvol_btc") or 0),
|
||||
"fear_greed": float(payload.get("fng") or 0),
|
||||
"taker_ratio": float(payload.get("taker") or 0),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check data sufficiency for Alpha Engine (every 60 iterations = 30s)
|
||||
if data_checker and (push_count % 60 == 0):
|
||||
is_sufficient, details = data_checker.check_sufficiency(payload)
|
||||
if not is_sufficient and _LOG_INFO:
|
||||
insufficient_data_warnings += 1
|
||||
if insufficient_data_warnings % 10 == 1: # Log every 5 min
|
||||
log.warning("Alpha Engine data INSUFFICIENT: score=%.2f, ACB=%d/%d, missing=%s",
|
||||
details['score'], details['acb_indicators'],
|
||||
len(data_checker.ACB_CRITICAL),
|
||||
details['missing_acb'][:3])
|
||||
|
||||
# Throttled status logging - only every LOG_STATUS_INTERVAL
|
||||
if _LOG_INFO and (push_count % LOG_STATUS_INTERVAL == 1):
|
||||
st = svc.status()
|
||||
sufficiency = "SUFF" if (data_checker and data_checker._sufficiency_score > 0.7) else "INSUFF"
|
||||
log.info("ExF#%d ok=%d/%d acb=%s data=%s",
|
||||
push_count, st['indicators_ok'],
|
||||
st['indicators_total'], payload['_acb_ready'],
|
||||
sufficiency)
|
||||
else:
|
||||
fail_count += 1
|
||||
consecutive_fails += 1
|
||||
# Only log failures every 10th to avoid spam
|
||||
if _LOG_INFO and (consecutive_fails % 10 == 1):
|
||||
log.warning("HZ fail#%d (tot=%d)", consecutive_fails, fail_count)
|
||||
if consecutive_fails >= 3 and client is not None:
|
||||
_connect() # Silent reconnect attempt
|
||||
|
||||
except Exception as e:
|
||||
fail_count += 1
|
||||
consecutive_fails += 1
|
||||
# Only log exceptions periodically
|
||||
if _LOG_INFO and (consecutive_fails % 10 == 1):
|
||||
log.error("Loop exc: %s", e)
|
||||
if consecutive_fails > 20:
|
||||
if _LOG_INFO:
|
||||
log.error("FATAL: 20+ consecutive failures")
|
||||
raise
|
||||
|
||||
# Maintain precise interval - this is critical
|
||||
elapsed = time.monotonic() - t0
|
||||
sleep_time = HZ_PUSH_INTERVAL_S - elapsed
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
elif _LOG_DEBUG:
|
||||
# Only log overruns in debug mode
|
||||
log.debug("Overrun: %.3fs", -sleep_time)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
if _LOG_INFO:
|
||||
log.info("Interrupted")
|
||||
except Exception:
|
||||
if _LOG_INFO:
|
||||
log.exception("Fatal error")
|
||||
raise
|
||||
finally:
|
||||
if _LOG_INFO:
|
||||
log.info("Shutting down — pushes=%d fails=%d", push_count, fail_count)
|
||||
if svc:
|
||||
try:
|
||||
svc.stop()
|
||||
except Exception:
|
||||
pass
|
||||
if client:
|
||||
try:
|
||||
client.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
if persistence:
|
||||
try:
|
||||
persistence.stop()
|
||||
if _LOG_INFO:
|
||||
log.info("Persistence svc stopped (%d writes)",
|
||||
persistence.stats.get('writes', 0))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="DOLPHIN ExF live daemon")
|
||||
parser.add_argument("--warmup", type=int, default=WARMUP_S,
|
||||
help="Warmup seconds before first HZ push")
|
||||
args = parser.parse_args()
|
||||
exf_fetcher_flow(warmup_s=args.warmup)
|
||||
Reference in New Issue
Block a user