290 lines
12 KiB
Python
290 lines
12 KiB
Python
|
|
"""DOLPHIN — ExF (External Factors) Live Daemon
|
||
|
|
================================================
|
||
|
|
Long-running process that wraps RealTimeExFService and streams its
|
||
|
|
lag-adjusted indicator snapshot to Hazelcast every `HZ_PUSH_INTERVAL_S`
|
||
|
|
seconds (default 5 s), satisfying the never-starve 5-second consumer guarantee.
|
||
|
|
|
||
|
|
Architecture (from ExF_EsoF_Complete_Specification.md):
|
||
|
|
- 85 indicators across 8 providers (Binance, Deribit, FRED, …)
|
||
|
|
- Per-indicator polling at native rate (5m Binance → 8h funding)
|
||
|
|
- Per-provider rate-limit semaphores (Binance 20/s, CoinMetrics 0.15/s)
|
||
|
|
- In-memory state: IndicatorState with daily_history deque (lag support)
|
||
|
|
- Consumer API: get_indicators(apply_lag=True) < 1 ms, never blocks
|
||
|
|
- HZ key: DOLPHIN_FEATURES['exf_latest']
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python prod/exf_fetcher_flow.py # run live
|
||
|
|
python prod/exf_fetcher_flow.py --warmup 60 # override warmup seconds
|
||
|
|
"""
|
||
|
|
|
||
|
|
import sys
|
||
|
|
import json
|
||
|
|
import logging
|
||
|
|
import signal
|
||
|
|
import time
|
||
|
|
import argparse
|
||
|
|
import traceback
|
||
|
|
from pathlib import Path
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
|
||
|
|
HCM_DIR = Path(__file__).parent.parent
|
||
|
|
sys.path.insert(0, str(HCM_DIR / "external_factors"))
|
||
|
|
sys.path.insert(0, str(HCM_DIR))
|
||
|
|
|
||
|
|
# ── Logging Setup ──────────────────────────────────────────────────────────────
|
||
|
|
# Use minimal formatting, no timestamp in formatter (handled by journald/syslog)
|
||
|
|
logging.basicConfig(
|
||
|
|
level=logging.INFO,
|
||
|
|
format="%(asctime)s [%(levelname)s] %(name)s — %(message)s",
|
||
|
|
)
|
||
|
|
logger = logging.getLogger("exf_fetcher")
|
||
|
|
|
||
|
|
# Guard for expensive debug operations
|
||
|
|
_LOG_DEBUG = logger.isEnabledFor(logging.DEBUG)
|
||
|
|
_LOG_INFO = logger.isEnabledFor(logging.INFO)
|
||
|
|
|
||
|
|
# ── Constants ──────────────────────────────────────────────────────────────────
|
||
|
|
HZ_PUSH_INTERVAL_S = 0.5 # push to HZ every 0.5 s (Aggressive Oversampling)
|
||
|
|
WARMUP_S = 30 # wait for first indicator fetch cycle
|
||
|
|
HZ_KEY = "exf_latest"
|
||
|
|
# Keys actually consumed by _calculate_signals() in AdaptiveCircuitBreaker.
|
||
|
|
# Trimmed from original broader set — mcap_bc and addr_btc are not used by
|
||
|
|
# ACBv6 computation and were causing _acb_ready to be permanently False.
|
||
|
|
ACB_KEYS = frozenset([
|
||
|
|
"funding_btc", "fund_dbt_btc", "dvol_btc", "fng", "taker",
|
||
|
|
"funding_eth", "dvol_eth", "vix", "ls_btc", "oi_btc", "fund_dbt_eth",
|
||
|
|
])
|
||
|
|
|
||
|
|
# Log throttling - only log status every N iterations
|
||
|
|
LOG_STATUS_INTERVAL = 120 # Log status every ~60 seconds (120 * 0.5s)
|
||
|
|
|
||
|
|
from prefect import flow, task, get_run_logger
|
||
|
|
|
||
|
|
# Import persistence layer (for NPZ logging)
|
||
|
|
sys.path.insert(0, str(HCM_DIR / "prod"))
|
||
|
|
try:
|
||
|
|
from exf_persistence import ExFPersistenceService, AlphaEngineDataChecker
|
||
|
|
_HAS_PERSISTENCE = True
|
||
|
|
except ImportError as e:
|
||
|
|
_HAS_PERSISTENCE = False
|
||
|
|
print(f"Warning: Persistence layer not available: {e}")
|
||
|
|
|
||
|
|
# Pre-compiled for speed
|
||
|
|
_isinstance = isinstance
|
||
|
|
_isin = ACB_KEYS.__contains__
|
||
|
|
|
||
|
|
|
||
|
|
@task(name="push_indicators_to_hz", retries=3, retry_delay_seconds=1,
|
||
|
|
cache_policy=None) # Disable caching - client not serializable
|
||
|
|
def push_to_hz(client, payload):
|
||
|
|
"""Push indicators to Hazelcast with automatic retry."""
|
||
|
|
from prod._hz_push import hz_push
|
||
|
|
try:
|
||
|
|
return hz_push(HZ_KEY, payload, client)
|
||
|
|
except Exception:
|
||
|
|
# Re-raise for Prefect retry - traceback logged by Prefect if configured
|
||
|
|
raise
|
||
|
|
|
||
|
|
|
||
|
|
@flow(name="exf-fetcher-flow", log_prints=False) # Disable log_prints for performance
|
||
|
|
def exf_fetcher_flow(warmup_s: int = WARMUP_S):
|
||
|
|
from realtime_exf_service import RealTimeExFService
|
||
|
|
from prod._hz_push import make_hz_client
|
||
|
|
|
||
|
|
log = get_run_logger()
|
||
|
|
svc = None
|
||
|
|
client = None
|
||
|
|
|
||
|
|
# ── Startup ─────────────────────────────────────────────────────────────────
|
||
|
|
try:
|
||
|
|
svc = RealTimeExFService()
|
||
|
|
svc.start()
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.info("ExF svc started — warmup %ds", warmup_s)
|
||
|
|
except Exception as e:
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.error("CRITICAL: ExF svc start failed: %s", e)
|
||
|
|
raise
|
||
|
|
|
||
|
|
# Warmup period
|
||
|
|
time.sleep(warmup_s)
|
||
|
|
|
||
|
|
# ── Hazelcast Connection ─────────────────────────────────────────────────────
|
||
|
|
def _connect():
|
||
|
|
nonlocal client
|
||
|
|
try:
|
||
|
|
if client:
|
||
|
|
try:
|
||
|
|
client.shutdown()
|
||
|
|
except Exception:
|
||
|
|
pass # Ignore shutdown errors
|
||
|
|
client = make_hz_client()
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.info("HZ connected")
|
||
|
|
return True
|
||
|
|
except Exception as e:
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.warning("HZ connect failed: %s", e)
|
||
|
|
client = None
|
||
|
|
return False
|
||
|
|
|
||
|
|
if not _connect() and _LOG_INFO:
|
||
|
|
log.warning("Initial HZ conn failed — retrying in loop")
|
||
|
|
|
||
|
|
# ── Start Persistence Service (OFF hot path) ─────────────────────────────────
|
||
|
|
persistence = None
|
||
|
|
data_checker = None
|
||
|
|
if _HAS_PERSISTENCE:
|
||
|
|
try:
|
||
|
|
persistence = ExFPersistenceService()
|
||
|
|
persistence.start()
|
||
|
|
data_checker = AlphaEngineDataChecker()
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.info("Persistence svc started (5min flush interval)")
|
||
|
|
except Exception as e:
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.warning("Persistence svc failed to start: %s", e)
|
||
|
|
|
||
|
|
# ── Main Loop ────────────────────────────────────────────────────────────────
|
||
|
|
push_count = 0
|
||
|
|
insufficient_data_warnings = 0
|
||
|
|
fail_count = 0
|
||
|
|
consecutive_fails = 0
|
||
|
|
loop_iter = 0
|
||
|
|
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.info("ExF loop live — HZ['%s'] every %.1fs", HZ_KEY, HZ_PUSH_INTERVAL_S)
|
||
|
|
|
||
|
|
try:
|
||
|
|
while True:
|
||
|
|
t0 = time.monotonic()
|
||
|
|
loop_iter += 1
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Get dual-sampled snapshot (<1 ms) - ZERO logging here
|
||
|
|
indicators = svc.get_indicators(dual_sample=True)
|
||
|
|
|
||
|
|
# Build HZ payload - minimize object creation
|
||
|
|
staleness = indicators.pop("_staleness", {})
|
||
|
|
payload = {k: v for k, v in indicators.items()
|
||
|
|
if _isinstance(v, (int, float))}
|
||
|
|
payload["_staleness_s"] = {k: round(v, 1) for k, v in staleness.items()}
|
||
|
|
payload["_acb_ready"] = all(k in payload for k in ACB_KEYS)
|
||
|
|
payload["_ok_count"] = sum(1 for v in payload.values()
|
||
|
|
if _isinstance(v, float) and v == v)
|
||
|
|
|
||
|
|
# Ensure HZ connection (silent unless fails 3x)
|
||
|
|
if client is None:
|
||
|
|
if not _connect():
|
||
|
|
consecutive_fails += 1
|
||
|
|
if consecutive_fails > 10 and _LOG_INFO:
|
||
|
|
log.error("CRITICAL: HZ lost for %d+ iters", consecutive_fails)
|
||
|
|
time.sleep(1.0)
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Execute push task - ZERO logging on success path
|
||
|
|
try:
|
||
|
|
success = push_to_hz.submit(client, payload).result()
|
||
|
|
except Exception:
|
||
|
|
success = False
|
||
|
|
|
||
|
|
if success:
|
||
|
|
push_count += 1
|
||
|
|
consecutive_fails = 0
|
||
|
|
try:
|
||
|
|
from ch_writer import ch_put, ts_us as _ts
|
||
|
|
ch_put("exf_data", {
|
||
|
|
"ts": _ts(),
|
||
|
|
"funding_rate": float(payload.get("funding_btc") or 0),
|
||
|
|
"dvol": float(payload.get("dvol_btc") or 0),
|
||
|
|
"fear_greed": float(payload.get("fng") or 0),
|
||
|
|
"taker_ratio": float(payload.get("taker") or 0),
|
||
|
|
})
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
# Check data sufficiency for Alpha Engine (every 60 iterations = 30s)
|
||
|
|
if data_checker and (push_count % 60 == 0):
|
||
|
|
is_sufficient, details = data_checker.check_sufficiency(payload)
|
||
|
|
if not is_sufficient and _LOG_INFO:
|
||
|
|
insufficient_data_warnings += 1
|
||
|
|
if insufficient_data_warnings % 10 == 1: # Log every 5 min
|
||
|
|
log.warning("Alpha Engine data INSUFFICIENT: score=%.2f, ACB=%d/%d, missing=%s",
|
||
|
|
details['score'], details['acb_indicators'],
|
||
|
|
len(data_checker.ACB_CRITICAL),
|
||
|
|
details['missing_acb'][:3])
|
||
|
|
|
||
|
|
# Throttled status logging - only every LOG_STATUS_INTERVAL
|
||
|
|
if _LOG_INFO and (push_count % LOG_STATUS_INTERVAL == 1):
|
||
|
|
st = svc.status()
|
||
|
|
sufficiency = "SUFF" if (data_checker and data_checker._sufficiency_score > 0.7) else "INSUFF"
|
||
|
|
log.info("ExF#%d ok=%d/%d acb=%s data=%s",
|
||
|
|
push_count, st['indicators_ok'],
|
||
|
|
st['indicators_total'], payload['_acb_ready'],
|
||
|
|
sufficiency)
|
||
|
|
else:
|
||
|
|
fail_count += 1
|
||
|
|
consecutive_fails += 1
|
||
|
|
# Only log failures every 10th to avoid spam
|
||
|
|
if _LOG_INFO and (consecutive_fails % 10 == 1):
|
||
|
|
log.warning("HZ fail#%d (tot=%d)", consecutive_fails, fail_count)
|
||
|
|
if consecutive_fails >= 3 and client is not None:
|
||
|
|
_connect() # Silent reconnect attempt
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
fail_count += 1
|
||
|
|
consecutive_fails += 1
|
||
|
|
# Only log exceptions periodically
|
||
|
|
if _LOG_INFO and (consecutive_fails % 10 == 1):
|
||
|
|
log.error("Loop exc: %s", e)
|
||
|
|
if consecutive_fails > 20:
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.error("FATAL: 20+ consecutive failures")
|
||
|
|
raise
|
||
|
|
|
||
|
|
# Maintain precise interval - this is critical
|
||
|
|
elapsed = time.monotonic() - t0
|
||
|
|
sleep_time = HZ_PUSH_INTERVAL_S - elapsed
|
||
|
|
if sleep_time > 0:
|
||
|
|
time.sleep(sleep_time)
|
||
|
|
elif _LOG_DEBUG:
|
||
|
|
# Only log overruns in debug mode
|
||
|
|
log.debug("Overrun: %.3fs", -sleep_time)
|
||
|
|
|
||
|
|
except KeyboardInterrupt:
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.info("Interrupted")
|
||
|
|
except Exception:
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.exception("Fatal error")
|
||
|
|
raise
|
||
|
|
finally:
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.info("Shutting down — pushes=%d fails=%d", push_count, fail_count)
|
||
|
|
if svc:
|
||
|
|
try:
|
||
|
|
svc.stop()
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
if client:
|
||
|
|
try:
|
||
|
|
client.shutdown()
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
if persistence:
|
||
|
|
try:
|
||
|
|
persistence.stop()
|
||
|
|
if _LOG_INFO:
|
||
|
|
log.info("Persistence svc stopped (%d writes)",
|
||
|
|
persistence.stats.get('writes', 0))
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
parser = argparse.ArgumentParser(description="DOLPHIN ExF live daemon")
|
||
|
|
parser.add_argument("--warmup", type=int, default=WARMUP_S,
|
||
|
|
help="Warmup seconds before first HZ push")
|
||
|
|
args = parser.parse_args()
|
||
|
|
exf_fetcher_flow(warmup_s=args.warmup)
|