Files
DOLPHIN/prod/exf_fetcher_flow.py

290 lines
12 KiB
Python
Raw Normal View History

"""DOLPHIN — ExF (External Factors) Live Daemon
================================================
Long-running process that wraps RealTimeExFService and streams its
lag-adjusted indicator snapshot to Hazelcast every `HZ_PUSH_INTERVAL_S`
seconds (default 5 s), satisfying the never-starve 5-second consumer guarantee.
Architecture (from ExF_EsoF_Complete_Specification.md):
- 85 indicators across 8 providers (Binance, Deribit, FRED, )
- Per-indicator polling at native rate (5m Binance 8h funding)
- Per-provider rate-limit semaphores (Binance 20/s, CoinMetrics 0.15/s)
- In-memory state: IndicatorState with daily_history deque (lag support)
- Consumer API: get_indicators(apply_lag=True) < 1 ms, never blocks
- HZ key: DOLPHIN_FEATURES['exf_latest']
Usage:
python prod/exf_fetcher_flow.py # run live
python prod/exf_fetcher_flow.py --warmup 60 # override warmup seconds
"""
import sys
import json
import logging
import signal
import time
import argparse
import traceback
from pathlib import Path
from datetime import datetime, timezone
HCM_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(HCM_DIR / "external_factors"))
sys.path.insert(0, str(HCM_DIR))
# ── Logging Setup ──────────────────────────────────────────────────────────────
# Use minimal formatting, no timestamp in formatter (handled by journald/syslog)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s%(message)s",
)
logger = logging.getLogger("exf_fetcher")
# Guard for expensive debug operations
_LOG_DEBUG = logger.isEnabledFor(logging.DEBUG)
_LOG_INFO = logger.isEnabledFor(logging.INFO)
# ── Constants ──────────────────────────────────────────────────────────────────
HZ_PUSH_INTERVAL_S = 0.5 # push to HZ every 0.5 s (Aggressive Oversampling)
WARMUP_S = 30 # wait for first indicator fetch cycle
HZ_KEY = "exf_latest"
# Keys actually consumed by _calculate_signals() in AdaptiveCircuitBreaker.
# Trimmed from original broader set — mcap_bc and addr_btc are not used by
# ACBv6 computation and were causing _acb_ready to be permanently False.
ACB_KEYS = frozenset([
"funding_btc", "fund_dbt_btc", "dvol_btc", "fng", "taker",
"funding_eth", "dvol_eth", "vix", "ls_btc", "oi_btc", "fund_dbt_eth",
])
# Log throttling - only log status every N iterations
LOG_STATUS_INTERVAL = 120 # Log status every ~60 seconds (120 * 0.5s)
from prefect import flow, task, get_run_logger
# Import persistence layer (for NPZ logging)
sys.path.insert(0, str(HCM_DIR / "prod"))
try:
from exf_persistence import ExFPersistenceService, AlphaEngineDataChecker
_HAS_PERSISTENCE = True
except ImportError as e:
_HAS_PERSISTENCE = False
print(f"Warning: Persistence layer not available: {e}")
# Pre-compiled for speed
_isinstance = isinstance
_isin = ACB_KEYS.__contains__
@task(name="push_indicators_to_hz", retries=3, retry_delay_seconds=1,
cache_policy=None) # Disable caching - client not serializable
def push_to_hz(client, payload):
"""Push indicators to Hazelcast with automatic retry."""
from prod._hz_push import hz_push
try:
return hz_push(HZ_KEY, payload, client)
except Exception:
# Re-raise for Prefect retry - traceback logged by Prefect if configured
raise
@flow(name="exf-fetcher-flow", log_prints=False) # Disable log_prints for performance
def exf_fetcher_flow(warmup_s: int = WARMUP_S):
from realtime_exf_service import RealTimeExFService
from prod._hz_push import make_hz_client
log = get_run_logger()
svc = None
client = None
# ── Startup ─────────────────────────────────────────────────────────────────
try:
svc = RealTimeExFService()
svc.start()
if _LOG_INFO:
log.info("ExF svc started — warmup %ds", warmup_s)
except Exception as e:
if _LOG_INFO:
log.error("CRITICAL: ExF svc start failed: %s", e)
raise
# Warmup period
time.sleep(warmup_s)
# ── Hazelcast Connection ─────────────────────────────────────────────────────
def _connect():
nonlocal client
try:
if client:
try:
client.shutdown()
except Exception:
pass # Ignore shutdown errors
client = make_hz_client()
if _LOG_INFO:
log.info("HZ connected")
return True
except Exception as e:
if _LOG_INFO:
log.warning("HZ connect failed: %s", e)
client = None
return False
if not _connect() and _LOG_INFO:
log.warning("Initial HZ conn failed — retrying in loop")
# ── Start Persistence Service (OFF hot path) ─────────────────────────────────
persistence = None
data_checker = None
if _HAS_PERSISTENCE:
try:
persistence = ExFPersistenceService()
persistence.start()
data_checker = AlphaEngineDataChecker()
if _LOG_INFO:
log.info("Persistence svc started (5min flush interval)")
except Exception as e:
if _LOG_INFO:
log.warning("Persistence svc failed to start: %s", e)
# ── Main Loop ────────────────────────────────────────────────────────────────
push_count = 0
insufficient_data_warnings = 0
fail_count = 0
consecutive_fails = 0
loop_iter = 0
if _LOG_INFO:
log.info("ExF loop live — HZ['%s'] every %.1fs", HZ_KEY, HZ_PUSH_INTERVAL_S)
try:
while True:
t0 = time.monotonic()
loop_iter += 1
try:
# Get dual-sampled snapshot (<1 ms) - ZERO logging here
indicators = svc.get_indicators(dual_sample=True)
# Build HZ payload - minimize object creation
staleness = indicators.pop("_staleness", {})
payload = {k: v for k, v in indicators.items()
if _isinstance(v, (int, float))}
payload["_staleness_s"] = {k: round(v, 1) for k, v in staleness.items()}
payload["_acb_ready"] = all(k in payload for k in ACB_KEYS)
payload["_ok_count"] = sum(1 for v in payload.values()
if _isinstance(v, float) and v == v)
# Ensure HZ connection (silent unless fails 3x)
if client is None:
if not _connect():
consecutive_fails += 1
if consecutive_fails > 10 and _LOG_INFO:
log.error("CRITICAL: HZ lost for %d+ iters", consecutive_fails)
time.sleep(1.0)
continue
# Execute push task - ZERO logging on success path
try:
success = push_to_hz.submit(client, payload).result()
except Exception:
success = False
if success:
push_count += 1
consecutive_fails = 0
try:
from ch_writer import ch_put, ts_us as _ts
ch_put("exf_data", {
"ts": _ts(),
"funding_rate": float(payload.get("funding_btc") or 0),
"dvol": float(payload.get("dvol_btc") or 0),
"fear_greed": float(payload.get("fng") or 0),
"taker_ratio": float(payload.get("taker") or 0),
})
except Exception:
pass
# Check data sufficiency for Alpha Engine (every 60 iterations = 30s)
if data_checker and (push_count % 60 == 0):
is_sufficient, details = data_checker.check_sufficiency(payload)
if not is_sufficient and _LOG_INFO:
insufficient_data_warnings += 1
if insufficient_data_warnings % 10 == 1: # Log every 5 min
log.warning("Alpha Engine data INSUFFICIENT: score=%.2f, ACB=%d/%d, missing=%s",
details['score'], details['acb_indicators'],
len(data_checker.ACB_CRITICAL),
details['missing_acb'][:3])
# Throttled status logging - only every LOG_STATUS_INTERVAL
if _LOG_INFO and (push_count % LOG_STATUS_INTERVAL == 1):
st = svc.status()
sufficiency = "SUFF" if (data_checker and data_checker._sufficiency_score > 0.7) else "INSUFF"
log.info("ExF#%d ok=%d/%d acb=%s data=%s",
push_count, st['indicators_ok'],
st['indicators_total'], payload['_acb_ready'],
sufficiency)
else:
fail_count += 1
consecutive_fails += 1
# Only log failures every 10th to avoid spam
if _LOG_INFO and (consecutive_fails % 10 == 1):
log.warning("HZ fail#%d (tot=%d)", consecutive_fails, fail_count)
if consecutive_fails >= 3 and client is not None:
_connect() # Silent reconnect attempt
except Exception as e:
fail_count += 1
consecutive_fails += 1
# Only log exceptions periodically
if _LOG_INFO and (consecutive_fails % 10 == 1):
log.error("Loop exc: %s", e)
if consecutive_fails > 20:
if _LOG_INFO:
log.error("FATAL: 20+ consecutive failures")
raise
# Maintain precise interval - this is critical
elapsed = time.monotonic() - t0
sleep_time = HZ_PUSH_INTERVAL_S - elapsed
if sleep_time > 0:
time.sleep(sleep_time)
elif _LOG_DEBUG:
# Only log overruns in debug mode
log.debug("Overrun: %.3fs", -sleep_time)
except KeyboardInterrupt:
if _LOG_INFO:
log.info("Interrupted")
except Exception:
if _LOG_INFO:
log.exception("Fatal error")
raise
finally:
if _LOG_INFO:
log.info("Shutting down — pushes=%d fails=%d", push_count, fail_count)
if svc:
try:
svc.stop()
except Exception:
pass
if client:
try:
client.shutdown()
except Exception:
pass
if persistence:
try:
persistence.stop()
if _LOG_INFO:
log.info("Persistence svc stopped (%d writes)",
persistence.stats.get('writes', 0))
except Exception:
pass
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="DOLPHIN ExF live daemon")
parser.add_argument("--warmup", type=int, default=WARMUP_S,
help="Warmup seconds before first HZ push")
args = parser.parse_args()
exf_fetcher_flow(warmup_s=args.warmup)