590 lines
25 KiB
Python
590 lines
25 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
DOLPHIN OBF — Order Book Feature Subsystem under Prefect
|
|||
|
|
=========================================================
|
|||
|
|
|
|||
|
|
Architecture mirrors ExF/EsoF flows exactly:
|
|||
|
|
- Async OBStreamService runs in a background thread (its own event loop).
|
|||
|
|
- LiveOBFeatureEngine computes all 4 sub-systems incrementally.
|
|||
|
|
- Per-asset raw OB pushed to HZ as asset_{ASSET}_ob.
|
|||
|
|
- Consolidated features pushed to HZ as obf_latest.
|
|||
|
|
- Local JSON cache written atomically for DOLPHIN scanner to read.
|
|||
|
|
- Parquet persistence via OBFPersistenceService (5-min flush).
|
|||
|
|
|
|||
|
|
Rate limits respected:
|
|||
|
|
- Binance Futures WS @depth@100ms: push stream, no REST rate limit concern.
|
|||
|
|
- Binance REST depth limit=1000: weight=20, used ONLY on init/reconnect.
|
|||
|
|
|
|||
|
|
Timing:
|
|||
|
|
- WS stream updates internal book at 100 ms granularity.
|
|||
|
|
- Feature extraction + HZ push: every HZ_PUSH_INTERVAL_S (0.5 s).
|
|||
|
|
- Parquet flush: every 300 s (background thread).
|
|||
|
|
|
|||
|
|
Fixes applied:
|
|||
|
|
P0-1 HZ circuit breaker — opens after N failures, resets after cooldown
|
|||
|
|
P0-2 WS stall watchdog — warns if OBStreamService.is_stale() > 30 s
|
|||
|
|
P0-4 Per-asset dark-streak counter — logs immediately after 5 consecutive None
|
|||
|
|
P1-1 Per-asset HZ pushes are fire-and-forget (no .result() block)
|
|||
|
|
P1-6 push_errors is Dict[key, int] — per-key breakdown in status log
|
|||
|
|
P1-7 _write_local_cache logs failures (not silent pass)
|
|||
|
|
P1-8 HZ connectivity probe before entering hot loop
|
|||
|
|
P2-5 AsyncOBThread exposes stop() for clean shutdown + is_stale() passthrough
|
|||
|
|
P3-3 Dead-man's switch — CRITICAL log if all assets dark > 60 s
|
|||
|
|
|
|||
|
|
Launch:
|
|||
|
|
cd /mnt/dolphinng5_predict/prod
|
|||
|
|
PREFECT_API_URL=http://localhost:4200/api \\
|
|||
|
|
nohup python3 obf_prefect_flow.py > /tmp/obf_prefect.log 2>&1 &
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import asyncio
|
|||
|
|
import collections
|
|||
|
|
import json
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import threading
|
|||
|
|
import time
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Dict, List, Optional
|
|||
|
|
|
|||
|
|
from prefect import flow, task, get_run_logger
|
|||
|
|
from prefect.cache_policies import NO_CACHE
|
|||
|
|
|
|||
|
|
_HERE = Path(__file__).parent
|
|||
|
|
sys.path.insert(0, str(_HERE))
|
|||
|
|
sys.path.insert(0, str(_HERE.parent))
|
|||
|
|
|
|||
|
|
from _hz_push import make_hz_client, hz_push
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# CONSTANTS
|
|||
|
|
# ===========================================================================
|
|||
|
|
HZ_KEY_CONSOLIDATED = "obf_latest"
|
|||
|
|
HZ_KEY_PER_ASSET = "asset_{asset}_ob"
|
|||
|
|
HZ_MAP = "DOLPHIN_FEATURES"
|
|||
|
|
|
|||
|
|
HZ_PUSH_INTERVAL_S = 0.5
|
|||
|
|
WARMUP_S = 8
|
|||
|
|
LOG_STATUS_EVERY = 120 # every 60 s at 2 Hz
|
|||
|
|
|
|||
|
|
ASSETS = ["BTCUSDT", "ETHUSDT", "SOLUSDT"]
|
|||
|
|
OB_CACHE_DIR = _HERE.parent / "ob_cache"
|
|||
|
|
OB_CACHE_FILE = OB_CACHE_DIR / "latest_ob_features.json"
|
|||
|
|
|
|||
|
|
MAX_DEPTH_PCT = 5
|
|||
|
|
|
|||
|
|
# P0-1: circuit breaker thresholds
|
|||
|
|
_HZ_CIRCUIT_OPEN_AFTER = 5 # consecutive failures before opening
|
|||
|
|
_HZ_CIRCUIT_RESET_AFTER = 30 # cycles before trying again (~15 s)
|
|||
|
|
|
|||
|
|
# P0-4 / P3-3: dark-streak thresholds
|
|||
|
|
_DARK_WARN_AFTER = 5 # log warning after 5 dark cycles (2.5 s)
|
|||
|
|
_DARK_CRITICAL_AFTER = 120 # log CRITICAL if ALL assets dark 60 s (P3-3)
|
|||
|
|
|
|||
|
|
# P1-7: local cache failure logging
|
|||
|
|
_CACHE_LOG_EVERY = 60 # log once per 30 s at 2 Hz
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# ASYNC OB STREAM THREAD
|
|||
|
|
# ===========================================================================
|
|||
|
|
|
|||
|
|
class AsyncOBThread(threading.Thread):
|
|||
|
|
"""
|
|||
|
|
Runs OBStreamService in a background asyncio event loop.
|
|||
|
|
|
|||
|
|
Exposes sync API safe to call from the Prefect flow's synchronous context.
|
|||
|
|
|
|||
|
|
Fixes applied:
|
|||
|
|
P0-2 is_stale() passthrough to OBStreamService.is_stale()
|
|||
|
|
P2-5 stop() for clean shutdown; _stop_event replaces create_future() park
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, assets: List[str], max_depth_pct: int = MAX_DEPTH_PCT):
|
|||
|
|
super().__init__(daemon=True, name="ob-stream-thread")
|
|||
|
|
from external_factors.ob_stream_service import OBStreamService
|
|||
|
|
self.service = OBStreamService(assets=assets, max_depth_pct=max_depth_pct)
|
|||
|
|
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|||
|
|
self._ready = threading.Event()
|
|||
|
|
self._stop_ev = threading.Event() # P2-5
|
|||
|
|
|
|||
|
|
def run(self):
|
|||
|
|
self._loop = asyncio.new_event_loop()
|
|||
|
|
asyncio.set_event_loop(self._loop)
|
|||
|
|
self._loop.run_until_complete(self._run_forever())
|
|||
|
|
|
|||
|
|
async def _run_forever(self):
|
|||
|
|
asyncio.create_task(self.service.stream())
|
|||
|
|
self._ready.set()
|
|||
|
|
# P2-5: park on a future that stop() cancels
|
|||
|
|
self._stop_future = self._loop.create_future()
|
|||
|
|
try:
|
|||
|
|
await self._stop_future
|
|||
|
|
except asyncio.CancelledError:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
def wait_ready(self, timeout: float = 10.0) -> bool:
|
|||
|
|
return self._ready.wait(timeout=timeout)
|
|||
|
|
|
|||
|
|
def stop(self, timeout: float = 5.0):
|
|||
|
|
"""P2-5: request clean shutdown and join."""
|
|||
|
|
if self._loop and self._loop.is_running():
|
|||
|
|
self._loop.call_soon_threadsafe(
|
|||
|
|
lambda: self._stop_future.set_result(None)
|
|||
|
|
if not self._stop_future.done() else None
|
|||
|
|
)
|
|||
|
|
self.join(timeout=timeout)
|
|||
|
|
|
|||
|
|
def is_stale(self, threshold_s: float = 30.0) -> bool:
|
|||
|
|
"""P0-2: True if OBStreamService has received no WS events for threshold_s."""
|
|||
|
|
return self.service.is_stale(threshold_s=threshold_s)
|
|||
|
|
|
|||
|
|
def get_depth_buckets_sync(self, asset: str) -> Optional[dict]:
|
|||
|
|
if self._loop is None or not self._loop.is_running():
|
|||
|
|
return None
|
|||
|
|
try:
|
|||
|
|
future = asyncio.run_coroutine_threadsafe(
|
|||
|
|
self.service.get_depth_buckets(asset), self._loop
|
|||
|
|
)
|
|||
|
|
return future.result(timeout=HZ_PUSH_INTERVAL_S)
|
|||
|
|
except Exception:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
@property
|
|||
|
|
def initialized(self) -> Dict[str, bool]:
|
|||
|
|
return dict(self.service.initialized)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# HZ PUSH TASK
|
|||
|
|
# ===========================================================================
|
|||
|
|
|
|||
|
|
@task(name="hz_push_obf", retries=3, retry_delay_seconds=2, cache_policy=NO_CACHE)
|
|||
|
|
def hz_push_obf_task(client, key: str, payload: dict) -> bool:
|
|||
|
|
try:
|
|||
|
|
data = dict(payload)
|
|||
|
|
data["_pushed_at"] = datetime.now(timezone.utc).isoformat()
|
|||
|
|
data["_push_seq"] = int(time.time() * 1000)
|
|||
|
|
client.get_map(HZ_MAP).blocking().put(key, json.dumps(data))
|
|||
|
|
return True
|
|||
|
|
except Exception:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# LOCAL CACHE WRITE (P1-7: logs failures instead of silent pass)
|
|||
|
|
# ===========================================================================
|
|||
|
|
|
|||
|
|
_cache_write_failures = 0
|
|||
|
|
|
|||
|
|
def _write_local_cache(payload: dict) -> bool:
|
|||
|
|
"""Atomic JSON write. Returns True on success. Logs failures (P1-7)."""
|
|||
|
|
global _cache_write_failures
|
|||
|
|
try:
|
|||
|
|
OB_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|||
|
|
tmp = OB_CACHE_DIR / f".ob_cache_{os.getpid()}.tmp"
|
|||
|
|
tmp.write_text(json.dumps(payload, default=str), encoding="utf-8")
|
|||
|
|
tmp.rename(OB_CACHE_FILE)
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
_cache_write_failures += 1
|
|||
|
|
if _cache_write_failures % _CACHE_LOG_EVERY == 1:
|
|||
|
|
import logging
|
|||
|
|
logging.getLogger(__name__).warning(
|
|||
|
|
"OBF local cache write failed (%d times): %s", _cache_write_failures, exc
|
|||
|
|
)
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# HZ PRE-FLIGHT CHECK (P1-8)
|
|||
|
|
# ===========================================================================
|
|||
|
|
|
|||
|
|
def _hz_preflight(client, log, retries: int = 5, delay_s: float = 3.0) -> bool:
|
|||
|
|
"""Verify HZ connectivity before entering hot loop. Returns True if OK."""
|
|||
|
|
for attempt in range(1, retries + 1):
|
|||
|
|
try:
|
|||
|
|
client.get_map(HZ_MAP).blocking().put(
|
|||
|
|
"_obf_heartbeat",
|
|||
|
|
json.dumps({"ts": time.time(), "source": "obf_preflight"}),
|
|||
|
|
)
|
|||
|
|
log.info("HZ connectivity verified (attempt %d)", attempt)
|
|||
|
|
return True
|
|||
|
|
except Exception as e:
|
|||
|
|
log.warning("HZ preflight failed (attempt %d/%d): %s", attempt, retries, e)
|
|||
|
|
if attempt < retries:
|
|||
|
|
time.sleep(delay_s)
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# PREFECT FLOW
|
|||
|
|
# ===========================================================================
|
|||
|
|
|
|||
|
|
@flow(name="obf-prefect-flow", log_prints=True)
|
|||
|
|
def obf_prefect_flow(
|
|||
|
|
warmup_s: float = float(WARMUP_S),
|
|||
|
|
poll_interval_s: float = float(HZ_PUSH_INTERVAL_S),
|
|||
|
|
assets: list = None,
|
|||
|
|
):
|
|||
|
|
"""
|
|||
|
|
Order Book Feature subsystem daemon.
|
|||
|
|
|
|||
|
|
Runs indefinitely under Prefect:
|
|||
|
|
1. Start AsyncOBThread (WS stream + REST sync).
|
|||
|
|
2. Warm up for warmup_s seconds.
|
|||
|
|
3. Start OBFPersistenceService.
|
|||
|
|
4. HZ connectivity pre-flight check.
|
|||
|
|
5. Loop at poll_interval_s:
|
|||
|
|
a. Extract depth buckets per asset.
|
|||
|
|
b. Compute 4-subsystem OB features.
|
|||
|
|
c. Push per-asset raw OB to HZ (fire-and-forget).
|
|||
|
|
d. Push consolidated features to HZ (blocking — authoritative key).
|
|||
|
|
e. Write local JSON cache.
|
|||
|
|
f. Feed persistence buffer.
|
|||
|
|
g. Stall watchdog + dark-streak + dead-man's switch checks.
|
|||
|
|
"""
|
|||
|
|
log = get_run_logger()
|
|||
|
|
|
|||
|
|
_assets = list(assets or ASSETS)
|
|||
|
|
log.info("=== OBF PREFECT FLOW STARTING ===")
|
|||
|
|
log.info("Assets: %s | push_interval=%.1fs | warmup=%ss", _assets, poll_interval_s, warmup_s)
|
|||
|
|
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
# 1. Start WebSocket stream thread
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
ob_thread = AsyncOBThread(assets=_assets, max_depth_pct=MAX_DEPTH_PCT)
|
|||
|
|
ob_thread.start()
|
|||
|
|
if not ob_thread.wait_ready(timeout=10.0):
|
|||
|
|
log.error("AsyncOBThread event loop did not start in time — aborting")
|
|||
|
|
return
|
|||
|
|
log.info("OB WebSocket stream thread started")
|
|||
|
|
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
# 2. Warm up
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
log.info("Warming up for %.0f s (WS + REST book sync)...", warmup_s)
|
|||
|
|
time.sleep(warmup_s)
|
|||
|
|
|
|||
|
|
init_status = ob_thread.initialized
|
|||
|
|
log.info("Book init status after warmup: %s", init_status)
|
|||
|
|
n_ready = sum(v for v in init_status.values())
|
|||
|
|
if n_ready == 0:
|
|||
|
|
log.warning("No assets initialized after warmup — will retry in hot loop")
|
|||
|
|
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
# 3. Start persistence service
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
from obf_persistence import OBFPersistenceService, LiveOBFeatureEngine
|
|||
|
|
|
|||
|
|
persist = OBFPersistenceService(assets=_assets, flush_interval_s=300)
|
|||
|
|
persist.start()
|
|||
|
|
log.info("OBFPersistenceService started")
|
|||
|
|
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
# 4. Initialize LiveOBFeatureEngine
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
feature_engine = LiveOBFeatureEngine(assets=_assets)
|
|||
|
|
log.info("LiveOBFeatureEngine initialized")
|
|||
|
|
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
# 5. Connect to Hazelcast + pre-flight check (P1-8)
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
log.info("Connecting to Hazelcast...")
|
|||
|
|
client = make_hz_client()
|
|||
|
|
|
|||
|
|
if not _hz_preflight(client, log):
|
|||
|
|
log.error("HZ preflight failed after retries — aborting flow")
|
|||
|
|
persist.stop()
|
|||
|
|
ob_thread.stop()
|
|||
|
|
return
|
|||
|
|
log.info("Hazelcast ready")
|
|||
|
|
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
# 6. Hot loop state
|
|||
|
|
# -----------------------------------------------------------------------
|
|||
|
|
pushes = 0
|
|||
|
|
push_errors: Dict[str, int] = collections.defaultdict(int) # P1-6
|
|||
|
|
|
|||
|
|
# P0-1: circuit breaker state
|
|||
|
|
_hz_consec_failures = 0
|
|||
|
|
_hz_circuit_open = False
|
|||
|
|
_hz_cooldown = 0
|
|||
|
|
|
|||
|
|
# P0-4: per-asset dark streak
|
|||
|
|
_none_streak: Dict[str, int] = {a: 0 for a in _assets}
|
|||
|
|
|
|||
|
|
# P3-3: all-assets-dark counter
|
|||
|
|
_all_dark_cycles = 0
|
|||
|
|
|
|||
|
|
# P0-2: stale watchdog — check every LOG_STATUS_EVERY cycles
|
|||
|
|
_stale_logged = False
|
|||
|
|
|
|||
|
|
last_lag_s = 0.0
|
|||
|
|
_push_seq = 0
|
|||
|
|
|
|||
|
|
log.info("=== OBF HOT LOOP STARTED ===")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
while True:
|
|||
|
|
t0_mono = time.monotonic()
|
|||
|
|
t0_wall = time.time()
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# A. Extract current OB snapshots
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
raw_snaps: Dict[str, Optional[dict]] = {}
|
|||
|
|
for asset in _assets:
|
|||
|
|
raw_snaps[asset] = ob_thread.get_depth_buckets_sync(asset)
|
|||
|
|
|
|||
|
|
local_ts = time.time()
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# P0-4: per-asset dark streak detection
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
n_init = 0
|
|||
|
|
for asset in _assets:
|
|||
|
|
if raw_snaps[asset] is None:
|
|||
|
|
_none_streak[asset] += 1
|
|||
|
|
if _none_streak[asset] == _DARK_WARN_AFTER:
|
|||
|
|
log.warning(
|
|||
|
|
"OBF: %s book dark for %d consecutive cycles (%.1f s)",
|
|||
|
|
asset, _none_streak[asset],
|
|||
|
|
_none_streak[asset] * poll_interval_s,
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
if _none_streak[asset] >= _DARK_WARN_AFTER:
|
|||
|
|
log.info(
|
|||
|
|
"OBF: %s book restored after %d dark cycles",
|
|||
|
|
asset, _none_streak[asset],
|
|||
|
|
)
|
|||
|
|
_none_streak[asset] = 0
|
|||
|
|
n_init += 1
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# P3-3: dead-man's switch — all assets dark
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
if n_init == 0:
|
|||
|
|
_all_dark_cycles += 1
|
|||
|
|
if _all_dark_cycles == _DARK_CRITICAL_AFTER:
|
|||
|
|
log.critical(
|
|||
|
|
"OBF DEAD-MAN: ALL %d assets dark for %.0f s — "
|
|||
|
|
"alpha engine is receiving neutral OB features",
|
|||
|
|
len(_assets), _all_dark_cycles * poll_interval_s,
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
_all_dark_cycles = 0
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# B. Compute 4-subsystem OB features
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
feature_snaps = {a: raw_snaps[a] for a in _assets}
|
|||
|
|
feature_result = feature_engine.update(feature_snaps)
|
|||
|
|
per_asset_feat = feature_result["per_asset"]
|
|||
|
|
market_feat = feature_result["market"]
|
|||
|
|
macro_feat = feature_result["macro"]
|
|||
|
|
|
|||
|
|
compute_ts = time.time()
|
|||
|
|
_push_seq += 1
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# C. Build consolidated HZ payload
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
consolidated: dict = {
|
|||
|
|
"timestamp": datetime.fromtimestamp(local_ts, tz=timezone.utc).isoformat(),
|
|||
|
|
"local_ts": local_ts,
|
|||
|
|
"compute_ts": compute_ts,
|
|||
|
|
"assets": _assets,
|
|||
|
|
"_push_seq": _push_seq,
|
|||
|
|
**{f"market_{k}": v for k, v in market_feat.items()},
|
|||
|
|
**{f"macro_{k}": v for k, v in macro_feat.items()},
|
|||
|
|
}
|
|||
|
|
for asset in _assets:
|
|||
|
|
paf = per_asset_feat.get(asset)
|
|||
|
|
pfx = f"{asset.lower()}_"
|
|||
|
|
if paf:
|
|||
|
|
for k, v in paf.items():
|
|||
|
|
consolidated[f"{pfx}{k}"] = v
|
|||
|
|
else:
|
|||
|
|
consolidated[f"{pfx}initialized"] = False
|
|||
|
|
|
|||
|
|
consolidated["_n_assets_live"] = n_init
|
|||
|
|
consolidated["_n_assets_total"] = len(_assets)
|
|||
|
|
consolidated["_all_live"] = n_init == len(_assets)
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# D. HZ push (P0-1: circuit breaker; P1-1: per-asset fire-and-forget)
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
if _hz_circuit_open:
|
|||
|
|
# Circuit open — skip HZ pushes, still persist locally
|
|||
|
|
_hz_cooldown -= 1
|
|||
|
|
if _hz_cooldown <= 0:
|
|||
|
|
log.info("OBF circuit breaker resetting — retrying HZ")
|
|||
|
|
_hz_circuit_open = False
|
|||
|
|
_hz_consec_failures = 0
|
|||
|
|
else:
|
|||
|
|
# P1-1: per-asset pushes are fire-and-forget (no .result() block)
|
|||
|
|
for asset in _assets:
|
|||
|
|
snap = raw_snaps.get(asset)
|
|||
|
|
if snap is None:
|
|||
|
|
continue
|
|||
|
|
asset_payload = {
|
|||
|
|
"timestamp": snap["timestamp"],
|
|||
|
|
"asset": snap["asset"],
|
|||
|
|
"bid_notional": list(snap["bid_notional"]),
|
|||
|
|
"ask_notional": list(snap["ask_notional"]),
|
|||
|
|
"bid_depth": list(snap["bid_depth"]),
|
|||
|
|
"ask_depth": list(snap["ask_depth"]),
|
|||
|
|
"best_bid": snap["best_bid"],
|
|||
|
|
"best_ask": snap["best_ask"],
|
|||
|
|
"spread_bps": snap["spread_bps"],
|
|||
|
|
}
|
|||
|
|
key = HZ_KEY_PER_ASSET.format(asset=asset)
|
|||
|
|
try:
|
|||
|
|
hz_push_obf_task.submit(client, key, asset_payload)
|
|||
|
|
# fire-and-forget: no .result() — don't block hot loop
|
|||
|
|
except Exception:
|
|||
|
|
push_errors[key] += 1
|
|||
|
|
|
|||
|
|
# Consolidated push blocks (authoritative key — consumers depend on it)
|
|||
|
|
consol_ok = False
|
|||
|
|
try:
|
|||
|
|
hz_push_obf_task.submit(
|
|||
|
|
client, HZ_KEY_CONSOLIDATED, consolidated
|
|||
|
|
).result(timeout=1.5)
|
|||
|
|
consol_ok = True
|
|||
|
|
_hz_consec_failures = 0
|
|||
|
|
pushes += 1
|
|||
|
|
except Exception:
|
|||
|
|
push_errors[HZ_KEY_CONSOLIDATED] += 1
|
|||
|
|
_hz_consec_failures += 1
|
|||
|
|
if _hz_consec_failures >= _HZ_CIRCUIT_OPEN_AFTER:
|
|||
|
|
log.error(
|
|||
|
|
"OBF HZ CIRCUIT OPEN after %d consecutive failures — "
|
|||
|
|
"skipping HZ pushes for %d cycles",
|
|||
|
|
_hz_consec_failures, _HZ_CIRCUIT_RESET_AFTER,
|
|||
|
|
)
|
|||
|
|
_hz_circuit_open = True
|
|||
|
|
_hz_cooldown = _HZ_CIRCUIT_RESET_AFTER
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# E. Write local JSON cache (P1-7: logs failures)
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
_write_local_cache(consolidated)
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# F. Feed persistence buffers
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
pushed_at_ts = time.time()
|
|||
|
|
last_lag_s = pushed_at_ts - local_ts
|
|||
|
|
|
|||
|
|
for asset in _assets:
|
|||
|
|
snap = raw_snaps.get(asset)
|
|||
|
|
paf = per_asset_feat.get(asset)
|
|||
|
|
if snap is None or paf is None:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
bid_n = snap["bid_notional"]
|
|||
|
|
ask_n = snap["ask_notional"]
|
|||
|
|
|
|||
|
|
row = {
|
|||
|
|
"exchange_ts": float(snap["timestamp"]),
|
|||
|
|
"local_ts": local_ts,
|
|||
|
|
"pushed_at": pushed_at_ts,
|
|||
|
|
"lag_s": float(last_lag_s),
|
|||
|
|
"asset": asset,
|
|||
|
|
"best_bid": float(snap["best_bid"]),
|
|||
|
|
"best_ask": float(snap["best_ask"]),
|
|||
|
|
"spread_bps": float(snap["spread_bps"]),
|
|||
|
|
"bid_notional_0": float(bid_n[0]),
|
|||
|
|
"bid_notional_1": float(bid_n[1]),
|
|||
|
|
"bid_notional_2": float(bid_n[2]),
|
|||
|
|
"bid_notional_3": float(bid_n[3]),
|
|||
|
|
"bid_notional_4": float(bid_n[4]),
|
|||
|
|
"ask_notional_0": float(ask_n[0]),
|
|||
|
|
"ask_notional_1": float(ask_n[1]),
|
|||
|
|
"ask_notional_2": float(ask_n[2]),
|
|||
|
|
"ask_notional_3": float(ask_n[3]),
|
|||
|
|
"ask_notional_4": float(ask_n[4]),
|
|||
|
|
"depth_1pct_usd": paf["depth_1pct_usd"],
|
|||
|
|
"depth_quality": paf["depth_quality"],
|
|||
|
|
"fill_probability": paf["fill_probability"],
|
|||
|
|
"spread_proxy_bps": paf["spread_proxy_bps"],
|
|||
|
|
"imbalance": paf["imbalance"],
|
|||
|
|
"imbalance_ma5": paf["imbalance_ma5"],
|
|||
|
|
"imbalance_persistence": paf["imbalance_persistence"],
|
|||
|
|
"depth_asymmetry": paf["depth_asymmetry"],
|
|||
|
|
"withdrawal_velocity": paf["withdrawal_velocity"],
|
|||
|
|
"median_imbalance": market_feat["median_imbalance"],
|
|||
|
|
"agreement_pct": market_feat["agreement_pct"],
|
|||
|
|
"depth_pressure": market_feat["depth_pressure"],
|
|||
|
|
"depth_velocity": macro_feat["depth_velocity"],
|
|||
|
|
"cascade_count": macro_feat["cascade_count"],
|
|||
|
|
"acceleration": macro_feat["acceleration"],
|
|||
|
|
"regime_signal": macro_feat["regime_signal"],
|
|||
|
|
}
|
|||
|
|
persist.update_snapshot(asset, row)
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# G. Periodic status log + watchdogs
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
if pushes % LOG_STATUS_EVERY == 0 and pushes > 0:
|
|||
|
|
stats = persist.get_stats()
|
|||
|
|
|
|||
|
|
# P1-6: report top error keys
|
|||
|
|
top_errs = sorted(push_errors.items(), key=lambda x: -x[1])[:3]
|
|||
|
|
|
|||
|
|
log.info(
|
|||
|
|
"OBF status | pushes=%d lag_s=%.3f assets_live=%d/%d "
|
|||
|
|
"files=%d hz_circuit=%s top_errors=%s",
|
|||
|
|
pushes, last_lag_s, n_init, len(_assets),
|
|||
|
|
stats.get("files_written", 0),
|
|||
|
|
"OPEN" if _hz_circuit_open else "closed",
|
|||
|
|
top_errs,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if last_lag_s > poll_interval_s * 2:
|
|||
|
|
log.warning(
|
|||
|
|
"OBF LAG DRIFT: lag_s=%.3f > 2×poll=%.3f",
|
|||
|
|
last_lag_s, poll_interval_s * 2,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# P0-2: WS stall watchdog
|
|||
|
|
if ob_thread.is_stale(threshold_s=30.0):
|
|||
|
|
if not _stale_logged:
|
|||
|
|
log.error(
|
|||
|
|
"OBF WS STALL: no events received for > 30 s — "
|
|||
|
|
"book data may be frozen"
|
|||
|
|
)
|
|||
|
|
_stale_logged = True
|
|||
|
|
else:
|
|||
|
|
_stale_logged = False
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# H. Maintain push interval (monotonic clock)
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
elapsed = time.monotonic() - t0_mono
|
|||
|
|
sleep_time = max(0.0, poll_interval_s - elapsed)
|
|||
|
|
if sleep_time > 0:
|
|||
|
|
time.sleep(sleep_time)
|
|||
|
|
|
|||
|
|
except KeyboardInterrupt:
|
|||
|
|
log.info("OBF flow interrupted by KeyboardInterrupt")
|
|||
|
|
finally:
|
|||
|
|
persist.stop()
|
|||
|
|
ob_thread.stop() # P2-5: clean shutdown
|
|||
|
|
total_errors = sum(push_errors.values())
|
|||
|
|
log.info(
|
|||
|
|
"OBF flow done — pushes=%d total_errors=%d error_breakdown=%s",
|
|||
|
|
pushes, total_errors, dict(push_errors),
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# ENTRY POINT
|
|||
|
|
# ===========================================================================
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
obf_prefect_flow()
|