initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
589
prod/obf_prefect_flow.py
Executable file
589
prod/obf_prefect_flow.py
Executable file
@@ -0,0 +1,589 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DOLPHIN OBF — Order Book Feature Subsystem under Prefect
|
||||
=========================================================
|
||||
|
||||
Architecture mirrors ExF/EsoF flows exactly:
|
||||
- Async OBStreamService runs in a background thread (its own event loop).
|
||||
- LiveOBFeatureEngine computes all 4 sub-systems incrementally.
|
||||
- Per-asset raw OB pushed to HZ as asset_{ASSET}_ob.
|
||||
- Consolidated features pushed to HZ as obf_latest.
|
||||
- Local JSON cache written atomically for DOLPHIN scanner to read.
|
||||
- Parquet persistence via OBFPersistenceService (5-min flush).
|
||||
|
||||
Rate limits respected:
|
||||
- Binance Futures WS @depth@100ms: push stream, no REST rate limit concern.
|
||||
- Binance REST depth limit=1000: weight=20, used ONLY on init/reconnect.
|
||||
|
||||
Timing:
|
||||
- WS stream updates internal book at 100 ms granularity.
|
||||
- Feature extraction + HZ push: every HZ_PUSH_INTERVAL_S (0.5 s).
|
||||
- Parquet flush: every 300 s (background thread).
|
||||
|
||||
Fixes applied:
|
||||
P0-1 HZ circuit breaker — opens after N failures, resets after cooldown
|
||||
P0-2 WS stall watchdog — warns if OBStreamService.is_stale() > 30 s
|
||||
P0-4 Per-asset dark-streak counter — logs immediately after 5 consecutive None
|
||||
P1-1 Per-asset HZ pushes are fire-and-forget (no .result() block)
|
||||
P1-6 push_errors is Dict[key, int] — per-key breakdown in status log
|
||||
P1-7 _write_local_cache logs failures (not silent pass)
|
||||
P1-8 HZ connectivity probe before entering hot loop
|
||||
P2-5 AsyncOBThread exposes stop() for clean shutdown + is_stale() passthrough
|
||||
P3-3 Dead-man's switch — CRITICAL log if all assets dark > 60 s
|
||||
|
||||
Launch:
|
||||
cd /mnt/dolphinng5_predict/prod
|
||||
PREFECT_API_URL=http://localhost:4200/api \\
|
||||
nohup python3 obf_prefect_flow.py > /tmp/obf_prefect.log 2>&1 &
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import collections
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from prefect import flow, task, get_run_logger
|
||||
from prefect.cache_policies import NO_CACHE
|
||||
|
||||
_HERE = Path(__file__).parent
|
||||
sys.path.insert(0, str(_HERE))
|
||||
sys.path.insert(0, str(_HERE.parent))
|
||||
|
||||
from _hz_push import make_hz_client, hz_push
|
||||
|
||||
# ===========================================================================
|
||||
# CONSTANTS
|
||||
# ===========================================================================
|
||||
HZ_KEY_CONSOLIDATED = "obf_latest"
|
||||
HZ_KEY_PER_ASSET = "asset_{asset}_ob"
|
||||
HZ_MAP = "DOLPHIN_FEATURES"
|
||||
|
||||
HZ_PUSH_INTERVAL_S = 0.5
|
||||
WARMUP_S = 8
|
||||
LOG_STATUS_EVERY = 120 # every 60 s at 2 Hz
|
||||
|
||||
ASSETS = ["BTCUSDT", "ETHUSDT", "SOLUSDT"]
|
||||
OB_CACHE_DIR = _HERE.parent / "ob_cache"
|
||||
OB_CACHE_FILE = OB_CACHE_DIR / "latest_ob_features.json"
|
||||
|
||||
MAX_DEPTH_PCT = 5
|
||||
|
||||
# P0-1: circuit breaker thresholds
|
||||
_HZ_CIRCUIT_OPEN_AFTER = 5 # consecutive failures before opening
|
||||
_HZ_CIRCUIT_RESET_AFTER = 30 # cycles before trying again (~15 s)
|
||||
|
||||
# P0-4 / P3-3: dark-streak thresholds
|
||||
_DARK_WARN_AFTER = 5 # log warning after 5 dark cycles (2.5 s)
|
||||
_DARK_CRITICAL_AFTER = 120 # log CRITICAL if ALL assets dark 60 s (P3-3)
|
||||
|
||||
# P1-7: local cache failure logging
|
||||
_CACHE_LOG_EVERY = 60 # log once per 30 s at 2 Hz
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# ASYNC OB STREAM THREAD
|
||||
# ===========================================================================
|
||||
|
||||
class AsyncOBThread(threading.Thread):
|
||||
"""
|
||||
Runs OBStreamService in a background asyncio event loop.
|
||||
|
||||
Exposes sync API safe to call from the Prefect flow's synchronous context.
|
||||
|
||||
Fixes applied:
|
||||
P0-2 is_stale() passthrough to OBStreamService.is_stale()
|
||||
P2-5 stop() for clean shutdown; _stop_event replaces create_future() park
|
||||
"""
|
||||
|
||||
def __init__(self, assets: List[str], max_depth_pct: int = MAX_DEPTH_PCT):
|
||||
super().__init__(daemon=True, name="ob-stream-thread")
|
||||
from external_factors.ob_stream_service import OBStreamService
|
||||
self.service = OBStreamService(assets=assets, max_depth_pct=max_depth_pct)
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._ready = threading.Event()
|
||||
self._stop_ev = threading.Event() # P2-5
|
||||
|
||||
def run(self):
|
||||
self._loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self._loop)
|
||||
self._loop.run_until_complete(self._run_forever())
|
||||
|
||||
async def _run_forever(self):
|
||||
asyncio.create_task(self.service.stream())
|
||||
self._ready.set()
|
||||
# P2-5: park on a future that stop() cancels
|
||||
self._stop_future = self._loop.create_future()
|
||||
try:
|
||||
await self._stop_future
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
def wait_ready(self, timeout: float = 10.0) -> bool:
|
||||
return self._ready.wait(timeout=timeout)
|
||||
|
||||
def stop(self, timeout: float = 5.0):
|
||||
"""P2-5: request clean shutdown and join."""
|
||||
if self._loop and self._loop.is_running():
|
||||
self._loop.call_soon_threadsafe(
|
||||
lambda: self._stop_future.set_result(None)
|
||||
if not self._stop_future.done() else None
|
||||
)
|
||||
self.join(timeout=timeout)
|
||||
|
||||
def is_stale(self, threshold_s: float = 30.0) -> bool:
|
||||
"""P0-2: True if OBStreamService has received no WS events for threshold_s."""
|
||||
return self.service.is_stale(threshold_s=threshold_s)
|
||||
|
||||
def get_depth_buckets_sync(self, asset: str) -> Optional[dict]:
|
||||
if self._loop is None or not self._loop.is_running():
|
||||
return None
|
||||
try:
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.service.get_depth_buckets(asset), self._loop
|
||||
)
|
||||
return future.result(timeout=HZ_PUSH_INTERVAL_S)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@property
|
||||
def initialized(self) -> Dict[str, bool]:
|
||||
return dict(self.service.initialized)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# HZ PUSH TASK
|
||||
# ===========================================================================
|
||||
|
||||
@task(name="hz_push_obf", retries=3, retry_delay_seconds=2, cache_policy=NO_CACHE)
|
||||
def hz_push_obf_task(client, key: str, payload: dict) -> bool:
|
||||
try:
|
||||
data = dict(payload)
|
||||
data["_pushed_at"] = datetime.now(timezone.utc).isoformat()
|
||||
data["_push_seq"] = int(time.time() * 1000)
|
||||
client.get_map(HZ_MAP).blocking().put(key, json.dumps(data))
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# LOCAL CACHE WRITE (P1-7: logs failures instead of silent pass)
|
||||
# ===========================================================================
|
||||
|
||||
_cache_write_failures = 0
|
||||
|
||||
def _write_local_cache(payload: dict) -> bool:
|
||||
"""Atomic JSON write. Returns True on success. Logs failures (P1-7)."""
|
||||
global _cache_write_failures
|
||||
try:
|
||||
OB_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
tmp = OB_CACHE_DIR / f".ob_cache_{os.getpid()}.tmp"
|
||||
tmp.write_text(json.dumps(payload, default=str), encoding="utf-8")
|
||||
tmp.rename(OB_CACHE_FILE)
|
||||
return True
|
||||
except Exception as exc:
|
||||
_cache_write_failures += 1
|
||||
if _cache_write_failures % _CACHE_LOG_EVERY == 1:
|
||||
import logging
|
||||
logging.getLogger(__name__).warning(
|
||||
"OBF local cache write failed (%d times): %s", _cache_write_failures, exc
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# HZ PRE-FLIGHT CHECK (P1-8)
|
||||
# ===========================================================================
|
||||
|
||||
def _hz_preflight(client, log, retries: int = 5, delay_s: float = 3.0) -> bool:
|
||||
"""Verify HZ connectivity before entering hot loop. Returns True if OK."""
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
client.get_map(HZ_MAP).blocking().put(
|
||||
"_obf_heartbeat",
|
||||
json.dumps({"ts": time.time(), "source": "obf_preflight"}),
|
||||
)
|
||||
log.info("HZ connectivity verified (attempt %d)", attempt)
|
||||
return True
|
||||
except Exception as e:
|
||||
log.warning("HZ preflight failed (attempt %d/%d): %s", attempt, retries, e)
|
||||
if attempt < retries:
|
||||
time.sleep(delay_s)
|
||||
return False
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# PREFECT FLOW
|
||||
# ===========================================================================
|
||||
|
||||
@flow(name="obf-prefect-flow", log_prints=True)
|
||||
def obf_prefect_flow(
|
||||
warmup_s: float = float(WARMUP_S),
|
||||
poll_interval_s: float = float(HZ_PUSH_INTERVAL_S),
|
||||
assets: list = None,
|
||||
):
|
||||
"""
|
||||
Order Book Feature subsystem daemon.
|
||||
|
||||
Runs indefinitely under Prefect:
|
||||
1. Start AsyncOBThread (WS stream + REST sync).
|
||||
2. Warm up for warmup_s seconds.
|
||||
3. Start OBFPersistenceService.
|
||||
4. HZ connectivity pre-flight check.
|
||||
5. Loop at poll_interval_s:
|
||||
a. Extract depth buckets per asset.
|
||||
b. Compute 4-subsystem OB features.
|
||||
c. Push per-asset raw OB to HZ (fire-and-forget).
|
||||
d. Push consolidated features to HZ (blocking — authoritative key).
|
||||
e. Write local JSON cache.
|
||||
f. Feed persistence buffer.
|
||||
g. Stall watchdog + dark-streak + dead-man's switch checks.
|
||||
"""
|
||||
log = get_run_logger()
|
||||
|
||||
_assets = list(assets or ASSETS)
|
||||
log.info("=== OBF PREFECT FLOW STARTING ===")
|
||||
log.info("Assets: %s | push_interval=%.1fs | warmup=%ss", _assets, poll_interval_s, warmup_s)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# 1. Start WebSocket stream thread
|
||||
# -----------------------------------------------------------------------
|
||||
ob_thread = AsyncOBThread(assets=_assets, max_depth_pct=MAX_DEPTH_PCT)
|
||||
ob_thread.start()
|
||||
if not ob_thread.wait_ready(timeout=10.0):
|
||||
log.error("AsyncOBThread event loop did not start in time — aborting")
|
||||
return
|
||||
log.info("OB WebSocket stream thread started")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# 2. Warm up
|
||||
# -----------------------------------------------------------------------
|
||||
log.info("Warming up for %.0f s (WS + REST book sync)...", warmup_s)
|
||||
time.sleep(warmup_s)
|
||||
|
||||
init_status = ob_thread.initialized
|
||||
log.info("Book init status after warmup: %s", init_status)
|
||||
n_ready = sum(v for v in init_status.values())
|
||||
if n_ready == 0:
|
||||
log.warning("No assets initialized after warmup — will retry in hot loop")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# 3. Start persistence service
|
||||
# -----------------------------------------------------------------------
|
||||
from obf_persistence import OBFPersistenceService, LiveOBFeatureEngine
|
||||
|
||||
persist = OBFPersistenceService(assets=_assets, flush_interval_s=300)
|
||||
persist.start()
|
||||
log.info("OBFPersistenceService started")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# 4. Initialize LiveOBFeatureEngine
|
||||
# -----------------------------------------------------------------------
|
||||
feature_engine = LiveOBFeatureEngine(assets=_assets)
|
||||
log.info("LiveOBFeatureEngine initialized")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# 5. Connect to Hazelcast + pre-flight check (P1-8)
|
||||
# -----------------------------------------------------------------------
|
||||
log.info("Connecting to Hazelcast...")
|
||||
client = make_hz_client()
|
||||
|
||||
if not _hz_preflight(client, log):
|
||||
log.error("HZ preflight failed after retries — aborting flow")
|
||||
persist.stop()
|
||||
ob_thread.stop()
|
||||
return
|
||||
log.info("Hazelcast ready")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# 6. Hot loop state
|
||||
# -----------------------------------------------------------------------
|
||||
pushes = 0
|
||||
push_errors: Dict[str, int] = collections.defaultdict(int) # P1-6
|
||||
|
||||
# P0-1: circuit breaker state
|
||||
_hz_consec_failures = 0
|
||||
_hz_circuit_open = False
|
||||
_hz_cooldown = 0
|
||||
|
||||
# P0-4: per-asset dark streak
|
||||
_none_streak: Dict[str, int] = {a: 0 for a in _assets}
|
||||
|
||||
# P3-3: all-assets-dark counter
|
||||
_all_dark_cycles = 0
|
||||
|
||||
# P0-2: stale watchdog — check every LOG_STATUS_EVERY cycles
|
||||
_stale_logged = False
|
||||
|
||||
last_lag_s = 0.0
|
||||
_push_seq = 0
|
||||
|
||||
log.info("=== OBF HOT LOOP STARTED ===")
|
||||
|
||||
try:
|
||||
while True:
|
||||
t0_mono = time.monotonic()
|
||||
t0_wall = time.time()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# A. Extract current OB snapshots
|
||||
# ------------------------------------------------------------------
|
||||
raw_snaps: Dict[str, Optional[dict]] = {}
|
||||
for asset in _assets:
|
||||
raw_snaps[asset] = ob_thread.get_depth_buckets_sync(asset)
|
||||
|
||||
local_ts = time.time()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# P0-4: per-asset dark streak detection
|
||||
# ------------------------------------------------------------------
|
||||
n_init = 0
|
||||
for asset in _assets:
|
||||
if raw_snaps[asset] is None:
|
||||
_none_streak[asset] += 1
|
||||
if _none_streak[asset] == _DARK_WARN_AFTER:
|
||||
log.warning(
|
||||
"OBF: %s book dark for %d consecutive cycles (%.1f s)",
|
||||
asset, _none_streak[asset],
|
||||
_none_streak[asset] * poll_interval_s,
|
||||
)
|
||||
else:
|
||||
if _none_streak[asset] >= _DARK_WARN_AFTER:
|
||||
log.info(
|
||||
"OBF: %s book restored after %d dark cycles",
|
||||
asset, _none_streak[asset],
|
||||
)
|
||||
_none_streak[asset] = 0
|
||||
n_init += 1
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# P3-3: dead-man's switch — all assets dark
|
||||
# ------------------------------------------------------------------
|
||||
if n_init == 0:
|
||||
_all_dark_cycles += 1
|
||||
if _all_dark_cycles == _DARK_CRITICAL_AFTER:
|
||||
log.critical(
|
||||
"OBF DEAD-MAN: ALL %d assets dark for %.0f s — "
|
||||
"alpha engine is receiving neutral OB features",
|
||||
len(_assets), _all_dark_cycles * poll_interval_s,
|
||||
)
|
||||
else:
|
||||
_all_dark_cycles = 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# B. Compute 4-subsystem OB features
|
||||
# ------------------------------------------------------------------
|
||||
feature_snaps = {a: raw_snaps[a] for a in _assets}
|
||||
feature_result = feature_engine.update(feature_snaps)
|
||||
per_asset_feat = feature_result["per_asset"]
|
||||
market_feat = feature_result["market"]
|
||||
macro_feat = feature_result["macro"]
|
||||
|
||||
compute_ts = time.time()
|
||||
_push_seq += 1
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# C. Build consolidated HZ payload
|
||||
# ------------------------------------------------------------------
|
||||
consolidated: dict = {
|
||||
"timestamp": datetime.fromtimestamp(local_ts, tz=timezone.utc).isoformat(),
|
||||
"local_ts": local_ts,
|
||||
"compute_ts": compute_ts,
|
||||
"assets": _assets,
|
||||
"_push_seq": _push_seq,
|
||||
**{f"market_{k}": v for k, v in market_feat.items()},
|
||||
**{f"macro_{k}": v for k, v in macro_feat.items()},
|
||||
}
|
||||
for asset in _assets:
|
||||
paf = per_asset_feat.get(asset)
|
||||
pfx = f"{asset.lower()}_"
|
||||
if paf:
|
||||
for k, v in paf.items():
|
||||
consolidated[f"{pfx}{k}"] = v
|
||||
else:
|
||||
consolidated[f"{pfx}initialized"] = False
|
||||
|
||||
consolidated["_n_assets_live"] = n_init
|
||||
consolidated["_n_assets_total"] = len(_assets)
|
||||
consolidated["_all_live"] = n_init == len(_assets)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# D. HZ push (P0-1: circuit breaker; P1-1: per-asset fire-and-forget)
|
||||
# ------------------------------------------------------------------
|
||||
if _hz_circuit_open:
|
||||
# Circuit open — skip HZ pushes, still persist locally
|
||||
_hz_cooldown -= 1
|
||||
if _hz_cooldown <= 0:
|
||||
log.info("OBF circuit breaker resetting — retrying HZ")
|
||||
_hz_circuit_open = False
|
||||
_hz_consec_failures = 0
|
||||
else:
|
||||
# P1-1: per-asset pushes are fire-and-forget (no .result() block)
|
||||
for asset in _assets:
|
||||
snap = raw_snaps.get(asset)
|
||||
if snap is None:
|
||||
continue
|
||||
asset_payload = {
|
||||
"timestamp": snap["timestamp"],
|
||||
"asset": snap["asset"],
|
||||
"bid_notional": list(snap["bid_notional"]),
|
||||
"ask_notional": list(snap["ask_notional"]),
|
||||
"bid_depth": list(snap["bid_depth"]),
|
||||
"ask_depth": list(snap["ask_depth"]),
|
||||
"best_bid": snap["best_bid"],
|
||||
"best_ask": snap["best_ask"],
|
||||
"spread_bps": snap["spread_bps"],
|
||||
}
|
||||
key = HZ_KEY_PER_ASSET.format(asset=asset)
|
||||
try:
|
||||
hz_push_obf_task.submit(client, key, asset_payload)
|
||||
# fire-and-forget: no .result() — don't block hot loop
|
||||
except Exception:
|
||||
push_errors[key] += 1
|
||||
|
||||
# Consolidated push blocks (authoritative key — consumers depend on it)
|
||||
consol_ok = False
|
||||
try:
|
||||
hz_push_obf_task.submit(
|
||||
client, HZ_KEY_CONSOLIDATED, consolidated
|
||||
).result(timeout=1.5)
|
||||
consol_ok = True
|
||||
_hz_consec_failures = 0
|
||||
pushes += 1
|
||||
except Exception:
|
||||
push_errors[HZ_KEY_CONSOLIDATED] += 1
|
||||
_hz_consec_failures += 1
|
||||
if _hz_consec_failures >= _HZ_CIRCUIT_OPEN_AFTER:
|
||||
log.error(
|
||||
"OBF HZ CIRCUIT OPEN after %d consecutive failures — "
|
||||
"skipping HZ pushes for %d cycles",
|
||||
_hz_consec_failures, _HZ_CIRCUIT_RESET_AFTER,
|
||||
)
|
||||
_hz_circuit_open = True
|
||||
_hz_cooldown = _HZ_CIRCUIT_RESET_AFTER
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# E. Write local JSON cache (P1-7: logs failures)
|
||||
# ------------------------------------------------------------------
|
||||
_write_local_cache(consolidated)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# F. Feed persistence buffers
|
||||
# ------------------------------------------------------------------
|
||||
pushed_at_ts = time.time()
|
||||
last_lag_s = pushed_at_ts - local_ts
|
||||
|
||||
for asset in _assets:
|
||||
snap = raw_snaps.get(asset)
|
||||
paf = per_asset_feat.get(asset)
|
||||
if snap is None or paf is None:
|
||||
continue
|
||||
|
||||
bid_n = snap["bid_notional"]
|
||||
ask_n = snap["ask_notional"]
|
||||
|
||||
row = {
|
||||
"exchange_ts": float(snap["timestamp"]),
|
||||
"local_ts": local_ts,
|
||||
"pushed_at": pushed_at_ts,
|
||||
"lag_s": float(last_lag_s),
|
||||
"asset": asset,
|
||||
"best_bid": float(snap["best_bid"]),
|
||||
"best_ask": float(snap["best_ask"]),
|
||||
"spread_bps": float(snap["spread_bps"]),
|
||||
"bid_notional_0": float(bid_n[0]),
|
||||
"bid_notional_1": float(bid_n[1]),
|
||||
"bid_notional_2": float(bid_n[2]),
|
||||
"bid_notional_3": float(bid_n[3]),
|
||||
"bid_notional_4": float(bid_n[4]),
|
||||
"ask_notional_0": float(ask_n[0]),
|
||||
"ask_notional_1": float(ask_n[1]),
|
||||
"ask_notional_2": float(ask_n[2]),
|
||||
"ask_notional_3": float(ask_n[3]),
|
||||
"ask_notional_4": float(ask_n[4]),
|
||||
"depth_1pct_usd": paf["depth_1pct_usd"],
|
||||
"depth_quality": paf["depth_quality"],
|
||||
"fill_probability": paf["fill_probability"],
|
||||
"spread_proxy_bps": paf["spread_proxy_bps"],
|
||||
"imbalance": paf["imbalance"],
|
||||
"imbalance_ma5": paf["imbalance_ma5"],
|
||||
"imbalance_persistence": paf["imbalance_persistence"],
|
||||
"depth_asymmetry": paf["depth_asymmetry"],
|
||||
"withdrawal_velocity": paf["withdrawal_velocity"],
|
||||
"median_imbalance": market_feat["median_imbalance"],
|
||||
"agreement_pct": market_feat["agreement_pct"],
|
||||
"depth_pressure": market_feat["depth_pressure"],
|
||||
"depth_velocity": macro_feat["depth_velocity"],
|
||||
"cascade_count": macro_feat["cascade_count"],
|
||||
"acceleration": macro_feat["acceleration"],
|
||||
"regime_signal": macro_feat["regime_signal"],
|
||||
}
|
||||
persist.update_snapshot(asset, row)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# G. Periodic status log + watchdogs
|
||||
# ------------------------------------------------------------------
|
||||
if pushes % LOG_STATUS_EVERY == 0 and pushes > 0:
|
||||
stats = persist.get_stats()
|
||||
|
||||
# P1-6: report top error keys
|
||||
top_errs = sorted(push_errors.items(), key=lambda x: -x[1])[:3]
|
||||
|
||||
log.info(
|
||||
"OBF status | pushes=%d lag_s=%.3f assets_live=%d/%d "
|
||||
"files=%d hz_circuit=%s top_errors=%s",
|
||||
pushes, last_lag_s, n_init, len(_assets),
|
||||
stats.get("files_written", 0),
|
||||
"OPEN" if _hz_circuit_open else "closed",
|
||||
top_errs,
|
||||
)
|
||||
|
||||
if last_lag_s > poll_interval_s * 2:
|
||||
log.warning(
|
||||
"OBF LAG DRIFT: lag_s=%.3f > 2×poll=%.3f",
|
||||
last_lag_s, poll_interval_s * 2,
|
||||
)
|
||||
|
||||
# P0-2: WS stall watchdog
|
||||
if ob_thread.is_stale(threshold_s=30.0):
|
||||
if not _stale_logged:
|
||||
log.error(
|
||||
"OBF WS STALL: no events received for > 30 s — "
|
||||
"book data may be frozen"
|
||||
)
|
||||
_stale_logged = True
|
||||
else:
|
||||
_stale_logged = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# H. Maintain push interval (monotonic clock)
|
||||
# ------------------------------------------------------------------
|
||||
elapsed = time.monotonic() - t0_mono
|
||||
sleep_time = max(0.0, poll_interval_s - elapsed)
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
log.info("OBF flow interrupted by KeyboardInterrupt")
|
||||
finally:
|
||||
persist.stop()
|
||||
ob_thread.stop() # P2-5: clean shutdown
|
||||
total_errors = sum(push_errors.values())
|
||||
log.info(
|
||||
"OBF flow done — pushes=%d total_errors=%d error_breakdown=%s",
|
||||
pushes, total_errors, dict(push_errors),
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# ENTRY POINT
|
||||
# ===========================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
obf_prefect_flow()
|
||||
Reference in New Issue
Block a user