Files
DOLPHIN/external_factors/ob_stream_service.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

352 lines
14 KiB
Python
Executable File

import asyncio
import aiohttp
import json
import time
import logging
import numpy as np
from typing import Dict, List, Optional
from collections import defaultdict
# Setup basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
logger = logging.getLogger("OBStreamService")
try:
import websockets
except ImportError:
logger.warning("websockets package not found. Run pip install websockets aiohttp")
# Reconnect back-off constants (P1-2)
_RECONNECT_BASE_S = 3.0
_RECONNECT_MAX_S = 120.0
_RECONNECT_STABLE_S = 60.0 # reset back-off if connected this long without error
# Stall detection (P0-2): warn if no WS message for this many seconds
_STALE_THRESHOLD_S = 30.0
class OBStreamService:
"""
Real-Time Order Book Streamer for Binance Futures.
Fixes applied:
P0-2 last_event_ts for WS stall detection (is_stale())
P0-3 Crossed-book guard in get_depth_buckets()
P1-2 Exponential back-off on reconnect (max 120s, jitter)
P1-5 Shared aiohttp.ClientSession for REST calls (no new session per call)
P2-1 Unknown asset symbol in WS event ignored, no KeyError
"""
def __init__(self, assets: List[str], max_depth_pct: int = 5):
self.assets = [a.upper() for a in assets]
self.streams = [f"{a.lower()}@depth@100ms" for a in self.assets]
self.max_depth_pct = max_depth_pct
# In-memory Order Book caches (Price -> Quantity)
self.bids: Dict[str, Dict[float, float]] = {a: {} for a in self.assets}
self.asks: Dict[str, Dict[float, float]] = {a: {} for a in self.assets}
# Synchronization mechanisms
self.last_update_id: Dict[str, int] = {a: 0 for a in self.assets}
self.buffer: Dict[str, List] = {a: [] for a in self.assets}
self.initialized: Dict[str, bool] = {a: False for a in self.assets}
# Per-asset asyncio lock (P2-1: keyed only on known assets)
self.locks: Dict[str, asyncio.Lock] = {a: asyncio.Lock() for a in self.assets}
# P0-2: WS stall detection — updated on every received message
self.last_event_ts: float = 0.0
# P1-5: shared session — created lazily in stream(), closed on exit
self._session: Optional[aiohttp.ClientSession] = None
# Gold Path: Rate Limit Monitoring (AGENT-SPEC-GOLDPATH)
self.rate_limits: Dict[str, str] = {}
# ------------------------------------------------------------------
# P0-2: stale check callable from AsyncOBThread
# ------------------------------------------------------------------
def is_stale(self, threshold_s: float = _STALE_THRESHOLD_S) -> bool:
"""Return True if no WS event has been received for threshold_s seconds."""
if self.last_event_ts == 0.0:
return False # hasn't started yet — not stale, just cold
return (time.time() - self.last_event_ts) > threshold_s
# ------------------------------------------------------------------
# REST snapshot (P1-5: reuses shared session)
# ------------------------------------------------------------------
async def fetch_snapshot(self, asset: str):
"""Fetch REST snapshot of the Order Book to initialize local state."""
url = f"https://fapi.binance.com/fapi/v1/depth?symbol={asset}&limit=1000"
try:
session = self._session
if session is None or session.closed:
# Fallback: create a temporary session if shared one not ready
async with aiohttp.ClientSession() as tmp_session:
await self._do_fetch(tmp_session, asset, url)
return
await self._do_fetch(session, asset, url)
except Exception as e:
logger.error(f"Error initializing snapshot for {asset}: {e}")
async def _do_fetch(self, session: aiohttp.ClientSession, asset: str, url: str):
async with session.get(url) as resp:
# Capture Rate Limits (Gold Spec)
for k, v in resp.headers.items():
if k.lower().startswith("x-mbx-used-weight-"):
self.rate_limits[k] = v
data = await resp.json()
if 'lastUpdateId' not in data:
logger.error(f"Failed to fetch snapshot for {asset}: {data}")
return
last_id = data['lastUpdateId']
async with self.locks[asset]:
self.bids[asset] = {float(p): float(q) for p, q in data['bids']}
self.asks[asset] = {float(p): float(q) for p, q in data['asks']}
self.last_update_id[asset] = last_id
# Apply any buffered updates that arrived while REST was in flight
for event in self.buffer[asset]:
if event['u'] <= last_id:
continue # already reflected in snapshot
self._apply_event(asset, event)
self.buffer[asset].clear()
self.initialized[asset] = True
logger.info(f"Synchronized L2 book for {asset} (UpdateId: {last_id})")
# ------------------------------------------------------------------
# Book maintenance
# ------------------------------------------------------------------
def _apply_event(self, asset: str, event: dict):
"""Apply a streaming diff event to the local book."""
bids = self.bids[asset]
asks = self.asks[asset]
for p_str, q_str in event['b']:
p, q = float(p_str), float(q_str)
if q == 0.0:
bids.pop(p, None)
else:
bids[p] = q
for p_str, q_str in event['a']:
p, q = float(p_str), float(q_str)
if q == 0.0:
asks.pop(p, None)
else:
asks[p] = q
self.last_update_id[asset] = event['u']
# ------------------------------------------------------------------
# Main WS loop (P1-2: exp backoff; P1-5: shared session; P2-1: unknown symbol guard)
# ------------------------------------------------------------------
async def stream(self):
"""Main loop: connect to WebSocket streams and maintain books."""
import websockets
stream_url = (
"wss://fstream.binance.com/stream?streams=" + "/".join(self.streams)
)
logger.info(f"Connecting to Binance Stream: {stream_url}")
reconnect_delay = _RECONNECT_BASE_S
import random
# P1-5: create shared session for lifetime of stream()
async with aiohttp.ClientSession() as session:
self._session = session
# Fire REST snapshots concurrently using shared session
for a in self.assets:
asyncio.create_task(self.fetch_snapshot(a))
while True:
connect_start = time.monotonic()
try:
async with websockets.connect(
stream_url, ping_interval=20, ping_timeout=20
) as ws:
logger.info("WebSocket connected. Streaming depth diffs...")
while True:
msg = await ws.recv()
# P0-2: stamp every received message
self.last_event_ts = time.time()
data = json.loads(msg)
if 'data' not in data:
continue
ev = data['data']
# P2-1: ignore events for unknown symbols — no KeyError
asset = ev.get('s', '').upper()
if asset not in self.locks:
logger.debug("Ignoring WS event for untracked symbol: %s", asset)
continue
async with self.locks[asset]:
if not self.initialized[asset]:
self.buffer[asset].append(ev)
else:
self._apply_event(asset, ev)
# If we reach here the connection lasted stably:
# reset back-off on a clean exit (never normally reached)
reconnect_delay = _RECONNECT_BASE_S
except websockets.exceptions.ConnectionClosed as e:
connected_s = time.monotonic() - connect_start
logger.warning(
"WebSocket closed (%s). Connected %.1fs. Reconnecting in %.1fs...",
e, connected_s, reconnect_delay,
)
# P1-2: reset back-off if connection was stable long enough
if connected_s >= _RECONNECT_STABLE_S:
reconnect_delay = _RECONNECT_BASE_S
# Re-init all assets, re-fire REST snapshots
for a in self.assets:
self.initialized[a] = False
asyncio.create_task(self.fetch_snapshot(a))
await asyncio.sleep(reconnect_delay + random.uniform(0, 1))
# P1-2: double delay for next failure, cap at max
reconnect_delay = min(reconnect_delay * 2, _RECONNECT_MAX_S)
except Exception as e:
logger.error("Stream error: %s", e)
await asyncio.sleep(reconnect_delay + random.uniform(0, 1))
reconnect_delay = min(reconnect_delay * 2, _RECONNECT_MAX_S)
self._session = None # stream() exited cleanly
# ------------------------------------------------------------------
# Depth bucket extraction (P0-3: crossed book guard)
# ------------------------------------------------------------------
async def get_depth_buckets(self, asset: str) -> Optional[dict]:
"""
Extract the Notional Depth vectors matching OBSnapshot.
Creates 5 elements summing USD depth between 0-1%, 1-2%, ..., 4-5% from mid.
Returns None if:
- Book not yet initialized
- Empty bids or asks
- Crossed book (best_bid >= best_ask) ← P0-3
"""
async with self.locks[asset]:
if not self.initialized[asset]:
return None
bids = sorted(self.bids[asset].items(), key=lambda x: -x[0])
asks = sorted(self.asks[asset].items(), key=lambda x: x[0])
if not bids or not asks:
return None
best_bid = bids[0][0]
best_ask = asks[0][0]
# P0-3: crossed book produces corrupted features — reject entirely
if best_bid >= best_ask:
logger.warning(
"Crossed book for %s (bid=%.5f >= ask=%.5f) — skipping snapshot",
asset, best_bid, best_ask,
)
return None
mid = (best_bid + best_ask) / 2.0
bid_not = np.zeros(self.max_depth_pct, dtype=np.float64)
ask_not = np.zeros(self.max_depth_pct, dtype=np.float64)
bid_dep = np.zeros(self.max_depth_pct, dtype=np.float64)
ask_dep = np.zeros(self.max_depth_pct, dtype=np.float64)
for p, q in bids:
dist_pct = (mid - p) / mid * 100
idx = int(dist_pct)
if idx < self.max_depth_pct:
bid_not[idx] += p * q
bid_dep[idx] += q
else:
break
for p, q in asks:
dist_pct = (p - mid) / mid * 100
idx = int(dist_pct)
if idx < self.max_depth_pct:
ask_not[idx] += p * q
ask_dep[idx] += q
else:
break
return {
"timestamp": time.time(),
"asset": asset,
"bid_notional": bid_not,
"ask_notional": ask_not,
"bid_depth": bid_dep,
"ask_depth": ask_dep,
"best_bid": best_bid,
"best_ask": best_ask,
"spread_bps": (best_ask - best_bid) / mid * 10_000,
}
# -----------------------------------------------------------------------------
# Standalone run/test hook
# -----------------------------------------------------------------------------
import hazelcast
async def main():
assets_to_track = ["BTCUSDT", "ETHUSDT", "SOLUSDT"]
service = OBStreamService(assets=assets_to_track)
asyncio.create_task(service.stream())
await asyncio.sleep(4)
try:
hz_client = hazelcast.HazelcastClient(
cluster_name="dolphin",
cluster_members=["localhost:5701"]
)
hz_map = hz_client.get_map('DOLPHIN_FEATURES').blocking()
logger.info("Connected to Hazelcast DOLPHIN_FEATURES map.")
except Exception as e:
logger.error(f"Hazelcast connection failed: {e}")
return
while True:
try:
for asset in assets_to_track:
snap = await service.get_depth_buckets(asset)
if snap:
hz_payload = {
"timestamp": snap["timestamp"],
"asset": snap["asset"],
"bid_notional": list(snap["bid_notional"]),
"ask_notional": list(snap["ask_notional"]),
"bid_depth": list(snap["bid_depth"]),
"ask_depth": list(snap["ask_depth"]),
"best_bid": snap["best_bid"],
"best_ask": snap["best_ask"],
"spread_bps": snap["spread_bps"],
}
hz_map.put(f"asset_{asset}_ob", json.dumps(hz_payload))
await asyncio.sleep(0.5)
except Exception as e:
logger.error(f"Error in stream writing loop: {e}")
await asyncio.sleep(5)
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("OB Streamer shut down manually.")