Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
352 lines
14 KiB
Python
Executable File
352 lines
14 KiB
Python
Executable File
import asyncio
|
|
import aiohttp
|
|
import json
|
|
import time
|
|
import logging
|
|
import numpy as np
|
|
from typing import Dict, List, Optional
|
|
from collections import defaultdict
|
|
|
|
# Setup basic logging
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
|
logger = logging.getLogger("OBStreamService")
|
|
|
|
try:
|
|
import websockets
|
|
except ImportError:
|
|
logger.warning("websockets package not found. Run pip install websockets aiohttp")
|
|
|
|
# Reconnect back-off constants (P1-2)
|
|
_RECONNECT_BASE_S = 3.0
|
|
_RECONNECT_MAX_S = 120.0
|
|
_RECONNECT_STABLE_S = 60.0 # reset back-off if connected this long without error
|
|
|
|
# Stall detection (P0-2): warn if no WS message for this many seconds
|
|
_STALE_THRESHOLD_S = 30.0
|
|
|
|
|
|
class OBStreamService:
|
|
"""
|
|
Real-Time Order Book Streamer for Binance Futures.
|
|
|
|
Fixes applied:
|
|
P0-2 last_event_ts for WS stall detection (is_stale())
|
|
P0-3 Crossed-book guard in get_depth_buckets()
|
|
P1-2 Exponential back-off on reconnect (max 120s, jitter)
|
|
P1-5 Shared aiohttp.ClientSession for REST calls (no new session per call)
|
|
P2-1 Unknown asset symbol in WS event ignored, no KeyError
|
|
"""
|
|
|
|
def __init__(self, assets: List[str], max_depth_pct: int = 5):
|
|
self.assets = [a.upper() for a in assets]
|
|
self.streams = [f"{a.lower()}@depth@100ms" for a in self.assets]
|
|
self.max_depth_pct = max_depth_pct
|
|
|
|
# In-memory Order Book caches (Price -> Quantity)
|
|
self.bids: Dict[str, Dict[float, float]] = {a: {} for a in self.assets}
|
|
self.asks: Dict[str, Dict[float, float]] = {a: {} for a in self.assets}
|
|
|
|
# Synchronization mechanisms
|
|
self.last_update_id: Dict[str, int] = {a: 0 for a in self.assets}
|
|
self.buffer: Dict[str, List] = {a: [] for a in self.assets}
|
|
self.initialized: Dict[str, bool] = {a: False for a in self.assets}
|
|
|
|
# Per-asset asyncio lock (P2-1: keyed only on known assets)
|
|
self.locks: Dict[str, asyncio.Lock] = {a: asyncio.Lock() for a in self.assets}
|
|
|
|
# P0-2: WS stall detection — updated on every received message
|
|
self.last_event_ts: float = 0.0
|
|
|
|
# P1-5: shared session — created lazily in stream(), closed on exit
|
|
self._session: Optional[aiohttp.ClientSession] = None
|
|
|
|
# Gold Path: Rate Limit Monitoring (AGENT-SPEC-GOLDPATH)
|
|
self.rate_limits: Dict[str, str] = {}
|
|
|
|
# ------------------------------------------------------------------
|
|
# P0-2: stale check callable from AsyncOBThread
|
|
# ------------------------------------------------------------------
|
|
def is_stale(self, threshold_s: float = _STALE_THRESHOLD_S) -> bool:
|
|
"""Return True if no WS event has been received for threshold_s seconds."""
|
|
if self.last_event_ts == 0.0:
|
|
return False # hasn't started yet — not stale, just cold
|
|
return (time.time() - self.last_event_ts) > threshold_s
|
|
|
|
# ------------------------------------------------------------------
|
|
# REST snapshot (P1-5: reuses shared session)
|
|
# ------------------------------------------------------------------
|
|
async def fetch_snapshot(self, asset: str):
|
|
"""Fetch REST snapshot of the Order Book to initialize local state."""
|
|
url = f"https://fapi.binance.com/fapi/v1/depth?symbol={asset}&limit=1000"
|
|
try:
|
|
session = self._session
|
|
if session is None or session.closed:
|
|
# Fallback: create a temporary session if shared one not ready
|
|
async with aiohttp.ClientSession() as tmp_session:
|
|
await self._do_fetch(tmp_session, asset, url)
|
|
return
|
|
await self._do_fetch(session, asset, url)
|
|
except Exception as e:
|
|
logger.error(f"Error initializing snapshot for {asset}: {e}")
|
|
|
|
async def _do_fetch(self, session: aiohttp.ClientSession, asset: str, url: str):
|
|
async with session.get(url) as resp:
|
|
# Capture Rate Limits (Gold Spec)
|
|
for k, v in resp.headers.items():
|
|
if k.lower().startswith("x-mbx-used-weight-"):
|
|
self.rate_limits[k] = v
|
|
|
|
data = await resp.json()
|
|
|
|
if 'lastUpdateId' not in data:
|
|
logger.error(f"Failed to fetch snapshot for {asset}: {data}")
|
|
return
|
|
|
|
last_id = data['lastUpdateId']
|
|
|
|
async with self.locks[asset]:
|
|
self.bids[asset] = {float(p): float(q) for p, q in data['bids']}
|
|
self.asks[asset] = {float(p): float(q) for p, q in data['asks']}
|
|
self.last_update_id[asset] = last_id
|
|
|
|
# Apply any buffered updates that arrived while REST was in flight
|
|
for event in self.buffer[asset]:
|
|
if event['u'] <= last_id:
|
|
continue # already reflected in snapshot
|
|
self._apply_event(asset, event)
|
|
|
|
self.buffer[asset].clear()
|
|
self.initialized[asset] = True
|
|
|
|
logger.info(f"Synchronized L2 book for {asset} (UpdateId: {last_id})")
|
|
|
|
# ------------------------------------------------------------------
|
|
# Book maintenance
|
|
# ------------------------------------------------------------------
|
|
def _apply_event(self, asset: str, event: dict):
|
|
"""Apply a streaming diff event to the local book."""
|
|
bids = self.bids[asset]
|
|
asks = self.asks[asset]
|
|
|
|
for p_str, q_str in event['b']:
|
|
p, q = float(p_str), float(q_str)
|
|
if q == 0.0:
|
|
bids.pop(p, None)
|
|
else:
|
|
bids[p] = q
|
|
|
|
for p_str, q_str in event['a']:
|
|
p, q = float(p_str), float(q_str)
|
|
if q == 0.0:
|
|
asks.pop(p, None)
|
|
else:
|
|
asks[p] = q
|
|
|
|
self.last_update_id[asset] = event['u']
|
|
|
|
# ------------------------------------------------------------------
|
|
# Main WS loop (P1-2: exp backoff; P1-5: shared session; P2-1: unknown symbol guard)
|
|
# ------------------------------------------------------------------
|
|
async def stream(self):
|
|
"""Main loop: connect to WebSocket streams and maintain books."""
|
|
import websockets
|
|
|
|
stream_url = (
|
|
"wss://fstream.binance.com/stream?streams=" + "/".join(self.streams)
|
|
)
|
|
logger.info(f"Connecting to Binance Stream: {stream_url}")
|
|
|
|
reconnect_delay = _RECONNECT_BASE_S
|
|
import random
|
|
|
|
# P1-5: create shared session for lifetime of stream()
|
|
async with aiohttp.ClientSession() as session:
|
|
self._session = session
|
|
|
|
# Fire REST snapshots concurrently using shared session
|
|
for a in self.assets:
|
|
asyncio.create_task(self.fetch_snapshot(a))
|
|
|
|
while True:
|
|
connect_start = time.monotonic()
|
|
try:
|
|
async with websockets.connect(
|
|
stream_url, ping_interval=20, ping_timeout=20
|
|
) as ws:
|
|
logger.info("WebSocket connected. Streaming depth diffs...")
|
|
while True:
|
|
msg = await ws.recv()
|
|
|
|
# P0-2: stamp every received message
|
|
self.last_event_ts = time.time()
|
|
|
|
data = json.loads(msg)
|
|
if 'data' not in data:
|
|
continue
|
|
|
|
ev = data['data']
|
|
# P2-1: ignore events for unknown symbols — no KeyError
|
|
asset = ev.get('s', '').upper()
|
|
if asset not in self.locks:
|
|
logger.debug("Ignoring WS event for untracked symbol: %s", asset)
|
|
continue
|
|
|
|
async with self.locks[asset]:
|
|
if not self.initialized[asset]:
|
|
self.buffer[asset].append(ev)
|
|
else:
|
|
self._apply_event(asset, ev)
|
|
|
|
# If we reach here the connection lasted stably:
|
|
# reset back-off on a clean exit (never normally reached)
|
|
reconnect_delay = _RECONNECT_BASE_S
|
|
|
|
except websockets.exceptions.ConnectionClosed as e:
|
|
connected_s = time.monotonic() - connect_start
|
|
logger.warning(
|
|
"WebSocket closed (%s). Connected %.1fs. Reconnecting in %.1fs...",
|
|
e, connected_s, reconnect_delay,
|
|
)
|
|
# P1-2: reset back-off if connection was stable long enough
|
|
if connected_s >= _RECONNECT_STABLE_S:
|
|
reconnect_delay = _RECONNECT_BASE_S
|
|
|
|
# Re-init all assets, re-fire REST snapshots
|
|
for a in self.assets:
|
|
self.initialized[a] = False
|
|
asyncio.create_task(self.fetch_snapshot(a))
|
|
|
|
await asyncio.sleep(reconnect_delay + random.uniform(0, 1))
|
|
# P1-2: double delay for next failure, cap at max
|
|
reconnect_delay = min(reconnect_delay * 2, _RECONNECT_MAX_S)
|
|
|
|
except Exception as e:
|
|
logger.error("Stream error: %s", e)
|
|
await asyncio.sleep(reconnect_delay + random.uniform(0, 1))
|
|
reconnect_delay = min(reconnect_delay * 2, _RECONNECT_MAX_S)
|
|
|
|
self._session = None # stream() exited cleanly
|
|
|
|
# ------------------------------------------------------------------
|
|
# Depth bucket extraction (P0-3: crossed book guard)
|
|
# ------------------------------------------------------------------
|
|
async def get_depth_buckets(self, asset: str) -> Optional[dict]:
|
|
"""
|
|
Extract the Notional Depth vectors matching OBSnapshot.
|
|
Creates 5 elements summing USD depth between 0-1%, 1-2%, ..., 4-5% from mid.
|
|
|
|
Returns None if:
|
|
- Book not yet initialized
|
|
- Empty bids or asks
|
|
- Crossed book (best_bid >= best_ask) ← P0-3
|
|
"""
|
|
async with self.locks[asset]:
|
|
if not self.initialized[asset]:
|
|
return None
|
|
|
|
bids = sorted(self.bids[asset].items(), key=lambda x: -x[0])
|
|
asks = sorted(self.asks[asset].items(), key=lambda x: x[0])
|
|
|
|
if not bids or not asks:
|
|
return None
|
|
|
|
best_bid = bids[0][0]
|
|
best_ask = asks[0][0]
|
|
|
|
# P0-3: crossed book produces corrupted features — reject entirely
|
|
if best_bid >= best_ask:
|
|
logger.warning(
|
|
"Crossed book for %s (bid=%.5f >= ask=%.5f) — skipping snapshot",
|
|
asset, best_bid, best_ask,
|
|
)
|
|
return None
|
|
|
|
mid = (best_bid + best_ask) / 2.0
|
|
|
|
bid_not = np.zeros(self.max_depth_pct, dtype=np.float64)
|
|
ask_not = np.zeros(self.max_depth_pct, dtype=np.float64)
|
|
bid_dep = np.zeros(self.max_depth_pct, dtype=np.float64)
|
|
ask_dep = np.zeros(self.max_depth_pct, dtype=np.float64)
|
|
|
|
for p, q in bids:
|
|
dist_pct = (mid - p) / mid * 100
|
|
idx = int(dist_pct)
|
|
if idx < self.max_depth_pct:
|
|
bid_not[idx] += p * q
|
|
bid_dep[idx] += q
|
|
else:
|
|
break
|
|
|
|
for p, q in asks:
|
|
dist_pct = (p - mid) / mid * 100
|
|
idx = int(dist_pct)
|
|
if idx < self.max_depth_pct:
|
|
ask_not[idx] += p * q
|
|
ask_dep[idx] += q
|
|
else:
|
|
break
|
|
|
|
return {
|
|
"timestamp": time.time(),
|
|
"asset": asset,
|
|
"bid_notional": bid_not,
|
|
"ask_notional": ask_not,
|
|
"bid_depth": bid_dep,
|
|
"ask_depth": ask_dep,
|
|
"best_bid": best_bid,
|
|
"best_ask": best_ask,
|
|
"spread_bps": (best_ask - best_bid) / mid * 10_000,
|
|
}
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Standalone run/test hook
|
|
# -----------------------------------------------------------------------------
|
|
import hazelcast
|
|
|
|
async def main():
|
|
assets_to_track = ["BTCUSDT", "ETHUSDT", "SOLUSDT"]
|
|
service = OBStreamService(assets=assets_to_track)
|
|
|
|
asyncio.create_task(service.stream())
|
|
await asyncio.sleep(4)
|
|
|
|
try:
|
|
hz_client = hazelcast.HazelcastClient(
|
|
cluster_name="dolphin",
|
|
cluster_members=["localhost:5701"]
|
|
)
|
|
hz_map = hz_client.get_map('DOLPHIN_FEATURES').blocking()
|
|
logger.info("Connected to Hazelcast DOLPHIN_FEATURES map.")
|
|
except Exception as e:
|
|
logger.error(f"Hazelcast connection failed: {e}")
|
|
return
|
|
|
|
while True:
|
|
try:
|
|
for asset in assets_to_track:
|
|
snap = await service.get_depth_buckets(asset)
|
|
if snap:
|
|
hz_payload = {
|
|
"timestamp": snap["timestamp"],
|
|
"asset": snap["asset"],
|
|
"bid_notional": list(snap["bid_notional"]),
|
|
"ask_notional": list(snap["ask_notional"]),
|
|
"bid_depth": list(snap["bid_depth"]),
|
|
"ask_depth": list(snap["ask_depth"]),
|
|
"best_bid": snap["best_bid"],
|
|
"best_ask": snap["best_ask"],
|
|
"spread_bps": snap["spread_bps"],
|
|
}
|
|
hz_map.put(f"asset_{asset}_ob", json.dumps(hz_payload))
|
|
await asyncio.sleep(0.5)
|
|
except Exception as e:
|
|
logger.error(f"Error in stream writing loop: {e}")
|
|
await asyncio.sleep(5)
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
asyncio.run(main())
|
|
except KeyboardInterrupt:
|
|
print("OB Streamer shut down manually.")
|