2026-06-12 14:59:49 +02:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
|
|
|
|
DOLPHIN Nautilus Event-Driven Trader
|
|
|
|
|
|
"""
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import json
|
|
|
|
|
|
import hashlib
|
|
|
|
|
|
import math
|
|
|
|
|
|
import os
|
|
|
|
|
|
import time
|
|
|
|
|
|
import signal
|
|
|
|
|
|
import threading
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
import traceback
|
2026-06-12 14:59:49 +02:00
|
|
|
|
import urllib.request
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
from dataclasses import replace
|
|
|
|
|
|
from typing import Any, Mapping, Optional
|
|
|
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from collections import deque
|
|
|
|
|
|
|
|
|
|
|
|
# Stablecoins / pegged assets that must never be traded
|
|
|
|
|
|
_STABLECOIN_SYMBOLS = frozenset({
|
|
|
|
|
|
'USDCUSDT', 'BUSDUSDT', 'FDUSDUSDT', 'USDTUSDT', 'TUSDUSDT',
|
|
|
|
|
|
'DAIUSDT', 'FRAXUSDT', 'USDDUSDT', 'USTCUSDT', 'EURUSDT',
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
sys.path.insert(0, '/mnt/dolphinng5_predict')
|
|
|
|
|
|
sys.path.insert(0, '/mnt/dolphinng5_predict/nautilus_dolphin')
|
|
|
|
|
|
|
|
|
|
|
|
from nautilus_dolphin.nautilus.proxy_boost_engine import create_d_liq_engine
|
|
|
|
|
|
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDPosition
|
|
|
|
|
|
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
|
|
|
|
|
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
|
|
|
|
|
|
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
|
|
|
|
|
|
from nautilus_dolphin.nautilus.esof_size_gate import (
|
|
|
|
|
|
parse_esof_payload, esof_gate_from_payload, esof_score_from_payload,
|
|
|
|
|
|
esof_size_mult_from_score, ESOF_STALE_FALLBACK_MULT, ESOF_FRESHNESS_S,
|
|
|
|
|
|
)
|
|
|
|
|
|
from prod.clean_arch.adapters.eigen_scan_normalizer import normalize_ng7_scan
|
|
|
|
|
|
from prod.clean_arch.obf_tp_observation import inject_obf_midprice
|
|
|
|
|
|
from prod.clean_arch.tp_curve import compute_our_leverage, compute_soft_tp_pct
|
|
|
|
|
|
try:
|
|
|
|
|
|
sys.path.insert(0, '/mnt/dolphinng5_predict/Observability')
|
|
|
|
|
|
from esof_advisor import compute_esof as _compute_esof_inline
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
_compute_esof_inline = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from adaptive_exit.market_state_runtime import MarketStateRuntime
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
MarketStateRuntime = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from adaptive_exit.advanced_sl import AdvancedSLRuntime
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
AdvancedSLRuntime = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from adaptive_exit.sc_threshold_advisor import SCThresholdAdvisor
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
SCThresholdAdvisor = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from adaptive_exit.sc_gauge_advisor import SCGaugeAdvisor, build_obf_snapshot_from_engine
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
SCGaugeAdvisor = None
|
|
|
|
|
|
build_obf_snapshot_from_engine = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from adaptive_exit.bounce_advisor import BounceAdvisor
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
BounceAdvisor = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from adaptive_exit.post_win_long_overlay import PostWinExecutionFSM
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
PostWinExecutionFSM = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from nautilus_dolphin.nautilus.alpha_exit_v7_engine import AlphaExitEngineV7, TradeContextV7
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
AlphaExitEngineV7 = None
|
|
|
|
|
|
TradeContextV7 = None
|
|
|
|
|
|
|
|
|
|
|
|
BLUE_CH_DB = "dolphin"
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from prod.ch_writer import ch_put, ts_us as _ch_ts_us
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
def ch_put(*a, **kw): pass
|
|
|
|
|
|
def _ch_ts_us(): return 0
|
|
|
|
|
|
try:
|
|
|
|
|
|
from prod.execution_quality import build_execution_quality_record
|
|
|
|
|
|
from prod.execution_quality import build_trade_execution_quality_summary
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
build_execution_quality_record = None
|
|
|
|
|
|
build_trade_execution_quality_summary = None
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from announcement_router import build_announcement_center
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
from prod.announcement_router import build_announcement_center
|
|
|
|
|
|
|
|
|
|
|
|
sys.path.insert(0, '/mnt/dolphinng5_predict/prod')
|
|
|
|
|
|
from dolphin_exit_handler import install_exit_handler
|
|
|
|
|
|
install_exit_handler("nautilus_trader")
|
|
|
|
|
|
from prod.clean_arch.runtime.runner_heartbeat import (
|
|
|
|
|
|
build_runner_heartbeat_payload,
|
|
|
|
|
|
write_runner_heartbeat,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
HZ_CLUSTER = "dolphin"
|
|
|
|
|
|
HZ_HOST = "127.0.0.1:5701"
|
|
|
|
|
|
EIGEN_DIR = Path('/mnt/dolphinng6_data/eigenvalues')
|
|
|
|
|
|
|
|
|
|
|
|
CAPITAL_DISK_CHECKPOINT = Path("/tmp/dolphin_capital_checkpoint.json")
|
|
|
|
|
|
CAPITAL_CORRECTIVE_REPLAY = Path("/tmp/dolphin_latest_nautilus_replay.json")
|
|
|
|
|
|
CAPITAL_UPDATE_LEDGER = Path("/tmp/dolphin_capital_update_ledger.json")
|
|
|
|
|
|
CAPITAL_CORRECTIVE_REPLAY_HZ_KEY = "capital_correction_replay"
|
|
|
|
|
|
ANNOUNCEMENT_CONFIG = Path("/mnt/dolphinng5_predict/prod/configs/position_notifications_blue.json")
|
|
|
|
|
|
ANNOUNCEMENT_RUNTIME_ENV = Path("/mnt/dolphin_training/observability_notifications_blue.runtime.json")
|
|
|
|
|
|
|
|
|
|
|
|
# Economic dust floor for OPEN position_state rows and retract remainders.
|
|
|
|
|
|
# A remainder at/below this is a FULL CLOSE, never an OPEN snapshot. The
|
|
|
|
|
|
# lifecycle invariant "OPEN ⇒ size > dust" is enforced at the single write
|
|
|
|
|
|
# gate (_ps_write_open); zero/dust-size OPEN rows are the malformed class
|
|
|
|
|
|
# behind the 2026-06-11 restore restart-loop (MALFORMED_OPEN_RESTORE_BUG.md).
|
|
|
|
|
|
# $0.01 sits far above the round(notional,4)=0 boundary (5e-5), so a row
|
|
|
|
|
|
# that passes the gate can never round to a zero notional on disk.
|
|
|
|
|
|
POSITION_DUST_NOTIONAL_USD = 0.01
|
|
|
|
|
|
|
|
|
|
|
|
ENGINE_KWARGS = dict(
|
|
|
|
|
|
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
|
|
|
|
|
|
min_leverage=0.5, max_leverage=8.0, # note: create_d_liq_engine overrides to D_LIQ_SOFT_CAP=8.0
|
|
|
|
|
|
leverage_convexity=3.0,
|
|
|
|
|
|
fraction=0.20, fixed_tp_pct=0.0020, stop_pct=1.0, max_hold_bars=250, # TP research 2026-05-11: 0.95→0.20%
|
|
|
|
|
|
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
|
|
|
|
|
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
|
|
|
|
|
use_asset_selection=True, min_irp_alignment=0.0, # gold spec: no IRP filter
|
|
|
|
|
|
use_sp_fees=True, use_sp_slippage=True,
|
|
|
|
|
|
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
|
|
|
|
|
|
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
|
|
|
|
|
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
|
|
|
|
|
allow_subday_acb_exit=False,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _env_bool(name: str, default: bool) -> bool:
|
|
|
|
|
|
raw = os.environ.get(name)
|
|
|
|
|
|
if raw is None:
|
|
|
|
|
|
return default
|
|
|
|
|
|
return str(raw).strip().lower() in {"1", "true", "yes", "on"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _env_float(name: str, default: float) -> float:
|
|
|
|
|
|
raw = os.environ.get(name)
|
|
|
|
|
|
if raw is None:
|
|
|
|
|
|
return default
|
|
|
|
|
|
try:
|
|
|
|
|
|
value = float(raw)
|
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
|
return default
|
|
|
|
|
|
return value if math.isfinite(value) else default
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _env_int(name: str, default: int) -> int:
|
|
|
|
|
|
raw = os.environ.get(name)
|
|
|
|
|
|
if raw is None:
|
|
|
|
|
|
return default
|
|
|
|
|
|
try:
|
|
|
|
|
|
value = int(float(raw))
|
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
|
return default
|
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _direction_from_env(value: Optional[str] = None) -> int:
|
|
|
|
|
|
raw = os.environ.get("DOLPHIN_DIRECTION", "short_only") if value is None else value
|
|
|
|
|
|
text = str(raw or "short_only").strip().lower()
|
|
|
|
|
|
if text in {"short", "short_only", "sell", "-1"}:
|
|
|
|
|
|
return -1
|
|
|
|
|
|
if text in {"long", "long_only", "buy", "+1", "1"}:
|
|
|
|
|
|
return 1
|
|
|
|
|
|
raise ValueError(
|
|
|
|
|
|
f"Unsupported DOLPHIN_DIRECTION={raw!r}; use short_only or long_only"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _direction_label(direction: int) -> str:
|
|
|
|
|
|
return "LONG" if int(direction) == 1 else "SHORT"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_v7_exit_reason(reason: str) -> str:
|
|
|
|
|
|
text = str(reason or "").strip()
|
|
|
|
|
|
if text == "V7_MAE_SL_VOL_NORM":
|
|
|
|
|
|
return "V7.1_MAE_SL_VOL_NORM"
|
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _safe_float(value, default: float = 0.0) -> float:
|
|
|
|
|
|
try:
|
|
|
|
|
|
out = float(value)
|
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
|
return default
|
|
|
|
|
|
return out if math.isfinite(out) else default
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _flatten_env_payload(payload, prefix: str = "") -> dict:
|
|
|
|
|
|
flat = {}
|
|
|
|
|
|
if not isinstance(payload, dict):
|
|
|
|
|
|
return flat
|
|
|
|
|
|
for key, value in payload.items():
|
|
|
|
|
|
if not isinstance(key, str) or not key.strip():
|
|
|
|
|
|
continue
|
|
|
|
|
|
full_key = f"{prefix}_{key}" if prefix else key
|
|
|
|
|
|
if isinstance(value, dict):
|
|
|
|
|
|
flat.update(_flatten_env_payload(value, full_key))
|
|
|
|
|
|
else:
|
|
|
|
|
|
flat[full_key.upper()] = value
|
|
|
|
|
|
return flat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _seed_runtime_env(path: Path) -> None:
|
|
|
|
|
|
if not path.exists():
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = json.loads(path.read_text())
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return
|
|
|
|
|
|
for key, value in _flatten_env_payload(payload).items():
|
|
|
|
|
|
if key not in os.environ and value not in (None, "", "__CHANGE_ME__", "__REPLACE_ME__"):
|
|
|
|
|
|
os.environ[key] = str(value)
|
|
|
|
|
|
|
|
|
|
|
|
BTC_VOL_WINDOW = 50
|
|
|
|
|
|
|
|
|
|
|
|
# Per-bucket SL % used when HIBERNATE fires while a position is open.
|
|
|
|
|
|
# Instead of immediate HIBERNATE_HALT, we arm TP (existing fixed_tp_pct) +
|
|
|
|
|
|
# a per-bucket stop-loss so the position exits cleanly rather than being
|
|
|
|
|
|
# force-closed at whatever price the halt fires at.
|
|
|
|
|
|
# Values derived from AE shadow data + bucket trade analysis (2026-04-19).
|
|
|
|
|
|
# B3 wide: shadow shows mae_norm 5-5.1 before FIXED_TP; 3.5×ATR fires on noise.
|
|
|
|
|
|
# B4 tight: 34.8% WR, 0.80 R:R — cut fast, no recovery value.
|
|
|
|
|
|
# B6 widest: extreme vol (vol_daily_pct 760-864); normal ATR excursions are large.
|
|
|
|
|
|
_BUCKET_SL_PCT: dict = {
|
|
|
|
|
|
0: 0.015, # Low-vol high-corr nano-cap
|
|
|
|
|
|
1: 0.012, # Med-vol low-corr mid-price (XRP/XLM class)
|
|
|
|
|
|
2: 0.015, # Mega-cap BTC/ETH — default (not traded)
|
|
|
|
|
|
3: 0.025, # High-vol mid-corr STAR bucket (ENJ/ADA/DOGE) — needs room
|
|
|
|
|
|
4: 0.008, # Worst bucket (BNB/LTC/LINK) — cut fast
|
|
|
|
|
|
5: 0.018, # High-vol low-corr micro-price (ATOM/TRX class)
|
|
|
|
|
|
6: 0.030, # Extreme-vol mid-corr (FET/ZRX) — widest
|
|
|
|
|
|
'default': 0.015,
|
|
|
|
|
|
}
|
|
|
|
|
|
# Gold-calibrated from full 5-year BTC history: 0.00026414 (stricter, ~2.7x tighter).
|
|
|
|
|
|
# 2026-04-07: switched to 56-day gold window value (0.00009868) — the exact threshold
|
|
|
|
|
|
# used in the T=2155 ROI=+189% backtest. More permissive; paper trading to gather data.
|
|
|
|
|
|
# 2026-05-09 weekend mode: runtime-configurable lower gate for low-vol tape.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Legacy references preserved:
|
|
|
|
|
|
# VOL_P60_THRESHOLD_LEGACY_MAIN = 0.00026414
|
|
|
|
|
|
# VOL_P60_THRESHOLD_GOLD_56D = 0.00009868
|
|
|
|
|
|
VOL_P60_THRESHOLD_LEGACY_MAIN = 0.00026414
|
|
|
|
|
|
VOL_P60_THRESHOLD_GOLD_56D = 0.00009868
|
|
|
|
|
|
VOL_P60_THRESHOLD_WEEKEND_DEFAULT = 0.00003
|
|
|
|
|
|
VOL_P60_THRESHOLD_RELAXED_TEMP = 0.00015838
|
|
|
|
|
|
# Backward-compatible alias retained for older tests and tooling.
|
|
|
|
|
|
VOL_P60_THRESHOLD = VOL_P60_THRESHOLD_LEGACY_MAIN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _vol_p60_threshold_from_env(default: float = VOL_P60_THRESHOLD_LEGACY_MAIN) -> float:
|
|
|
|
|
|
raw = os.environ.get("DOLPHIN_VOL_P60_THRESHOLD")
|
|
|
|
|
|
if raw is None:
|
|
|
|
|
|
return float(default)
|
|
|
|
|
|
try:
|
|
|
|
|
|
out = float(str(raw).strip())
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return float(default)
|
|
|
|
|
|
if not math.isfinite(out) or out <= 0.0:
|
|
|
|
|
|
return float(default)
|
|
|
|
|
|
return float(out)
|
|
|
|
|
|
|
|
|
|
|
|
# Algorithm Versioning
|
|
|
|
|
|
# v1_shakedown: v50-v150 (noise bug), loose vol gate
|
|
|
|
|
|
# v2_gold_fix: CORRECTED v50-v750 macro divergence (matches parquet backtest)
|
|
|
|
|
|
ALGO_VERSION = "v2_gold_fix_v50-v750"
|
|
|
|
|
|
|
|
|
|
|
|
# Persistent, version-tagged trade log (survives reboots; sorts by date).
|
|
|
|
|
|
# Keep a local fallback path so mount hiccups never break runtime callbacks.
|
|
|
|
|
|
_LOG_DIR_PRIMARY = "/mnt/dolphinng5_predict/prod/logs"
|
|
|
|
|
|
_LOG_DIR_FALLBACK = "/tmp/dolphin_logs/trader"
|
|
|
|
|
|
_LOG_IO_LAST_WARN_TS = 0.0
|
|
|
|
|
|
running = True
|
|
|
|
|
|
_PROCESS_BOOT_TS = time.time()
|
|
|
|
|
|
_SIGTERM_STARTUP_GRACE_S = 20.0
|
|
|
|
|
|
|
|
|
|
|
|
# ── Scan-flow watchdog (2026-06-10) ──────────────────────────────────────────
|
|
|
|
|
|
# BLUE went deaf 3× on 2026-06-09 (scan listener/worker stalled silently while
|
|
|
|
|
|
# supervisord showed RUNNING) and lost most of a trading session. The watchdog
|
|
|
|
|
|
# detects a stalled scan path and self-exits with WATCHDOG_EXIT_CODE so
|
|
|
|
|
|
# supervisord (autorestart=true) brings the process back clean. Restore of
|
|
|
|
|
|
# capital + position state on boot is the proven recovery path.
|
|
|
|
|
|
SCAN_STALL_S = 120.0 # scan path considered stalled after this
|
|
|
|
|
|
WATCHDOG_RESTART_MIN_UPTIME_S = 600.0 # never self-restart during warm-up
|
|
|
|
|
|
WATCHDOG_PROBE_INTERVAL_S = 30.0 # spacing between HZ deafness probes
|
|
|
|
|
|
UPSTREAM_DARK_LOG_EVERY_S = 300.0 # CRITICAL reminder cadence when dark
|
|
|
|
|
|
WATCHDOG_EXIT_CODE = 86
|
|
|
|
|
|
# Scanner restarts reset scan_number to 0. A backwards jump larger than this
|
|
|
|
|
|
# is a restart (accept + re-anchor ratchet), not a stale duplicate (drop).
|
|
|
|
|
|
SCAN_NUMBER_RESET_GAP = 1000
|
|
|
|
|
|
|
|
|
|
|
|
def _trade_log_paths(ts_dt: datetime) -> tuple[str, str]:
|
|
|
|
|
|
log_date = ts_dt.strftime("%Y%m%d")
|
|
|
|
|
|
fname = f"nautilus_trader_{log_date}_{ALGO_VERSION}.log"
|
|
|
|
|
|
return os.path.join(_LOG_DIR_PRIMARY, fname), os.path.join(_LOG_DIR_FALLBACK, fname)
|
|
|
|
|
|
|
|
|
|
|
|
def log(msg):
|
|
|
|
|
|
global _LOG_IO_LAST_WARN_TS
|
|
|
|
|
|
ts_dt = datetime.now(timezone.utc)
|
|
|
|
|
|
ts = ts_dt.isoformat()
|
|
|
|
|
|
line = f"[{ts}] {msg}"
|
|
|
|
|
|
print(line, flush=True)
|
|
|
|
|
|
primary_path, fallback_path = _trade_log_paths(ts_dt)
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.makedirs(_LOG_DIR_PRIMARY, exist_ok=True)
|
|
|
|
|
|
with open(primary_path, 'a') as f:
|
|
|
|
|
|
f.write(line + '\n')
|
|
|
|
|
|
return
|
|
|
|
|
|
except OSError as e:
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
|
if now - _LOG_IO_LAST_WARN_TS >= 60.0:
|
|
|
|
|
|
_LOG_IO_LAST_WARN_TS = now
|
|
|
|
|
|
print(f"[{ts}] LOG_PATH_FALLBACK: primary log write failed: {e}", flush=True)
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.makedirs(_LOG_DIR_FALLBACK, exist_ok=True)
|
|
|
|
|
|
with open(fallback_path, 'a') as f:
|
|
|
|
|
|
f.write(line + '\n')
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
# Last-resort: stdout still has the log line.
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _chain_digest(payload: dict) -> str:
|
|
|
|
|
|
"""Stable digest for BLUE exit-chain state."""
|
|
|
|
|
|
body = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str).encode()
|
|
|
|
|
|
return hashlib.sha256(body).hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _build_chain_state(
|
|
|
|
|
|
*,
|
|
|
|
|
|
trade_id: str,
|
|
|
|
|
|
asset: str,
|
|
|
|
|
|
side: str,
|
|
|
|
|
|
entry_price: float,
|
|
|
|
|
|
quantity: float,
|
|
|
|
|
|
notional: float,
|
|
|
|
|
|
entry_bar: int,
|
|
|
|
|
|
entry_ts: int,
|
|
|
|
|
|
retraction_legs: int = 0,
|
|
|
|
|
|
realized_pnl_legs_total: float = 0.0,
|
|
|
|
|
|
chain_root_trade_id: str | None = None,
|
|
|
|
|
|
chain_head_leg_id: str | None = None,
|
|
|
|
|
|
chain_prev_leg_id: str = "",
|
|
|
|
|
|
chain_mode: str = "LIVE",
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""Build a deterministic chain snapshot for the current open trade head."""
|
|
|
|
|
|
root = str(chain_root_trade_id or trade_id or "")
|
|
|
|
|
|
seq = max(0, int(retraction_legs))
|
|
|
|
|
|
head = str(chain_head_leg_id or (f"{trade_id}:open" if seq <= 0 else f"{trade_id}:x{seq:03d}"))
|
|
|
|
|
|
prev = str(chain_prev_leg_id or "")
|
|
|
|
|
|
anchor = {
|
|
|
|
|
|
"trade_id": str(trade_id or ""),
|
|
|
|
|
|
"chain_root_trade_id": root,
|
|
|
|
|
|
"chain_head_leg_id": head,
|
|
|
|
|
|
"chain_prev_leg_id": prev,
|
|
|
|
|
|
"chain_seq": seq,
|
|
|
|
|
|
"chain_mode": str(chain_mode or "LIVE"),
|
|
|
|
|
|
"asset": str(asset or ""),
|
|
|
|
|
|
"side": str(side or "").upper(),
|
|
|
|
|
|
"entry_price": round(float(entry_price or 0.0), 12),
|
|
|
|
|
|
"quantity": round(float(quantity or 0.0), 12),
|
|
|
|
|
|
"notional": round(float(notional or 0.0), 12),
|
|
|
|
|
|
"entry_bar": int(entry_bar or 0),
|
|
|
|
|
|
"entry_ts": int(entry_ts or 0),
|
|
|
|
|
|
"retraction_legs": seq,
|
|
|
|
|
|
"realized_pnl_legs_total": round(float(realized_pnl_legs_total or 0.0), 12),
|
|
|
|
|
|
}
|
|
|
|
|
|
anchor["chain_token"] = _chain_digest(anchor)
|
|
|
|
|
|
anchor["chain_version"] = 1
|
|
|
|
|
|
anchor["chain_kind"] = "ROOT" if seq <= 0 else "LEG"
|
|
|
|
|
|
return anchor
|
|
|
|
|
|
|
|
|
|
|
|
class DolphinLiveTrader:
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
self.eng = None
|
|
|
|
|
|
self.hz_client = None
|
|
|
|
|
|
self.features_map = None
|
|
|
|
|
|
self.safety_map = None
|
|
|
|
|
|
self.pnl_map = None
|
|
|
|
|
|
self.state_map = None
|
|
|
|
|
|
self.heartbeat_map = None
|
|
|
|
|
|
self.control_map = None
|
|
|
|
|
|
self.eng_lock = threading.Lock()
|
|
|
|
|
|
self._heartbeat_stop = threading.Event()
|
|
|
|
|
|
self._runtime_command_lock = threading.Lock()
|
|
|
|
|
|
self._dedup_lock = threading.Lock() # guards atomic check-and-set on last_scan_number
|
|
|
|
|
|
self._scan_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="scan")
|
|
|
|
|
|
self.last_scan_number = -1
|
|
|
|
|
|
# Scan-flow watchdog state. Event ts proves the HZ listener is alive;
|
|
|
|
|
|
# accept ts proves the worker thread is draining; the dupe counter
|
|
|
|
|
|
# separates "worker stuck" from "upstream flooding duplicates".
|
|
|
|
|
|
self._last_scan_event_ts = time.time()
|
|
|
|
|
|
self._last_scan_accept_ts = time.time()
|
|
|
|
|
|
self._dupe_drops_total = 0
|
|
|
|
|
|
self._watchdog_stop = threading.Event()
|
|
|
|
|
|
self._probe_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="wdprobe")
|
|
|
|
|
|
self.last_file_mtime = 0
|
|
|
|
|
|
self.bar_idx = 0
|
|
|
|
|
|
self.current_day = None
|
|
|
|
|
|
self.trades_executed = 0
|
|
|
|
|
|
self.scans_processed = 0
|
|
|
|
|
|
self.btc_prices = deque(maxlen=BTC_VOL_WINDOW + 2)
|
|
|
|
|
|
self.cached_posture = "APEX"
|
|
|
|
|
|
self.posture_cache_time = 0
|
|
|
|
|
|
self.ob_assets = []
|
|
|
|
|
|
self.ob_eng = None
|
|
|
|
|
|
self.acb = None
|
|
|
|
|
|
self.last_w750_vel = None
|
|
|
|
|
|
self._pending_entries: dict = {} # trade_id → entry snapshot (for CH trade_events)
|
|
|
|
|
|
self._last_exf: dict = {}
|
|
|
|
|
|
self._last_engine_snapshot_payload = None
|
|
|
|
|
|
self._exf_log_time = 0.0 # throttle for on_exf_update logging
|
|
|
|
|
|
self._ae = None # AdaptiveExitEngine shadow (parallel, never real exits)
|
|
|
|
|
|
self._v7_exit_engine = None # AlphaExitEngineV7 live BLUE exit control + journal
|
|
|
|
|
|
self._v7_contexts: dict = {} # trade_id → TradeContextV7
|
|
|
|
|
|
self._v7_decisions: dict = {} # trade_id → latest v7 decision
|
|
|
|
|
|
self._v7_decision_seq: dict = {} # trade_id → monotonic eval sequence
|
|
|
|
|
|
self._v7_journal_enabled: bool = _env_bool("DOLPHIN_ENABLE_V7_JOURNAL", True)
|
|
|
|
|
|
self._v7_journal_db: str = BLUE_CH_DB
|
|
|
|
|
|
self._v7_journal_table: str = "v7_decision_events"
|
|
|
|
|
|
self._v7_live_exit_enabled: bool = False
|
|
|
|
|
|
self._sc_advisor = None # SC threshold advisor (shadow-only)
|
|
|
|
|
|
self._sc_advisor_last_log = 0.0
|
|
|
|
|
|
self._sc_gauge = None # SC bucket gauge advisor (shadow-only)
|
|
|
|
|
|
self._sc_gauge_last_log = 0.0
|
|
|
|
|
|
self._bounce_advisor = None # inverse-ARS bounce advisor (shadow-only)
|
|
|
|
|
|
self._bounce_advisor_last_log = 0.0
|
|
|
|
|
|
self._bounce_price_history: dict[str, deque] = {}
|
|
|
|
|
|
self._last_prices_dict: dict[str, float] = {}
|
|
|
|
|
|
self._market_state_runtime = MarketStateRuntime() if MarketStateRuntime is not None else None
|
|
|
|
|
|
self._tp_base_pct = float(ENGINE_KWARGS.get("fixed_tp_pct", 0.0020))
|
|
|
|
|
|
self._advanced_sl = AdvancedSLRuntime.load() if AdvancedSLRuntime is not None else None
|
|
|
|
|
|
self._advanced_sl_live_exit_enabled: bool = _env_bool("DOLPHIN_ENABLE_ADVANCED_SL_LIVE", False)
|
|
|
|
|
|
if self._advanced_sl is not None and self._advanced_sl_live_exit_enabled:
|
|
|
|
|
|
self._advanced_sl.config = replace(self._advanced_sl.config, enabled=True)
|
|
|
|
|
|
self._catastrophic_floor_pct: float = max(
|
|
|
|
|
|
0.0,
|
|
|
|
|
|
_env_float("DOLPHIN_CATASTROPHIC_FLOOR_PCT", 0.0120),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._overlay_catastrophic_floor_pct: float = max(
|
|
|
|
|
|
0.0,
|
|
|
|
|
|
_env_float("DOLPHIN_OVERLAY_CATASTROPHIC_FLOOR_PCT", 0.0050),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._overlay_catastrophic_max_loss_usd: float = max(
|
|
|
|
|
|
0.0,
|
|
|
|
|
|
_env_float("DOLPHIN_OVERLAY_CATASTROPHIC_MAX_LOSS_USD", 500.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._overlay_advsl_live_exit_enabled: bool = _env_bool("DOLPHIN_OVERLAY_ADVSL_LIVE", True)
|
|
|
|
|
|
self._overlay_advsl_min_bars: int = max(0, _env_int("DOLPHIN_OVERLAY_ADVSL_MIN_BARS", 6))
|
|
|
|
|
|
self._overlay_advsl_mfe_max_pct: float = max(0.0, _env_float("DOLPHIN_OVERLAY_ADVSL_MFE_MAX_PCT", 0.0020))
|
|
|
|
|
|
self._overlay_advsl_pressure_min: float = max(0.0, _env_float("DOLPHIN_OVERLAY_ADVSL_PRESSURE_MIN", 1.85))
|
|
|
|
|
|
self._overlay_advsl_mae_risk_min: float = max(0.0, _env_float("DOLPHIN_OVERLAY_ADVSL_MAE_RISK_MIN", 0.50))
|
|
|
|
|
|
self._hibernate_protect_active: str | None = None # trade_id being protected
|
|
|
|
|
|
self._bucket_assignments: dict = {} # asset → KMeans bucket_id (loaded from pkl)
|
|
|
|
|
|
self._last_esof_size_mult: float = 1.0
|
|
|
|
|
|
self._restore_failed: bool = False
|
|
|
|
|
|
self._restore_failure_reason: str = ""
|
|
|
|
|
|
self._restore_source: str = ""
|
|
|
|
|
|
self.trade_direction: int = _direction_from_env()
|
|
|
|
|
|
self.vol_p60_threshold: float = _vol_p60_threshold_from_env()
|
|
|
|
|
|
self._runtime_direction: int = self.trade_direction
|
|
|
|
|
|
self._efsm = PostWinExecutionFSM() if PostWinExecutionFSM is not None else None
|
|
|
|
|
|
self._trade_announcement_center = None
|
|
|
|
|
|
self._processed_retract_commands: deque = deque(maxlen=5000)
|
|
|
|
|
|
self._processed_retract_set: set[str] = set()
|
|
|
|
|
|
_seed_runtime_env(ANNOUNCEMENT_RUNTIME_ENV)
|
|
|
|
|
|
if ANNOUNCEMENT_CONFIG.exists():
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._trade_announcement_center = build_announcement_center(
|
|
|
|
|
|
ANNOUNCEMENT_CONFIG,
|
|
|
|
|
|
hz_getter=self._get_hz,
|
|
|
|
|
|
logger=None,
|
|
|
|
|
|
)
|
|
|
|
|
|
log(" Position announcements: loaded")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" Position announcements: {e}")
|
|
|
|
|
|
self._trade_announcement_center = None
|
|
|
|
|
|
if self._efsm is not None:
|
|
|
|
|
|
log(" EFSM: loaded (post-win LONG overlay)")
|
|
|
|
|
|
if self._advanced_sl is not None:
|
|
|
|
|
|
log(" AdvancedSL: loaded (shadow prototype)")
|
|
|
|
|
|
|
|
|
|
|
|
def _get_hz(self):
|
|
|
|
|
|
"""Return a live Hazelcast client for announcement channels."""
|
|
|
|
|
|
hz = self.hz_client
|
|
|
|
|
|
if hz is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not hz.lifecycle_service.is_running():
|
|
|
|
|
|
return None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return hz
|
|
|
|
|
|
|
|
|
|
|
|
def _latest_maras_context(self) -> dict:
|
|
|
|
|
|
"""Best-effort MARAS context for meta exit gates."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.features_map is None:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
raw = self.features_map.blocking().get("maras_latest")
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
payload = json.loads(raw) if isinstance(raw, str) else raw
|
|
|
|
|
|
if not isinstance(payload, dict):
|
|
|
|
|
|
return {}
|
|
|
|
|
|
return {
|
|
|
|
|
|
"composite_hash": payload.get("composite_hash", payload.get("hash", 0)),
|
|
|
|
|
|
"scalar_hash": payload.get("scalar_hash", 0),
|
|
|
|
|
|
"regime": payload.get("regime", ""),
|
|
|
|
|
|
"final_score": payload.get("final_score", 0.0),
|
|
|
|
|
|
"confidence": payload.get("confidence", 0.0),
|
|
|
|
|
|
}
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_runtime_direction(self) -> int:
|
|
|
|
|
|
"""Resolve active trade direction for the next eligible entry."""
|
|
|
|
|
|
base = int(self.trade_direction)
|
|
|
|
|
|
if base != -1 or self._efsm is None:
|
|
|
|
|
|
return base
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
has_open_position = getattr(self.eng, "position", None) is not None
|
|
|
|
|
|
if has_open_position:
|
|
|
|
|
|
return base
|
|
|
|
|
|
return 1 if int(self._efsm.pending_slots) > 0 else base
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_runtime_direction(self) -> None:
|
|
|
|
|
|
"""Apply current runtime direction to the engine regime."""
|
|
|
|
|
|
resolved = self._resolve_runtime_direction()
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
if getattr(self.eng, "regime_direction", self.trade_direction) != resolved:
|
|
|
|
|
|
self.eng.regime_direction = resolved
|
|
|
|
|
|
self._runtime_direction = resolved
|
|
|
|
|
|
|
|
|
|
|
|
def _build_engine(self):
|
|
|
|
|
|
log("Building NDAlphaEngine...")
|
|
|
|
|
|
engine_kwargs = dict(ENGINE_KWARGS)
|
|
|
|
|
|
engine_kwargs["allow_subday_acb_exit"] = _env_bool(
|
|
|
|
|
|
"DOLPHIN_ALLOW_ACB_SUBDAY_EXIT",
|
|
|
|
|
|
bool(engine_kwargs.get("allow_subday_acb_exit", False)),
|
|
|
|
|
|
)
|
|
|
|
|
|
self.eng = create_d_liq_engine(**engine_kwargs)
|
|
|
|
|
|
# TP profit-floor ratchet (LINK 5e05eeeb, 2026-06-11): once the BASE
|
|
|
|
|
|
# 0.20% TP has been crossed, regression back to base exits (TP_FLOOR)
|
|
|
|
|
|
# instead of riding the OB-widened threshold back to a loss. Class
|
|
|
|
|
|
# default is OFF (backtest/champion parity); live default is ON.
|
|
|
|
|
|
# Kill switch: DOLPHIN_TP_FLOOR=0.
|
|
|
|
|
|
self.eng.exit_manager.tp_floor_enabled = _env_bool("DOLPHIN_TP_FLOOR", True)
|
|
|
|
|
|
log(f" Engine: {type(self.eng).__name__}")
|
|
|
|
|
|
log(f" TP profit-floor: {'ON' if self.eng.exit_manager.tp_floor_enabled else 'OFF'}")
|
|
|
|
|
|
log(f" Direction: {_direction_label(self.trade_direction)} ({self.trade_direction:+d})")
|
|
|
|
|
|
log(
|
|
|
|
|
|
" VOL gate threshold: "
|
|
|
|
|
|
f"{self.vol_p60_threshold:.8f} "
|
|
|
|
|
|
f"(legacy_main={VOL_P60_THRESHOLD_LEGACY_MAIN:.8f}, gold_56d={VOL_P60_THRESHOLD_GOLD_56D:.8f}, "
|
|
|
|
|
|
f"relaxed_temp={VOL_P60_THRESHOLD_RELAXED_TEMP:.7f})"
|
|
|
|
|
|
)
|
|
|
|
|
|
log(f" ACB subday exits: {'ON' if engine_kwargs['allow_subday_acb_exit'] else 'OFF'}")
|
|
|
|
|
|
log(f" Leverage: soft={self.eng.base_max_leverage}x abs={self.eng.abs_max_leverage}x")
|
|
|
|
|
|
|
|
|
|
|
|
if EIGEN_DIR.exists():
|
|
|
|
|
|
try:
|
|
|
|
|
|
date_strings = sorted([d.name for d in EIGEN_DIR.iterdir() if d.is_dir()])
|
|
|
|
|
|
self.acb = AdaptiveCircuitBreaker()
|
|
|
|
|
|
self.acb.preload_w750(date_strings)
|
|
|
|
|
|
self.eng.set_acb(self.acb)
|
|
|
|
|
|
log(" ACBv6: loaded")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ACBv6: {e}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.acb = AdaptiveCircuitBreaker()
|
|
|
|
|
|
self.eng.set_acb(self.acb)
|
|
|
|
|
|
log(" ACBv6: loaded (no preload dates)")
|
|
|
|
|
|
|
|
|
|
|
|
self.eng.set_esoteric_hazard_multiplier(0.0) # gold spec: init guard, MUST precede set_mc_forewarner
|
|
|
|
|
|
log(f" Hazard: set_esoteric_hazard_multiplier(0.0) — soft={self.eng.base_max_leverage}x")
|
|
|
|
|
|
|
|
|
|
|
|
MC_MODELS_DIR = '/mnt/dolphinng5_predict/nautilus_dolphin/mc_results/models'
|
|
|
|
|
|
MC_BASE_CFG = {
|
|
|
|
|
|
'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
|
|
|
|
|
|
'use_direction_confirm': True, 'dc_lookback_bars': 7,
|
|
|
|
|
|
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
|
|
|
|
|
|
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
|
|
|
|
|
|
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 8.00, # gold spec
|
|
|
|
|
|
'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True,
|
|
|
|
|
|
'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0020, 'stop_pct': 1.00,
|
|
|
|
|
|
'max_hold_bars': 250, 'use_sp_fees': True, 'use_sp_slippage': True, # gold spec
|
|
|
|
|
|
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
|
|
|
|
|
|
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
|
|
|
|
|
|
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
|
|
|
|
|
|
'use_asset_selection': True, 'min_irp_alignment': 0.0,
|
|
|
|
|
|
'asset_selector_lookback': 10, 'lookback': 100, # gold spec
|
|
|
|
|
|
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
|
|
|
|
|
|
}
|
|
|
|
|
|
if Path(MC_MODELS_DIR).exists():
|
|
|
|
|
|
try:
|
|
|
|
|
|
from mc.mc_ml import DolphinForewarner
|
|
|
|
|
|
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
|
|
|
|
|
|
self.eng.set_mc_forewarner(forewarner, MC_BASE_CFG)
|
|
|
|
|
|
log(" MC-Forewarner: wired")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" MC-Forewarner: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from adaptive_exit.adaptive_exit_engine import AdaptiveExitEngine
|
|
|
|
|
|
self._ae = AdaptiveExitEngine.load()
|
|
|
|
|
|
log(" AdaptiveExitEngine: loaded (shadow mode — no real exits)")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" AdaptiveExitEngine: {e} — shadow disabled")
|
|
|
|
|
|
|
|
|
|
|
|
if AlphaExitEngineV7 is not None and self._v7_journal_enabled:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._v7_exit_engine = AlphaExitEngineV7(bar_duration_sec=11.0)
|
|
|
|
|
|
self._ensure_v7_journal_table()
|
|
|
|
|
|
log(" AlphaExitEngineV7: loaded (live BLUE exit control + journal)")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" AlphaExitEngineV7: {e} — shadow disabled")
|
|
|
|
|
|
self._v7_exit_engine = None
|
|
|
|
|
|
self._v7_live_exit_enabled = self._v7_exit_engine is not None
|
|
|
|
|
|
if self.eng is not None:
|
|
|
|
|
|
self.eng.exit_decision_provider = self._v7_live_exit_decision if self._v7_live_exit_enabled else None
|
|
|
|
|
|
|
|
|
|
|
|
self._load_bucket_assignments()
|
|
|
|
|
|
|
|
|
|
|
|
if SCThresholdAdvisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._sc_advisor = SCThresholdAdvisor.load(
|
|
|
|
|
|
strategy="blue",
|
|
|
|
|
|
shadow_db=BLUE_CH_DB,
|
|
|
|
|
|
)
|
|
|
|
|
|
log(" SCThresholdAdvisor: loaded (shadow mode — no sizing changes)")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" SCThresholdAdvisor: {e} — shadow disabled")
|
|
|
|
|
|
self._sc_advisor = None
|
|
|
|
|
|
|
|
|
|
|
|
if SCGaugeAdvisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._sc_gauge = SCGaugeAdvisor.load(
|
|
|
|
|
|
strategy="blue",
|
|
|
|
|
|
shadow_db=BLUE_CH_DB,
|
|
|
|
|
|
)
|
|
|
|
|
|
log(" SCGaugeAdvisor: loaded (shadow mode — no sizing changes)")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" SCGaugeAdvisor: {e} — shadow disabled")
|
|
|
|
|
|
self._sc_gauge = None
|
|
|
|
|
|
|
|
|
|
|
|
if BounceAdvisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._bounce_advisor = BounceAdvisor.load(
|
|
|
|
|
|
strategy="blue",
|
|
|
|
|
|
shadow_db=BLUE_CH_DB,
|
|
|
|
|
|
)
|
|
|
|
|
|
log(" BounceAdvisor: loaded (shadow mode — no execution changes)")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" BounceAdvisor: {e} — shadow disabled")
|
|
|
|
|
|
self._bounce_advisor = None
|
|
|
|
|
|
|
|
|
|
|
|
def _load_bucket_assignments(self):
|
|
|
|
|
|
"""Load KMeans asset→bucket_id mapping for hibernate protection SL levels."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
import pickle
|
|
|
|
|
|
pkl_path = Path('/mnt/dolphinng5_predict/adaptive_exit/models/bucket_assignments.pkl')
|
|
|
|
|
|
with open(pkl_path, 'rb') as f:
|
|
|
|
|
|
data = pickle.load(f)
|
|
|
|
|
|
self._bucket_assignments = data.get('assignments', {})
|
|
|
|
|
|
log(f" BucketAssignments: {len(self._bucket_assignments)} assets loaded for hibernate protection")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" BucketAssignments: {e} — hibernate protect will use default SL={_BUCKET_SL_PCT['default']*100:.1f}%")
|
|
|
|
|
|
|
|
|
|
|
|
def _announce_position_event(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
kind: str,
|
|
|
|
|
|
severity: str,
|
|
|
|
|
|
title: str,
|
|
|
|
|
|
message: str,
|
|
|
|
|
|
metadata: dict | None = None,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
center = getattr(self, "_trade_announcement_center", None)
|
|
|
|
|
|
if center is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
center.note_event(
|
|
|
|
|
|
kind=kind,
|
|
|
|
|
|
severity=severity,
|
|
|
|
|
|
title=title,
|
|
|
|
|
|
message=message,
|
|
|
|
|
|
metadata=metadata or {},
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" Position announcement failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _read_esof_payload(self) -> dict | None:
|
|
|
|
|
|
"""Read the freshest EsoF advisory payload from HZ, if available."""
|
|
|
|
|
|
if not self.features_map:
|
|
|
|
|
|
return None
|
|
|
|
|
|
for key in ("esof_latest", "esof_advisor_latest"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.features_map.blocking().get(key)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
payload = parse_esof_payload(raw)
|
|
|
|
|
|
if payload:
|
|
|
|
|
|
return payload
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _sync_esof_size_gate(self) -> None:
|
|
|
|
|
|
"""Update the shared engine with the current continuous EsoF size multiplier.
|
|
|
|
|
|
|
|
|
|
|
|
When the HZ payload is stale or missing (daemon died, HZ restarted),
|
|
|
|
|
|
falls back to inline computation using the canonical compute_esof() from
|
|
|
|
|
|
esof_advisor.py — single implementation, no parallel code.
|
|
|
|
|
|
"""
|
|
|
|
|
|
payload = self._read_esof_payload()
|
|
|
|
|
|
score = esof_score_from_payload(payload, max_age_s=ESOF_FRESHNESS_S)
|
|
|
|
|
|
source = "hz"
|
|
|
|
|
|
|
|
|
|
|
|
if score is None and _compute_esof_inline is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
inline = _compute_esof_inline()
|
|
|
|
|
|
score = esof_score_from_payload(inline, max_age_s=None)
|
|
|
|
|
|
if score is not None:
|
|
|
|
|
|
source = "inline"
|
|
|
|
|
|
payload = inline
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
mult = esof_size_mult_from_score(score)
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
if hasattr(self.eng, "set_esof_advisory_score"):
|
|
|
|
|
|
self.eng.set_esof_advisory_score(score)
|
|
|
|
|
|
if mult != self._last_esof_size_mult:
|
|
|
|
|
|
self._last_esof_size_mult = mult
|
|
|
|
|
|
if score is None:
|
|
|
|
|
|
log(f"EsoF size gate: STALE-FALLBACK mult={mult:.2f} (no HZ + no inline)")
|
|
|
|
|
|
elif source == "inline":
|
|
|
|
|
|
log(f"EsoF size gate: INLINE sc={score:+.3f} mult={mult:.2f} (HZ stale)")
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"EsoF size gate: sc={score:+.3f} mult={mult:.2f}")
|
|
|
|
|
|
|
|
|
|
|
|
def _tp_curve_context(self, *, notional: float | None = None) -> dict[str, Any]:
|
|
|
|
|
|
pos = getattr(self.eng, "position", None)
|
|
|
|
|
|
capital = float(getattr(self.eng, "capital", 0.0) or 0.0)
|
|
|
|
|
|
if notional is None:
|
|
|
|
|
|
if pos is not None:
|
|
|
|
|
|
pos_notional = _safe_float(getattr(pos, "notional", 0.0), 0.0)
|
|
|
|
|
|
if pos_notional <= 0.0:
|
|
|
|
|
|
pos_notional = _safe_float(
|
|
|
|
|
|
getattr(pos, "size", 0.0) * getattr(pos, "entry_price", 0.0),
|
|
|
|
|
|
0.0,
|
|
|
|
|
|
)
|
|
|
|
|
|
notional = pos_notional
|
|
|
|
|
|
else:
|
|
|
|
|
|
notional = 0.0
|
|
|
|
|
|
our_leverage = compute_our_leverage(notional=notional, capital=capital)
|
|
|
|
|
|
tp_effective_pct = compute_soft_tp_pct(self._tp_base_pct, our_leverage)
|
|
|
|
|
|
bundle = {}
|
|
|
|
|
|
if self._market_state_runtime is not None and getattr(self._market_state_runtime, "latest_bundle_dict", None):
|
|
|
|
|
|
bundle = dict(self._market_state_runtime.latest_bundle_dict)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"tp_base_pct": float(self._tp_base_pct),
|
|
|
|
|
|
"tp_effective_pct": float(tp_effective_pct),
|
|
|
|
|
|
"our_leverage": float(our_leverage),
|
|
|
|
|
|
"market_state_bundle_json": json.dumps(bundle, default=str, sort_keys=True) if bundle else "{}",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _sync_tp_threshold(self) -> None:
|
|
|
|
|
|
"""Read live TP threshold from HZ control plane and propagate to engine.
|
|
|
|
|
|
|
|
|
|
|
|
HZ key: DOLPHIN_FEATURES["live_tp_threshold"] → JSON {"tp_pct": 0.0020, "ts": ...}
|
|
|
|
|
|
If absent or stale, keeps the current default (0.0020 from ENGINE_KWARGS).
|
|
|
|
|
|
A tighter TP cuts open positions immediately; a wider TP extends the hold.
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
ctx = self._tp_curve_context()
|
|
|
|
|
|
tp_pct = float(ctx.get("tp_effective_pct", 0.0) or 0.0)
|
|
|
|
|
|
if tp_pct <= 0:
|
|
|
|
|
|
return
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
old = self.eng.set_live_tp_pct(tp_pct)
|
|
|
|
|
|
if abs(old - tp_pct) > 1e-6:
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"TP threshold: {old*100:.2f}% → {tp_pct*100:.2f}% "
|
|
|
|
|
|
f"(soft curve, lev={ctx.get('our_leverage', 0.0):.2f}x)"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def _inject_obf_midprice(self, prices_dict: dict) -> dict:
|
|
|
|
|
|
"""Override scan price for the open position's asset with live OB mid-price.
|
|
|
|
|
|
|
|
|
|
|
|
Scan prices are quantized to ~4 decimal places (e.g. 0.1255 vs 0.1256),
|
|
|
|
|
|
which is too coarse for a 0.20% TP on low-priced assets. The OBF universe
|
|
|
|
|
|
service has live WebSocket bid/ask at ~0.5s resolution with full precision.
|
|
|
|
|
|
This method substitutes the scan price with (best_bid + best_ask) / 2 for
|
|
|
|
|
|
the position's asset only, so TP evaluation sees the freshest available
|
|
|
|
|
|
observation without changing the TP threshold itself.
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
pos = self.eng.position
|
|
|
|
|
|
if pos is None or not pos.asset:
|
|
|
|
|
|
return prices_dict
|
|
|
|
|
|
raw = self.features_map.blocking().get("obf_universe_latest")
|
|
|
|
|
|
return inject_obf_midprice(
|
|
|
|
|
|
prices_dict,
|
|
|
|
|
|
position_asset=str(pos.asset or ""),
|
|
|
|
|
|
obf_payload=raw,
|
|
|
|
|
|
max_age_s=3.0,
|
|
|
|
|
|
now_s=time.time(),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return prices_dict
|
|
|
|
|
|
|
|
|
|
|
|
def _sync_sc_threshold_advisor(self, scan_number: int, vel_div: float) -> None:
|
|
|
|
|
|
"""Shadow-only advisory layer for tracking / future threshold learning."""
|
|
|
|
|
|
if self._sc_advisor is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = self._read_esof_payload()
|
|
|
|
|
|
trade_history = getattr(self.eng, "trade_history", [])
|
|
|
|
|
|
open_tid = next(iter(self._pending_entries.keys()), "")
|
|
|
|
|
|
pending = self._pending_entries.get(open_tid, {}) if open_tid else {}
|
|
|
|
|
|
rec = self._sc_advisor.evaluate(
|
|
|
|
|
|
trade_id=str(open_tid or ""),
|
|
|
|
|
|
asset=str(pending.get("asset", "")),
|
|
|
|
|
|
sc=_safe_float(payload.get("advisory_score", payload.get("score", 0.0)) if payload else None),
|
|
|
|
|
|
vel_div=float(vel_div or 0.0),
|
|
|
|
|
|
exf_snapshot=getattr(self, "_last_exf", {}) or {},
|
|
|
|
|
|
trade_history=trade_history,
|
|
|
|
|
|
current_mult=float(self._last_esof_size_mult or 1.0),
|
|
|
|
|
|
esof_payload=payload,
|
|
|
|
|
|
scan_number=int(scan_number or 0),
|
|
|
|
|
|
bar_idx=int(self.bar_idx),
|
|
|
|
|
|
strategy="blue",
|
|
|
|
|
|
log_shadow=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
if open_tid:
|
|
|
|
|
|
pending["sc_threshold_advisor"] = rec
|
|
|
|
|
|
pending["sc_exec_mult"] = float(self._last_esof_size_mult or 1.0)
|
|
|
|
|
|
self._pending_entries[open_tid] = pending
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._record_sc_haircut(trade_id=open_tid, pending=pending, source="sc_threshold")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"SC haircut record failed for {open_tid}: {e}")
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
|
if now - self._sc_advisor_last_log >= 300:
|
|
|
|
|
|
self._sc_advisor_last_log = now
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"SC_ADVISOR: sc={rec['sc']:+.3f} cur={rec['current_mult']:.2f} "
|
|
|
|
|
|
f"rec={rec['recommended_mult']:.2f} cut={rec['recommended_sc_cut']:+.2f} "
|
|
|
|
|
|
f"conf={rec['confidence']:.2f} src={rec['decision_source']}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"SC_ADVISOR error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _current_obf_snapshot(self, asset: str, bar_idx: int) -> dict[str, dict]:
|
|
|
|
|
|
if build_obf_snapshot_from_engine is None or self.ob_eng is None or not asset:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
try:
|
|
|
|
|
|
return build_obf_snapshot_from_engine(self.ob_eng, asset, bar_idx)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
def _record_bounce_prices(self, prices_dict: dict[str, float]) -> None:
|
|
|
|
|
|
"""Maintain rolling price histories for the bounce advisor."""
|
|
|
|
|
|
if not prices_dict:
|
|
|
|
|
|
return
|
|
|
|
|
|
for asset, px in prices_dict.items():
|
|
|
|
|
|
try:
|
|
|
|
|
|
price = float(px)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if not math.isfinite(price) or price <= 0.0:
|
|
|
|
|
|
continue
|
|
|
|
|
|
hist = self._bounce_price_history.get(asset)
|
|
|
|
|
|
if hist is None:
|
|
|
|
|
|
hist = deque(maxlen=512)
|
|
|
|
|
|
self._bounce_price_history[asset] = hist
|
|
|
|
|
|
hist.append(price)
|
|
|
|
|
|
|
|
|
|
|
|
def _bounce_price_path(self, asset: str) -> list[float]:
|
|
|
|
|
|
hist = self._bounce_price_history.get(asset)
|
|
|
|
|
|
if not hist:
|
|
|
|
|
|
return []
|
|
|
|
|
|
return [float(px) for px in hist if math.isfinite(float(px))]
|
|
|
|
|
|
|
|
|
|
|
|
def _bounce_eval(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
trade_id: str,
|
|
|
|
|
|
asset: str,
|
|
|
|
|
|
side: str,
|
|
|
|
|
|
source: str,
|
|
|
|
|
|
scan_number: int,
|
|
|
|
|
|
entry_ts: datetime | None,
|
|
|
|
|
|
current_price: float,
|
|
|
|
|
|
entry_price: float,
|
|
|
|
|
|
quantity: float,
|
|
|
|
|
|
notional: float,
|
|
|
|
|
|
leverage: float,
|
|
|
|
|
|
vel_div: float,
|
|
|
|
|
|
current_mult: float,
|
|
|
|
|
|
bars_held: int,
|
|
|
|
|
|
log_shadow: bool = True,
|
|
|
|
|
|
) -> dict | None:
|
|
|
|
|
|
"""Evaluate the bounce advisor on a rolling price path and persist the row."""
|
|
|
|
|
|
if self._bounce_advisor is None or not trade_id or not asset:
|
|
|
|
|
|
return None
|
|
|
|
|
|
price_path = self._bounce_price_path(asset)
|
|
|
|
|
|
if len(price_path) < 3:
|
|
|
|
|
|
return None
|
|
|
|
|
|
rec = self._bounce_advisor.evaluate(
|
|
|
|
|
|
trade_id=str(trade_id),
|
|
|
|
|
|
asset=str(asset),
|
|
|
|
|
|
side=str(side or "SHORT"),
|
|
|
|
|
|
price_path=price_path,
|
|
|
|
|
|
entry_ts=entry_ts or datetime.now(timezone.utc),
|
|
|
|
|
|
entry_price=float(entry_price or 0.0),
|
|
|
|
|
|
current_price=float(current_price or 0.0),
|
|
|
|
|
|
quantity=float(quantity or 0.0),
|
|
|
|
|
|
notional=float(notional or 0.0),
|
|
|
|
|
|
leverage=float(leverage or 0.0),
|
|
|
|
|
|
current_mult=float(current_mult or 1.0),
|
|
|
|
|
|
vel_div=float(vel_div or 0.0),
|
|
|
|
|
|
scan_number=int(scan_number or 0),
|
|
|
|
|
|
bar_idx=int(self.bar_idx),
|
|
|
|
|
|
bars_held=int(max(0, bars_held)),
|
|
|
|
|
|
source=str(source or "entry"),
|
|
|
|
|
|
obf_snapshot=self._current_obf_snapshot(asset, self.bar_idx),
|
|
|
|
|
|
log_shadow=log_shadow,
|
|
|
|
|
|
use_ta=True,
|
|
|
|
|
|
use_obf=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
if rec:
|
|
|
|
|
|
rec["price_path"] = price_path[-128:]
|
|
|
|
|
|
return rec
|
|
|
|
|
|
|
|
|
|
|
|
def _ensure_v7_journal_table(self) -> None:
|
|
|
|
|
|
"""Create the V7 decision journal if it does not already exist."""
|
|
|
|
|
|
ddl = f"""
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS {self._v7_journal_db}.{self._v7_journal_table}
|
|
|
|
|
|
(
|
|
|
|
|
|
ts DateTime64(6, 'UTC'),
|
|
|
|
|
|
ts_day Date MATERIALIZED toDate(ts),
|
|
|
|
|
|
strategy LowCardinality(String),
|
|
|
|
|
|
source LowCardinality(String),
|
|
|
|
|
|
trade_id String,
|
|
|
|
|
|
asset LowCardinality(String),
|
|
|
|
|
|
side LowCardinality(String),
|
|
|
|
|
|
entry_price Float64,
|
|
|
|
|
|
current_price Float64,
|
|
|
|
|
|
quantity Float64,
|
|
|
|
|
|
notional Float64,
|
|
|
|
|
|
leverage Float32,
|
|
|
|
|
|
bar_idx UInt32,
|
|
|
|
|
|
decision_seq UInt32,
|
|
|
|
|
|
bars_held UInt16,
|
|
|
|
|
|
action LowCardinality(String),
|
|
|
|
|
|
reason LowCardinality(String),
|
|
|
|
|
|
pnl_pct Float32,
|
|
|
|
|
|
mfe Float32,
|
|
|
|
|
|
mae Float32,
|
|
|
|
|
|
mfe_risk Float32,
|
|
|
|
|
|
mae_risk Float32,
|
|
|
|
|
|
exit_pressure Float32,
|
|
|
|
|
|
rv_comp Float32,
|
|
|
|
|
|
mae_thresh1 Float32,
|
|
|
|
|
|
bounce_score Float32,
|
|
|
|
|
|
bounce_risk Float32,
|
|
|
|
|
|
ob_imbalance Float32,
|
|
|
|
|
|
vel_div_entry Float32,
|
|
|
|
|
|
vel_div_now Float32,
|
|
|
|
|
|
v50_vel Float32,
|
|
|
|
|
|
v750_vel Float32,
|
|
|
|
|
|
exf_funding Float32,
|
|
|
|
|
|
exf_dvol Float32,
|
|
|
|
|
|
exf_fear_greed Float32,
|
|
|
|
|
|
exf_taker Float32,
|
|
|
|
|
|
posture LowCardinality(String),
|
|
|
|
|
|
tp_base_pct Float32 DEFAULT 0,
|
|
|
|
|
|
dynamic_tp_pct Float32 DEFAULT 0,
|
|
|
|
|
|
tp_mod_factor Float32 DEFAULT 0,
|
|
|
|
|
|
cascade_count UInt16 DEFAULT 0,
|
|
|
|
|
|
ob_regime_signal Int8 DEFAULT 0,
|
|
|
|
|
|
tp_floor_armed UInt8 DEFAULT 0
|
|
|
|
|
|
)
|
|
|
|
|
|
ENGINE = MergeTree
|
|
|
|
|
|
PARTITION BY toYYYYMM(ts)
|
|
|
|
|
|
ORDER BY (ts_day, trade_id, decision_seq, ts)
|
|
|
|
|
|
TTL ts_day + toIntervalDay(180)
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
|
"http://localhost:8123/",
|
|
|
|
|
|
data=ddl.encode(),
|
|
|
|
|
|
method="POST",
|
|
|
|
|
|
)
|
|
|
|
|
|
req.add_header("X-ClickHouse-User", "dolphin")
|
|
|
|
|
|
req.add_header("X-ClickHouse-Key", "dolphin_ch_2026")
|
|
|
|
|
|
urllib.request.urlopen(req, timeout=5).close()
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
log(f"[V7_JOURNAL] table ensure failed: {exc}")
|
|
|
|
|
|
|
|
|
|
|
|
def _record_v7_decision(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
trade_id: str,
|
|
|
|
|
|
asset: str,
|
|
|
|
|
|
side: str,
|
|
|
|
|
|
decision: dict,
|
|
|
|
|
|
current_price: float,
|
|
|
|
|
|
ob_imbalance: float,
|
|
|
|
|
|
vel_div_now: float,
|
|
|
|
|
|
v50_vel: float,
|
|
|
|
|
|
v750_vel: float,
|
|
|
|
|
|
source: str = "scan_eval",
|
|
|
|
|
|
bar_idx: int | None = None,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
"""Persist a V7 evaluation for observability and offline comparison."""
|
|
|
|
|
|
if not self._v7_journal_enabled or self._v7_exit_engine is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
pending = self._pending_entries.get(trade_id, {})
|
|
|
|
|
|
seq = int(self._v7_decision_seq.get(trade_id, 0)) + 1
|
|
|
|
|
|
self._v7_decision_seq[trade_id] = seq
|
|
|
|
|
|
entry_price = float(pending.get("entry_price", 0.0) or 0.0)
|
|
|
|
|
|
quantity = float(pending.get("quantity", 0.0) or 0.0)
|
|
|
|
|
|
row = {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"strategy": "blue",
|
|
|
|
|
|
"source": source,
|
|
|
|
|
|
"trade_id": str(trade_id or ""),
|
|
|
|
|
|
"asset": str(asset or pending.get("asset", "")),
|
|
|
|
|
|
"side": str(side or pending.get("side", "")),
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"current_price": float(current_price or 0.0),
|
|
|
|
|
|
"quantity": quantity,
|
|
|
|
|
|
"notional": float(quantity * entry_price),
|
|
|
|
|
|
"leverage": float(pending.get("leverage", 0.0) or 0.0),
|
|
|
|
|
|
"bar_idx": int(max(0, self.bar_idx - 1 if bar_idx is None else bar_idx)),
|
|
|
|
|
|
"decision_seq": seq,
|
|
|
|
|
|
"bars_held": int(decision.get("bars_held", 0) or 0),
|
|
|
|
|
|
"action": str(decision.get("action", "UNKNOWN") or "UNKNOWN"),
|
|
|
|
|
|
"reason": _normalize_v7_exit_reason(decision.get("reason") or ""),
|
|
|
|
|
|
"pnl_pct": float(decision.get("pnl_pct", 0.0) or 0.0),
|
|
|
|
|
|
"mfe": float(decision.get("mfe", 0.0) or 0.0),
|
|
|
|
|
|
"mae": float(decision.get("mae", 0.0) or 0.0),
|
|
|
|
|
|
"mfe_risk": float(decision.get("mfe_risk", 0.0) or 0.0),
|
|
|
|
|
|
"mae_risk": float(decision.get("mae_risk", 0.0) or 0.0),
|
|
|
|
|
|
"exit_pressure": float(decision.get("exit_pressure", 0.0) or 0.0),
|
|
|
|
|
|
"rv_comp": float(decision.get("rv_comp", 0.0) or 0.0),
|
|
|
|
|
|
"mae_thresh1": float(decision.get("mae_thresh1", 0.0) or 0.0),
|
|
|
|
|
|
"bounce_score": float(decision.get("bounce_score", 0.0) or 0.0),
|
|
|
|
|
|
"bounce_risk": float(decision.get("bounce_risk", 0.0) or 0.0),
|
|
|
|
|
|
"ob_imbalance": float(ob_imbalance or 0.0),
|
|
|
|
|
|
"vel_div_entry": float(pending.get("vel_div_entry", 0.0) or 0.0),
|
|
|
|
|
|
"vel_div_now": float(vel_div_now or 0.0),
|
|
|
|
|
|
"v50_vel": float(v50_vel or 0.0),
|
|
|
|
|
|
"v750_vel": float(v750_vel or 0.0),
|
|
|
|
|
|
"exf_funding": float(self._last_exf.get("funding", 0.0) or 0.0),
|
|
|
|
|
|
"exf_dvol": float(self._last_exf.get("dvol", 0.0) or 0.0),
|
|
|
|
|
|
"exf_fear_greed": float(self._last_exf.get("fear_greed", 0.0) or 0.0),
|
|
|
|
|
|
"exf_taker": float(self._last_exf.get("taker", 0.0) or 0.0),
|
|
|
|
|
|
"posture": str(pending.get("posture", self.cached_posture) or ""),
|
|
|
|
|
|
}
|
|
|
|
|
|
# TP-threshold observability (LINK 5e05eeeb incident, 2026-06-11):
|
|
|
|
|
|
# the EFFECTIVE TP gate is OB-modulated (cascade ×1.40 etc.) and was
|
|
|
|
|
|
# never logged — making the miss undiagnosable from the tape. Pull
|
|
|
|
|
|
# the exit manager's last evaluation for this trade; fall back to
|
|
|
|
|
|
# any diag fields carried on the decision dict itself.
|
|
|
|
|
|
try:
|
|
|
|
|
|
_tp_diag = dict(getattr(self.eng.exit_manager, "last_eval", {}) or {})
|
|
|
|
|
|
if str(_tp_diag.get("trade_id") or "") != str(trade_id or ""):
|
|
|
|
|
|
_tp_diag = {}
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
_tp_diag = {}
|
|
|
|
|
|
def _dg(key, default=0.0):
|
|
|
|
|
|
v = decision.get(key, _tp_diag.get(key, default))
|
|
|
|
|
|
return v if v is not None else default
|
|
|
|
|
|
row.update({
|
|
|
|
|
|
"tp_base_pct": float(_dg("tp_base_pct")),
|
|
|
|
|
|
"dynamic_tp_pct": float(_dg("dynamic_tp_pct")),
|
|
|
|
|
|
"tp_mod_factor": float(_dg("tp_mod_factor")),
|
|
|
|
|
|
"cascade_count": int(_dg("cascade_count", 0)),
|
|
|
|
|
|
"ob_regime_signal": int(_dg("ob_regime_signal", 0)),
|
|
|
|
|
|
"tp_floor_armed": 1 if _dg("tp_floor_armed", False) else 0,
|
|
|
|
|
|
})
|
|
|
|
|
|
try:
|
|
|
|
|
|
ch_put(self._v7_journal_table, row)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
log(f"[V7_JOURNAL] write failed: {exc}")
|
|
|
|
|
|
|
|
|
|
|
|
def _v7_live_exit_decision(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
pos,
|
|
|
|
|
|
bar_idx: int,
|
|
|
|
|
|
prices: dict,
|
|
|
|
|
|
vel_div: float,
|
|
|
|
|
|
v50_vel: float,
|
|
|
|
|
|
v750_vel: float,
|
|
|
|
|
|
) -> dict | None:
|
|
|
|
|
|
"""Live BLUE exit hook backed by AlphaExitEngineV7.
|
|
|
|
|
|
|
|
|
|
|
|
The orchestrator calls this before falling back to the base exit manager.
|
|
|
|
|
|
Returns a V7 decision dict or None if the trade cannot yet be evaluated.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self._v7_exit_engine is None or pos is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
trade_id = str(getattr(pos, "trade_id", "") or "")
|
|
|
|
|
|
asset = str(getattr(pos, "asset", "") or "")
|
|
|
|
|
|
if not trade_id or not asset:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
pending = self._pending_entries.get(trade_id, {})
|
|
|
|
|
|
ctx_v7 = self._v7_contexts.get(trade_id)
|
|
|
|
|
|
eval_bar = max(0, int(bar_idx) - 1)
|
|
|
|
|
|
|
|
|
|
|
|
if ctx_v7 is None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ctx_v7 = self._v7_exit_engine.make_context(
|
|
|
|
|
|
entry_price=float(
|
|
|
|
|
|
pending.get("entry_price", getattr(pos, "entry_price", 0.0))
|
|
|
|
|
|
or getattr(pos, "entry_price", 0.0)
|
|
|
|
|
|
or 0.0
|
|
|
|
|
|
),
|
|
|
|
|
|
entry_bar=int(pending.get("entry_bar", eval_bar) or eval_bar),
|
|
|
|
|
|
side=1 if str(pending.get("side", "SHORT") or "SHORT") == "SHORT" else 0,
|
|
|
|
|
|
)
|
|
|
|
|
|
if self._last_exf:
|
|
|
|
|
|
ctx_v7.set_exf(
|
|
|
|
|
|
funding=float(self._last_exf.get("funding", 0.0) or 0.0),
|
|
|
|
|
|
dvol=float(self._last_exf.get("dvol", 0.0) or 0.0),
|
|
|
|
|
|
fear_greed=float(self._last_exf.get("fear_greed", 0.0) or 0.0),
|
|
|
|
|
|
taker=float(self._last_exf.get("taker", 0.0) or 0.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._v7_contexts[trade_id] = ctx_v7
|
|
|
|
|
|
self._v7_decision_seq.setdefault(trade_id, 0)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
log(f" V7 live context init failed for {trade_id}: {exc}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
elif self._last_exf:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ctx_v7.set_exf(
|
|
|
|
|
|
funding=float(self._last_exf.get("funding", 0.0) or 0.0),
|
|
|
|
|
|
dvol=float(self._last_exf.get("dvol", 0.0) or 0.0),
|
|
|
|
|
|
fear_greed=float(self._last_exf.get("fear_greed", 0.0) or 0.0),
|
|
|
|
|
|
taker=float(self._last_exf.get("taker", 0.0) or 0.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
ob_imb = 0.0
|
|
|
|
|
|
if self.ob_eng is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ob_sig = self.ob_eng.get_signal(asset, float(eval_bar))
|
|
|
|
|
|
ob_imb = float(getattr(ob_sig, "imbalance_ma5", 0.0) or 0.0)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
log(f" V7 live OB signal failed for {trade_id}: {exc}")
|
|
|
|
|
|
|
|
|
|
|
|
cur_px = float(
|
|
|
|
|
|
prices.get(asset, getattr(pos, "current_price", 0.0))
|
|
|
|
|
|
or getattr(pos, "current_price", 0.0)
|
|
|
|
|
|
or 0.0
|
|
|
|
|
|
)
|
|
|
|
|
|
if cur_px <= 0.0:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
decision = self._v7_exit_engine.evaluate(
|
|
|
|
|
|
ctx_v7,
|
|
|
|
|
|
cur_px,
|
|
|
|
|
|
eval_bar,
|
|
|
|
|
|
ob_imb,
|
|
|
|
|
|
asset=asset,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._v7_decisions[trade_id] = decision
|
|
|
|
|
|
self._record_v7_decision(
|
|
|
|
|
|
trade_id=trade_id,
|
|
|
|
|
|
asset=asset,
|
|
|
|
|
|
side=str(pending.get("side", "SHORT") or "SHORT"),
|
|
|
|
|
|
decision=decision,
|
|
|
|
|
|
current_price=cur_px,
|
|
|
|
|
|
ob_imbalance=ob_imb,
|
|
|
|
|
|
vel_div_now=vel_div,
|
|
|
|
|
|
v50_vel=v50_vel,
|
|
|
|
|
|
v750_vel=v750_vel,
|
|
|
|
|
|
source="live_exit",
|
|
|
|
|
|
bar_idx=eval_bar,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
action = str(decision.get("action", "HOLD") or "HOLD")
|
|
|
|
|
|
if action != "HOLD":
|
|
|
|
|
|
log(
|
|
|
|
|
|
" V7 live decision: "
|
|
|
|
|
|
f"{trade_id} {asset} action={action} reason={decision.get('reason', '')} "
|
|
|
|
|
|
f"pressure={float(decision.get('exit_pressure', 0.0) or 0.0):+.3f} "
|
|
|
|
|
|
f"pnl_pct={float(decision.get('pnl_pct', 0.0) or 0.0):+.3f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return decision
|
|
|
|
|
|
|
|
|
|
|
|
def _sync_sc_gauge_advisor(self, scan_number: int, vel_div: float) -> None:
|
|
|
|
|
|
"""Shadow-only bucket gauge advisory surface."""
|
|
|
|
|
|
if self._sc_gauge is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = self._read_esof_payload()
|
|
|
|
|
|
trade_history = getattr(self.eng, "trade_history", [])
|
|
|
|
|
|
open_tid = next(iter(self._pending_entries.keys()), "")
|
|
|
|
|
|
pending = self._pending_entries.get(open_tid, {}) if open_tid else {}
|
|
|
|
|
|
asset = str(pending.get("asset", ""))
|
|
|
|
|
|
rec = self._sc_gauge.evaluate(
|
|
|
|
|
|
trade_id=str(open_tid or ""),
|
|
|
|
|
|
asset=asset,
|
|
|
|
|
|
sc=_safe_float(payload.get("advisory_score", payload.get("score", 0.0)) if payload else None),
|
|
|
|
|
|
vel_div=float(vel_div or 0.0),
|
|
|
|
|
|
exf_snapshot=getattr(self, "_last_exf", {}) or {},
|
|
|
|
|
|
obf_snapshot=self._current_obf_snapshot(asset, self.bar_idx),
|
|
|
|
|
|
trade_history=trade_history,
|
|
|
|
|
|
current_mult=float(self._last_esof_size_mult or 1.0),
|
|
|
|
|
|
esof_payload=payload,
|
|
|
|
|
|
scan_number=int(scan_number or 0),
|
|
|
|
|
|
bar_idx=int(self.bar_idx),
|
|
|
|
|
|
strategy="blue",
|
|
|
|
|
|
log_shadow=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
if open_tid:
|
|
|
|
|
|
pending["sc_bucket_gauge"] = rec
|
|
|
|
|
|
pending["sc_bucket_gauge_exec_mult"] = float(self._last_esof_size_mult or 1.0)
|
|
|
|
|
|
self._pending_entries[open_tid] = pending
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
|
if now - self._sc_gauge_last_log >= 300:
|
|
|
|
|
|
self._sc_gauge_last_log = now
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"SC_GAUGE: sc={rec['sc']:+.3f} bucket={rec['bucket_id']} "
|
|
|
|
|
|
f"cur={rec['current_mult']:.2f} rec={rec['recommended_size_mult']:.2f} "
|
|
|
|
|
|
f"tp={rec['recommended_tp_mult']:.2f} hold={rec['recommended_hold_mult']:.2f} "
|
|
|
|
|
|
f"cut={rec['recommended_sc_cut']:+.2f} conf={rec['confidence']:.2f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"SC_GAUGE error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_trade_id(self, explicit: str | None = None, *, create_if_missing: bool = False) -> str:
|
|
|
|
|
|
"""Resolve a trade_id from the event, live position, or pending entry."""
|
|
|
|
|
|
tid = str(explicit or "").strip()
|
|
|
|
|
|
if tid:
|
|
|
|
|
|
return tid
|
|
|
|
|
|
pos = getattr(self.eng, "position", None)
|
|
|
|
|
|
if pos is not None:
|
|
|
|
|
|
pos_tid = str(getattr(pos, "trade_id", "") or "").strip()
|
|
|
|
|
|
if pos_tid:
|
|
|
|
|
|
return pos_tid
|
|
|
|
|
|
if len(self._pending_entries) == 1:
|
|
|
|
|
|
pending_tid = next(iter(self._pending_entries.keys()))
|
|
|
|
|
|
if pending_tid:
|
|
|
|
|
|
return pending_tid
|
|
|
|
|
|
if create_if_missing:
|
|
|
|
|
|
return uuid.uuid4().hex[:16]
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
def _query_clickhouse_tsv(
|
|
|
|
|
|
self,
|
|
|
|
|
|
sql: str,
|
|
|
|
|
|
*,
|
|
|
|
|
|
db_candidates: tuple[str, ...] = ("dolphin", "dolphin_prodgreen"),
|
|
|
|
|
|
timeout: float = 5.0,
|
|
|
|
|
|
) -> tuple[str, str]:
|
|
|
|
|
|
"""Run a small ClickHouse HTTP query and return (raw_text, db_used)."""
|
|
|
|
|
|
import base64 as _b64
|
|
|
|
|
|
|
|
|
|
|
|
auth = "Basic " + _b64.b64encode(b"dolphin:dolphin_ch_2026").decode()
|
|
|
|
|
|
last_exc: Exception | None = None
|
|
|
|
|
|
for db in db_candidates:
|
|
|
|
|
|
try:
|
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
|
f"http://localhost:8123/?database={db}",
|
|
|
|
|
|
data=sql.encode(),
|
|
|
|
|
|
headers={"Authorization": auth},
|
|
|
|
|
|
)
|
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as r:
|
|
|
|
|
|
return r.read().decode().strip(), db
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
last_exc = exc
|
|
|
|
|
|
raise last_exc or RuntimeError("ClickHouse query failed")
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_capital_blob(self, raw, source: str) -> tuple[float, dict] | None:
|
|
|
|
|
|
"""Parse a HZ/JSON state blob and validate the capital payload."""
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
data = json.loads(raw) if isinstance(raw, str) else raw
|
|
|
|
|
|
if isinstance(data, list):
|
|
|
|
|
|
# ledger-style payload (list of update rows): use the latest row
|
|
|
|
|
|
data = next((e for e in reversed(data) if isinstance(e, dict)), {})
|
|
|
|
|
|
if not isinstance(data, dict):
|
|
|
|
|
|
data = {}
|
2026-06-12 14:59:49 +02:00
|
|
|
|
capital = float(data.get("capital", 0) or 0)
|
|
|
|
|
|
if capital >= 1.0 and math.isfinite(capital):
|
|
|
|
|
|
return capital, data
|
|
|
|
|
|
log(f" restore candidate rejected from {source}: capital={capital!r}")
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
log(f" restore candidate parse failed from {source}: {exc}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_timestamp_seconds(self, value) -> float | None:
|
|
|
|
|
|
"""Parse epoch/ISO timestamps into UTC epoch seconds."""
|
|
|
|
|
|
if value is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
if isinstance(value, (int, float)):
|
|
|
|
|
|
ts = float(value)
|
|
|
|
|
|
elif isinstance(value, str):
|
|
|
|
|
|
text = value.strip()
|
|
|
|
|
|
if not text:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
ts = float(text)
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
dt = datetime.fromisoformat(text.replace("Z", "+00:00"))
|
|
|
|
|
|
if dt.tzinfo is None:
|
|
|
|
|
|
dt = dt.replace(tzinfo=timezone.utc)
|
|
|
|
|
|
ts = dt.timestamp()
|
|
|
|
|
|
else:
|
|
|
|
|
|
return None
|
|
|
|
|
|
if not math.isfinite(ts):
|
|
|
|
|
|
return None
|
|
|
|
|
|
# Normalize millisecond / microsecond / nanosecond epochs down to seconds.
|
|
|
|
|
|
# CH event clocks are often stored as ts_us, while HZ blobs tend to be seconds.
|
|
|
|
|
|
scale_hops = 0
|
|
|
|
|
|
while ts > 1.0e11 and scale_hops < 4:
|
|
|
|
|
|
ts /= 1000.0
|
|
|
|
|
|
scale_hops += 1
|
|
|
|
|
|
return ts if ts > 0 else None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_state_timestamp(self, blob: dict) -> float | None:
|
|
|
|
|
|
"""Extract the best timestamp from a state blob."""
|
|
|
|
|
|
if not isinstance(blob, dict):
|
|
|
|
|
|
return None
|
|
|
|
|
|
for key in ("updated_at", "timestamp", "ts", "iso"):
|
|
|
|
|
|
if key not in blob:
|
|
|
|
|
|
continue
|
|
|
|
|
|
parsed = self._parse_timestamp_seconds(blob.get(key))
|
|
|
|
|
|
if parsed is not None:
|
|
|
|
|
|
return parsed
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _mark_restore_failure(self, reason: str) -> None:
|
|
|
|
|
|
"""Mark restore as failed and force the trader into halt mode."""
|
|
|
|
|
|
self._restore_failed = True
|
|
|
|
|
|
self._restore_failure_reason = reason
|
|
|
|
|
|
try:
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
if self.eng is not None:
|
|
|
|
|
|
self.eng.regime_dd_halt = True
|
|
|
|
|
|
self.eng._day_posture = "HIBERNATE"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
log(f"RESTORE HALT: {reason}")
|
|
|
|
|
|
|
|
|
|
|
|
def _restore_capital_from_legacy_checkpoint(self) -> bool:
|
|
|
|
|
|
"""Legacy escape hatch for the old scalar checkpoint path."""
|
|
|
|
|
|
if not _env_bool("DOLPHIN_ALLOW_LEGACY_CAPITAL_CHECKPOINT", False):
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _try_load(raw, source):
|
|
|
|
|
|
parsed = self._parse_capital_blob(raw, source)
|
|
|
|
|
|
if parsed is None:
|
|
|
|
|
|
return False
|
|
|
|
|
|
capital, _ = parsed
|
|
|
|
|
|
self.eng.capital = capital
|
|
|
|
|
|
self._restore_source = source
|
|
|
|
|
|
log(f" Capital restored from legacy {source}: ${capital:,.2f}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.state_map.blocking().get("capital_checkpoint")
|
|
|
|
|
|
if _try_load(raw, "HZ capital_checkpoint"):
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital HZ legacy restore failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
if CAPITAL_DISK_CHECKPOINT.exists():
|
|
|
|
|
|
raw = CAPITAL_DISK_CHECKPOINT.read_text()
|
|
|
|
|
|
if _try_load(raw, "disk capital_checkpoint"):
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital disk legacy restore failed: {e}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _restore_capital_from_state(self) -> bool:
|
|
|
|
|
|
"""Restore capital from live HZ state or ledger-backed snapshots."""
|
|
|
|
|
|
parsed_state = {}
|
|
|
|
|
|
self._restore_state_snapshots = {}
|
|
|
|
|
|
source_rank = {
|
|
|
|
|
|
"capital_update_ledger": 65,
|
|
|
|
|
|
"status_snapshots": 50,
|
|
|
|
|
|
"latest_nautilus": 40,
|
|
|
|
|
|
"engine_snapshot": 30,
|
|
|
|
|
|
"pnl_day": 25,
|
|
|
|
|
|
"correction_replay_local": 20,
|
|
|
|
|
|
"correction_replay_hz": 10,
|
|
|
|
|
|
"trade_events": 5,
|
|
|
|
|
|
}
|
|
|
|
|
|
if CAPITAL_CORRECTIVE_REPLAY.exists():
|
|
|
|
|
|
try:
|
|
|
|
|
|
replay_blob = json.loads(CAPITAL_CORRECTIVE_REPLAY.read_text())
|
|
|
|
|
|
replay_capital = _safe_float(replay_blob.get("capital", 0.0), 0.0)
|
|
|
|
|
|
replay_ts = replay_blob.get("updated_at") or replay_blob.get("ts")
|
|
|
|
|
|
replay_ts_f = None
|
|
|
|
|
|
if isinstance(replay_ts, (int, float)):
|
|
|
|
|
|
replay_ts_f = float(replay_ts)
|
|
|
|
|
|
elif isinstance(replay_ts, str):
|
|
|
|
|
|
try:
|
|
|
|
|
|
replay_ts_f = datetime.fromisoformat(replay_ts.replace("Z", "+00:00")).timestamp()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
replay_ts_f = None
|
|
|
|
|
|
if replay_capital >= 1.0:
|
|
|
|
|
|
parsed_state["correction_replay_local"] = (
|
|
|
|
|
|
"local corrective replay",
|
|
|
|
|
|
replay_capital,
|
|
|
|
|
|
replay_blob,
|
|
|
|
|
|
replay_ts_f,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital corrective replay read failed: {e}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if CAPITAL_UPDATE_LEDGER.exists():
|
|
|
|
|
|
raw = CAPITAL_UPDATE_LEDGER.read_text()
|
|
|
|
|
|
ledger_rows = json.loads(raw) if raw else []
|
|
|
|
|
|
if isinstance(ledger_rows, list) and ledger_rows:
|
|
|
|
|
|
last = ledger_rows[-1] if isinstance(ledger_rows[-1], dict) else None
|
|
|
|
|
|
if isinstance(last, dict):
|
|
|
|
|
|
capital_after = _safe_float(last.get("capital_after", last.get("capital", 0.0)), 0.0)
|
|
|
|
|
|
if capital_after >= 1.0 and math.isfinite(capital_after):
|
|
|
|
|
|
parsed_state["capital_update_ledger_local"] = (
|
|
|
|
|
|
"local capital_update_ledger",
|
|
|
|
|
|
capital_after,
|
|
|
|
|
|
dict(last),
|
|
|
|
|
|
self._extract_state_timestamp(last),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital ledger disk read failed: {e}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw_ledger = None
|
|
|
|
|
|
if self.state_map is not None:
|
|
|
|
|
|
raw_ledger = self.state_map.blocking().get("capital_update_ledger")
|
|
|
|
|
|
ledger_rows = json.loads(raw_ledger) if isinstance(raw_ledger, str) and raw_ledger else list(raw_ledger or [])
|
|
|
|
|
|
if isinstance(ledger_rows, list) and ledger_rows:
|
|
|
|
|
|
last = ledger_rows[-1] if isinstance(ledger_rows[-1], dict) else None
|
|
|
|
|
|
if isinstance(last, dict):
|
|
|
|
|
|
capital_after = _safe_float(last.get("capital_after", last.get("capital", 0.0)), 0.0)
|
|
|
|
|
|
if capital_after >= 1.0:
|
|
|
|
|
|
parsed_state["capital_update_ledger"] = (
|
|
|
|
|
|
"capital_update_ledger",
|
|
|
|
|
|
capital_after,
|
|
|
|
|
|
dict(last),
|
|
|
|
|
|
self._extract_state_timestamp(last),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital ledger restore failed: {e}")
|
|
|
|
|
|
for key, label in (
|
|
|
|
|
|
("capital_update_ledger_local", "local capital_update_ledger"),
|
|
|
|
|
|
("capital_update_ledger", "capital_update_ledger"),
|
|
|
|
|
|
("correction_replay_local", "local corrective replay"),
|
|
|
|
|
|
(CAPITAL_CORRECTIVE_REPLAY_HZ_KEY, "HZ corrective replay"),
|
|
|
|
|
|
("latest_nautilus", "HZ latest_nautilus"),
|
|
|
|
|
|
("engine_snapshot", "HZ engine_snapshot"),
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.state_map.blocking().get(key)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital {key} read failed: {e}")
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
parsed = self._parse_capital_blob(raw, label)
|
|
|
|
|
|
if parsed is not None:
|
|
|
|
|
|
capital, blob = parsed
|
|
|
|
|
|
parsed_key = (
|
|
|
|
|
|
"correction_replay_local"
|
|
|
|
|
|
if key == "correction_replay_local"
|
|
|
|
|
|
else "correction_replay_hz" if key == CAPITAL_CORRECTIVE_REPLAY_HZ_KEY else key
|
|
|
|
|
|
)
|
|
|
|
|
|
parsed_state[parsed_key] = (
|
|
|
|
|
|
label,
|
|
|
|
|
|
capital,
|
|
|
|
|
|
blob,
|
|
|
|
|
|
self._extract_state_timestamp(blob),
|
|
|
|
|
|
)
|
|
|
|
|
|
if key in ("latest_nautilus", "engine_snapshot") and isinstance(blob, dict):
|
|
|
|
|
|
self._restore_state_snapshots[key] = dict(blob)
|
|
|
|
|
|
|
|
|
|
|
|
day_key = datetime.now(timezone.utc).strftime('%Y-%m-%d')
|
|
|
|
|
|
if self.pnl_map is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.pnl_map.blocking().get(day_key)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital pnl_map[{day_key}] read failed: {e}")
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
parsed = self._parse_capital_blob(raw, f"HZ pnl[{day_key}]")
|
|
|
|
|
|
if parsed is not None:
|
|
|
|
|
|
capital, blob = parsed
|
|
|
|
|
|
parsed_state["pnl_day"] = (
|
|
|
|
|
|
f"HZ pnl[{day_key}]",
|
|
|
|
|
|
capital,
|
|
|
|
|
|
blob,
|
|
|
|
|
|
self._extract_state_timestamp(blob),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _select_restore_candidate() -> tuple[str, str, float, dict, float | None] | None:
|
|
|
|
|
|
candidates: list[tuple[float, int, str, str, float, dict, float | None]] = []
|
|
|
|
|
|
for key, (label, capital, blob, ts) in parsed_state.items():
|
|
|
|
|
|
if not (math.isfinite(capital) and capital >= 1.0):
|
|
|
|
|
|
continue
|
|
|
|
|
|
candidates.append(
|
|
|
|
|
|
(
|
|
|
|
|
|
ts if ts is not None else float("-inf"),
|
|
|
|
|
|
source_rank.get(key, 0),
|
|
|
|
|
|
key,
|
|
|
|
|
|
label,
|
|
|
|
|
|
capital,
|
|
|
|
|
|
blob,
|
|
|
|
|
|
ts,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
if not candidates:
|
|
|
|
|
|
return None
|
|
|
|
|
|
force_latest_seed = _env_bool("DOLPHIN_FORCE_LATEST_NAUTILUS_RESTORE", False)
|
|
|
|
|
|
if force_latest_seed and "latest_nautilus" in parsed_state:
|
|
|
|
|
|
label, capital, blob, ts = parsed_state["latest_nautilus"]
|
|
|
|
|
|
if math.isfinite(capital) and capital >= 1.0:
|
|
|
|
|
|
return "latest_nautilus", label, capital, blob, ts
|
|
|
|
|
|
if "capital_update_ledger_local" in parsed_state:
|
|
|
|
|
|
label, capital, blob, ts = parsed_state["capital_update_ledger_local"]
|
|
|
|
|
|
if math.isfinite(capital) and capital >= 1.0:
|
|
|
|
|
|
return "capital_update_ledger_local", label, capital, blob, ts
|
|
|
|
|
|
candidates.sort(key=lambda item: (item[0], item[1]), reverse=True)
|
|
|
|
|
|
_, _, key, label, capital, blob, ts = candidates[0]
|
|
|
|
|
|
return key, label, capital, blob, ts
|
|
|
|
|
|
|
|
|
|
|
|
for sql, label in (
|
|
|
|
|
|
(
|
|
|
|
|
|
"SELECT ts, capital, trades_executed, posture, phase "
|
|
|
|
|
|
"FROM status_snapshots ORDER BY ts DESC LIMIT 1 FORMAT TabSeparated",
|
|
|
|
|
|
"status_snapshots",
|
|
|
|
|
|
),
|
|
|
|
|
|
(
|
|
|
|
|
|
"SELECT ts, capital_after, capital_before, pnl, exit_reason, trade_id "
|
|
|
|
|
|
"FROM trade_events "
|
|
|
|
|
|
"WHERE strategy='blue' AND capital_after > 0 "
|
|
|
|
|
|
"ORDER BY ts DESC LIMIT 1 FORMAT TabSeparated",
|
|
|
|
|
|
"trade_events",
|
|
|
|
|
|
),
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw, db = self._query_clickhouse_tsv(sql)
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
continue
|
|
|
|
|
|
cols = raw.split("\t")
|
|
|
|
|
|
capital = None
|
|
|
|
|
|
if label == "status_snapshots" and len(cols) >= 2:
|
|
|
|
|
|
capital = float(cols[1])
|
|
|
|
|
|
parsed_state["status_snapshots"] = (
|
|
|
|
|
|
f"status_snapshots[{db}]",
|
|
|
|
|
|
capital,
|
|
|
|
|
|
{"capital": capital, "ts": cols[0]},
|
|
|
|
|
|
self._parse_timestamp_seconds(cols[0]),
|
|
|
|
|
|
)
|
|
|
|
|
|
elif label == "trade_events" and len(cols) >= 4:
|
|
|
|
|
|
cap_after = float(cols[1])
|
|
|
|
|
|
cap_before = float(cols[2])
|
|
|
|
|
|
pnl = float(cols[3])
|
|
|
|
|
|
expected = cap_before + pnl
|
|
|
|
|
|
if math.isfinite(cap_after) and math.isfinite(expected):
|
|
|
|
|
|
if abs(cap_after - expected) <= max(1.0, abs(expected) * 0.002):
|
|
|
|
|
|
capital = cap_after
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(
|
|
|
|
|
|
f" restore candidate rejected from {db}.{label}: "
|
|
|
|
|
|
f"capital_after={cap_after:.2f} expected={expected:.2f} "
|
|
|
|
|
|
f"exit_reason={cols[4] if len(cols) > 4 else ''}"
|
|
|
|
|
|
)
|
|
|
|
|
|
if capital is not None and math.isfinite(capital) and capital >= 1.0:
|
|
|
|
|
|
parsed_state["trade_events"] = (
|
|
|
|
|
|
f"{db}.{label}",
|
|
|
|
|
|
capital,
|
|
|
|
|
|
{"capital": capital, "ts": cols[0], "trade_id": cols[5] if len(cols) > 5 else ""},
|
|
|
|
|
|
self._parse_timestamp_seconds(cols[0]),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital {label} replay failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
chosen = _select_restore_candidate()
|
|
|
|
|
|
if chosen is not None:
|
|
|
|
|
|
key, label, capital, replay_blob, _ = chosen
|
|
|
|
|
|
self.eng.capital = capital
|
|
|
|
|
|
self._restore_source = label
|
|
|
|
|
|
if key in ("correction_replay_local", "correction_replay_hz"):
|
|
|
|
|
|
self._publish_corrective_replay(replay_blob)
|
|
|
|
|
|
log(f" Capital restored from {label}: ${capital:,.2f}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
if self._restore_capital_from_legacy_checkpoint():
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
self._mark_restore_failure("no sane capital source found (HZ state and ledger replay unavailable)")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
# ── CH position-state persistence ─────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
def _ps_write_open(
|
|
|
|
|
|
self,
|
|
|
|
|
|
tid: str,
|
|
|
|
|
|
entry: dict,
|
|
|
|
|
|
*,
|
|
|
|
|
|
ts: int | None = None,
|
|
|
|
|
|
entry_bar: int | None = None,
|
|
|
|
|
|
bars_held: int = 0,
|
|
|
|
|
|
pnl: float = 0.0,
|
|
|
|
|
|
) -> bool:
|
|
|
|
|
|
"""Persist OPEN row to position_state. SINGLE write gate for OPEN rows.
|
|
|
|
|
|
|
|
|
|
|
|
Lifecycle invariant (MALFORMED_OPEN_RESTORE_BUG.md, distal fix):
|
|
|
|
|
|
an OPEN row MUST represent a position with economic size. Writes with
|
|
|
|
|
|
quantity <= 0 or notional <= POSITION_DUST_NOTIONAL_USD are REFUSED —
|
|
|
|
|
|
a dust/zero remainder is a lifecycle CLOSE and must go through
|
|
|
|
|
|
_ps_write_closed. Returns True if the row was emitted.
|
|
|
|
|
|
|
|
|
|
|
|
The keyword overrides let the partial-retract path persist the
|
|
|
|
|
|
remaining leg through this same gate (ts=now, continued entry_bar,
|
|
|
|
|
|
accumulated bars_held / realized pnl) instead of bypassing it with a
|
|
|
|
|
|
raw ch_put — the bypass is how zero-size OPEN snapshots were born.
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
quantity = float(entry.get('quantity', 0.0) or 0.0)
|
|
|
|
|
|
entry_price = float(entry.get('entry_price', 0.0) or 0.0)
|
|
|
|
|
|
notional = round(quantity * entry_price, 4)
|
|
|
|
|
|
if quantity <= 0.0 or notional <= POSITION_DUST_NOTIONAL_USD:
|
|
|
|
|
|
log(
|
|
|
|
|
|
" position_state OPEN write REFUSED (lifecycle invariant): "
|
|
|
|
|
|
f"trade={tid} qty={quantity} notional={notional} — "
|
|
|
|
|
|
"dust/zero remainders must close, not snapshot as OPEN"
|
|
|
|
|
|
)
|
|
|
|
|
|
return False
|
|
|
|
|
|
market_state_bundle_json = str(entry.get("market_state_bundle_json", "") or "")
|
|
|
|
|
|
ch_put("position_state", {
|
|
|
|
|
|
"ts": int(ts if ts is not None else entry['entry_ts']),
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"asset": entry['asset'],
|
|
|
|
|
|
"direction": -1 if entry['side'] == 'SHORT' else 1,
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"quantity": quantity,
|
|
|
|
|
|
"notional": notional,
|
|
|
|
|
|
"leverage": entry['leverage'],
|
|
|
|
|
|
"bucket_id": int(getattr(self, "_bucket_assignments", {}).get(entry['asset'], -1)),
|
|
|
|
|
|
"entry_bar": int(entry_bar if entry_bar is not None else self.bar_idx),
|
|
|
|
|
|
"status": "OPEN",
|
|
|
|
|
|
"exit_reason": "",
|
|
|
|
|
|
"pnl": float(pnl),
|
|
|
|
|
|
"bars_held": int(bars_held),
|
|
|
|
|
|
"market_state_bundle_json": market_state_bundle_json,
|
|
|
|
|
|
"tp_base_pct": float(entry.get("tp_base_pct", 0.0) or 0.0),
|
|
|
|
|
|
"tp_effective_pct": float(entry.get("tp_effective_pct", 0.0) or 0.0),
|
|
|
|
|
|
"our_leverage": float(entry.get("our_leverage", 0.0) or 0.0),
|
|
|
|
|
|
})
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" position_state OPEN write failed: {e}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _ps_write_closed(self, tid: str, pending: dict, x: dict):
|
|
|
|
|
|
"""Persist CLOSED row to position_state on exit (supersedes OPEN row via ReplacingMergeTree)."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
market_state_bundle_json = str(pending.get("market_state_bundle_json", "") or "")
|
|
|
|
|
|
ch_put("position_state", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"asset": pending.get('asset', ''),
|
|
|
|
|
|
"direction": -1 if pending.get('side') == 'SHORT' else 1,
|
|
|
|
|
|
"entry_price": pending.get('entry_price', 0.0),
|
|
|
|
|
|
"quantity": pending.get('quantity', 0.0),
|
|
|
|
|
|
"notional": round(pending.get('quantity', 0.0) * pending.get('entry_price', 0.0), 4),
|
|
|
|
|
|
"leverage": pending.get('leverage', 0.0),
|
|
|
|
|
|
"bucket_id": int(getattr(self, "_bucket_assignments", {}).get(pending.get('asset', ''), -1)),
|
|
|
|
|
|
"entry_bar": 0,
|
|
|
|
|
|
"status": "CLOSED",
|
|
|
|
|
|
"exit_reason": str(x.get('reason', 'UNKNOWN')),
|
|
|
|
|
|
"pnl": float(x.get('net_pnl', 0) or 0),
|
|
|
|
|
|
"bars_held": int(x.get('bars_held', 0) or 0),
|
|
|
|
|
|
"market_state_bundle_json": market_state_bundle_json,
|
|
|
|
|
|
"tp_base_pct": float(pending.get("tp_base_pct", 0.0) or 0.0),
|
|
|
|
|
|
"tp_effective_pct": float(pending.get("tp_effective_pct", 0.0) or 0.0),
|
|
|
|
|
|
"our_leverage": float(pending.get("our_leverage", 0.0) or 0.0),
|
|
|
|
|
|
})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" position_state CLOSED write failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _fallback_pending_for_close(self, trade_id: str, exit_payload: Mapping[str, Any]) -> dict:
|
|
|
|
|
|
"""Best-effort pending snapshot when in-memory pending metadata is unavailable."""
|
|
|
|
|
|
side = str(exit_payload.get("side", "") or "").upper()
|
|
|
|
|
|
if side not in {"SHORT", "LONG"}:
|
|
|
|
|
|
direction = int(_safe_float(exit_payload.get("direction", -1), -1))
|
|
|
|
|
|
side = "SHORT" if direction == -1 else "LONG"
|
|
|
|
|
|
entry_price = _safe_float(exit_payload.get("entry_price", 0.0), 0.0)
|
|
|
|
|
|
quantity = _safe_float(exit_payload.get("quantity", 0.0), 0.0)
|
|
|
|
|
|
leverage = _safe_float(exit_payload.get("leverage", 0.0), 0.0)
|
|
|
|
|
|
asset = str(exit_payload.get("asset", "") or "")
|
|
|
|
|
|
return {
|
|
|
|
|
|
"trade_id": str(trade_id or ""),
|
|
|
|
|
|
"asset": asset,
|
|
|
|
|
|
"side": side,
|
|
|
|
|
|
"entry_price": entry_price if entry_price > 0 else 0.0,
|
|
|
|
|
|
"quantity": quantity if quantity > 0 else 0.0,
|
|
|
|
|
|
"notional": (entry_price * quantity) if entry_price > 0 and quantity > 0 else 0.0,
|
|
|
|
|
|
"leverage": leverage if leverage > 0 else 0.0,
|
|
|
|
|
|
"entry_date": str(self.current_day or ""),
|
|
|
|
|
|
"posture": "FALLBACK_CLOSE",
|
|
|
|
|
|
"vel_div_entry": 0.0,
|
|
|
|
|
|
"boost_at_entry": 1.0,
|
|
|
|
|
|
"beta_at_entry": 1.0,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _restore_open_max_age_seconds(self) -> float:
|
|
|
|
|
|
"""Max tolerated age for an OPEN row before restore treats it as stale ghost state."""
|
|
|
|
|
|
env_value = _safe_float(os.environ.get("DOLPHIN_RESTORE_OPEN_MAX_AGE_SEC"), float("nan"))
|
|
|
|
|
|
if math.isfinite(env_value) and env_value > 0:
|
|
|
|
|
|
return float(env_value)
|
|
|
|
|
|
return 12.0 * 3600.0
|
|
|
|
|
|
|
|
|
|
|
|
def _restore_position_state(self):
|
|
|
|
|
|
"""On startup: check CH for an OPEN position and restore engine state."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
import urllib.request, base64 as _b64
|
|
|
|
|
|
# IMPORTANT:
|
|
|
|
|
|
# Never filter status='OPEN' first, otherwise stale historical OPEN rows
|
|
|
|
|
|
# can be resurrected forever even after a newer CLOSED row exists.
|
|
|
|
|
|
# Resolve latest row per trade_id first, then keep only currently-OPEN.
|
|
|
|
|
|
sql = (
|
|
|
|
|
|
"SELECT trade_id, asset, direction, entry_price, quantity, "
|
|
|
|
|
|
"notional, leverage, bucket_id, bars_held, last_ts "
|
|
|
|
|
|
"FROM ("
|
|
|
|
|
|
" SELECT "
|
|
|
|
|
|
" trade_id, "
|
|
|
|
|
|
" argMax(asset, ts) AS asset, "
|
|
|
|
|
|
" argMax(direction, ts) AS direction, "
|
|
|
|
|
|
" argMax(entry_price, ts) AS entry_price, "
|
|
|
|
|
|
" argMax(quantity, ts) AS quantity, "
|
|
|
|
|
|
" argMax(notional, ts) AS notional, "
|
|
|
|
|
|
" argMax(leverage, ts) AS leverage, "
|
|
|
|
|
|
" argMax(bucket_id, ts) AS bucket_id, "
|
|
|
|
|
|
" argMax(bars_held, ts) AS bars_held, "
|
|
|
|
|
|
" argMax(status, ts) AS status, "
|
|
|
|
|
|
" argMax(ts, ts) AS last_ts "
|
|
|
|
|
|
" FROM dolphin.position_state "
|
|
|
|
|
|
" GROUP BY trade_id"
|
|
|
|
|
|
") "
|
|
|
|
|
|
"WHERE status = 'OPEN' AND quantity > 0 AND notional > 0 "
|
|
|
|
|
|
"ORDER BY last_ts DESC LIMIT 1 FORMAT TabSeparated"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _restore_from_hz_snapshot(reason: str) -> bool:
|
|
|
|
|
|
"""Fallback restore path when ClickHouse is unavailable or empty.
|
|
|
|
|
|
|
|
|
|
|
|
We prefer latest_nautilus/engine_snapshot because these are the live
|
|
|
|
|
|
BLUE state surfaces and can still be coherent even if CH restore
|
|
|
|
|
|
is temporarily unavailable. The restored open leg is re-seeded back
|
|
|
|
|
|
into position_state so future restarts can recover without replaying
|
|
|
|
|
|
the entire incident.
|
|
|
|
|
|
"""
|
|
|
|
|
|
snapshot_sources = (
|
|
|
|
|
|
("latest_nautilus", "HZ latest_nautilus"),
|
|
|
|
|
|
("engine_snapshot", "HZ engine_snapshot"),
|
|
|
|
|
|
)
|
|
|
|
|
|
cached_snapshots = getattr(self, "_restore_state_snapshots", {}) or {}
|
|
|
|
|
|
for key, label in snapshot_sources:
|
|
|
|
|
|
blob = cached_snapshots.get(key)
|
|
|
|
|
|
if not isinstance(blob, dict):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.state_map.blocking().get(key)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" {label} read failed during restore fallback: {e}")
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
parsed = self._parse_capital_blob(raw, label)
|
|
|
|
|
|
if parsed is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
_, blob = parsed
|
|
|
|
|
|
if not isinstance(blob, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
open_positions = blob.get("open_positions")
|
|
|
|
|
|
if not isinstance(open_positions, list) or len(open_positions) != 1:
|
|
|
|
|
|
continue
|
|
|
|
|
|
pos_blob = open_positions[0]
|
|
|
|
|
|
if not isinstance(pos_blob, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
trade_id = str(pos_blob.get("trade_id", "") or "").strip()
|
|
|
|
|
|
asset = str(pos_blob.get("asset", "") or "").strip()
|
|
|
|
|
|
side = str(pos_blob.get("side", "") or "").upper()
|
|
|
|
|
|
direction = -1 if side == "SHORT" else 1 if side == "LONG" else 0
|
|
|
|
|
|
entry_price = float(pos_blob.get("entry_price", 0.0) or 0.0)
|
|
|
|
|
|
quantity = float(pos_blob.get("quantity", 0.0) or 0.0)
|
|
|
|
|
|
notional = float(pos_blob.get("notional", quantity * entry_price) or 0.0)
|
|
|
|
|
|
leverage = float(pos_blob.get("leverage", 0.0) or 0.0)
|
|
|
|
|
|
stored_bars = int(pos_blob.get("bars_held", 0) or blob.get("bars_held", 0) or 0)
|
|
|
|
|
|
# Continuity formula identical to the CH path: anchor on
|
|
|
|
|
|
# THIS session's bar counter (negative entry_bar is fine,
|
|
|
|
|
|
# Int32 in CH) so bars_held resumes at stored_bars. The
|
|
|
|
|
|
# old snapshot_bar-based form anchored on the PREVIOUS
|
|
|
|
|
|
# session's counter, producing entry_bar >> bar_idx and
|
|
|
|
|
|
# therefore NEGATIVE bars_held after a restart.
|
|
|
|
|
|
restored_entry_bar = self.bar_idx - max(0, stored_bars)
|
|
|
|
|
|
snapshot_ts = self._extract_state_timestamp(blob)
|
|
|
|
|
|
entry_ts_us = int((snapshot_ts if snapshot_ts is not None else time.time()) * 1_000_000)
|
|
|
|
|
|
|
|
|
|
|
|
if not trade_id:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if not asset:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if direction not in (-1, 1):
|
|
|
|
|
|
continue
|
|
|
|
|
|
if not (math.isfinite(entry_price) and entry_price > 0):
|
|
|
|
|
|
continue
|
|
|
|
|
|
if not (math.isfinite(quantity) and quantity > 0):
|
|
|
|
|
|
continue
|
|
|
|
|
|
if not (math.isfinite(notional) and notional > 0):
|
|
|
|
|
|
notional = quantity * entry_price
|
|
|
|
|
|
if not (math.isfinite(leverage) and leverage > 0):
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
chain_recon = self._load_chain_ledger_state(trade_id)
|
|
|
|
|
|
chain_meta = {}
|
|
|
|
|
|
if chain_recon:
|
|
|
|
|
|
chain_meta.update(chain_recon)
|
|
|
|
|
|
nested_chain = chain_recon.get("chain")
|
|
|
|
|
|
if isinstance(nested_chain, dict):
|
|
|
|
|
|
chain_meta.update(nested_chain)
|
|
|
|
|
|
chain_seed_pending = {
|
|
|
|
|
|
"asset": asset,
|
|
|
|
|
|
"side": side or ("SHORT" if direction == -1 else "LONG"),
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"quantity": quantity,
|
|
|
|
|
|
"notional": notional,
|
|
|
|
|
|
"notional_entry": notional,
|
|
|
|
|
|
"leverage": leverage,
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
# NEVER take entry_bar from chain_meta: trade_reconstruction
|
|
|
|
|
|
# payloads carry the DEAD session's bar counter, so the
|
|
|
|
|
|
# override reinstated the stale clock frame the re-anchor
|
|
|
|
|
|
# exists to fix (negative bars_held → UInt16 spool poison,
|
|
|
|
|
|
# incident 2026-06-12). restored_entry_bar already encodes
|
|
|
|
|
|
# hold continuity via stored_bars in THIS session's frame.
|
|
|
|
|
|
"entry_bar": int(restored_entry_bar),
|
2026-06-12 14:59:49 +02:00
|
|
|
|
"entry_ts": int(chain_meta.get("entry_ts", entry_ts_us) or entry_ts_us) if chain_recon else entry_ts_us,
|
|
|
|
|
|
"retraction_legs": int(chain_meta.get("retraction_legs", chain_meta.get("chain_seq", 0)) or 0) if chain_recon else 0,
|
|
|
|
|
|
"realized_pnl_legs_total": float(chain_meta.get("realized_pnl_legs_total", 0.0) or 0.0) if chain_recon else 0.0,
|
|
|
|
|
|
}
|
|
|
|
|
|
try:
|
|
|
|
|
|
chain_state = self._chain_state_from_reconstruction(trade_id, chain_seed_pending, chain_recon)
|
|
|
|
|
|
except Exception as chain_err:
|
|
|
|
|
|
log(f" position_state HZ fallback chain restore failed: {chain_err}")
|
|
|
|
|
|
self._mark_restore_failure(str(chain_err))
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
pos = NDPosition(
|
|
|
|
|
|
trade_id=trade_id,
|
|
|
|
|
|
asset=asset,
|
|
|
|
|
|
direction=direction,
|
|
|
|
|
|
entry_price=entry_price,
|
|
|
|
|
|
entry_bar=restored_entry_bar,
|
|
|
|
|
|
notional=notional,
|
|
|
|
|
|
leverage=leverage,
|
|
|
|
|
|
fraction=notional / max(self.eng.capital * leverage, 1.0),
|
|
|
|
|
|
entry_vel_div=0.0,
|
|
|
|
|
|
bucket_idx=0,
|
|
|
|
|
|
current_price=entry_price,
|
|
|
|
|
|
)
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
self.eng.position = pos
|
|
|
|
|
|
self.eng.exit_manager.setup_position(
|
|
|
|
|
|
trade_id,
|
|
|
|
|
|
entry_price,
|
|
|
|
|
|
direction,
|
|
|
|
|
|
restored_entry_bar,
|
|
|
|
|
|
stop_pct_override=float(getattr(self, "_catastrophic_floor_pct", 0.0120) or 0.0120),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._pending_entries[trade_id] = {
|
|
|
|
|
|
"trade_id": trade_id,
|
|
|
|
|
|
"asset": asset,
|
|
|
|
|
|
"side": side or ("SHORT" if direction == -1 else "LONG"),
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"quantity": quantity,
|
|
|
|
|
|
"notional": notional,
|
|
|
|
|
|
"notional_entry": notional,
|
|
|
|
|
|
"leverage": leverage,
|
|
|
|
|
|
"vel_div_entry": 0.0,
|
|
|
|
|
|
"boost_at_entry": 1.0,
|
|
|
|
|
|
"beta_at_entry": 1.0,
|
|
|
|
|
|
"posture": "RESTORED",
|
|
|
|
|
|
"entry_ts": entry_ts_us,
|
|
|
|
|
|
"entry_date": (self.current_day or ""),
|
|
|
|
|
|
"retraction_legs": int(chain_state.get("chain_seq", 0) or 0),
|
|
|
|
|
|
"realized_pnl_legs_total": float(chain_state.get("realized_pnl_legs_total", 0.0) or 0.0),
|
|
|
|
|
|
"chain_root_trade_id": chain_state.get("chain_root_trade_id", trade_id),
|
|
|
|
|
|
"chain_head_leg_id": chain_state.get("chain_head_leg_id", f"{trade_id}:open"),
|
|
|
|
|
|
"chain_prev_leg_id": chain_state.get("chain_prev_leg_id", ""),
|
|
|
|
|
|
"chain_seq": int(chain_state.get("chain_seq", 0) or 0),
|
|
|
|
|
|
"chain_token": chain_state.get("chain_token", ""),
|
|
|
|
|
|
"chain_mode": chain_state.get("chain_mode", "LIVE"),
|
|
|
|
|
|
"chain_version": int(chain_state.get("chain_version", 1) or 1),
|
|
|
|
|
|
"chain_kind": chain_state.get("chain_kind", "ROOT"),
|
|
|
|
|
|
}
|
|
|
|
|
|
v7_exit_engine = getattr(self, "_v7_exit_engine", None)
|
|
|
|
|
|
if v7_exit_engine is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ctx = v7_exit_engine.make_context(
|
|
|
|
|
|
entry_price=entry_price,
|
|
|
|
|
|
entry_bar=restored_entry_bar,
|
|
|
|
|
|
side=1 if direction == -1 else 0,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._v7_contexts[trade_id] = ctx
|
|
|
|
|
|
self._v7_decision_seq[trade_id] = 0
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" V7 live restore context failed (HZ fallback): {e}")
|
|
|
|
|
|
self._seed_posture_for_restored_position()
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
self._apply_catastrophic_floor_to_open_position()
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._ps_write_open(trade_id, self._pending_entries[trade_id])
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" position_state HZ fallback OPEN write failed: {e}")
|
|
|
|
|
|
self._restore_source = label
|
|
|
|
|
|
self._restore_failed = False
|
|
|
|
|
|
self._restore_failure_reason = ""
|
|
|
|
|
|
log(
|
|
|
|
|
|
f" position_state RESTORED from {label} ({reason}): "
|
|
|
|
|
|
f"{asset} {side or ('SHORT' if direction == -1 else 'LONG')} "
|
|
|
|
|
|
f"entry={entry_price} notional={notional:.0f} bars_held≈{stored_bars} trade={trade_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _hz_snapshot_is_flat(reason: str) -> bool:
|
|
|
|
|
|
"""Accept flat HZ state when CH restore is temporarily unavailable."""
|
|
|
|
|
|
snapshot_sources = (
|
|
|
|
|
|
("latest_nautilus", "HZ latest_nautilus"),
|
|
|
|
|
|
("engine_snapshot", "HZ engine_snapshot"),
|
|
|
|
|
|
)
|
|
|
|
|
|
cached_snapshots = getattr(self, "_restore_state_snapshots", {}) or {}
|
|
|
|
|
|
for key, label in snapshot_sources:
|
|
|
|
|
|
blob = cached_snapshots.get(key)
|
|
|
|
|
|
if not isinstance(blob, dict):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.state_map.blocking().get(key)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" {label} flat-check read failed during restore fallback: {e}")
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
parsed = self._parse_capital_blob(raw, label)
|
|
|
|
|
|
if parsed is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
_, blob = parsed
|
|
|
|
|
|
if not isinstance(blob, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
open_positions = blob.get("open_positions")
|
|
|
|
|
|
if isinstance(open_positions, list) and len(open_positions) == 0:
|
|
|
|
|
|
log(f" position_state: CH restore unavailable ({reason}); {label} is flat")
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
|
"http://localhost:8123/?database=dolphin",
|
|
|
|
|
|
data=sql.encode(),
|
|
|
|
|
|
headers={"Authorization": "Basic " +
|
|
|
|
|
|
_b64.b64encode(b"dolphin:dolphin_ch_2026").decode()})
|
|
|
|
|
|
with urllib.request.urlopen(req, timeout=5) as r:
|
|
|
|
|
|
row = r.read().decode().strip()
|
|
|
|
|
|
if not row:
|
|
|
|
|
|
log(" position_state: no open position to restore in CH; trying HZ fallback")
|
|
|
|
|
|
if _restore_from_hz_snapshot("CH empty"):
|
|
|
|
|
|
return
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def _reject_restore_candidate(message: str, *, halt_on_exhaustion: bool = True) -> bool:
|
|
|
|
|
|
log(f" position_state open candidate rejected: {message}")
|
|
|
|
|
|
if _restore_from_hz_snapshot(message):
|
|
|
|
|
|
return True
|
|
|
|
|
|
if _hz_snapshot_is_flat(message):
|
|
|
|
|
|
return True
|
|
|
|
|
|
# Fallbacks exhausted: no HZ position AND no HZ flat-proof.
|
|
|
|
|
|
# Two garbage classes diverge here:
|
|
|
|
|
|
# - zero-size OPEN rows are the DOCUMENTED malformed/tombstone
|
|
|
|
|
|
# class (MALFORMED_OPEN_RESTORE_BUG.md): definitionally not
|
|
|
|
|
|
# live positions → flat continuation is correct
|
|
|
|
|
|
# (halt_on_exhaustion=False at those call sites);
|
|
|
|
|
|
# - corrupt direction/entry_price/leverage is UNKNOWN state —
|
|
|
|
|
|
# trading from flat over it risks a single-slot violation
|
|
|
|
|
|
# (XTZ 863c21da class) → halt via restore-failure.
|
|
|
|
|
|
if halt_on_exhaustion:
|
|
|
|
|
|
self._mark_restore_failure(message)
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
cols = row.split('\t')
|
|
|
|
|
|
if len(cols) < 10:
|
|
|
|
|
|
log(f" position_state: unexpected row format: {row}")
|
|
|
|
|
|
if _restore_from_hz_snapshot("CH malformed"):
|
|
|
|
|
|
return
|
|
|
|
|
|
self._mark_restore_failure("position_state row malformed")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
trade_id = cols[0]
|
|
|
|
|
|
asset = cols[1]
|
|
|
|
|
|
direction = int(cols[2])
|
|
|
|
|
|
entry_price = float(cols[3])
|
|
|
|
|
|
quantity = float(cols[4])
|
|
|
|
|
|
notional = float(cols[5])
|
|
|
|
|
|
leverage = float(cols[6])
|
|
|
|
|
|
bucket_id = int(cols[7])
|
|
|
|
|
|
stored_bars = int(cols[8])
|
|
|
|
|
|
last_ts = self._parse_timestamp_seconds(cols[9])
|
|
|
|
|
|
|
|
|
|
|
|
if not trade_id.strip():
|
|
|
|
|
|
self._mark_restore_failure("position_state row missing trade_id")
|
|
|
|
|
|
return
|
|
|
|
|
|
if not asset.strip():
|
|
|
|
|
|
self._mark_restore_failure(f"position_state row missing asset for trade {trade_id}")
|
|
|
|
|
|
return
|
|
|
|
|
|
if direction not in (-1, 1):
|
|
|
|
|
|
if _reject_restore_candidate(f"position_state row invalid direction for trade {trade_id}: {direction}"):
|
|
|
|
|
|
return
|
|
|
|
|
|
return
|
|
|
|
|
|
if not (math.isfinite(entry_price) and entry_price > 0):
|
|
|
|
|
|
if _reject_restore_candidate(f"position_state row invalid entry_price for trade {trade_id}: {entry_price}"):
|
|
|
|
|
|
return
|
|
|
|
|
|
return
|
|
|
|
|
|
if not (math.isfinite(quantity) and quantity > 0):
|
|
|
|
|
|
# zero/dust size = documented tombstone class → no halt
|
|
|
|
|
|
if _reject_restore_candidate(
|
|
|
|
|
|
f"position_state row invalid quantity for trade {trade_id}: {quantity}",
|
|
|
|
|
|
halt_on_exhaustion=False):
|
|
|
|
|
|
return
|
|
|
|
|
|
return
|
|
|
|
|
|
if not (math.isfinite(notional) and notional > 0):
|
|
|
|
|
|
# zero/dust size = documented tombstone class → no halt
|
|
|
|
|
|
if _reject_restore_candidate(
|
|
|
|
|
|
f"position_state row invalid notional for trade {trade_id}: {notional}",
|
|
|
|
|
|
halt_on_exhaustion=False):
|
|
|
|
|
|
return
|
|
|
|
|
|
return
|
|
|
|
|
|
if not (math.isfinite(leverage) and leverage > 0):
|
|
|
|
|
|
if _reject_restore_candidate(f"position_state row invalid leverage for trade {trade_id}: {leverage}"):
|
|
|
|
|
|
return
|
|
|
|
|
|
return
|
|
|
|
|
|
if stored_bars < 0:
|
|
|
|
|
|
self._mark_restore_failure(f"position_state row invalid bars_held for trade {trade_id}: {stored_bars}")
|
|
|
|
|
|
return
|
|
|
|
|
|
if last_ts is not None:
|
|
|
|
|
|
age_sec = max(0.0, time.time() - last_ts)
|
|
|
|
|
|
max_age_sec = self._restore_open_max_age_seconds()
|
|
|
|
|
|
if age_sec > max_age_sec:
|
|
|
|
|
|
log(
|
|
|
|
|
|
" position_state stale OPEN candidate rejected: "
|
|
|
|
|
|
f"trade={trade_id} age={age_sec:.0f}s limit={max_age_sec:.0f}s "
|
|
|
|
|
|
f"asset={asset} side={'SHORT' if direction == -1 else 'LONG'}"
|
|
|
|
|
|
)
|
|
|
|
|
|
stale_pending = {
|
|
|
|
|
|
"asset": asset,
|
|
|
|
|
|
"side": "SHORT" if direction == -1 else "LONG",
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"quantity": quantity,
|
|
|
|
|
|
"leverage": leverage,
|
|
|
|
|
|
}
|
|
|
|
|
|
self._ps_write_closed(
|
|
|
|
|
|
trade_id,
|
|
|
|
|
|
stale_pending,
|
|
|
|
|
|
{
|
|
|
|
|
|
"reason": "RESTORE_STALE_OPEN_REJECT",
|
|
|
|
|
|
"net_pnl": 0.0,
|
|
|
|
|
|
"bars_held": stored_bars,
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
return
|
|
|
|
|
|
derived_notional = quantity * entry_price
|
|
|
|
|
|
if math.isfinite(derived_notional) and derived_notional > 0:
|
|
|
|
|
|
if abs(notional - derived_notional) > max(1.0, abs(derived_notional) * 0.01):
|
|
|
|
|
|
log(
|
|
|
|
|
|
" position_state notional mismatch: "
|
|
|
|
|
|
f"stored={notional:.6f} derived={derived_notional:.6f} trade={trade_id} "
|
|
|
|
|
|
"— using derived value"
|
|
|
|
|
|
)
|
|
|
|
|
|
notional = derived_notional
|
|
|
|
|
|
|
|
|
|
|
|
# entry_bar so the MAX_HOLD countdown CONTINUES from where it left
|
|
|
|
|
|
# off. At boot self.bar_idx is 0, so this is typically negative —
|
|
|
|
|
|
# that is intentional: bars_held = bar_idx − entry_bar then equals
|
|
|
|
|
|
# stored_bars immediately. The old max(0, …) clamp zeroed the
|
|
|
|
|
|
# clock on every restore (XTZ 863c21da "bars_held≈0"; MAX_HOLD
|
|
|
|
|
|
# never fired and the phantom rode 1h to STOP_LOSS).
|
|
|
|
|
|
# position_state.entry_bar is Int32 — negative is storable.
|
|
|
|
|
|
restored_entry_bar = self.bar_idx - max(0, stored_bars)
|
|
|
|
|
|
chain_recon = self._load_chain_ledger_state(trade_id)
|
|
|
|
|
|
chain_meta = {}
|
|
|
|
|
|
if chain_recon:
|
|
|
|
|
|
chain_meta.update(chain_recon)
|
|
|
|
|
|
nested_chain = chain_recon.get("chain")
|
|
|
|
|
|
if isinstance(nested_chain, dict):
|
|
|
|
|
|
chain_meta.update(nested_chain)
|
|
|
|
|
|
chain_seed_pending = {
|
|
|
|
|
|
"asset": asset,
|
|
|
|
|
|
"side": 'SHORT' if direction == -1 else 'LONG',
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"quantity": quantity,
|
|
|
|
|
|
"notional": notional,
|
|
|
|
|
|
"notional_entry": notional,
|
|
|
|
|
|
"leverage": leverage,
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
# NEVER take entry_bar from chain_meta: trade_reconstruction
|
|
|
|
|
|
# payloads carry the DEAD session's bar counter — the override
|
|
|
|
|
|
# reinstated the stale clock frame the re-anchor exists to fix
|
|
|
|
|
|
# (negative bars_held → UInt16 spool poison, incident 2026-06-12).
|
|
|
|
|
|
"entry_bar": int(restored_entry_bar),
|
2026-06-12 14:59:49 +02:00
|
|
|
|
"entry_ts": int(chain_meta.get("entry_ts", 0) or 0) if chain_recon else 0,
|
|
|
|
|
|
"retraction_legs": int(chain_meta.get("retraction_legs", chain_meta.get("chain_seq", 0)) or 0) if chain_recon else 0,
|
|
|
|
|
|
"realized_pnl_legs_total": float(chain_meta.get("realized_pnl_legs_total", 0.0) or 0.0) if chain_recon else 0.0,
|
|
|
|
|
|
}
|
|
|
|
|
|
try:
|
|
|
|
|
|
chain_state = self._chain_state_from_reconstruction(trade_id, chain_seed_pending, chain_recon)
|
|
|
|
|
|
except Exception as chain_err:
|
|
|
|
|
|
self._mark_restore_failure(str(chain_err))
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
pos = NDPosition(
|
|
|
|
|
|
trade_id = trade_id,
|
|
|
|
|
|
asset = asset,
|
|
|
|
|
|
direction = direction,
|
|
|
|
|
|
entry_price = entry_price,
|
|
|
|
|
|
entry_bar = restored_entry_bar,
|
|
|
|
|
|
notional = notional,
|
|
|
|
|
|
leverage = leverage,
|
|
|
|
|
|
fraction = notional / max(self.eng.capital * leverage, 1.0),
|
|
|
|
|
|
entry_vel_div = 0.0,
|
|
|
|
|
|
bucket_idx = 0, # signal-strength bucket (not KMeans); 0=safe default
|
|
|
|
|
|
current_price = entry_price,
|
|
|
|
|
|
)
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
self.eng.position = pos
|
|
|
|
|
|
self.eng.exit_manager.setup_position(
|
|
|
|
|
|
trade_id,
|
|
|
|
|
|
entry_price,
|
|
|
|
|
|
direction,
|
|
|
|
|
|
restored_entry_bar,
|
|
|
|
|
|
stop_pct_override=float(getattr(self, "_catastrophic_floor_pct", 0.0120) or 0.0120),
|
|
|
|
|
|
)
|
|
|
|
|
|
# NOTE: do NOT arm hibernate protect here.
|
|
|
|
|
|
# _day_posture starts as 'APEX' — the posture sync block on the
|
|
|
|
|
|
# first incoming scan will detect the APEX→HIBERNATE transition
|
|
|
|
|
|
# and call _hibernate_protect_position() at the right moment.
|
|
|
|
|
|
|
|
|
|
|
|
# Rebuild _pending_entries so the exit CH write fires correctly
|
|
|
|
|
|
side = 'SHORT' if direction == -1 else 'LONG'
|
|
|
|
|
|
self._pending_entries[trade_id] = {
|
|
|
|
|
|
'trade_id': trade_id,
|
|
|
|
|
|
'asset': asset,
|
|
|
|
|
|
'side': side,
|
|
|
|
|
|
'entry_price': entry_price,
|
|
|
|
|
|
'quantity': quantity,
|
|
|
|
|
|
'notional': float(quantity * entry_price),
|
|
|
|
|
|
'notional_entry': float(quantity * entry_price),
|
|
|
|
|
|
'leverage': leverage,
|
|
|
|
|
|
'vel_div_entry': 0.0,
|
|
|
|
|
|
'boost_at_entry': 1.0,
|
|
|
|
|
|
'beta_at_entry': 1.0,
|
|
|
|
|
|
'posture': 'RESTORED',
|
|
|
|
|
|
'entry_ts': int(chain_meta.get("entry_ts", _ch_ts_us()) or _ch_ts_us()) if chain_recon else _ch_ts_us(),
|
|
|
|
|
|
'entry_date': (self.current_day or ''),
|
|
|
|
|
|
'retraction_legs': int(chain_state.get("chain_seq", 0) or 0),
|
|
|
|
|
|
'realized_pnl_legs_total': float(chain_state.get("realized_pnl_legs_total", 0.0) or 0.0),
|
|
|
|
|
|
'chain_root_trade_id': chain_state.get("chain_root_trade_id", trade_id),
|
|
|
|
|
|
'chain_head_leg_id': chain_state.get("chain_head_leg_id", f"{trade_id}:open"),
|
|
|
|
|
|
'chain_prev_leg_id': chain_state.get("chain_prev_leg_id", ""),
|
|
|
|
|
|
'chain_seq': int(chain_state.get("chain_seq", 0) or 0),
|
|
|
|
|
|
'chain_token': chain_state.get("chain_token", ""),
|
|
|
|
|
|
'chain_mode': chain_state.get("chain_mode", "LIVE"),
|
|
|
|
|
|
'chain_version': int(chain_state.get("chain_version", 1) or 1),
|
|
|
|
|
|
'chain_kind': chain_state.get("chain_kind", "ROOT"),
|
|
|
|
|
|
}
|
|
|
|
|
|
if self._v7_exit_engine is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ctx = self._v7_exit_engine.make_context(
|
|
|
|
|
|
entry_price=entry_price,
|
|
|
|
|
|
entry_bar=restored_entry_bar,
|
|
|
|
|
|
side=1 if direction == -1 else 0,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._v7_contexts[trade_id] = ctx
|
|
|
|
|
|
self._v7_decision_seq[trade_id] = 0
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" V7 live restore context failed: {e}")
|
|
|
|
|
|
self._seed_posture_for_restored_position()
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
self._apply_catastrophic_floor_to_open_position()
|
|
|
|
|
|
log(f" position_state RESTORED: {asset} {side} entry={entry_price} "
|
|
|
|
|
|
f"notional={notional:.0f} bars_held≈{stored_bars} trade={trade_id}")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" position_state restore error: {e}")
|
|
|
|
|
|
if _restore_from_hz_snapshot(str(e)):
|
|
|
|
|
|
return
|
|
|
|
|
|
if _hz_snapshot_is_flat(str(e)):
|
|
|
|
|
|
return
|
|
|
|
|
|
self._mark_restore_failure(f"position_state restore error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _seed_posture_for_restored_position(self) -> None:
|
|
|
|
|
|
"""Make the next scan observe a posture transition for restored legs."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.eng is None or getattr(self.eng, "position", None) is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
if getattr(self.eng, "_day_posture", "APEX") == "HIBERNATE":
|
|
|
|
|
|
self.eng._day_posture = "APEX"
|
|
|
|
|
|
log(" position_state restore: re-seeded day posture to APEX for hibernate sync")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" position_state posture reseed failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _rehydrate_engine_position_from_bingx(self, *, source: str = "startup") -> None:
|
|
|
|
|
|
"""Keep the local engine slot aligned with the exchange slot when live on BingX.
|
|
|
|
|
|
|
|
|
|
|
|
This is intentionally conservative in BLUE: when exchange is flat, clear any
|
|
|
|
|
|
stale local slot artifacts. Projection of non-flat exchange state is handled
|
|
|
|
|
|
by the execution/runtime layer.
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
exec_venue_name = getattr(self, "_exec_venue_name", None)
|
|
|
|
|
|
exec_venue = exec_venue_name() if callable(exec_venue_name) else ""
|
|
|
|
|
|
if str(exec_venue).upper() != "BINGX" or not bool(getattr(self, "live_mode", False)):
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
engine = getattr(self, "engine", None)
|
|
|
|
|
|
if engine is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
get_live_positions = getattr(self, "_get_bingx_live_positions", None)
|
|
|
|
|
|
live_positions = get_live_positions() if callable(get_live_positions) else {}
|
|
|
|
|
|
if not isinstance(live_positions, dict):
|
|
|
|
|
|
live_positions = {}
|
|
|
|
|
|
|
|
|
|
|
|
current_pos = getattr(engine, "position", None)
|
|
|
|
|
|
if live_positions or current_pos is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
stale_tid = str(getattr(current_pos, "trade_id", "") or "")
|
|
|
|
|
|
state = engine.get_state() if hasattr(engine, "get_state") else {}
|
|
|
|
|
|
if not isinstance(state, dict):
|
|
|
|
|
|
state = {}
|
|
|
|
|
|
state["position"] = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
if hasattr(engine, "restore_state"):
|
|
|
|
|
|
engine.restore_state(state)
|
|
|
|
|
|
else:
|
|
|
|
|
|
engine.position = None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
engine.position = None
|
|
|
|
|
|
|
|
|
|
|
|
open_positions = getattr(self, "_exec_open_positions", None)
|
|
|
|
|
|
if isinstance(open_positions, dict):
|
|
|
|
|
|
open_positions.pop(stale_tid, None)
|
|
|
|
|
|
pending_entries = getattr(self, "_pending_entries", None)
|
|
|
|
|
|
if isinstance(pending_entries, dict):
|
|
|
|
|
|
pending_entries.pop(stale_tid, None)
|
|
|
|
|
|
|
|
|
|
|
|
rt_exit_mgr = getattr(self, "_rt_exit_mgr", None)
|
|
|
|
|
|
if rt_exit_mgr is not None and stale_tid:
|
|
|
|
|
|
unregister = getattr(rt_exit_mgr, "unregister", None)
|
|
|
|
|
|
if callable(unregister):
|
|
|
|
|
|
try:
|
|
|
|
|
|
unregister(stale_tid)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
logger = getattr(self, "log", None) or getattr(self, "_log", None)
|
|
|
|
|
|
if logger is not None and hasattr(logger, "warning"):
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
f"[BINGX_REHYDRATE] cleared stale engine slot from {source}: "
|
|
|
|
|
|
f"exchange flat, trade_id={stale_tid or '<missing>'}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger = getattr(self, "log", None) or getattr(self, "_log", None)
|
|
|
|
|
|
if logger is not None and hasattr(logger, "debug"):
|
|
|
|
|
|
logger.debug(f"[BINGX_REHYDRATE] stale-slot cleanup failed: {exc}")
|
|
|
|
|
|
|
|
|
|
|
|
def _hibernate_protect_position(self):
|
|
|
|
|
|
"""Arm per-bucket TP+SL instead of immediate HIBERNATE_HALT.
|
|
|
|
|
|
|
|
|
|
|
|
Must be called under eng_lock with an open position.
|
|
|
|
|
|
Sets stop_pct_override on the live exit_manager state so the position
|
|
|
|
|
|
exits via FIXED_TP or STOP_LOSS rather than being force-closed.
|
|
|
|
|
|
Records trade_id in _hibernate_protect_active so the exit path can
|
|
|
|
|
|
re-label the reason and finalize posture once the position closes.
|
|
|
|
|
|
"""
|
|
|
|
|
|
pos = self.eng.position
|
|
|
|
|
|
if pos is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
bucket = getattr(self, "_bucket_assignments", {}).get(pos.asset, 'default')
|
|
|
|
|
|
sl_pct = _BUCKET_SL_PCT.get(bucket, _BUCKET_SL_PCT['default'])
|
|
|
|
|
|
tp_pct = self.eng.exit_manager.fixed_tp_pct
|
|
|
|
|
|
|
|
|
|
|
|
# Patch the live exit_manager state for this trade_id
|
|
|
|
|
|
em_state = self.eng.exit_manager._positions.get(pos.trade_id)
|
|
|
|
|
|
if em_state is not None:
|
|
|
|
|
|
em_state['stop_pct_override'] = sl_pct
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Position not registered in exit_manager (shouldn't happen, but be safe)
|
|
|
|
|
|
log(f" HIBERNATE_PROTECT: trade {pos.trade_id} not in exit_manager — arming anyway via re-setup")
|
|
|
|
|
|
self.eng.exit_manager.setup_position(
|
|
|
|
|
|
pos.trade_id, pos.entry_price, pos.direction, pos.entry_bar,
|
|
|
|
|
|
stop_pct_override=sl_pct,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
self._hibernate_protect_active = pos.trade_id
|
|
|
|
|
|
log(f"HIBERNATE_PROTECT armed: {pos.asset} B{bucket} "
|
|
|
|
|
|
f"SL={sl_pct*100:.2f}% TP={tp_pct*100:.2f}% trade={pos.trade_id}")
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_catastrophic_floor_to_open_position(self):
|
|
|
|
|
|
"""Keep a bounded live loss floor armed on the current BLUE position."""
|
|
|
|
|
|
floor_pct, floor_label = self._catastrophic_floor_for_open_position()
|
|
|
|
|
|
if floor_pct <= 0.0:
|
|
|
|
|
|
return
|
|
|
|
|
|
if self.eng is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
pos = getattr(self.eng, "position", None)
|
|
|
|
|
|
if pos is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
trade_id = str(getattr(pos, "trade_id", "") or "")
|
|
|
|
|
|
if not trade_id:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
em_state = self.eng.exit_manager._positions.get(trade_id)
|
|
|
|
|
|
if em_state is None:
|
|
|
|
|
|
self.eng.exit_manager.setup_position(
|
|
|
|
|
|
trade_id,
|
|
|
|
|
|
pos.entry_price,
|
|
|
|
|
|
pos.direction,
|
|
|
|
|
|
pos.entry_bar,
|
|
|
|
|
|
stop_pct_override=floor_pct,
|
|
|
|
|
|
)
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"CATASTROPHIC_FLOOR armed: {pos.asset} "
|
|
|
|
|
|
f"SL={floor_pct*100:.2f}% mode={floor_label} trade={trade_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
current = _safe_float(em_state.get("stop_pct_override"), 0.0)
|
|
|
|
|
|
if current <= 0.0 or current > floor_pct:
|
|
|
|
|
|
em_state["stop_pct_override"] = floor_pct
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"CATASTROPHIC_FLOOR armed: {pos.asset} "
|
|
|
|
|
|
f"SL={floor_pct*100:.2f}% mode={floor_label} trade={trade_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" CATASTROPHIC_FLOOR failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _catastrophic_floor_for_open_position(self) -> tuple[float, str]:
|
|
|
|
|
|
base_floor = float(getattr(self, "_catastrophic_floor_pct", 0.0) or 0.0)
|
|
|
|
|
|
if self.eng is None:
|
|
|
|
|
|
return base_floor, "base"
|
|
|
|
|
|
pos = getattr(self.eng, "position", None)
|
|
|
|
|
|
if pos is None:
|
|
|
|
|
|
return base_floor, "base"
|
|
|
|
|
|
trade_id = str(getattr(pos, "trade_id", "") or "")
|
|
|
|
|
|
pending = self._pending_entries.get(trade_id, {}) if trade_id else {}
|
|
|
|
|
|
if not bool(pending.get("overlay_flip", False)):
|
|
|
|
|
|
return base_floor, "base"
|
|
|
|
|
|
|
|
|
|
|
|
overlay_floor = float(getattr(self, "_overlay_catastrophic_floor_pct", 0.0) or 0.0)
|
|
|
|
|
|
candidates = [value for value in (base_floor, overlay_floor) if value > 0.0]
|
|
|
|
|
|
floor_pct = min(candidates) if candidates else 0.0
|
|
|
|
|
|
|
|
|
|
|
|
notional = _safe_float(
|
|
|
|
|
|
pending.get("notional_entry", pending.get("notional", getattr(pos, "notional", 0.0))),
|
|
|
|
|
|
0.0,
|
|
|
|
|
|
)
|
|
|
|
|
|
max_loss_usd = float(getattr(self, "_overlay_catastrophic_max_loss_usd", 0.0) or 0.0)
|
|
|
|
|
|
if notional > 0.0 and max_loss_usd > 0.0:
|
|
|
|
|
|
floor_pct = min(floor_pct, max_loss_usd / notional) if floor_pct > 0.0 else max_loss_usd / notional
|
|
|
|
|
|
|
|
|
|
|
|
reason = str(pending.get("overlay_reason", "") or "overlay")
|
|
|
|
|
|
return max(0.0, floor_pct), f"overlay:{reason}"
|
|
|
|
|
|
|
|
|
|
|
|
def _overlay_advsl_should_exit(
|
|
|
|
|
|
self,
|
|
|
|
|
|
trade_id: str,
|
|
|
|
|
|
pending: Mapping[str, Any],
|
|
|
|
|
|
v7_decision: Mapping[str, Any],
|
|
|
|
|
|
bars_held: int,
|
|
|
|
|
|
current_price: float,
|
|
|
|
|
|
) -> tuple[bool, str]:
|
|
|
|
|
|
if not bool(getattr(self, "_overlay_advsl_live_exit_enabled", False)):
|
|
|
|
|
|
return False, "disabled"
|
|
|
|
|
|
if not bool(pending.get("overlay_flip", False)):
|
|
|
|
|
|
return False, "not_overlay"
|
|
|
|
|
|
if int(bars_held or 0) < int(getattr(self, "_overlay_advsl_min_bars", 0) or 0):
|
|
|
|
|
|
return False, "min_hold"
|
|
|
|
|
|
|
|
|
|
|
|
entry = _safe_float(pending.get("entry_price"), 0.0)
|
|
|
|
|
|
if entry <= 0.0 or current_price <= 0.0:
|
|
|
|
|
|
return False, "bad_price"
|
|
|
|
|
|
side = str(pending.get("side", "SHORT") or "SHORT").upper()
|
|
|
|
|
|
favorable = ((current_price - entry) / entry) if side == "LONG" else ((entry - current_price) / entry)
|
|
|
|
|
|
adverse = max(0.0, -favorable)
|
|
|
|
|
|
lifetime_mfe = max(0.0, _safe_float(v7_decision.get("mfe"), 0.0))
|
|
|
|
|
|
pressure = _safe_float(v7_decision.get("exit_pressure"), 0.0)
|
|
|
|
|
|
mae_risk = _safe_float(v7_decision.get("mae_risk"), 0.0)
|
|
|
|
|
|
|
|
|
|
|
|
floor_pct, floor_label = self._catastrophic_floor_for_open_position()
|
|
|
|
|
|
meaningful_mfe = float(getattr(self, "_overlay_advsl_mfe_max_pct", 0.0) or 0.0)
|
|
|
|
|
|
pressure_min = float(getattr(self, "_overlay_advsl_pressure_min", 0.0) or 0.0)
|
|
|
|
|
|
mae_risk_min = float(getattr(self, "_overlay_advsl_mae_risk_min", 0.0) or 0.0)
|
|
|
|
|
|
no_meaningful_mfe = lifetime_mfe <= meaningful_mfe
|
|
|
|
|
|
pressure_gate = pressure >= pressure_min and mae_risk >= mae_risk_min
|
|
|
|
|
|
|
|
|
|
|
|
if adverse >= floor_pct and floor_pct > 0.0 and (no_meaningful_mfe or pressure_gate):
|
|
|
|
|
|
return True, (
|
|
|
|
|
|
f"{floor_label}:adverse={adverse:.5f}:mfe={lifetime_mfe:.5f}:"
|
|
|
|
|
|
f"pressure={pressure:.2f}:mae_risk={mae_risk:.2f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return False, "hold"
|
|
|
|
|
|
|
|
|
|
|
|
def _connect_hz(self):
|
|
|
|
|
|
log("Connecting to Hazelcast...")
|
|
|
|
|
|
import hazelcast
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
import logging as _logging
|
|
|
|
|
|
# Client lifecycle events (connection added/removed, heartbeat,
|
|
|
|
|
|
# reconnect attempts) at INFO to stderr — the 2026-06-12 silent-death
|
|
|
|
|
|
# investigation found ZERO client log lines because nothing routed
|
|
|
|
|
|
# them; without this the reactor's health is invisible.
|
|
|
|
|
|
_hz_logger = _logging.getLogger("hazelcast")
|
|
|
|
|
|
if not _hz_logger.handlers:
|
|
|
|
|
|
_h = _logging.StreamHandler()
|
|
|
|
|
|
_h.setFormatter(_logging.Formatter("%(asctime)s HZCLIENT %(levelname)s %(name)s: %(message)s"))
|
|
|
|
|
|
_hz_logger.addHandler(_h)
|
|
|
|
|
|
_hz_logger.setLevel(_logging.INFO)
|
2026-06-12 14:59:49 +02:00
|
|
|
|
self.hz_client = hazelcast.HazelcastClient(
|
|
|
|
|
|
cluster_name=HZ_CLUSTER,
|
|
|
|
|
|
cluster_members=[HZ_HOST],
|
|
|
|
|
|
invocation_timeout=3.0, # prevent indefinite scan-loop stall when HZ is unresponsive
|
|
|
|
|
|
)
|
|
|
|
|
|
self.features_map = self.hz_client.get_map("DOLPHIN_FEATURES")
|
|
|
|
|
|
self.safety_map = self.hz_client.get_map("DOLPHIN_SAFETY")
|
|
|
|
|
|
self.pnl_map = self.hz_client.get_map("DOLPHIN_PNL_BLUE")
|
|
|
|
|
|
self.state_map = self.hz_client.get_map("DOLPHIN_STATE_BLUE")
|
|
|
|
|
|
self.heartbeat_map = self.hz_client.get_map("DOLPHIN_HEARTBEAT")
|
|
|
|
|
|
self.control_map = self.hz_client.get_map("DOLPHIN_CONTROL_PLANE")
|
|
|
|
|
|
if self._advanced_sl is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._advanced_sl.bind_hz(features_map=self.features_map, state_map=self.state_map)
|
|
|
|
|
|
self._advanced_sl.publish_control_plane()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
# Immediate heartbeat — prevents Cat1=0 during startup gap
|
|
|
|
|
|
try:
|
|
|
|
|
|
write_runner_heartbeat(
|
|
|
|
|
|
self.heartbeat_map,
|
|
|
|
|
|
build_runner_heartbeat_payload(
|
|
|
|
|
|
flow="nautilus_event_trader",
|
|
|
|
|
|
phase="starting",
|
|
|
|
|
|
run_date=self.current_day,
|
|
|
|
|
|
runner="blue",
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
log(" Hz connected")
|
|
|
|
|
|
|
|
|
|
|
|
def _heartbeat_loop(self):
|
|
|
|
|
|
"""Out-of-band heartbeat writer (independent of scan loop)."""
|
|
|
|
|
|
while not self._heartbeat_stop.is_set():
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.heartbeat_map is not None:
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
|
write_runner_heartbeat(
|
|
|
|
|
|
self.heartbeat_map,
|
|
|
|
|
|
build_runner_heartbeat_payload(
|
|
|
|
|
|
flow="nautilus_event_trader",
|
|
|
|
|
|
phase="trading",
|
|
|
|
|
|
run_date=self.current_day,
|
|
|
|
|
|
runner="blue",
|
|
|
|
|
|
extra={
|
|
|
|
|
|
"last_scan_age_s": round(now - self._last_scan_accept_ts, 1),
|
|
|
|
|
|
"last_event_age_s": round(now - self._last_scan_event_ts, 1),
|
|
|
|
|
|
"scans_processed": self.scans_processed,
|
|
|
|
|
|
},
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
|
|
|
|
|
if self.control_map is not None:
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
# RETRACT can produce a forced terminal close which must
|
|
|
|
|
|
# run through the scan-thread close finalizer. The
|
|
|
|
|
|
# heartbeat may still apply non-exit commands while scans
|
|
|
|
|
|
# are quiet, but it must leave RETRACT queued.
|
|
|
|
|
|
self._drain_runtime_commands(allow_retract=False)
|
2026-06-12 14:59:49 +02:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
# Never route heartbeat failures through the mounted trade log:
|
|
|
|
|
|
# if that filesystem is sick, the exception handler must still
|
|
|
|
|
|
# survive so the loop can retry on the next tick.
|
|
|
|
|
|
try:
|
|
|
|
|
|
print(
|
|
|
|
|
|
f"[{datetime.now(timezone.utc).isoformat()}] "
|
|
|
|
|
|
f"Heartbeat loop put failed: {e}",
|
|
|
|
|
|
flush=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
finally:
|
|
|
|
|
|
self._heartbeat_stop.wait(10.0)
|
|
|
|
|
|
|
|
|
|
|
|
# ── Scan-flow watchdog ────────────────────────────────────────────────────
|
|
|
|
|
|
# Detection only — no alpha/engine involvement. Distinguishes three stall
|
|
|
|
|
|
# modes and recovers the two that a process restart fixes:
|
|
|
|
|
|
# 1. worker stuck (events fresh, accepts stale, no dupe churn) → restart
|
|
|
|
|
|
# 2. listener deaf (events stale but HZ key still advancing) → restart
|
|
|
|
|
|
# 3. upstream dark (HZ key frozen too) → log only
|
|
|
|
|
|
# Uses print() not log(): log() appends to the CIFS share and the watchdog
|
|
|
|
|
|
# must stay alive precisely when that mount is sick.
|
|
|
|
|
|
|
|
|
|
|
|
def _probe_latest_scan_number(self, timeout_s: float = 10.0):
|
|
|
|
|
|
"""Read latest_eigen_scan from HZ off-thread; None on timeout/error."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
fut = self._probe_executor.submit(
|
|
|
|
|
|
lambda: self.features_map.blocking().get('latest_eigen_scan')
|
|
|
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = fut.result(timeout=timeout_s)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
fut.cancel()
|
|
|
|
|
|
raise
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return None
|
|
|
|
|
|
scan = json.loads(raw) if isinstance(raw, str) else raw
|
|
|
|
|
|
if scan.get('version') == 'NG7':
|
|
|
|
|
|
inner = scan.get('scan_number')
|
|
|
|
|
|
if inner is None and isinstance(scan.get('scan'), dict):
|
|
|
|
|
|
inner = scan['scan'].get('scan_number')
|
|
|
|
|
|
return int(inner or 0)
|
|
|
|
|
|
return int(scan.get('scan_number') or 0)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
def _dump_blackbox(self, reason: str):
|
|
|
|
|
|
"""Forensic dump before a watchdog restart — answers WHY the HZ client
|
|
|
|
|
|
died (incidents: silent client death every 40min–8h, no exception ever
|
|
|
|
|
|
reaches stderr; prime suspect is the hazelcast reactor thread, which
|
|
|
|
|
|
runs I/O + future completion + event dispatch + heartbeat manager, so
|
|
|
|
|
|
its death is silent by construction). print() only — CIFS-safe."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
import sys as _sys
|
|
|
|
|
|
now_iso = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
print(f"[{now_iso}] BLACKBOX dump ({reason}):", flush=True)
|
|
|
|
|
|
try:
|
|
|
|
|
|
running_flag = self.hz_client.lifecycle_service.is_running()
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
running_flag = f"err:{exc}"
|
|
|
|
|
|
print(f" hz_client.lifecycle.is_running={running_flag}", flush=True)
|
|
|
|
|
|
try:
|
|
|
|
|
|
cm = getattr(self.hz_client, "_connection_manager", None)
|
|
|
|
|
|
conns = getattr(cm, "active_connections", None)
|
|
|
|
|
|
print(f" active_connections={conns!r}", flush=True)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
print(f" connection introspect failed: {exc}", flush=True)
|
|
|
|
|
|
frames = _sys._current_frames()
|
|
|
|
|
|
for th in threading.enumerate():
|
|
|
|
|
|
frame = frames.get(th.ident)
|
|
|
|
|
|
hz_mark = " <HZ?>" if "hazelcast" in th.name.lower() or "reactor" in th.name.lower() else ""
|
|
|
|
|
|
print(f" THREAD {th.name} daemon={th.daemon} alive={th.is_alive()}{hz_mark}", flush=True)
|
|
|
|
|
|
if frame is not None:
|
|
|
|
|
|
for fl in traceback.format_stack(frame):
|
|
|
|
|
|
for ln in fl.rstrip().splitlines():
|
|
|
|
|
|
print(f" {ln}", flush=True)
|
|
|
|
|
|
# any hazelcast-named thread MISSING from the enumeration = reactor died
|
|
|
|
|
|
hz_threads = [t.name for t in threading.enumerate()
|
|
|
|
|
|
if "hazelcast" in t.name.lower() or "reactor" in t.name.lower()]
|
|
|
|
|
|
print(f" hazelcast-ish threads present: {hz_threads or 'NONE — reactor thread is DEAD'}",
|
|
|
|
|
|
flush=True)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
print(f" BLACKBOX dump failed: {exc}", flush=True)
|
|
|
|
|
|
|
2026-06-12 14:59:49 +02:00
|
|
|
|
def _watchdog_restart(self, reason: str):
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
try:
|
|
|
|
|
|
self._dump_blackbox(reason)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2026-06-12 14:59:49 +02:00
|
|
|
|
print(f"[{datetime.now(timezone.utc).isoformat()}] "
|
|
|
|
|
|
f"WATCHDOG_RESTART: {reason} — exiting {WATCHDOG_EXIT_CODE} for "
|
|
|
|
|
|
f"supervisord respawn (capital/position restore on boot)", flush=True)
|
|
|
|
|
|
os._exit(WATCHDOG_EXIT_CODE)
|
|
|
|
|
|
|
|
|
|
|
|
def _scan_watchdog_loop(self):
|
|
|
|
|
|
last_probe_num = None
|
|
|
|
|
|
last_probe_ts = 0.0
|
|
|
|
|
|
last_dark_log_ts = 0.0
|
|
|
|
|
|
dupes_at_stall = None
|
|
|
|
|
|
while not self._watchdog_stop.is_set():
|
|
|
|
|
|
self._watchdog_stop.wait(15.0)
|
|
|
|
|
|
if self._watchdog_stop.is_set() or not running:
|
|
|
|
|
|
return
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
|
acc_age = now - self._last_scan_accept_ts
|
|
|
|
|
|
ev_age = now - self._last_scan_event_ts
|
|
|
|
|
|
if acc_age < SCAN_STALL_S:
|
|
|
|
|
|
last_probe_num = None
|
|
|
|
|
|
dupes_at_stall = None
|
|
|
|
|
|
continue
|
|
|
|
|
|
uptime_ok = (now - _PROCESS_BOOT_TS) > WATCHDOG_RESTART_MIN_UPTIME_S
|
|
|
|
|
|
if ev_age < SCAN_STALL_S:
|
|
|
|
|
|
# Listener delivering but worker not accepting.
|
|
|
|
|
|
if dupes_at_stall is None:
|
|
|
|
|
|
dupes_at_stall = self._dupe_drops_total
|
|
|
|
|
|
continue
|
|
|
|
|
|
if self._dupe_drops_total > dupes_at_stall:
|
|
|
|
|
|
if now - last_dark_log_ts > UPSTREAM_DARK_LOG_EVERY_S:
|
|
|
|
|
|
last_dark_log_ts = now
|
|
|
|
|
|
print(f"[{datetime.now(timezone.utc).isoformat()}] "
|
|
|
|
|
|
f"WATCHDOG: upstream repeating duplicate scan_number "
|
|
|
|
|
|
f"{self.last_scan_number} for {acc_age:.0f}s — scanner stuck, "
|
|
|
|
|
|
f"no restart (restart will not help)", flush=True)
|
|
|
|
|
|
elif uptime_ok:
|
|
|
|
|
|
self._watchdog_restart(
|
|
|
|
|
|
f"scan worker stalled {acc_age:.0f}s with events still arriving "
|
|
|
|
|
|
f"(likely blocked I/O while holding eng_lock)")
|
|
|
|
|
|
continue
|
|
|
|
|
|
# Both event stream and accepts stale → probe HZ for deafness.
|
|
|
|
|
|
probe = self._probe_latest_scan_number()
|
|
|
|
|
|
if probe is None:
|
|
|
|
|
|
# Persistent probe failure = our HZ client is dead (listener
|
|
|
|
|
|
# and probe share it). 2026-06-10 15:18 incident: scanner kept
|
|
|
|
|
|
# writing, PINK kept receiving, but BLUE's client died — probe
|
|
|
|
|
|
# returned None forever and the old logic mislabeled it
|
|
|
|
|
|
# "upstream dark" and never restarted. Three consecutive
|
|
|
|
|
|
# failures (~45 s) with uptime past warm-up → self-restart.
|
|
|
|
|
|
self._probe_fail_streak = getattr(self, "_probe_fail_streak", 0) + 1
|
|
|
|
|
|
if self._probe_fail_streak >= 3 and uptime_ok:
|
|
|
|
|
|
self._watchdog_restart(
|
|
|
|
|
|
f"HZ probe failed {self._probe_fail_streak}x while no "
|
|
|
|
|
|
f"events for {ev_age:.0f}s — HZ client presumed dead")
|
|
|
|
|
|
else:
|
|
|
|
|
|
self._probe_fail_streak = 0
|
|
|
|
|
|
if probe is not None:
|
|
|
|
|
|
if last_probe_num is None:
|
|
|
|
|
|
last_probe_num = probe
|
|
|
|
|
|
last_probe_ts = now
|
|
|
|
|
|
elif (now - last_probe_ts) >= WATCHDOG_PROBE_INTERVAL_S:
|
|
|
|
|
|
if probe != last_probe_num and uptime_ok:
|
|
|
|
|
|
self._watchdog_restart(
|
|
|
|
|
|
f"listener deaf: HZ latest_eigen_scan advanced "
|
|
|
|
|
|
f"{last_probe_num} → {probe} but no events for {ev_age:.0f}s")
|
|
|
|
|
|
last_probe_num = probe
|
|
|
|
|
|
last_probe_ts = now
|
|
|
|
|
|
if now - last_dark_log_ts > UPSTREAM_DARK_LOG_EVERY_S:
|
|
|
|
|
|
last_dark_log_ts = now
|
|
|
|
|
|
print(f"[{datetime.now(timezone.utc).isoformat()}] "
|
|
|
|
|
|
f"WATCHDOG: NO SCANS for {acc_age:.0f}s (HZ scan_number probe="
|
|
|
|
|
|
f"{probe}) — upstream scanner appears DARK; open positions are "
|
|
|
|
|
|
f"UNMANAGED until scans resume", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
def _read_posture(self):
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
|
if now - self.posture_cache_time < 10:
|
|
|
|
|
|
return self.cached_posture
|
|
|
|
|
|
try:
|
|
|
|
|
|
posture_raw = self.safety_map.blocking().get("latest") or self.safety_map.blocking().get("posture")
|
|
|
|
|
|
if posture_raw:
|
|
|
|
|
|
if isinstance(posture_raw, str):
|
|
|
|
|
|
try:
|
|
|
|
|
|
parsed = json.loads(posture_raw)
|
|
|
|
|
|
self.cached_posture = parsed.get("posture", posture_raw)
|
|
|
|
|
|
except (json.JSONDecodeError, AttributeError):
|
|
|
|
|
|
self.cached_posture = posture_raw
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.cached_posture = posture_raw.get("posture", "APEX")
|
|
|
|
|
|
self.posture_cache_time = now
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return self.cached_posture
|
|
|
|
|
|
|
|
|
|
|
|
def _rollover_day(self):
|
|
|
|
|
|
today = datetime.now(timezone.utc).strftime('%Y-%m-%d')
|
|
|
|
|
|
if today == self.current_day:
|
|
|
|
|
|
return
|
|
|
|
|
|
posture = self._read_posture()
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
if today != self.current_day: # double-checked: only one thread calls begin_day
|
|
|
|
|
|
if getattr(self, 'acb', None):
|
|
|
|
|
|
try:
|
|
|
|
|
|
exf_raw = self.features_map.blocking().get('exf_latest') if self.features_map else None
|
|
|
|
|
|
es_raw = self.features_map.blocking().get('latest_eigen_scan') if self.features_map else None
|
|
|
|
|
|
|
|
|
|
|
|
exf_snapshot = json.loads(exf_raw) if isinstance(exf_raw, str) else (exf_raw or {})
|
|
|
|
|
|
eigen_scan = json.loads(es_raw) if isinstance(es_raw, str) else (es_raw or {})
|
|
|
|
|
|
|
|
|
|
|
|
w750_vel = eigen_scan.get('w750_velocity', 0.0)
|
|
|
|
|
|
|
|
|
|
|
|
if exf_snapshot:
|
|
|
|
|
|
self.acb.get_dynamic_boost_from_hz(
|
|
|
|
|
|
date_str=today,
|
|
|
|
|
|
exf_snapshot=exf_snapshot,
|
|
|
|
|
|
w750_velocity=float(w750_vel) if w750_vel else None,
|
|
|
|
|
|
direction=self.trade_direction,
|
|
|
|
|
|
)
|
|
|
|
|
|
log(f"ACB: Pre-warmed cache for {today} from HZ")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"ACB Rollover Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
self.eng.begin_day(today, posture=posture, direction=self.trade_direction)
|
|
|
|
|
|
self.bar_idx = 0
|
|
|
|
|
|
self.current_day = today
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"begin_day({today}) called with posture={posture} "
|
|
|
|
|
|
f"direction={_direction_label(self.trade_direction)}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _mark_retract_command_seen(self, command_id: str) -> None:
|
|
|
|
|
|
if not command_id or command_id in self._processed_retract_set:
|
|
|
|
|
|
return
|
|
|
|
|
|
self._processed_retract_commands.append(command_id)
|
|
|
|
|
|
self._processed_retract_set.add(command_id)
|
|
|
|
|
|
|
|
|
|
|
|
def _mark_runtime_command_seen(self, command_id: str) -> None:
|
|
|
|
|
|
"""Mark a runtime command id as processed for idempotency."""
|
|
|
|
|
|
self._mark_retract_command_seen(command_id)
|
|
|
|
|
|
|
|
|
|
|
|
def _enqueue_blue_runtime_command(self, cmd: dict) -> bool:
|
|
|
|
|
|
"""Append a command to the BLUE runtime command queue."""
|
|
|
|
|
|
if self.control_map is None:
|
|
|
|
|
|
return False
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw_q = self.control_map.blocking().get("blue_runtime_commands")
|
|
|
|
|
|
q = json.loads(raw_q) if isinstance(raw_q, str) and raw_q else []
|
|
|
|
|
|
if not isinstance(q, list):
|
|
|
|
|
|
q = []
|
|
|
|
|
|
q.append(cmd)
|
|
|
|
|
|
q = q[-200:]
|
|
|
|
|
|
self.control_map.blocking().put("blue_runtime_commands", json.dumps(q))
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" BLUE runtime command enqueue failed: {e}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _capital_state_payload(
|
|
|
|
|
|
self,
|
|
|
|
|
|
capital: float,
|
|
|
|
|
|
*,
|
|
|
|
|
|
reason: str = "",
|
|
|
|
|
|
source: str = "",
|
|
|
|
|
|
trade_id: str = "",
|
|
|
|
|
|
asset: str = "",
|
|
|
|
|
|
replay_blob: Mapping[str, Any] | None = None,
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""Build a canonical capital payload for HZ and disk persistence."""
|
|
|
|
|
|
payload = dict(replay_blob or {})
|
|
|
|
|
|
payload["capital"] = float(capital)
|
|
|
|
|
|
payload["ts"] = float(time.time())
|
|
|
|
|
|
payload["updated_at"] = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
payload["reason"] = str(reason or payload.get("reason", "") or "")
|
|
|
|
|
|
payload["source"] = str(source or payload.get("source", "") or "")
|
|
|
|
|
|
if trade_id:
|
|
|
|
|
|
payload["trade_id"] = str(trade_id)
|
|
|
|
|
|
if asset:
|
|
|
|
|
|
payload["asset"] = str(asset)
|
|
|
|
|
|
payload.setdefault("strategy", "nautilus-blue")
|
|
|
|
|
|
payload.setdefault("engine", "nautilus_event_trader")
|
|
|
|
|
|
return payload
|
|
|
|
|
|
|
|
|
|
|
|
def _capital_ledger_event_payload(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
capital_before: float,
|
|
|
|
|
|
capital_after: float,
|
|
|
|
|
|
reason: str = "",
|
|
|
|
|
|
source: str = "",
|
|
|
|
|
|
trade_id: str = "",
|
|
|
|
|
|
asset: str = "",
|
|
|
|
|
|
event_ts: float | None = None,
|
|
|
|
|
|
applies_before_ts: float | None = None,
|
|
|
|
|
|
mode: str = "terminal_update",
|
|
|
|
|
|
replay_blob: Mapping[str, Any] | None = None,
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
payload = dict(replay_blob or {})
|
|
|
|
|
|
payload["capital_before"] = float(capital_before)
|
|
|
|
|
|
payload["capital_after"] = float(capital_after)
|
|
|
|
|
|
payload["capital"] = float(capital_after)
|
|
|
|
|
|
payload["capital_delta"] = float(capital_after - capital_before)
|
|
|
|
|
|
payload["ts"] = float(event_ts if event_ts is not None else time.time())
|
|
|
|
|
|
payload["updated_at"] = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
payload["reason"] = str(reason or payload.get("reason", "") or "")
|
|
|
|
|
|
payload["source"] = str(source or payload.get("source", "") or "")
|
|
|
|
|
|
payload["mode"] = str(mode)
|
|
|
|
|
|
if applies_before_ts is not None:
|
|
|
|
|
|
payload["applies_before_ts"] = float(applies_before_ts)
|
|
|
|
|
|
if trade_id:
|
|
|
|
|
|
payload["trade_id"] = str(trade_id)
|
|
|
|
|
|
if asset:
|
|
|
|
|
|
payload["asset"] = str(asset)
|
|
|
|
|
|
payload.setdefault("strategy", "nautilus-blue")
|
|
|
|
|
|
payload.setdefault("engine", "nautilus_event_trader")
|
|
|
|
|
|
return payload
|
|
|
|
|
|
|
|
|
|
|
|
def _record_capital_ledger_event(self, entry: Mapping[str, Any]) -> None:
|
|
|
|
|
|
"""Append a capital event to the durable BLUE ledger surfaces."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
ledger = []
|
|
|
|
|
|
if self.state_map is not None:
|
|
|
|
|
|
raw = self.state_map.blocking().get("capital_update_ledger")
|
|
|
|
|
|
if raw:
|
|
|
|
|
|
ledger = json.loads(raw) if isinstance(raw, str) else list(raw)
|
|
|
|
|
|
if not isinstance(ledger, list):
|
|
|
|
|
|
ledger = []
|
|
|
|
|
|
ledger.append(dict(entry))
|
|
|
|
|
|
ledger = ledger[-1000:]
|
|
|
|
|
|
ledger_payload = json.dumps(ledger)
|
|
|
|
|
|
if self.state_map is not None:
|
|
|
|
|
|
self.state_map.blocking().put("capital_update_ledger", ledger_payload)
|
|
|
|
|
|
if self.control_map is not None:
|
|
|
|
|
|
self.control_map.blocking().put("blue_capital_update_ledger_latest", json.dumps(dict(entry)))
|
|
|
|
|
|
CAPITAL_UPDATE_LEDGER.write_text(ledger_payload)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital ledger write failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _current_capital_state_timestamp(self) -> float | None:
|
|
|
|
|
|
"""Return the freshest timestamp currently known for BLUE capital state."""
|
|
|
|
|
|
candidates: list[float] = []
|
|
|
|
|
|
|
|
|
|
|
|
def _maybe_add_blob(raw, source: str) -> None:
|
|
|
|
|
|
parsed = self._parse_capital_blob(raw, source)
|
|
|
|
|
|
if parsed is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
_, blob = parsed
|
|
|
|
|
|
ts = self._extract_state_timestamp(blob)
|
|
|
|
|
|
if ts is not None:
|
|
|
|
|
|
candidates.append(ts)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.state_map is not None:
|
|
|
|
|
|
for key in ("latest_nautilus", "engine_snapshot", CAPITAL_CORRECTIVE_REPLAY_HZ_KEY, "capital_checkpoint"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
_maybe_add_blob(self.state_map.blocking().get(key), f"HZ {key}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.pnl_map is not None:
|
|
|
|
|
|
day_key = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
|
|
|
|
_maybe_add_blob(self.pnl_map.blocking().get(day_key), f"HZ pnl[{day_key}]")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
if CAPITAL_DISK_CHECKPOINT.exists():
|
|
|
|
|
|
raw = CAPITAL_DISK_CHECKPOINT.read_text()
|
|
|
|
|
|
data = json.loads(raw) if raw else {}
|
|
|
|
|
|
ts = self._extract_state_timestamp(data if isinstance(data, dict) else {})
|
|
|
|
|
|
if ts is not None:
|
|
|
|
|
|
candidates.append(ts)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
if CAPITAL_UPDATE_LEDGER.exists():
|
|
|
|
|
|
raw = CAPITAL_UPDATE_LEDGER.read_text()
|
|
|
|
|
|
rows = json.loads(raw) if raw else []
|
|
|
|
|
|
if isinstance(rows, list) and rows:
|
|
|
|
|
|
last = rows[-1] if isinstance(rows[-1], dict) else None
|
|
|
|
|
|
if isinstance(last, dict):
|
|
|
|
|
|
ts = self._extract_state_timestamp(last)
|
|
|
|
|
|
if ts is not None:
|
|
|
|
|
|
candidates.append(ts)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return max(candidates) if candidates else None
|
|
|
|
|
|
|
|
|
|
|
|
def _resolved_capital_state_value(self, fallback: float | None = None) -> tuple[float | None, str, float | None]:
|
|
|
|
|
|
"""Return the freshest authoritative BLUE capital value available locally."""
|
|
|
|
|
|
candidates: list[tuple[float, int, float, str, float | None]] = []
|
|
|
|
|
|
source_rank = {
|
|
|
|
|
|
"capital_update_ledger": 65,
|
|
|
|
|
|
"latest_nautilus": 40,
|
|
|
|
|
|
"engine_snapshot": 30,
|
|
|
|
|
|
"pnl_day": 25,
|
|
|
|
|
|
"correction_replay_local": 20,
|
|
|
|
|
|
"correction_replay_hz": 10,
|
|
|
|
|
|
"capital_checkpoint": 5,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _maybe_add_blob(raw, source: str, rank_key: str) -> None:
|
|
|
|
|
|
parsed = self._parse_capital_blob(raw, source)
|
|
|
|
|
|
if parsed is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
capital, blob = parsed
|
|
|
|
|
|
ts = self._extract_state_timestamp(blob)
|
|
|
|
|
|
candidates.append(
|
|
|
|
|
|
(
|
|
|
|
|
|
ts if ts is not None else float("-inf"),
|
|
|
|
|
|
source_rank.get(rank_key, 0),
|
|
|
|
|
|
capital,
|
|
|
|
|
|
source,
|
|
|
|
|
|
ts,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
if CAPITAL_CORRECTIVE_REPLAY.exists():
|
|
|
|
|
|
try:
|
|
|
|
|
|
replay_blob = json.loads(CAPITAL_CORRECTIVE_REPLAY.read_text())
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
replay_blob = None
|
|
|
|
|
|
if isinstance(replay_blob, dict):
|
|
|
|
|
|
replay_capital = _safe_float(replay_blob.get("capital", 0.0), 0.0)
|
|
|
|
|
|
if replay_capital >= 1.0 and math.isfinite(replay_capital):
|
|
|
|
|
|
replay_ts = self._extract_state_timestamp(replay_blob)
|
|
|
|
|
|
candidates.append(
|
|
|
|
|
|
(
|
|
|
|
|
|
replay_ts if replay_ts is not None else float("-inf"),
|
|
|
|
|
|
source_rank["correction_replay_local"],
|
|
|
|
|
|
replay_capital,
|
|
|
|
|
|
"local corrective replay",
|
|
|
|
|
|
replay_ts,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.state_map is not None:
|
|
|
|
|
|
raw_ledger = self.state_map.blocking().get("capital_update_ledger")
|
|
|
|
|
|
ledger_rows = json.loads(raw_ledger) if isinstance(raw_ledger, str) and raw_ledger else list(raw_ledger or [])
|
|
|
|
|
|
if isinstance(ledger_rows, list) and ledger_rows:
|
|
|
|
|
|
last = ledger_rows[-1] if isinstance(ledger_rows[-1], dict) else None
|
|
|
|
|
|
if isinstance(last, dict):
|
|
|
|
|
|
capital_after = _safe_float(last.get("capital_after", last.get("capital", 0.0)), 0.0)
|
|
|
|
|
|
if capital_after >= 1.0 and math.isfinite(capital_after):
|
|
|
|
|
|
ledger_ts = self._extract_state_timestamp(last)
|
|
|
|
|
|
candidates.append(
|
|
|
|
|
|
(
|
|
|
|
|
|
ledger_ts if ledger_ts is not None else float("-inf"),
|
|
|
|
|
|
source_rank["capital_update_ledger"],
|
|
|
|
|
|
capital_after,
|
|
|
|
|
|
"capital_update_ledger",
|
|
|
|
|
|
ledger_ts,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
for key, label, rank_key in (
|
|
|
|
|
|
("latest_nautilus", "HZ latest_nautilus", "latest_nautilus"),
|
|
|
|
|
|
("engine_snapshot", "HZ engine_snapshot", "engine_snapshot"),
|
|
|
|
|
|
(CAPITAL_CORRECTIVE_REPLAY_HZ_KEY, "HZ corrective replay", "correction_replay_hz"),
|
|
|
|
|
|
("capital_checkpoint", "HZ capital_checkpoint", "capital_checkpoint"),
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.state_map.blocking().get(key)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
_maybe_add_blob(raw, label, rank_key)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.pnl_map is not None:
|
|
|
|
|
|
day_key = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
|
|
|
|
raw = self.pnl_map.blocking().get(day_key)
|
|
|
|
|
|
_maybe_add_blob(raw, f"HZ pnl[{day_key}]", "pnl_day")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
if CAPITAL_DISK_CHECKPOINT.exists():
|
|
|
|
|
|
raw = CAPITAL_DISK_CHECKPOINT.read_text()
|
|
|
|
|
|
data = json.loads(raw) if raw else {}
|
|
|
|
|
|
if isinstance(data, dict):
|
|
|
|
|
|
capital = _safe_float(data.get("capital", 0.0), 0.0)
|
|
|
|
|
|
ts = self._extract_state_timestamp(data)
|
|
|
|
|
|
if capital >= 1.0 and math.isfinite(capital):
|
|
|
|
|
|
candidates.append(
|
|
|
|
|
|
(
|
|
|
|
|
|
ts if ts is not None else float("-inf"),
|
|
|
|
|
|
source_rank["capital_checkpoint"],
|
|
|
|
|
|
capital,
|
|
|
|
|
|
"disk capital_checkpoint",
|
|
|
|
|
|
ts,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
if candidates:
|
|
|
|
|
|
candidates.sort(key=lambda item: (item[0], item[1]), reverse=True)
|
|
|
|
|
|
_, _, capital, source, ts = candidates[0]
|
|
|
|
|
|
return capital, source, ts
|
|
|
|
|
|
|
|
|
|
|
|
if fallback is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
fallback_f = float(fallback)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
fallback_f = None
|
|
|
|
|
|
if fallback_f is not None and math.isfinite(fallback_f) and fallback_f >= 1.0:
|
|
|
|
|
|
return fallback_f, "engine_fallback", None
|
|
|
|
|
|
return None, "unresolved", None
|
|
|
|
|
|
|
|
|
|
|
|
def _resolved_realized_trade_pnl(
|
|
|
|
|
|
self,
|
|
|
|
|
|
pending: Mapping[str, Any],
|
|
|
|
|
|
outcome: Mapping[str, Any],
|
|
|
|
|
|
*,
|
|
|
|
|
|
exit_price: float | None = None,
|
|
|
|
|
|
) -> tuple[float, str]:
|
|
|
|
|
|
"""Resolve realized PnL from the most trustworthy available close payload fields."""
|
|
|
|
|
|
raw_net = _safe_float(outcome.get("net_pnl", outcome.get("pnl", 0.0)), float("nan"))
|
|
|
|
|
|
if math.isfinite(raw_net) and abs(raw_net) >= 1e-9:
|
|
|
|
|
|
return raw_net, "net_pnl"
|
|
|
|
|
|
|
|
|
|
|
|
pnl_pct = _safe_float(outcome.get("pnl_pct", 0.0), float("nan"))
|
|
|
|
|
|
notional = _safe_float(pending.get("notional_entry", pending.get("notional", 0.0)), float("nan"))
|
|
|
|
|
|
if math.isfinite(pnl_pct) and math.isfinite(notional) and abs(pnl_pct) > 0.0 and notional > 0.0:
|
|
|
|
|
|
return pnl_pct * notional, "pnl_pct_notional"
|
|
|
|
|
|
|
|
|
|
|
|
entry_price = _safe_float(pending.get("entry_price", 0.0), float("nan"))
|
|
|
|
|
|
qty = _safe_float(pending.get("quantity", 0.0), float("nan"))
|
|
|
|
|
|
resolved_exit = exit_price if exit_price is not None else _safe_float(outcome.get("exit_price", 0.0), float("nan"))
|
|
|
|
|
|
if math.isfinite(entry_price) and math.isfinite(qty) and math.isfinite(resolved_exit):
|
|
|
|
|
|
if entry_price > 0.0 and qty > 0.0 and resolved_exit > 0.0:
|
|
|
|
|
|
side = str(pending.get("side", "SHORT") or "SHORT").upper()
|
|
|
|
|
|
direction = -1.0 if side == "SHORT" else 1.0
|
|
|
|
|
|
return direction * ((resolved_exit - entry_price) * qty), "entry_exit_qty"
|
|
|
|
|
|
|
|
|
|
|
|
if math.isfinite(raw_net):
|
|
|
|
|
|
return raw_net, "raw_net"
|
|
|
|
|
|
return 0.0, "unresolved"
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _truthy_flag(value: Any) -> bool:
|
|
|
|
|
|
"""Interpret loose flag values from runtime payloads."""
|
|
|
|
|
|
if isinstance(value, bool):
|
|
|
|
|
|
return value
|
|
|
|
|
|
if isinstance(value, (int, float)):
|
|
|
|
|
|
return bool(value)
|
|
|
|
|
|
if isinstance(value, str):
|
|
|
|
|
|
return value.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _resolved_capital_apply_pnl(self, outcome: Mapping[str, Any], realized_pnl: float) -> tuple[float, str]:
|
|
|
|
|
|
"""Resolve capital delta for close handling, suppressing known already-realized exits."""
|
|
|
|
|
|
if self._truthy_flag(outcome.get("capital_already_realized", False)):
|
|
|
|
|
|
return 0.0, "already_realized"
|
|
|
|
|
|
return float(realized_pnl or 0.0), "direct"
|
|
|
|
|
|
|
|
|
|
|
|
def _commit_capital_state(
|
|
|
|
|
|
self,
|
|
|
|
|
|
capital: float,
|
|
|
|
|
|
*,
|
|
|
|
|
|
reason: str = "",
|
|
|
|
|
|
source: str = "",
|
|
|
|
|
|
trade_id: str = "",
|
|
|
|
|
|
asset: str = "",
|
|
|
|
|
|
replay_blob: Mapping[str, Any] | None = None,
|
|
|
|
|
|
update_replay_key: bool = False,
|
|
|
|
|
|
mirror_control_plane: bool = True,
|
|
|
|
|
|
) -> dict | None:
|
|
|
|
|
|
"""Write capital to all canonical BLUE bookkeeping surfaces."""
|
|
|
|
|
|
capital = float(capital)
|
|
|
|
|
|
if capital < 1.0 or not math.isfinite(capital):
|
|
|
|
|
|
return None
|
|
|
|
|
|
payload = self._capital_state_payload(
|
|
|
|
|
|
capital,
|
|
|
|
|
|
reason=reason,
|
|
|
|
|
|
source=source,
|
|
|
|
|
|
trade_id=trade_id,
|
|
|
|
|
|
asset=asset,
|
|
|
|
|
|
replay_blob=replay_blob,
|
|
|
|
|
|
)
|
|
|
|
|
|
runtime_snapshot = getattr(self, "_last_engine_snapshot_payload", None)
|
|
|
|
|
|
if isinstance(runtime_snapshot, Mapping):
|
|
|
|
|
|
merged_payload = dict(runtime_snapshot)
|
|
|
|
|
|
merged_payload.update(payload)
|
|
|
|
|
|
payload = merged_payload
|
|
|
|
|
|
checkpoint_payload = json.dumps({"capital": capital, "ts": payload["ts"]})
|
|
|
|
|
|
state_payload = json.dumps(payload)
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.state_map is not None:
|
|
|
|
|
|
self.state_map.blocking().put("capital_checkpoint", checkpoint_payload)
|
|
|
|
|
|
self.state_map.blocking().put("latest_nautilus", state_payload)
|
|
|
|
|
|
self.state_map.blocking().put("engine_snapshot", state_payload)
|
|
|
|
|
|
if update_replay_key:
|
|
|
|
|
|
self.state_map.blocking().put(CAPITAL_CORRECTIVE_REPLAY_HZ_KEY, state_payload)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital state HZ save failed: {e}")
|
|
|
|
|
|
if update_replay_key:
|
|
|
|
|
|
try:
|
|
|
|
|
|
CAPITAL_CORRECTIVE_REPLAY.write_text(state_payload)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital corrective replay save failed: {e}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.pnl_map is not None:
|
|
|
|
|
|
day_key = datetime.now(timezone.utc).strftime('%Y-%m-%d')
|
|
|
|
|
|
self.pnl_map.blocking().put(day_key, state_payload)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital pnl state save failed: {e}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
CAPITAL_DISK_CHECKPOINT.write_text(checkpoint_payload)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital disk save failed: {e}")
|
|
|
|
|
|
if mirror_control_plane and self.control_map is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.control_map.blocking().put("blue_capital_update_latest", state_payload)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" capital control plane mirror failed: {e}")
|
|
|
|
|
|
self.eng.capital = capital
|
|
|
|
|
|
return payload
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_trade_capital_update(
|
|
|
|
|
|
self,
|
|
|
|
|
|
realized_pnl: float,
|
|
|
|
|
|
*,
|
|
|
|
|
|
reason: str,
|
|
|
|
|
|
source: str,
|
|
|
|
|
|
trade_id: str,
|
|
|
|
|
|
asset: str,
|
|
|
|
|
|
mirror_control_plane: bool = True,
|
|
|
|
|
|
) -> tuple[float, float]:
|
|
|
|
|
|
"""Apply a realized trade PnL to live capital and persist it immediately."""
|
|
|
|
|
|
capital_before, capital_source, capital_ts = self._resolved_capital_state_value(
|
|
|
|
|
|
fallback=float(getattr(self.eng, "capital", 0.0) or 0.0)
|
|
|
|
|
|
)
|
|
|
|
|
|
capital_before = float(capital_before or 0.0)
|
|
|
|
|
|
if capital_before < 1.0 or not math.isfinite(capital_before):
|
|
|
|
|
|
capital_before = 0.0
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.eng.capital = capital_before
|
|
|
|
|
|
capital_after = capital_before + float(realized_pnl or 0.0)
|
|
|
|
|
|
payload = self._commit_capital_state(
|
|
|
|
|
|
capital_after,
|
|
|
|
|
|
reason=reason,
|
|
|
|
|
|
source=source,
|
|
|
|
|
|
trade_id=trade_id,
|
|
|
|
|
|
asset=asset,
|
|
|
|
|
|
mirror_control_plane=mirror_control_plane,
|
|
|
|
|
|
)
|
|
|
|
|
|
if payload is not None:
|
|
|
|
|
|
ledger_entry = self._capital_ledger_event_payload(
|
|
|
|
|
|
capital_before=capital_before,
|
|
|
|
|
|
capital_after=capital_after,
|
|
|
|
|
|
reason=reason,
|
|
|
|
|
|
source=source,
|
|
|
|
|
|
trade_id=trade_id,
|
|
|
|
|
|
asset=asset,
|
|
|
|
|
|
event_ts=self._parse_timestamp_seconds(payload.get("ts")),
|
|
|
|
|
|
applies_before_ts=self._parse_timestamp_seconds(payload.get("ts")),
|
|
|
|
|
|
mode="terminal_update",
|
|
|
|
|
|
)
|
|
|
|
|
|
self._record_capital_ledger_event(ledger_entry)
|
|
|
|
|
|
if capital_source != "engine_fallback":
|
|
|
|
|
|
log(
|
|
|
|
|
|
" capital update base resolved from "
|
|
|
|
|
|
f"{capital_source}"
|
|
|
|
|
|
+ (f" ts={capital_ts:.3f}" if capital_ts is not None else "")
|
|
|
|
|
|
+ f": before={capital_before:.2f} after={capital_after:.2f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return capital_before, capital_after
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_internal_capital_update(self, cmd: dict) -> tuple[dict | None, str]:
|
|
|
|
|
|
"""Apply an in-band capital update command to the live BLUE engine."""
|
|
|
|
|
|
raw_capital = cmd.get("capital", None)
|
|
|
|
|
|
capital = _safe_float(raw_capital, float("nan"))
|
|
|
|
|
|
if capital < 1.0 or not math.isfinite(capital):
|
|
|
|
|
|
return None, "BAD_CAPITAL"
|
|
|
|
|
|
replay_blob = cmd.get("replay_blob") if isinstance(cmd.get("replay_blob"), Mapping) else None
|
|
|
|
|
|
capital_before = float(getattr(self.eng, "capital", capital) or capital)
|
|
|
|
|
|
event_ts = self._parse_timestamp_seconds(
|
|
|
|
|
|
cmd.get("event_ts")
|
|
|
|
|
|
or cmd.get("ts")
|
|
|
|
|
|
or (replay_blob.get("updated_at") if replay_blob else None)
|
|
|
|
|
|
or (replay_blob.get("ts") if replay_blob else None)
|
|
|
|
|
|
)
|
|
|
|
|
|
applies_before_ts = self._parse_timestamp_seconds(cmd.get("applies_before_ts"))
|
|
|
|
|
|
historical_only = False
|
|
|
|
|
|
if replay_blob is not None:
|
|
|
|
|
|
replay_ts = self._extract_state_timestamp(replay_blob)
|
|
|
|
|
|
current_ts = self._current_capital_state_timestamp()
|
|
|
|
|
|
if replay_ts is not None and current_ts is not None and replay_ts + 1.0 < current_ts:
|
|
|
|
|
|
historical_only = True
|
|
|
|
|
|
ledger_entry = self._capital_ledger_event_payload(
|
|
|
|
|
|
capital_before=capital_before,
|
|
|
|
|
|
capital_after=capital_before,
|
|
|
|
|
|
reason=str(cmd.get("reason", "CAPITAL_UPDATE") or "CAPITAL_UPDATE"),
|
|
|
|
|
|
source=str(cmd.get("source", "control_plane") or "control_plane"),
|
|
|
|
|
|
trade_id=str(cmd.get("trade_id", "") or ""),
|
|
|
|
|
|
asset=str(cmd.get("asset", "") or ""),
|
|
|
|
|
|
event_ts=event_ts,
|
|
|
|
|
|
applies_before_ts=current_ts,
|
|
|
|
|
|
mode="historical_replay_only",
|
|
|
|
|
|
replay_blob=replay_blob,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._record_capital_ledger_event(ledger_entry)
|
|
|
|
|
|
log(
|
|
|
|
|
|
" capital update recorded as historical replay "
|
|
|
|
|
|
f"ts={replay_ts:.3f} current_ts={current_ts:.3f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return ledger_entry, "RECORDED_HISTORICAL"
|
|
|
|
|
|
payload = self._commit_capital_state(
|
|
|
|
|
|
capital,
|
|
|
|
|
|
reason=str(cmd.get("reason", "CAPITAL_UPDATE") or "CAPITAL_UPDATE"),
|
|
|
|
|
|
source=str(cmd.get("source", "control_plane") or "control_plane"),
|
|
|
|
|
|
trade_id=str(cmd.get("trade_id", "") or ""),
|
|
|
|
|
|
asset=str(cmd.get("asset", "") or ""),
|
|
|
|
|
|
replay_blob=replay_blob,
|
|
|
|
|
|
update_replay_key=bool(replay_blob),
|
|
|
|
|
|
mirror_control_plane=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
if payload is None:
|
|
|
|
|
|
return None, "BAD_CAPITAL"
|
|
|
|
|
|
ledger_entry = self._capital_ledger_event_payload(
|
|
|
|
|
|
capital_before=capital_before,
|
|
|
|
|
|
capital_after=capital,
|
|
|
|
|
|
reason=str(cmd.get("reason", "CAPITAL_UPDATE") or "CAPITAL_UPDATE"),
|
|
|
|
|
|
source=str(cmd.get("source", "control_plane") or "control_plane"),
|
|
|
|
|
|
trade_id=str(cmd.get("trade_id", "") or ""),
|
|
|
|
|
|
asset=str(cmd.get("asset", "") or ""),
|
|
|
|
|
|
event_ts=event_ts,
|
|
|
|
|
|
applies_before_ts=applies_before_ts,
|
|
|
|
|
|
mode="terminal_update" if not historical_only else "historical_replay_only",
|
|
|
|
|
|
replay_blob=replay_blob,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._record_capital_ledger_event(ledger_entry)
|
|
|
|
|
|
return payload, "APPLIED"
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _sc_trim_fraction(current_mult: float, target_mult: float) -> float:
|
|
|
|
|
|
"""Translate a desired remaining multiplier into a retract fraction."""
|
|
|
|
|
|
cur = float(current_mult or 0.0)
|
|
|
|
|
|
tgt = float(target_mult or 0.0)
|
|
|
|
|
|
if not math.isfinite(cur) or not math.isfinite(tgt):
|
|
|
|
|
|
return 0.0
|
|
|
|
|
|
cur = max(0.0, cur)
|
|
|
|
|
|
tgt = max(0.0, tgt)
|
|
|
|
|
|
if cur <= 0.0 or tgt >= cur:
|
|
|
|
|
|
return 0.0
|
|
|
|
|
|
return max(0.0, min(1.0, 1.0 - (tgt / cur)))
|
|
|
|
|
|
|
|
|
|
|
|
def _record_sc_haircut(self, *, trade_id: str, pending: dict, source: str) -> dict | None:
|
|
|
|
|
|
"""Record SC haircut guidance as sizing metadata only.
|
|
|
|
|
|
|
|
|
|
|
|
SC is not an actuation surface. It records a haircut target that later
|
|
|
|
|
|
sizing logic can use, but it does not enqueue a live retract command.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not trade_id:
|
|
|
|
|
|
return None
|
|
|
|
|
|
pos = getattr(self.eng, "position", None)
|
|
|
|
|
|
if pos is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
pos_tid = str(getattr(pos, "trade_id", "") or "")
|
|
|
|
|
|
if pos_tid and pos_tid != str(trade_id):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
recs: list[float] = []
|
|
|
|
|
|
sc_rec = pending.get("sc_threshold_advisor")
|
|
|
|
|
|
if isinstance(sc_rec, dict):
|
|
|
|
|
|
recs.append(float(sc_rec.get("recommended_mult", 1.0) or 1.0))
|
|
|
|
|
|
gauge_rec = pending.get("sc_bucket_gauge")
|
|
|
|
|
|
if isinstance(gauge_rec, dict):
|
|
|
|
|
|
recs.append(float(gauge_rec.get("recommended_size_mult", 1.0) or 1.0))
|
|
|
|
|
|
if not recs:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
target_mult = max(0.0, min(recs))
|
|
|
|
|
|
current_notional = float(getattr(pos, "notional", pending.get("notional", 0.0)) or 0.0)
|
|
|
|
|
|
entry_notional = float(
|
|
|
|
|
|
pending.get("notional_entry", pending.get("notional", current_notional)) or current_notional
|
|
|
|
|
|
)
|
|
|
|
|
|
if current_notional <= 0.0 or entry_notional <= 0.0:
|
|
|
|
|
|
return None
|
|
|
|
|
|
current_mult = current_notional / entry_notional
|
|
|
|
|
|
last_target = float(pending.get("sc_haircut_last_target_mult", 1.0) or 1.0)
|
|
|
|
|
|
if target_mult >= current_mult - 1e-6 or target_mult >= last_target - 1e-6:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
frac = self._sc_trim_fraction(current_mult=current_mult, target_mult=target_mult)
|
|
|
|
|
|
if frac <= 0.0:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
pending["sc_haircut_target_mult"] = target_mult
|
|
|
|
|
|
pending["sc_haircut_fraction"] = frac
|
|
|
|
|
|
pending["sc_haircut_source"] = str(source or "sc")
|
|
|
|
|
|
pending["sc_haircut_last_updated_ts"] = float(time.time())
|
|
|
|
|
|
pending["sc_haircut_last_target_mult"] = target_mult
|
|
|
|
|
|
self._pending_entries[trade_id] = pending
|
|
|
|
|
|
log(
|
|
|
|
|
|
f" SC haircut record: {trade_id} target={target_mult:.2f} "
|
|
|
|
|
|
f"cur={current_mult:.2f} frac={frac:.3f} source={source}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"trade_id": trade_id,
|
|
|
|
|
|
"target_mult": target_mult,
|
|
|
|
|
|
"current_mult": current_mult,
|
|
|
|
|
|
"fraction": frac,
|
|
|
|
|
|
"source": str(source or "sc"),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_sc_entry_size_multiplier(self, trade_id: str, entry: dict, pending: dict) -> float:
|
|
|
|
|
|
"""Apply the live EsoF/SC size gate to an entry before persistence.
|
|
|
|
|
|
|
|
|
|
|
|
This is the actual sizing actuation surface for the deterministic SC gate.
|
|
|
|
|
|
It keeps the haircut size-only: no retract/close commands are enqueued.
|
|
|
|
|
|
"""
|
|
|
|
|
|
mult = float(self._last_esof_size_mult or 1.0)
|
|
|
|
|
|
if not math.isfinite(mult):
|
|
|
|
|
|
mult = 1.0
|
|
|
|
|
|
mult = max(0.0, min(1.0, mult))
|
|
|
|
|
|
pending["sc_exec_mult"] = mult
|
|
|
|
|
|
if mult >= 0.999:
|
|
|
|
|
|
return mult
|
|
|
|
|
|
|
|
|
|
|
|
entry_price = float(entry.get("entry_price", pending.get("entry_price", 0.0)) or 0.0)
|
|
|
|
|
|
base_notional = float(entry.get("notional", pending.get("notional", 0.0)) or 0.0)
|
|
|
|
|
|
if base_notional <= 0.0 and entry_price > 0.0:
|
|
|
|
|
|
quantity = float(entry.get("quantity", pending.get("quantity", 0.0)) or 0.0)
|
|
|
|
|
|
base_notional = quantity * entry_price
|
|
|
|
|
|
if base_notional <= 0.0:
|
|
|
|
|
|
return mult
|
|
|
|
|
|
|
|
|
|
|
|
effective_notional = round(base_notional * mult, 12)
|
|
|
|
|
|
if effective_notional <= 0.0:
|
|
|
|
|
|
return mult
|
|
|
|
|
|
|
|
|
|
|
|
base_quantity = float(entry.get("quantity", pending.get("quantity", 0.0)) or 0.0)
|
|
|
|
|
|
if base_quantity <= 0.0 and entry_price > 0.0:
|
|
|
|
|
|
base_quantity = base_notional / entry_price
|
|
|
|
|
|
effective_quantity = round(effective_notional / max(entry_price, 1e-12), 6) if entry_price > 0.0 else base_quantity * mult
|
|
|
|
|
|
base_leverage = float(entry.get("leverage", pending.get("leverage", 0.0)) or 0.0)
|
|
|
|
|
|
effective_leverage = round(base_leverage * mult, 6) if base_leverage > 0.0 else base_leverage
|
|
|
|
|
|
|
|
|
|
|
|
entry.setdefault("notional_entry", base_notional)
|
|
|
|
|
|
entry["notional"] = effective_notional
|
|
|
|
|
|
entry["quantity"] = effective_quantity
|
|
|
|
|
|
if effective_leverage > 0.0:
|
|
|
|
|
|
entry["leverage"] = effective_leverage
|
|
|
|
|
|
entry["sc_exec_mult"] = mult
|
|
|
|
|
|
entry["sc_exec_notional"] = effective_notional
|
|
|
|
|
|
entry["sc_exec_quantity"] = effective_quantity
|
|
|
|
|
|
|
|
|
|
|
|
pending.setdefault("notional_entry", base_notional)
|
|
|
|
|
|
pending["notional"] = effective_notional
|
|
|
|
|
|
pending["quantity"] = effective_quantity
|
|
|
|
|
|
if effective_leverage > 0.0:
|
|
|
|
|
|
pending["leverage"] = effective_leverage
|
|
|
|
|
|
pending["sc_exec_notional"] = effective_notional
|
|
|
|
|
|
pending["sc_exec_quantity"] = effective_quantity
|
|
|
|
|
|
pending["sc_exec_leverage"] = effective_leverage if effective_leverage > 0.0 else base_leverage
|
|
|
|
|
|
|
|
|
|
|
|
pos = getattr(self.eng, "position", None)
|
|
|
|
|
|
if pos is not None and str(getattr(pos, "trade_id", "") or "") in ("", str(trade_id)):
|
|
|
|
|
|
try:
|
|
|
|
|
|
pos.notional = effective_notional
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
pos.quantity = effective_quantity
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if effective_leverage > 0.0:
|
|
|
|
|
|
try:
|
|
|
|
|
|
pos.leverage = effective_leverage
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
log(
|
|
|
|
|
|
f" SC haircut execute: {trade_id} mult={mult:.3f} "
|
|
|
|
|
|
f"notional={base_notional:.6f}->{effective_notional:.6f} "
|
|
|
|
|
|
f"qty={base_quantity:.6f}->{effective_quantity:.6f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return mult
|
|
|
|
|
|
|
|
|
|
|
|
def _build_retract_exit(self, *, trade_id: str, reason: str, bars_held: int, pnl_pct: float, net_pnl: float) -> dict:
|
|
|
|
|
|
return {
|
|
|
|
|
|
"trade_id": trade_id,
|
|
|
|
|
|
"reason": reason,
|
|
|
|
|
|
"bars_held": int(max(0, bars_held)),
|
|
|
|
|
|
"pnl_pct": float(pnl_pct),
|
|
|
|
|
|
"net_pnl": float(net_pnl),
|
|
|
|
|
|
# Full retract legs already realize pnl incrementally; close-path capital
|
|
|
|
|
|
# application must be a no-op to avoid double-booking.
|
|
|
|
|
|
"capital_already_realized": True,
|
|
|
|
|
|
# Preserve explicit economic fields for observability/reporting.
|
|
|
|
|
|
"economic_pnl": float(net_pnl),
|
|
|
|
|
|
"economic_pnl_pct": float(pnl_pct),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _build_trade_execution_quality_summary(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
trade_id: str,
|
|
|
|
|
|
pending: dict,
|
|
|
|
|
|
exit_payload: dict,
|
|
|
|
|
|
capital_before: float,
|
|
|
|
|
|
capital_after: float,
|
|
|
|
|
|
realized_pnl: float,
|
|
|
|
|
|
exit_price: float,
|
|
|
|
|
|
source: str,
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
if build_trade_execution_quality_summary is None:
|
|
|
|
|
|
raise RuntimeError("execution quality summary helper unavailable")
|
|
|
|
|
|
return build_trade_execution_quality_summary(
|
|
|
|
|
|
trade_id=trade_id,
|
|
|
|
|
|
pending=pending,
|
|
|
|
|
|
exit_payload=exit_payload,
|
|
|
|
|
|
capital_before=capital_before,
|
|
|
|
|
|
capital_after=capital_after,
|
|
|
|
|
|
realized_pnl=realized_pnl,
|
|
|
|
|
|
exit_price=exit_price,
|
|
|
|
|
|
source=source,
|
|
|
|
|
|
ts=_ch_ts_us(),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _persist_trade_execution_quality(self, record: dict) -> None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ch_put("trade_execution_quality", record)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" trade_execution_quality CH write failed: {e}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.state_map is not None:
|
|
|
|
|
|
self.state_map.blocking().put("last_trade_execution_quality", record)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" trade execution quality HZ state write failed: {e}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.control_map is not None:
|
|
|
|
|
|
self.control_map.blocking().put("blue_last_trade_execution_quality", record)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" trade execution quality control plane write failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _chain_state_for_pending(
|
|
|
|
|
|
self,
|
|
|
|
|
|
trade_id: str,
|
|
|
|
|
|
pending: dict,
|
|
|
|
|
|
*,
|
|
|
|
|
|
chain_mode: str = "LIVE",
|
|
|
|
|
|
chain_head_leg_id: str | None = None,
|
|
|
|
|
|
chain_prev_leg_id: str | None = None,
|
|
|
|
|
|
chain_seq: int | None = None,
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""Return the canonical linked-list state for the current open trade head."""
|
|
|
|
|
|
seq = int(chain_seq if chain_seq is not None else pending.get("retraction_legs", 0) or 0)
|
|
|
|
|
|
quantity = float(pending.get("quantity", 0.0) or 0.0)
|
|
|
|
|
|
entry_price = float(pending.get("entry_price", 0.0) or 0.0)
|
|
|
|
|
|
notional = float(pending.get("notional", pending.get("notional_entry", 0.0)) or 0.0)
|
|
|
|
|
|
entry_bar = int(pending.get("entry_bar", 0) or 0)
|
|
|
|
|
|
entry_ts = int(pending.get("entry_ts", 0) or 0)
|
|
|
|
|
|
realized = float(pending.get("realized_pnl_legs_total", 0.0) or 0.0)
|
|
|
|
|
|
return _build_chain_state(
|
|
|
|
|
|
trade_id=str(trade_id or ""),
|
|
|
|
|
|
asset=str(pending.get("asset", "") or ""),
|
|
|
|
|
|
side=str(pending.get("side", "") or "SHORT"),
|
|
|
|
|
|
entry_price=entry_price,
|
|
|
|
|
|
quantity=quantity,
|
|
|
|
|
|
notional=notional,
|
|
|
|
|
|
entry_bar=entry_bar,
|
|
|
|
|
|
entry_ts=entry_ts,
|
|
|
|
|
|
retraction_legs=seq,
|
|
|
|
|
|
realized_pnl_legs_total=realized,
|
|
|
|
|
|
chain_root_trade_id=str(pending.get("chain_root_trade_id", trade_id) or trade_id),
|
|
|
|
|
|
chain_head_leg_id=chain_head_leg_id or pending.get("chain_head_leg_id"),
|
|
|
|
|
|
chain_prev_leg_id=chain_prev_leg_id if chain_prev_leg_id is not None else str(pending.get("chain_prev_leg_id", "") or ""),
|
|
|
|
|
|
chain_mode=chain_mode,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _load_chain_ledger_state(self, trade_id: str) -> dict | None:
|
|
|
|
|
|
"""Load the latest reconstruction payload for a trade, if ClickHouse is reachable."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
import base64 as _b64
|
|
|
|
|
|
escaped_tid = str(trade_id or "").replace("'", "''")
|
|
|
|
|
|
sql = (
|
|
|
|
|
|
"SELECT event_type, event_id, payload_json "
|
|
|
|
|
|
"FROM dolphin.trade_reconstruction "
|
|
|
|
|
|
f"WHERE trade_id = '{escaped_tid}' "
|
|
|
|
|
|
"ORDER BY ts DESC LIMIT 1 FORMAT JSONEachRow"
|
|
|
|
|
|
)
|
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
|
"http://localhost:8123/?database=dolphin",
|
|
|
|
|
|
data=sql.encode(),
|
|
|
|
|
|
headers={"Authorization": "Basic " +
|
|
|
|
|
|
_b64.b64encode(b"dolphin:dolphin_ch_2026").decode()},
|
|
|
|
|
|
)
|
|
|
|
|
|
with urllib.request.urlopen(req, timeout=5) as r:
|
|
|
|
|
|
raw = r.read().decode().strip()
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return None
|
|
|
|
|
|
row = json.loads(raw.splitlines()[0])
|
|
|
|
|
|
payload = json.loads(row.get("payload_json", "{}") or "{}")
|
|
|
|
|
|
payload["event_type"] = row.get("event_type", "")
|
|
|
|
|
|
payload["event_id"] = row.get("event_id", "")
|
|
|
|
|
|
return payload
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _chain_state_from_reconstruction(self, trade_id: str, pending: dict, recon: dict | None) -> dict:
|
|
|
|
|
|
"""Merge reconstruction payload chain hints with the current live state."""
|
|
|
|
|
|
chain_data = {}
|
|
|
|
|
|
seq = 0
|
|
|
|
|
|
prev_leg_id = ""
|
|
|
|
|
|
head_leg_id = f"{trade_id}:open"
|
|
|
|
|
|
chain_mode = "LEGACY"
|
|
|
|
|
|
if recon:
|
|
|
|
|
|
chain_data.update(recon)
|
|
|
|
|
|
nested = recon.get("chain")
|
|
|
|
|
|
if isinstance(nested, dict):
|
|
|
|
|
|
chain_data.update(nested)
|
|
|
|
|
|
seq = int(chain_data.get("chain_seq", chain_data.get("retraction_legs", 0)) or 0)
|
|
|
|
|
|
prev_leg_id = str(chain_data.get("chain_prev_leg_id", "") or "")
|
|
|
|
|
|
head_leg_id = str(chain_data.get("chain_head_leg_id", "") or head_leg_id)
|
|
|
|
|
|
chain_mode = str(chain_data.get("chain_mode", "LIVE") or "LIVE")
|
|
|
|
|
|
if "chain_token" not in chain_data:
|
|
|
|
|
|
chain_mode = "LEGACY_REBUILT"
|
|
|
|
|
|
chain = self._chain_state_for_pending(
|
|
|
|
|
|
trade_id,
|
|
|
|
|
|
pending,
|
|
|
|
|
|
chain_mode=chain_mode,
|
|
|
|
|
|
chain_head_leg_id=head_leg_id,
|
|
|
|
|
|
chain_prev_leg_id=prev_leg_id,
|
|
|
|
|
|
chain_seq=seq,
|
|
|
|
|
|
)
|
|
|
|
|
|
if chain_data.get("chain_token"):
|
|
|
|
|
|
expected = str(chain_data.get("chain_token", "") or "")
|
|
|
|
|
|
if expected != chain.get("chain_token"):
|
|
|
|
|
|
# Do not hard-halt restore on legacy/stale token drift.
|
|
|
|
|
|
# Keep trading continuity with a rebuilt chain and surface the
|
|
|
|
|
|
# mismatch loudly for follow-up reconciliation.
|
|
|
|
|
|
derived = str(chain.get("chain_token", "") or "")
|
|
|
|
|
|
log(
|
|
|
|
|
|
" chain token mismatch on restore: "
|
|
|
|
|
|
f"trade={trade_id} stored={expected[:12]} derived={derived[:12]} "
|
|
|
|
|
|
"— continuing with derived token"
|
|
|
|
|
|
)
|
|
|
|
|
|
chain["chain_mode"] = "LEGACY_REBUILT_MISMATCH"
|
|
|
|
|
|
# A log line is not forensics — emit a first-class journal
|
|
|
|
|
|
# event so the mismatch is queryable (the XTZ 863c21da
|
|
|
|
|
|
# incident took a day to reconstruct from grep).
|
|
|
|
|
|
try:
|
|
|
|
|
|
ch_put("trade_reconstruction", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"trade_id": trade_id,
|
|
|
|
|
|
"event_type": "CHAIN_TOKEN_MISMATCH",
|
|
|
|
|
|
"event_id": f"{trade_id}:chain_mismatch",
|
|
|
|
|
|
"payload_json": json.dumps({
|
|
|
|
|
|
"stored_token": expected,
|
|
|
|
|
|
"derived_token": derived,
|
|
|
|
|
|
"chain_mode": "LEGACY_REBUILT_MISMATCH",
|
|
|
|
|
|
"pending": {k: pending.get(k) for k in
|
|
|
|
|
|
("asset", "side", "entry_price", "quantity",
|
|
|
|
|
|
"notional", "entry_bar") if k in pending},
|
|
|
|
|
|
}, default=str),
|
|
|
|
|
|
"market_state_bundle_json": "",
|
|
|
|
|
|
"tp_base_pct": 0.0,
|
|
|
|
|
|
"tp_effective_pct": 0.0,
|
|
|
|
|
|
"our_leverage": 0.0,
|
|
|
|
|
|
})
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return chain
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_internal_retract(self, cmd: dict, prices_dict: dict) -> tuple[dict | None, str]:
|
|
|
|
|
|
"""Apply partial retraction on in-memory BLUE position; returns (forced_exit, status)."""
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
pos = getattr(self.eng, "position", None)
|
|
|
|
|
|
if pos is None:
|
|
|
|
|
|
return None, "NO_POSITION"
|
|
|
|
|
|
tid = str(getattr(pos, "trade_id", "") or "")
|
|
|
|
|
|
if not tid:
|
|
|
|
|
|
return None, "NO_TRADE_ID"
|
|
|
|
|
|
req_tid = str(cmd.get("trade_id", "") or "").strip()
|
|
|
|
|
|
if req_tid and req_tid != tid:
|
|
|
|
|
|
return None, f"TRADE_MISMATCH open={tid} cmd={req_tid}"
|
|
|
|
|
|
pending = self._pending_entries.get(tid) or {}
|
|
|
|
|
|
side = str(pending.get("side", "SHORT") or "SHORT").upper()
|
|
|
|
|
|
entry_price = float(pending.get("entry_price", getattr(pos, "entry_price", 0.0)) or 0.0)
|
|
|
|
|
|
if entry_price <= 0:
|
|
|
|
|
|
return None, "BAD_ENTRY_PRICE"
|
|
|
|
|
|
open_notional = float(getattr(pos, "notional", 0.0) or 0.0)
|
|
|
|
|
|
if open_notional <= 0:
|
|
|
|
|
|
return None, "ZERO_NOTIONAL"
|
|
|
|
|
|
frac = float(cmd.get("fraction", 0.0) or 0.0)
|
|
|
|
|
|
if not (0.0 < frac <= 1.0):
|
|
|
|
|
|
return None, "BAD_FRACTION"
|
|
|
|
|
|
expected_chain = self._chain_state_for_pending(tid, pending)
|
|
|
|
|
|
cmd_chain_token = str(cmd.get("chain_token", "") or "").strip()
|
|
|
|
|
|
cmd_chain_head = str(cmd.get("chain_head_leg_id", "") or "").strip()
|
|
|
|
|
|
cmd_chain_root = str(cmd.get("chain_root_trade_id", "") or "").strip()
|
|
|
|
|
|
cmd_chain_seq = int(cmd.get("chain_seq", expected_chain["chain_seq"]) or expected_chain["chain_seq"])
|
|
|
|
|
|
if not cmd_chain_token or not cmd_chain_head or not cmd_chain_root:
|
|
|
|
|
|
return None, "NO_CHAIN_LINK"
|
|
|
|
|
|
if cmd_chain_root != expected_chain["chain_root_trade_id"]:
|
|
|
|
|
|
return None, f"CHAIN_ROOT_MISMATCH expected={expected_chain['chain_root_trade_id']} cmd={cmd_chain_root}"
|
|
|
|
|
|
if cmd_chain_head != expected_chain["chain_head_leg_id"] or cmd_chain_token != expected_chain["chain_token"]:
|
|
|
|
|
|
return None, (
|
|
|
|
|
|
f"CHAIN_MISMATCH head={expected_chain['chain_head_leg_id']} "
|
|
|
|
|
|
f"seq={expected_chain['chain_seq']} token={expected_chain['chain_token'][:12]}"
|
|
|
|
|
|
)
|
|
|
|
|
|
if cmd_chain_seq != expected_chain["chain_seq"]:
|
|
|
|
|
|
return None, (
|
|
|
|
|
|
f"CHAIN_SEQ_MISMATCH expected={expected_chain['chain_seq']} cmd={cmd_chain_seq}"
|
|
|
|
|
|
)
|
|
|
|
|
|
reduce_notional = min(open_notional, open_notional * frac)
|
|
|
|
|
|
if reduce_notional <= 0.0:
|
|
|
|
|
|
return None, "ZERO_REDUCE_NOTIONAL"
|
|
|
|
|
|
current_price = float(prices_dict.get(pos.asset, getattr(pos, "current_price", entry_price)) or entry_price)
|
|
|
|
|
|
if current_price <= 0:
|
|
|
|
|
|
current_price = entry_price
|
|
|
|
|
|
direction = -1.0 if side == "SHORT" else 1.0
|
|
|
|
|
|
pnl_pct_now = direction * ((current_price - entry_price) / entry_price)
|
|
|
|
|
|
net_pnl_leg = pnl_pct_now * reduce_notional
|
|
|
|
|
|
bars_held = max(0, int(self.bar_idx - int(pending.get("entry_bar", max(0, self.bar_idx - 1)) or max(0, self.bar_idx - 1))))
|
|
|
|
|
|
capital_before, capital_after = self._apply_trade_capital_update(
|
|
|
|
|
|
net_pnl_leg,
|
|
|
|
|
|
reason=str(cmd.get("reason", "RETRACT")),
|
|
|
|
|
|
source=str(cmd.get("source", "internal")),
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
asset=str(getattr(pos, "asset", pending.get("asset", ""))),
|
|
|
|
|
|
mirror_control_plane=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
remaining_notional = max(0.0, open_notional - reduce_notional)
|
|
|
|
|
|
remaining_qty = round((remaining_notional / entry_price), 6) if entry_price > 0 else 0.0
|
|
|
|
|
|
pos.notional = remaining_notional
|
|
|
|
|
|
pos.current_price = current_price
|
|
|
|
|
|
try:
|
|
|
|
|
|
pos.pnl_pct = pnl_pct_now
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
pending.setdefault("notional_entry", float(pending.get("notional", open_notional) or open_notional))
|
|
|
|
|
|
pending["notional"] = remaining_notional
|
|
|
|
|
|
pending["quantity"] = remaining_qty
|
|
|
|
|
|
pending["retraction_legs"] = int(pending.get("retraction_legs", 0) or 0) + 1
|
|
|
|
|
|
pending["realized_pnl_legs_total"] = float(pending.get("realized_pnl_legs_total", 0.0) or 0.0) + net_pnl_leg
|
|
|
|
|
|
leg_seq = int(pending["retraction_legs"])
|
|
|
|
|
|
leg_id = f"{tid}:x{leg_seq:03d}"
|
|
|
|
|
|
chain_state = self._chain_state_for_pending(
|
|
|
|
|
|
tid,
|
|
|
|
|
|
{
|
|
|
|
|
|
**pending,
|
|
|
|
|
|
"chain_root_trade_id": expected_chain["chain_root_trade_id"],
|
|
|
|
|
|
"chain_prev_leg_id": expected_chain["chain_head_leg_id"],
|
|
|
|
|
|
"chain_head_leg_id": leg_id,
|
|
|
|
|
|
"chain_mode": "LIVE",
|
|
|
|
|
|
},
|
|
|
|
|
|
chain_mode="LIVE",
|
|
|
|
|
|
chain_head_leg_id=leg_id,
|
|
|
|
|
|
chain_prev_leg_id=expected_chain["chain_head_leg_id"],
|
|
|
|
|
|
chain_seq=leg_seq,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._pending_entries[tid] = pending
|
|
|
|
|
|
pending.update(chain_state)
|
|
|
|
|
|
current_bars_held = bars_held
|
|
|
|
|
|
entry_bar = int(pending.get("entry_bar", max(0, self.bar_idx - current_bars_held)) or max(0, self.bar_idx - current_bars_held))
|
|
|
|
|
|
# Full close when the remainder is economic dust — threshold is
|
|
|
|
|
|
# POSITION_DUST_NOTIONAL_USD, deliberately ALIGNED with the
|
|
|
|
|
|
# _ps_write_open lifecycle gate so no remainder can exist that is
|
|
|
|
|
|
# "open" in memory but rounds to a zero-size row on disk
|
|
|
|
|
|
# (the malformed-OPEN class, MALFORMED_OPEN_RESTORE_BUG.md).
|
|
|
|
|
|
fully_closed = remaining_notional <= POSITION_DUST_NOTIONAL_USD or remaining_qty <= 0.0
|
|
|
|
|
|
# The leg ledger rows (trade_exit_legs + trade_reconstruction) are
|
|
|
|
|
|
# written for EVERY leg including the terminal one. The previous
|
|
|
|
|
|
# full-close early-return skipped them, losing the final leg from
|
|
|
|
|
|
# the §38.9 replay surface.
|
|
|
|
|
|
ch_put("trade_exit_legs", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"date": str(pending.get("entry_date", self.current_day or "")),
|
|
|
|
|
|
"strategy": "blue",
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"chain_root_trade_id": str(chain_state.get("chain_root_trade_id", tid) or tid),
|
|
|
|
|
|
"chain_head_leg_id": str(chain_state.get("chain_head_leg_id", leg_id) or leg_id),
|
|
|
|
|
|
"chain_prev_leg_id": str(chain_state.get("chain_prev_leg_id", "") or ""),
|
|
|
|
|
|
"chain_seq": int(chain_state.get("chain_seq", leg_seq) or leg_seq),
|
|
|
|
|
|
"chain_token": str(chain_state.get("chain_token", "") or ""),
|
|
|
|
|
|
"chain_mode": str(chain_state.get("chain_mode", "LIVE") or "LIVE"),
|
|
|
|
|
|
"exit_leg_id": leg_id,
|
|
|
|
|
|
"exit_seq": leg_seq,
|
|
|
|
|
|
"command_id": str(cmd.get("command_id", "")),
|
|
|
|
|
|
"source": str(cmd.get("source", "internal")),
|
|
|
|
|
|
"reason": str(cmd.get("reason", "RETRACT")),
|
|
|
|
|
|
"asset": str(getattr(pos, "asset", pending.get("asset", ""))),
|
|
|
|
|
|
"side": side,
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"exit_price": current_price,
|
|
|
|
|
|
"fraction": frac,
|
|
|
|
|
|
"capital_before": capital_before,
|
|
|
|
|
|
"capital_after": capital_after,
|
|
|
|
|
|
"exit_notional": reduce_notional,
|
|
|
|
|
|
"remaining_notional": remaining_notional,
|
|
|
|
|
|
"remaining_qty": remaining_qty,
|
|
|
|
|
|
"pnl_pct_leg": pnl_pct_now,
|
|
|
|
|
|
"pnl_leg": net_pnl_leg,
|
|
|
|
|
|
"pnl_realized_total": float(pending.get("realized_pnl_legs_total", 0.0) or 0.0),
|
|
|
|
|
|
"bars_held": bars_held,
|
|
|
|
|
|
})
|
|
|
|
|
|
ch_put("trade_reconstruction", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"event_type": "FULL_RETRACT_EXIT" if fully_closed else "PARTIAL_EXIT",
|
|
|
|
|
|
"event_id": leg_id,
|
|
|
|
|
|
"payload_json": json.dumps({
|
|
|
|
|
|
"command": cmd,
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"exit_price": current_price,
|
|
|
|
|
|
"exit_notional": reduce_notional,
|
|
|
|
|
|
"remaining_notional": remaining_notional,
|
|
|
|
|
|
"pnl_pct_leg": pnl_pct_now,
|
|
|
|
|
|
"pnl_leg": net_pnl_leg,
|
|
|
|
|
|
"pnl_realized_total": float(pending.get("realized_pnl_legs_total", 0.0) or 0.0),
|
|
|
|
|
|
"bar_idx": int(self.bar_idx),
|
|
|
|
|
|
"chain": chain_state,
|
|
|
|
|
|
}),
|
|
|
|
|
|
"market_state_bundle_json": str(pending.get("market_state_bundle_json", "") or ""),
|
|
|
|
|
|
"tp_base_pct": float(pending.get("tp_base_pct", 0.0) or 0.0),
|
|
|
|
|
|
"tp_effective_pct": float(pending.get("tp_effective_pct", 0.0) or 0.0),
|
|
|
|
|
|
"our_leverage": float(pending.get("our_leverage", 0.0) or 0.0),
|
|
|
|
|
|
})
|
|
|
|
|
|
if fully_closed:
|
|
|
|
|
|
self.eng.position = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.eng.exit_manager._positions.pop(tid, None)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
total_realized = float(pending.get("realized_pnl_legs_total", 0.0) or 0.0)
|
|
|
|
|
|
denom = max(float(pending.get("notional_entry", open_notional) or open_notional), 1e-12)
|
|
|
|
|
|
forced = self._build_retract_exit(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
reason=str(cmd.get("reason", "RETRACT_FULL")),
|
|
|
|
|
|
bars_held=bars_held,
|
|
|
|
|
|
pnl_pct=total_realized / denom,
|
|
|
|
|
|
net_pnl=total_realized,
|
|
|
|
|
|
)
|
|
|
|
|
|
return forced, "FULL_CLOSE"
|
|
|
|
|
|
# Partial remainder: persist through the canonical OPEN write gate
|
|
|
|
|
|
# (lifecycle invariant enforced there) instead of a raw ch_put —
|
|
|
|
|
|
# the bypass was the causal origin of zero-size OPEN snapshots.
|
|
|
|
|
|
wrote = self._ps_write_open(
|
|
|
|
|
|
tid,
|
|
|
|
|
|
{
|
|
|
|
|
|
**pending,
|
|
|
|
|
|
"asset": str(getattr(pos, "asset", pending.get("asset", ""))),
|
|
|
|
|
|
"side": side,
|
|
|
|
|
|
"entry_price": entry_price,
|
|
|
|
|
|
"quantity": pending["quantity"],
|
|
|
|
|
|
"leverage": pending.get("leverage", getattr(pos, "leverage", 0.0)),
|
|
|
|
|
|
},
|
|
|
|
|
|
ts=_ch_ts_us(),
|
|
|
|
|
|
entry_bar=entry_bar,
|
|
|
|
|
|
bars_held=current_bars_held,
|
|
|
|
|
|
pnl=float(pending.get("realized_pnl_legs_total", 0.0) or 0.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
if not wrote:
|
|
|
|
|
|
# Gate refused (dust slipped past fully_closed somehow) —
|
|
|
|
|
|
# surface loudly; the invariant says this must not happen.
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"RETRACT WARNING: remainder for {tid} refused by OPEN gate "
|
|
|
|
|
|
f"(qty={pending['quantity']} notional={remaining_notional:.6f}) — "
|
|
|
|
|
|
"treat as accounting anomaly"
|
|
|
|
|
|
)
|
|
|
|
|
|
return None, "PARTIAL_OK"
|
|
|
|
|
|
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
def _process_runtime_commands(
|
|
|
|
|
|
self,
|
|
|
|
|
|
prices_dict: dict,
|
|
|
|
|
|
*,
|
|
|
|
|
|
allow_retract: bool = True,
|
|
|
|
|
|
) -> dict | None:
|
2026-06-12 14:59:49 +02:00
|
|
|
|
"""Drain BLUE runtime commands from control plane and apply retractions."""
|
|
|
|
|
|
if self.control_map is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
key = "blue_runtime_commands"
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = self.control_map.blocking().get(key)
|
|
|
|
|
|
if not raw:
|
|
|
|
|
|
return None
|
|
|
|
|
|
queue = json.loads(raw) if isinstance(raw, str) else list(raw)
|
|
|
|
|
|
if not isinstance(queue, list) or not queue:
|
|
|
|
|
|
return None
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
if allow_retract:
|
|
|
|
|
|
self.control_map.blocking().put(key, json.dumps([]))
|
|
|
|
|
|
else:
|
|
|
|
|
|
deferred = [
|
|
|
|
|
|
cmd for cmd in queue
|
|
|
|
|
|
if isinstance(cmd, dict)
|
|
|
|
|
|
and str(cmd.get("action", "") or "").upper() == "RETRACT"
|
|
|
|
|
|
]
|
|
|
|
|
|
queue = [
|
|
|
|
|
|
cmd for cmd in queue
|
|
|
|
|
|
if not (
|
|
|
|
|
|
isinstance(cmd, dict)
|
|
|
|
|
|
and str(cmd.get("action", "") or "").upper() == "RETRACT"
|
|
|
|
|
|
)
|
|
|
|
|
|
]
|
|
|
|
|
|
self.control_map.blocking().put(key, json.dumps(deferred))
|
2026-06-12 14:59:49 +02:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"RUNTIME_CMD read failed: {e}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
forced_exit = None
|
|
|
|
|
|
for cmd in queue:
|
|
|
|
|
|
if not isinstance(cmd, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
cid = str(cmd.get("command_id", "") or "")
|
|
|
|
|
|
if cid and cid in self._processed_retract_set:
|
|
|
|
|
|
hotkey = str(cmd.get("action", "") or "").upper() or "RUNTIME"
|
|
|
|
|
|
ch_put("hotkey_audit", {
|
|
|
|
|
|
"ts": int(time.time() * 1000),
|
|
|
|
|
|
"hotkey": f"{hotkey}_REPLAY",
|
|
|
|
|
|
"request_json": json.dumps(cmd, default=str),
|
|
|
|
|
|
"result": "IDEMPOTENT_REPLAY",
|
|
|
|
|
|
"effect_json": json.dumps({}, default=str),
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
action = str(cmd.get("action", "") or "").upper()
|
|
|
|
|
|
if action == "RETRACT":
|
|
|
|
|
|
fx, status = self._apply_internal_retract(cmd, prices_dict)
|
|
|
|
|
|
self._mark_runtime_command_seen(cid)
|
|
|
|
|
|
ch_put("hotkey_audit", {
|
|
|
|
|
|
"ts": int(time.time() * 1000),
|
|
|
|
|
|
"hotkey": "RETRACT",
|
|
|
|
|
|
"request_json": json.dumps(cmd, default=str),
|
|
|
|
|
|
"result": status,
|
|
|
|
|
|
"effect_json": json.dumps({"forced_exit": bool(fx)}, default=str),
|
|
|
|
|
|
})
|
|
|
|
|
|
if fx is not None:
|
|
|
|
|
|
forced_exit = fx
|
|
|
|
|
|
continue
|
|
|
|
|
|
if action in ("SET_CAPITAL", "CAPITAL_UPDATE"):
|
|
|
|
|
|
effect, status = self._apply_internal_capital_update(cmd)
|
|
|
|
|
|
self._mark_runtime_command_seen(cid)
|
|
|
|
|
|
ch_put("hotkey_audit", {
|
|
|
|
|
|
"ts": int(time.time() * 1000),
|
|
|
|
|
|
"hotkey": "CAPITAL_UPDATE",
|
|
|
|
|
|
"request_json": json.dumps(cmd, default=str),
|
|
|
|
|
|
"result": status,
|
|
|
|
|
|
"effect_json": json.dumps(effect or {}, default=str),
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
return forced_exit
|
|
|
|
|
|
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
def _drain_runtime_commands(
|
|
|
|
|
|
self,
|
|
|
|
|
|
prices_dict: dict | None = None,
|
|
|
|
|
|
*,
|
|
|
|
|
|
allow_retract: bool = True,
|
|
|
|
|
|
) -> dict | None:
|
2026-06-12 14:59:49 +02:00
|
|
|
|
"""Serialize queue draining so the scan and heartbeat paths do not race."""
|
|
|
|
|
|
lock = getattr(self, "_runtime_command_lock", None)
|
|
|
|
|
|
if lock is None:
|
|
|
|
|
|
lock = threading.Lock()
|
|
|
|
|
|
self._runtime_command_lock = lock
|
|
|
|
|
|
with lock:
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
return self._process_runtime_commands(
|
|
|
|
|
|
dict(prices_dict or self._last_prices_dict or {}),
|
|
|
|
|
|
allow_retract=allow_retract,
|
|
|
|
|
|
)
|
2026-06-12 14:59:49 +02:00
|
|
|
|
|
|
|
|
|
|
def _compute_vol_ok(self, scan):
|
|
|
|
|
|
assets = scan.get('assets', [])
|
|
|
|
|
|
prices = scan.get('asset_prices', [])
|
|
|
|
|
|
if not assets or not prices:
|
|
|
|
|
|
return True
|
|
|
|
|
|
prices_dict = dict(zip(assets, prices))
|
|
|
|
|
|
btc_price = prices_dict.get('BTCUSDT')
|
|
|
|
|
|
if btc_price is None:
|
|
|
|
|
|
return True
|
|
|
|
|
|
self.btc_prices.append(float(btc_price))
|
|
|
|
|
|
if len(self.btc_prices) < BTC_VOL_WINDOW:
|
|
|
|
|
|
return True
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
arr = np.array(self.btc_prices)
|
|
|
|
|
|
dvol = float(np.std(np.diff(arr) / arr[:-1]))
|
|
|
|
|
|
return dvol > float(self.vol_p60_threshold)
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _normalize_ng7(scan: dict) -> dict:
|
|
|
|
|
|
"""Promote NG7-format scan to the canonical BLUE-compatible flat dict."""
|
|
|
|
|
|
return normalize_ng7_scan(scan)
|
|
|
|
|
|
|
|
|
|
|
|
def on_scan(self, event):
|
|
|
|
|
|
"""Reactor-thread entry point — dispatches immediately to worker thread."""
|
|
|
|
|
|
if self._restore_failed or not event.value:
|
|
|
|
|
|
return
|
|
|
|
|
|
listener_time = time.time()
|
|
|
|
|
|
self._last_scan_event_ts = listener_time
|
|
|
|
|
|
self._scan_executor.submit(self._process_scan, event, listener_time)
|
|
|
|
|
|
|
|
|
|
|
|
def _process_scan(self, event, listener_time):
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self._restore_failed or not event.value:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
scan = json.loads(event.value) if isinstance(event.value, str) else event.value
|
|
|
|
|
|
|
|
|
|
|
|
# Normalise NG7 format → NG5-compatible flat dict before any field access
|
|
|
|
|
|
if scan.get('version') == 'NG7':
|
|
|
|
|
|
scan = self._normalize_ng7(scan)
|
|
|
|
|
|
|
|
|
|
|
|
scan_number = int(scan.get('scan_number') or 0)
|
|
|
|
|
|
|
|
|
|
|
|
# Dedup: scan_number is authoritative (monotonically increasing).
|
|
|
|
|
|
# file_mtime / timestamp are unreliable across NG7 restart probes.
|
|
|
|
|
|
# Exception: the scanner resets numbering to 0 on restart — a large
|
|
|
|
|
|
# backwards jump must re-anchor the ratchet, or BLUE drops every
|
|
|
|
|
|
# scan until manually restarted (near-miss on 2026-06-09/10).
|
|
|
|
|
|
with self._dedup_lock:
|
|
|
|
|
|
if scan_number > 0 and scan_number <= self.last_scan_number:
|
|
|
|
|
|
if scan_number < self.last_scan_number - SCAN_NUMBER_RESET_GAP:
|
|
|
|
|
|
log(f"WARN scanner restart detected: scan_number {self.last_scan_number} → "
|
|
|
|
|
|
f"{scan_number} — re-anchoring dedup ratchet")
|
|
|
|
|
|
else:
|
|
|
|
|
|
self._dupe_drops_total += 1
|
|
|
|
|
|
return
|
|
|
|
|
|
self.last_scan_number = scan_number
|
|
|
|
|
|
self._last_scan_accept_ts = time.time()
|
|
|
|
|
|
self.scans_processed += 1
|
|
|
|
|
|
|
|
|
|
|
|
self._rollover_day()
|
|
|
|
|
|
|
|
|
|
|
|
assets = scan.get('assets') or []
|
|
|
|
|
|
if assets and not self.ob_assets:
|
|
|
|
|
|
self._wire_obf(assets)
|
|
|
|
|
|
|
|
|
|
|
|
prices = scan.get('asset_prices') or []
|
|
|
|
|
|
if assets and prices and len(assets) != len(prices):
|
|
|
|
|
|
log(f"WARN scan #{scan_number}: assets/prices mismatch "
|
|
|
|
|
|
f"({len(assets)}≠{len(prices)}) — dropped")
|
|
|
|
|
|
return
|
|
|
|
|
|
prices_dict = dict(zip(assets, prices)) if assets and prices else {}
|
|
|
|
|
|
self._last_prices_dict = dict(prices_dict)
|
|
|
|
|
|
# Remove stablecoins — they should never be selected as a trade asset
|
|
|
|
|
|
for sym in _STABLECOIN_SYMBOLS:
|
|
|
|
|
|
prices_dict.pop(sym, None)
|
|
|
|
|
|
|
|
|
|
|
|
self._record_bounce_prices(prices_dict)
|
|
|
|
|
|
|
|
|
|
|
|
vol_ok = self._compute_vol_ok(scan)
|
|
|
|
|
|
|
|
|
|
|
|
vel_div = float(scan.get('vel_div') or 0.0)
|
|
|
|
|
|
if not math.isfinite(vel_div):
|
|
|
|
|
|
log(f"WARN scan #{scan_number}: non-finite vel_div={vel_div} — clamped to 0.0")
|
|
|
|
|
|
vel_div = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
v50_vel = float(scan.get('w50_velocity') or 0.0)
|
|
|
|
|
|
v750_vel = float(scan.get('w750_velocity') or 0.0)
|
|
|
|
|
|
if not math.isfinite(v50_vel): v50_vel = 0.0
|
|
|
|
|
|
if not math.isfinite(v750_vel): v750_vel = 0.0
|
|
|
|
|
|
self.last_w750_vel = v750_vel
|
|
|
|
|
|
|
|
|
|
|
|
# Feed live OB data into OBF engine for this bar (AGENT_SPEC_OBF_LIVE_SWITCHOVER)
|
|
|
|
|
|
if self.ob_eng is not None and self.ob_assets:
|
|
|
|
|
|
self.ob_eng.step_live(self.ob_assets, self.bar_idx)
|
|
|
|
|
|
|
|
|
|
|
|
# Live posture sync — update engine posture + regime_dd_halt together
|
|
|
|
|
|
posture_now = self._read_posture()
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
prev_posture = getattr(self.eng, '_day_posture', 'APEX')
|
|
|
|
|
|
if posture_now != prev_posture:
|
|
|
|
|
|
if posture_now in ('TURTLE', 'HIBERNATE'):
|
|
|
|
|
|
self.eng.regime_dd_halt = True # always block new entries
|
|
|
|
|
|
if posture_now == 'HIBERNATE' and self.eng.position is not None:
|
|
|
|
|
|
open_tid = str(getattr(self.eng.position, "trade_id", "") or "")
|
|
|
|
|
|
if not open_tid:
|
|
|
|
|
|
self._mark_restore_failure("HIBERNATE posture with open position missing trade_id")
|
|
|
|
|
|
return
|
|
|
|
|
|
if open_tid not in self._pending_entries:
|
|
|
|
|
|
self._mark_restore_failure(
|
|
|
|
|
|
f"HIBERNATE posture with open position missing pending entry: {open_tid}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return
|
|
|
|
|
|
if (posture_now == 'HIBERNATE'
|
|
|
|
|
|
and self.eng.position is not None
|
|
|
|
|
|
and not self._hibernate_protect_active):
|
|
|
|
|
|
# Position in flight: arm TP+SL instead of letting
|
|
|
|
|
|
# _manage_position() fire HIBERNATE_HALT next bar.
|
|
|
|
|
|
# _day_posture stays at prev value — no HALT fires.
|
|
|
|
|
|
self._hibernate_protect_position()
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.eng._day_posture = posture_now
|
|
|
|
|
|
log(f"POSTURE_SYNC: {posture_now} — halt set")
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.eng._day_posture = posture_now
|
|
|
|
|
|
self.eng.regime_dd_halt = False
|
|
|
|
|
|
if self._hibernate_protect_active:
|
|
|
|
|
|
log(f"POSTURE_SYNC: {posture_now} — posture recovered, clearing protect mode")
|
|
|
|
|
|
self._hibernate_protect_active = None
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"POSTURE_SYNC: {posture_now} — halt lifted")
|
|
|
|
|
|
|
|
|
|
|
|
# EsoF value gate — exposure only, no alpha or selection changes.
|
|
|
|
|
|
self._sync_esof_size_gate()
|
|
|
|
|
|
self._sync_tp_threshold()
|
|
|
|
|
|
self._sync_sc_threshold_advisor(scan_number=scan_number, vel_div=vel_div)
|
|
|
|
|
|
self._sync_sc_gauge_advisor(scan_number=scan_number, vel_div=vel_div)
|
|
|
|
|
|
self._apply_runtime_direction()
|
|
|
|
|
|
if self._market_state_runtime is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._market_state_runtime.update_scan_state(
|
|
|
|
|
|
scan_payload=scan,
|
|
|
|
|
|
prices_dict=prices_dict,
|
|
|
|
|
|
scan_number=scan_number,
|
|
|
|
|
|
vel_div=vel_div,
|
|
|
|
|
|
v50_vel=v50_vel,
|
|
|
|
|
|
v750_vel=v750_vel,
|
|
|
|
|
|
vol_ok=vol_ok,
|
|
|
|
|
|
posture=posture_now,
|
|
|
|
|
|
exf_snapshot=getattr(self, "_last_exf", {}) or {},
|
|
|
|
|
|
esof_payload=self._read_esof_payload(),
|
|
|
|
|
|
top_k_assets=5,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" MarketStateRuntime scan update failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
if self.eng.position is not None and prices_dict:
|
|
|
|
|
|
prices_dict = self._inject_obf_midprice(prices_dict)
|
|
|
|
|
|
|
|
|
|
|
|
step_start = time.time()
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
self._apply_catastrophic_floor_to_open_position()
|
|
|
|
|
|
result = self.eng.step_bar(
|
|
|
|
|
|
bar_idx=self.bar_idx, vel_div=vel_div, prices=prices_dict,
|
|
|
|
|
|
vol_regime_ok=vol_ok, v50_vel=v50_vel, v750_vel=v750_vel
|
|
|
|
|
|
)
|
|
|
|
|
|
self.bar_idx += 1
|
|
|
|
|
|
scan_to_fill_ms = (time.time() - listener_time) * 1000
|
|
|
|
|
|
step_bar_ms = (time.time() - step_start) * 1000
|
|
|
|
|
|
log(f"LATENCY scan #{scan_number}: scan→fill={scan_to_fill_ms:.1f}ms step_bar={step_bar_ms:.1f}ms vel_div={vel_div:.5f}")
|
|
|
|
|
|
|
|
|
|
|
|
ch_put("eigen_scans", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"scan_number": scan_number,
|
|
|
|
|
|
"scan_uuid": str(scan.get("scan_uuid") or ""),
|
|
|
|
|
|
"vel_div": vel_div,
|
|
|
|
|
|
"w50_velocity": v50_vel,
|
|
|
|
|
|
"w750_velocity": v750_vel,
|
|
|
|
|
|
"instability_50": float(scan.get("instability_50") or 0.0),
|
|
|
|
|
|
"scan_to_fill_ms": scan_to_fill_ms,
|
|
|
|
|
|
"step_bar_ms": step_bar_ms,
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
if result.get('entry'):
|
|
|
|
|
|
self.trades_executed += 1
|
|
|
|
|
|
e = result['entry']
|
|
|
|
|
|
log(f"ENTRY: {e} [{ALGO_VERSION}]")
|
|
|
|
|
|
# Cache entry fields for CH trade_events on exit
|
|
|
|
|
|
tid = self._resolve_trade_id(e.get('trade_id'), create_if_missing=True)
|
|
|
|
|
|
e['trade_id'] = tid
|
|
|
|
|
|
if tid:
|
|
|
|
|
|
efsm_decision = None
|
|
|
|
|
|
overlay_flip = False
|
|
|
|
|
|
if self._efsm is not None and int(e.get('direction', -1)) == 1 and int(self.trade_direction) == -1:
|
|
|
|
|
|
efsm_decision = self._efsm.tag_next_entry(
|
|
|
|
|
|
asset=str(e.get('asset', '') or ''),
|
|
|
|
|
|
entry_ts=datetime.now(timezone.utc),
|
|
|
|
|
|
metadata={"trade_id": tid},
|
|
|
|
|
|
)
|
|
|
|
|
|
overlay_flip = bool(efsm_decision and efsm_decision.action == "TAG" and efsm_decision.side == "LONG")
|
|
|
|
|
|
self._pending_entries[tid] = {
|
|
|
|
|
|
'trade_id': tid,
|
|
|
|
|
|
'asset': e.get('asset', ''),
|
|
|
|
|
|
'side': 'SHORT' if e.get('direction', -1) == -1 else 'LONG',
|
|
|
|
|
|
'entry_price': float(e.get('entry_price', 0) or 0),
|
|
|
|
|
|
'quantity': round(float(e.get('notional', 0) or 0) / float(e.get('entry_price', 1) or 1), 6),
|
|
|
|
|
|
'notional': float(e.get('notional', 0) or 0),
|
|
|
|
|
|
'notional_entry': float(e.get('notional', 0) or 0),
|
|
|
|
|
|
'leverage': float(e.get('leverage', 0) or 0),
|
|
|
|
|
|
'vel_div_entry': float(e.get('vel_div', 0) or 0),
|
|
|
|
|
|
'boost_at_entry': float(getattr(getattr(self, 'eng', None), 'acb_boost', 1.0) or 1.0),
|
|
|
|
|
|
'beta_at_entry': float(getattr(getattr(self, 'eng', None), 'acb_beta', 1.0) or 1.0),
|
|
|
|
|
|
'posture': posture_now,
|
|
|
|
|
|
'entry_ts': _ch_ts_us(),
|
|
|
|
|
|
'entry_date': (self.current_day or ''),
|
|
|
|
|
|
'entry_bar': self.bar_idx,
|
|
|
|
|
|
'overlay_flip': overlay_flip,
|
|
|
|
|
|
'overlay_reason': getattr(efsm_decision, "reason", "") if efsm_decision else "",
|
|
|
|
|
|
'overlay_slot': int(getattr(efsm_decision, "consumed_slot", 0) or 0) if efsm_decision else 0,
|
|
|
|
|
|
'retraction_legs': 0,
|
|
|
|
|
|
'realized_pnl_legs_total': 0.0,
|
|
|
|
|
|
}
|
|
|
|
|
|
_tp_ctx = self._tp_curve_context(notional=float(self._pending_entries[tid]["notional"] or 0.0))
|
|
|
|
|
|
self._pending_entries[tid].update(_tp_ctx)
|
|
|
|
|
|
self._apply_sc_entry_size_multiplier(tid, e, self._pending_entries[tid])
|
|
|
|
|
|
self._pending_entries[tid].update(self._chain_state_for_pending(
|
|
|
|
|
|
tid,
|
|
|
|
|
|
self._pending_entries[tid],
|
|
|
|
|
|
chain_mode="LIVE",
|
|
|
|
|
|
chain_head_leg_id=f"{tid}:open",
|
|
|
|
|
|
chain_prev_leg_id="",
|
|
|
|
|
|
chain_seq=0,
|
|
|
|
|
|
))
|
|
|
|
|
|
if overlay_flip:
|
|
|
|
|
|
log(
|
|
|
|
|
|
f"EFSM TAG: trade_id={tid} asset={e.get('asset','')} "
|
|
|
|
|
|
f"slot={self._pending_entries[tid]['overlay_slot']} "
|
|
|
|
|
|
f"reason={self._pending_entries[tid]['overlay_reason']}"
|
|
|
|
|
|
)
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
self._apply_catastrophic_floor_to_open_position()
|
|
|
|
|
|
# Persist position to CH so restarts can recover it
|
|
|
|
|
|
self._ps_write_open(tid, self._pending_entries[tid])
|
|
|
|
|
|
ch_put("trade_reconstruction", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"event_type": "OPEN",
|
|
|
|
|
|
"event_id": f"{tid}:open",
|
|
|
|
|
|
"payload_json": json.dumps(self._pending_entries[tid], default=str),
|
|
|
|
|
|
"market_state_bundle_json": str(self._pending_entries[tid].get("market_state_bundle_json", "") or ""),
|
|
|
|
|
|
"tp_base_pct": float(self._pending_entries[tid].get("tp_base_pct", 0.0) or 0.0),
|
|
|
|
|
|
"tp_effective_pct": float(self._pending_entries[tid].get("tp_effective_pct", 0.0) or 0.0),
|
|
|
|
|
|
"our_leverage": float(self._pending_entries[tid].get("our_leverage", 0.0) or 0.0),
|
|
|
|
|
|
})
|
|
|
|
|
|
self._announce_position_event(
|
|
|
|
|
|
kind="trade_entry",
|
|
|
|
|
|
severity="info",
|
|
|
|
|
|
title=f"[BLUE] ENTRY {e.get('asset', '')} {self._pending_entries[tid]['side']}",
|
|
|
|
|
|
message=(
|
|
|
|
|
|
f"entry={float(e.get('entry_price', 0) or 0):.6f} "
|
|
|
|
|
|
f"qty={self._pending_entries[tid]['quantity']:.6f} "
|
|
|
|
|
|
f"lev={self._pending_entries[tid]['leverage']:.2f}x"
|
|
|
|
|
|
),
|
|
|
|
|
|
metadata={
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"asset": self._pending_entries[tid]["asset"],
|
|
|
|
|
|
"side": self._pending_entries[tid]["side"],
|
|
|
|
|
|
"entry_price": self._pending_entries[tid]["entry_price"],
|
|
|
|
|
|
"quantity": self._pending_entries[tid]["quantity"],
|
|
|
|
|
|
"leverage": self._pending_entries[tid]["leverage"],
|
|
|
|
|
|
"vel_div_entry": self._pending_entries[tid]["vel_div_entry"],
|
|
|
|
|
|
"boost_at_entry": self._pending_entries[tid]["boost_at_entry"],
|
|
|
|
|
|
"beta_at_entry": self._pending_entries[tid]["beta_at_entry"],
|
|
|
|
|
|
"posture": self._pending_entries[tid]["posture"],
|
|
|
|
|
|
"entry_ts": self._pending_entries[tid]["entry_ts"],
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
if self._v7_exit_engine is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
side = 1 if e.get('direction', -1) == -1 else 0
|
|
|
|
|
|
ctx = self._v7_exit_engine.make_context(
|
|
|
|
|
|
entry_price=float(e.get('entry_price', 0) or 0),
|
|
|
|
|
|
entry_bar=max(0, self.bar_idx - 1),
|
|
|
|
|
|
side=side,
|
|
|
|
|
|
)
|
|
|
|
|
|
if self._last_exf:
|
|
|
|
|
|
ctx.set_exf(
|
|
|
|
|
|
funding=float(self._last_exf.get('funding', 0.0) or 0.0),
|
|
|
|
|
|
dvol=float(self._last_exf.get('dvol', 0.0) or 0.0),
|
|
|
|
|
|
fear_greed=float(self._last_exf.get('fear_greed', 0.0) or 0.0),
|
|
|
|
|
|
taker=float(self._last_exf.get('taker', 0.0) or 0.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._v7_contexts[tid] = ctx
|
|
|
|
|
|
self._v7_decisions.pop(tid, None)
|
|
|
|
|
|
self._v7_decision_seq[tid] = 0
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" V7 live context init failed for {tid}: {e}")
|
|
|
|
|
|
# Shadow AE: notify of entry (vel_div at entry bar is in scope)
|
|
|
|
|
|
if self._ae is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._ae.on_entry(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
asset=e.get('asset', ''),
|
|
|
|
|
|
direction=int(e.get('direction', -1)),
|
|
|
|
|
|
entry_price=float(e.get('entry_price', 0) or 0),
|
|
|
|
|
|
vel_div_entry=vel_div,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if self._sc_advisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = self._read_esof_payload()
|
|
|
|
|
|
rec = self._sc_advisor.evaluate(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
asset=e.get('asset', ''),
|
|
|
|
|
|
sc=_safe_float(payload.get('advisory_score', payload.get('score', 0.0)) if payload else None),
|
|
|
|
|
|
vel_div=vel_div,
|
|
|
|
|
|
exf_snapshot=getattr(self, "_last_exf", {}) or {},
|
|
|
|
|
|
trade_history=getattr(self.eng, 'trade_history', []),
|
|
|
|
|
|
current_mult=float(self._last_esof_size_mult or 1.0),
|
|
|
|
|
|
esof_payload=payload,
|
|
|
|
|
|
scan_number=scan_number,
|
|
|
|
|
|
bar_idx=self.bar_idx,
|
|
|
|
|
|
strategy="blue",
|
|
|
|
|
|
log_shadow=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._pending_entries[tid]['sc_threshold_advisor'] = rec
|
|
|
|
|
|
self._pending_entries[tid]['sc_exec_mult'] = float(self._last_esof_size_mult or 1.0)
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._record_sc_haircut(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
pending=self._pending_entries[tid],
|
|
|
|
|
|
source="sc_threshold_entry",
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"SC haircut record failed for {tid}: {e}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if self._sc_gauge is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
payload = self._read_esof_payload()
|
|
|
|
|
|
rec = self._sc_gauge.evaluate(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
asset=e.get('asset', ''),
|
|
|
|
|
|
sc=_safe_float(payload.get('advisory_score', payload.get('score', 0.0)) if payload else None),
|
|
|
|
|
|
vel_div=vel_div,
|
|
|
|
|
|
exf_snapshot=getattr(self, "_last_exf", {}) or {},
|
|
|
|
|
|
obf_snapshot=self._current_obf_snapshot(e.get('asset', ''), self.bar_idx),
|
|
|
|
|
|
trade_history=getattr(self.eng, 'trade_history', []),
|
|
|
|
|
|
current_mult=float(self._last_esof_size_mult or 1.0),
|
|
|
|
|
|
esof_payload=payload,
|
|
|
|
|
|
scan_number=scan_number,
|
|
|
|
|
|
bar_idx=self.bar_idx,
|
|
|
|
|
|
strategy="blue",
|
|
|
|
|
|
log_shadow=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._pending_entries[tid]['sc_bucket_gauge'] = rec
|
|
|
|
|
|
self._pending_entries[tid]['sc_bucket_gauge_exec_mult'] = float(self._last_esof_size_mult or 1.0)
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._record_sc_haircut(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
pending=self._pending_entries[tid],
|
|
|
|
|
|
source="sc_bucket_gauge",
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"SC haircut record failed for {tid}: {e}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if self._bounce_advisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
entry_ts_val = float(self._pending_entries[tid].get('entry_ts', 0) or 0)
|
|
|
|
|
|
entry_ts_dt = datetime.fromtimestamp(entry_ts_val / 1_000_000, tz=timezone.utc) if entry_ts_val else None
|
|
|
|
|
|
bounce_rec = self._bounce_eval(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
asset=str(e.get('asset', '')),
|
|
|
|
|
|
side=self._pending_entries[tid]['side'],
|
|
|
|
|
|
source="entry",
|
|
|
|
|
|
scan_number=scan_number,
|
|
|
|
|
|
entry_ts=entry_ts_dt,
|
|
|
|
|
|
current_price=float(prices_dict.get(e.get('asset', ''), e.get('entry_price', 0)) or e.get('entry_price', 0) or 0),
|
|
|
|
|
|
entry_price=float(e.get('entry_price', 0) or 0),
|
|
|
|
|
|
quantity=float(self._pending_entries[tid].get('quantity', 0) or 0),
|
|
|
|
|
|
notional=float(e.get('notional', 0) or 0),
|
|
|
|
|
|
leverage=float(e.get('leverage', 0) or 0),
|
|
|
|
|
|
vel_div=vel_div,
|
|
|
|
|
|
current_mult=float(self._last_esof_size_mult or 1.0),
|
|
|
|
|
|
bars_held=0,
|
|
|
|
|
|
log_shadow=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
if bounce_rec:
|
|
|
|
|
|
self._pending_entries[tid]['bounce_advisor_entry'] = bounce_rec
|
|
|
|
|
|
self._pending_entries[tid]['bounce_advisor_latest'] = bounce_rec
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" BounceAdvisor entry eval failed for {tid}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
# V7 remains the authoritative live exit brain, but the explicit
|
|
|
|
|
|
# retract bridge must stay active even when the engine callback is
|
|
|
|
|
|
# wired. Otherwise RETRACT decisions stay observational only.
|
|
|
|
|
|
if (self._v7_exit_engine is not None
|
|
|
|
|
|
and self.eng is not None
|
|
|
|
|
|
and getattr(self.eng, 'position', None) is not None):
|
|
|
|
|
|
pos = self.eng.position
|
|
|
|
|
|
tid_v7 = getattr(pos, 'trade_id', '')
|
|
|
|
|
|
pending_v7 = self._pending_entries.get(tid_v7, {})
|
|
|
|
|
|
ctx_v7 = self._v7_contexts.get(tid_v7)
|
|
|
|
|
|
if ctx_v7 is None and pending_v7:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ctx_v7 = self._v7_exit_engine.make_context(
|
|
|
|
|
|
entry_price=float(pending_v7.get('entry_price', pos.entry_price) or pos.entry_price or 0.0),
|
|
|
|
|
|
entry_bar=int(pending_v7.get('entry_bar', max(0, self.bar_idx - 1)) or max(0, self.bar_idx - 1)),
|
|
|
|
|
|
side=1 if pending_v7.get('side', 'SHORT') == 'SHORT' else 0,
|
|
|
|
|
|
)
|
|
|
|
|
|
if self._last_exf:
|
|
|
|
|
|
ctx_v7.set_exf(
|
|
|
|
|
|
funding=float(self._last_exf.get('funding', 0.0) or 0.0),
|
|
|
|
|
|
dvol=float(self._last_exf.get('dvol', 0.0) or 0.0),
|
|
|
|
|
|
fear_greed=float(self._last_exf.get('fear_greed', 0.0) or 0.0),
|
|
|
|
|
|
taker=float(self._last_exf.get('taker', 0.0) or 0.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._v7_contexts[tid_v7] = ctx_v7
|
|
|
|
|
|
self._v7_decision_seq.setdefault(tid_v7, 0)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" V7 live context restore failed for {tid_v7}: {e}")
|
|
|
|
|
|
ctx_v7 = None
|
|
|
|
|
|
if ctx_v7 is not None and pending_v7:
|
|
|
|
|
|
try:
|
|
|
|
|
|
if self.ob_eng is not None:
|
|
|
|
|
|
ob_sig = self.ob_eng.get_signal(pos.asset, float(max(0, self.bar_idx - 1)))
|
|
|
|
|
|
ob_imb = float(getattr(ob_sig, 'imbalance_ma5', 0.0) or 0.0)
|
|
|
|
|
|
else:
|
|
|
|
|
|
ob_imb = 0.0
|
|
|
|
|
|
cur_px = float(prices_dict.get(pos.asset, pos.current_price) or pos.current_price or 0.0)
|
|
|
|
|
|
if cur_px > 0.0:
|
|
|
|
|
|
v7dec = self._v7_exit_engine.evaluate(
|
|
|
|
|
|
ctx_v7,
|
|
|
|
|
|
cur_px,
|
|
|
|
|
|
max(0, self.bar_idx - 1),
|
|
|
|
|
|
ob_imb,
|
|
|
|
|
|
asset=pos.asset,
|
|
|
|
|
|
)
|
|
|
|
|
|
prev_v7dec = self._v7_decisions.get(tid_v7)
|
|
|
|
|
|
prev_v7_action = str(
|
|
|
|
|
|
prev_v7dec.get("action", "")
|
|
|
|
|
|
if isinstance(prev_v7dec, dict)
|
|
|
|
|
|
else getattr(prev_v7dec, "action", "")
|
|
|
|
|
|
).upper()
|
|
|
|
|
|
self._v7_decisions[tid_v7] = v7dec
|
|
|
|
|
|
self._record_v7_decision(
|
|
|
|
|
|
trade_id=tid_v7,
|
|
|
|
|
|
asset=pos.asset,
|
|
|
|
|
|
side=pending_v7.get('side', 'SHORT'),
|
|
|
|
|
|
decision=v7dec,
|
|
|
|
|
|
current_price=cur_px,
|
|
|
|
|
|
ob_imbalance=ob_imb,
|
|
|
|
|
|
vel_div_now=vel_div,
|
|
|
|
|
|
v50_vel=v50_vel,
|
|
|
|
|
|
v750_vel=v750_vel,
|
|
|
|
|
|
bar_idx=max(0, self.bar_idx - 1),
|
|
|
|
|
|
)
|
|
|
|
|
|
v7_action = str(v7dec.get("action", "") if isinstance(v7dec, dict) else getattr(v7dec, "action", "")).upper()
|
|
|
|
|
|
if v7_action == "RETRACT" and prev_v7_action != "RETRACT":
|
|
|
|
|
|
try:
|
|
|
|
|
|
cmd = {
|
|
|
|
|
|
"command_id": f"v7-retract-{uuid.uuid4().hex[:16]}",
|
|
|
|
|
|
"trade_id": tid_v7,
|
|
|
|
|
|
"action": "RETRACT",
|
|
|
|
|
|
"fraction": 0.50,
|
|
|
|
|
|
"reason": "V7_RETRACT",
|
|
|
|
|
|
"source": "v7",
|
|
|
|
|
|
"ts": float(time.time()),
|
|
|
|
|
|
"asset": pos.asset,
|
|
|
|
|
|
"chain_root_trade_id": str(pending_v7.get("chain_root_trade_id", tid_v7) or tid_v7),
|
|
|
|
|
|
"chain_head_leg_id": str(pending_v7.get("chain_head_leg_id", f"{tid_v7}:open") or f"{tid_v7}:open"),
|
|
|
|
|
|
"chain_prev_leg_id": str(pending_v7.get("chain_prev_leg_id", "") or ""),
|
|
|
|
|
|
"chain_seq": int(pending_v7.get("chain_seq", pending_v7.get("retraction_legs", 0)) or 0),
|
|
|
|
|
|
"chain_token": str(pending_v7.get("chain_token", "") or ""),
|
|
|
|
|
|
}
|
|
|
|
|
|
raw_q = self.control_map.blocking().get("blue_runtime_commands") if self.control_map else None
|
|
|
|
|
|
q = json.loads(raw_q) if isinstance(raw_q, str) and raw_q else []
|
|
|
|
|
|
if not isinstance(q, list):
|
|
|
|
|
|
q = []
|
|
|
|
|
|
q.append(cmd)
|
|
|
|
|
|
q = q[-200:]
|
|
|
|
|
|
if self.control_map is not None:
|
|
|
|
|
|
self.control_map.blocking().put("blue_runtime_commands", json.dumps(q))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" V7 retract enqueue failed for {tid_v7}: {e}")
|
|
|
|
|
|
if self._bounce_advisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
entry_ts_val = float(pending_v7.get('entry_ts', 0) or 0)
|
|
|
|
|
|
entry_ts_dt = datetime.fromtimestamp(entry_ts_val / 1_000_000, tz=timezone.utc) if entry_ts_val else None
|
|
|
|
|
|
bounce_rec = self._bounce_eval(
|
|
|
|
|
|
trade_id=tid_v7,
|
|
|
|
|
|
asset=pos.asset,
|
|
|
|
|
|
side=pending_v7.get('side', 'SHORT'),
|
|
|
|
|
|
source="open_scan",
|
|
|
|
|
|
scan_number=scan_number,
|
|
|
|
|
|
entry_ts=entry_ts_dt,
|
|
|
|
|
|
current_price=cur_px,
|
|
|
|
|
|
entry_price=float(pending_v7.get('entry_price', pos.entry_price) or pos.entry_price or 0.0),
|
|
|
|
|
|
quantity=float(pending_v7.get('quantity', getattr(pos, 'quantity', 0.0)) or getattr(pos, 'quantity', 0.0) or 0.0),
|
|
|
|
|
|
notional=float(pending_v7.get('notional', getattr(pos, 'notional', 0.0)) or getattr(pos, 'notional', 0.0) or 0.0),
|
|
|
|
|
|
leverage=float(pending_v7.get('leverage', getattr(pos, 'leverage', 0.0)) or getattr(pos, 'leverage', 0.0) or 0.0),
|
|
|
|
|
|
vel_div=vel_div,
|
|
|
|
|
|
current_mult=float(self._last_esof_size_mult or 1.0),
|
|
|
|
|
|
bars_held=max(0, int(self.bar_idx - int(pending_v7.get('entry_bar', max(0, self.bar_idx - 1)) or max(0, self.bar_idx - 1)))),
|
|
|
|
|
|
log_shadow=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
if bounce_rec:
|
|
|
|
|
|
pending_v7['bounce_advisor_latest'] = bounce_rec
|
|
|
|
|
|
self._pending_entries[tid_v7] = pending_v7
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" BounceAdvisor open-scan eval failed for {tid_v7}: {e}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" V7 live evaluate failed for {tid_v7}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
_forced_exit = self._drain_runtime_commands(prices_dict)
|
|
|
|
|
|
if _forced_exit is not None and not result.get('exit'):
|
|
|
|
|
|
result['exit'] = _forced_exit
|
|
|
|
|
|
|
|
|
|
|
|
if result.get('exit'):
|
|
|
|
|
|
x = result['exit']
|
|
|
|
|
|
tid = x.get('trade_id')
|
|
|
|
|
|
# Hibernate-protected exits: re-label reason, finalize posture
|
|
|
|
|
|
if tid and self._hibernate_protect_active == tid:
|
|
|
|
|
|
_orig = x.get('reason', '')
|
|
|
|
|
|
_map = {'FIXED_TP': 'HIBERNATE_TP', 'STOP_LOSS': 'HIBERNATE_SL',
|
|
|
|
|
|
'MAX_HOLD': 'HIBERNATE_MAXHOLD'}
|
|
|
|
|
|
x['reason'] = _map.get(_orig, f'HIBERNATE_{_orig}')
|
|
|
|
|
|
self._hibernate_protect_active = None
|
|
|
|
|
|
# Position closed — now safe to commit posture to HIBERNATE
|
|
|
|
|
|
_cur_posture = self._read_posture()
|
|
|
|
|
|
if _cur_posture == 'HIBERNATE':
|
|
|
|
|
|
self.eng._day_posture = 'HIBERNATE'
|
|
|
|
|
|
log(f"HIBERNATE_PROTECT: closed via {x['reason']} — posture finalized HIBERNATE")
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"HIBERNATE_PROTECT: closed via {x['reason']} — posture recovered to {_cur_posture}")
|
|
|
|
|
|
x['reason'] = _normalize_v7_exit_reason(x.get('reason', ''))
|
|
|
|
|
|
log(f"EXIT: {x} [{ALGO_VERSION}]")
|
|
|
|
|
|
_exit_reason_raw = str(x.get('reason', ''))
|
|
|
|
|
|
if _exit_reason_raw in ('FIXED_TP', 'HIBERNATE_TP', 'TP_FLOOR', 'HIBERNATE_TP_FLOOR'):
|
|
|
|
|
|
_tp_used = self.eng.exit_manager.fixed_tp_pct
|
|
|
|
|
|
_pos = self.eng.position
|
|
|
|
|
|
_bars = int(x.get('bars_held', 0) or 0)
|
|
|
|
|
|
# Effective (OB-modulated) gate: _execute_exit() rebuilds
|
|
|
|
|
|
# the exit dict and drops evaluate()'s diag keys, so read
|
|
|
|
|
|
# the manager's last_eval (same source the v7 journal uses).
|
|
|
|
|
|
_le = dict(getattr(self.eng.exit_manager, 'last_eval', {}) or {})
|
|
|
|
|
|
_dyn = float(x.get('dynamic_tp_pct', _le.get('dynamic_tp_pct', 0.0)) or 0.0)
|
|
|
|
|
|
_mod = float(x.get('tp_mod_factor', _le.get('tp_mod_factor', 0.0)) or 0.0)
|
|
|
|
|
|
_casc = int(x.get('cascade_count', _le.get('cascade_count', 0)) or 0)
|
|
|
|
|
|
log(f" TP_EXIT: tp_pct={_tp_used*100:.2f}% dyn_tp={_dyn*100:.2f}% "
|
|
|
|
|
|
f"mod={_mod:.2f}x cascade={_casc} "
|
|
|
|
|
|
f"bars_held={_bars} pnl_pct={float(x.get('pnl_pct',0) or 0):+.4f}")
|
|
|
|
|
|
tid = self._resolve_trade_id(x.get('trade_id'), create_if_missing=True)
|
|
|
|
|
|
x['trade_id'] = tid
|
|
|
|
|
|
pending = self._pending_entries.pop(tid, {}) if tid else {}
|
|
|
|
|
|
if tid:
|
|
|
|
|
|
self._v7_contexts.pop(tid, None)
|
|
|
|
|
|
self._v7_decisions.pop(tid, None)
|
|
|
|
|
|
self._v7_decision_seq.pop(tid, None)
|
|
|
|
|
|
if tid and not pending:
|
|
|
|
|
|
fallback_pending = self._fallback_pending_for_close(tid, x)
|
|
|
|
|
|
self._ps_write_closed(tid, fallback_pending, x)
|
|
|
|
|
|
log(
|
|
|
|
|
|
" EXIT pending metadata missing; wrote fallback CLOSED tombstone "
|
|
|
|
|
|
f"for trade={tid} asset={fallback_pending.get('asset', '')}"
|
|
|
|
|
|
)
|
|
|
|
|
|
if pending:
|
|
|
|
|
|
# exact bar price the engine exited against — prices_dict is still in scope
|
|
|
|
|
|
exit_price = float(prices_dict.get(pending['asset'], 0) or 0)
|
|
|
|
|
|
if self._sc_advisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_rec = pending.get('sc_threshold_advisor')
|
|
|
|
|
|
if _rec:
|
|
|
|
|
|
self._sc_advisor.observe_outcome(
|
|
|
|
|
|
_rec,
|
|
|
|
|
|
executed_mult=float(pending.get('sc_exec_mult', self._last_esof_size_mult) or 1.0),
|
|
|
|
|
|
pnl_pct=float(x.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
exit_reason=str(x.get('reason', 'UNKNOWN')),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if self._sc_gauge is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_rec = pending.get('sc_bucket_gauge')
|
|
|
|
|
|
if _rec:
|
|
|
|
|
|
self._sc_gauge.observe_outcome(
|
|
|
|
|
|
_rec,
|
|
|
|
|
|
executed_mult=float(pending.get('sc_bucket_gauge_exec_mult', self._last_esof_size_mult) or 1.0),
|
|
|
|
|
|
pnl_pct=float(x.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
exit_reason=str(x.get('reason', 'UNKNOWN')),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if self._bounce_advisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_bounce_rec = pending.get('bounce_advisor_entry')
|
|
|
|
|
|
if _bounce_rec:
|
|
|
|
|
|
self._bounce_advisor.observe_outcome(
|
|
|
|
|
|
_bounce_rec,
|
|
|
|
|
|
pnl_pct=float(x.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
exit_reason=str(x.get('reason', 'UNKNOWN')),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" BounceAdvisor outcome update failed for {tid}: {e}")
|
|
|
|
|
|
if self._market_state_runtime is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._market_state_runtime.online_update_from_trade(
|
|
|
|
|
|
asset=str(pending.get("asset", "")),
|
|
|
|
|
|
entry_price=float(pending.get("entry_price", 0) or 0),
|
|
|
|
|
|
exit_price=float(exit_price),
|
|
|
|
|
|
direction=-1 if str(pending.get("side", "SHORT")).upper() == "SHORT" else 1,
|
|
|
|
|
|
pnl_pct=float(x.get("pnl_pct", 0) or 0),
|
|
|
|
|
|
bars_held=int(x.get("bars_held", 0) or 0),
|
|
|
|
|
|
exit_reason=str(x.get("reason", "UNKNOWN")),
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
leverage=float(pending.get("leverage", 1.0) or 1.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" MarketStateRuntime outcome update failed for {tid}: {e}")
|
|
|
|
|
|
if self._efsm is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_efsm_out = self._efsm.observe_closed_trade(
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
asset=str(pending.get("asset", "") or ""),
|
|
|
|
|
|
side=str(pending.get("side", "SHORT") or "SHORT"),
|
|
|
|
|
|
pnl=float(x.get("net_pnl", 0) or 0),
|
|
|
|
|
|
pnl_pct=float(x.get("pnl_pct", 0) or 0),
|
|
|
|
|
|
leverage=float(pending.get("leverage", 0) or 0),
|
|
|
|
|
|
closed_ts=datetime.now(timezone.utc),
|
|
|
|
|
|
was_overlay_flip=bool(pending.get("overlay_flip", False)),
|
|
|
|
|
|
metadata={"exit_reason": str(x.get("reason", "UNKNOWN"))},
|
|
|
|
|
|
)
|
|
|
|
|
|
if _efsm_out.action in {"ARMED", "TAG", "RESET"}:
|
|
|
|
|
|
log(f"EFSM { _efsm_out.action }: { _efsm_out.to_dict() }")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" EFSM observe_closed_trade failed for {tid}: {e}")
|
|
|
|
|
|
realized_pnl, realized_pnl_source = self._resolved_realized_trade_pnl(
|
|
|
|
|
|
pending,
|
|
|
|
|
|
x,
|
|
|
|
|
|
exit_price=exit_price,
|
|
|
|
|
|
)
|
|
|
|
|
|
if realized_pnl_source != "net_pnl":
|
|
|
|
|
|
log(
|
|
|
|
|
|
" realized pnl resolved from "
|
|
|
|
|
|
f"{realized_pnl_source}: raw_net={float(x.get('net_pnl', 0) or 0):+.6f} "
|
|
|
|
|
|
f"resolved={realized_pnl:+.6f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
capital_apply_pnl, capital_apply_source = self._resolved_capital_apply_pnl(x, realized_pnl)
|
|
|
|
|
|
if capital_apply_source != "direct":
|
|
|
|
|
|
log(
|
|
|
|
|
|
" close capital delta suppressed: "
|
|
|
|
|
|
f"source={capital_apply_source} trade={tid} "
|
|
|
|
|
|
f"economic_pnl={realized_pnl:+.6f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
capital_before, capital_after = self._apply_trade_capital_update(
|
|
|
|
|
|
capital_apply_pnl,
|
|
|
|
|
|
reason=str(x.get("reason", "UNKNOWN")),
|
|
|
|
|
|
source="trade_close",
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
asset=str(pending.get("asset", "")),
|
|
|
|
|
|
mirror_control_plane=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
execution_quality = self._build_trade_execution_quality_summary(
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
pending=pending,
|
|
|
|
|
|
exit_payload=x,
|
|
|
|
|
|
capital_before=capital_before,
|
|
|
|
|
|
capital_after=capital_after,
|
|
|
|
|
|
realized_pnl=realized_pnl,
|
|
|
|
|
|
exit_price=exit_price,
|
|
|
|
|
|
source="trade_close",
|
|
|
|
|
|
)
|
|
|
|
|
|
self._persist_trade_execution_quality(execution_quality)
|
|
|
|
|
|
pending.update(self._tp_curve_context(notional=float(pending.get("notional", 0) or 0)))
|
|
|
|
|
|
ch_put("trade_events", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"date": pending['entry_date'],
|
|
|
|
|
|
"strategy": "blue",
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"asset": pending['asset'],
|
|
|
|
|
|
"side": pending['side'],
|
|
|
|
|
|
"entry_price": pending['entry_price'],
|
|
|
|
|
|
"exit_price": exit_price,
|
|
|
|
|
|
"quantity": pending['quantity'],
|
|
|
|
|
|
"capital_before": capital_before,
|
|
|
|
|
|
"capital_after": capital_after,
|
|
|
|
|
|
"pnl": realized_pnl,
|
|
|
|
|
|
"pnl_pct": float(x.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
"exit_reason": str(x.get('reason', 'UNKNOWN')),
|
|
|
|
|
|
"vel_div_entry": pending['vel_div_entry'],
|
|
|
|
|
|
"boost_at_entry": pending['boost_at_entry'],
|
|
|
|
|
|
"beta_at_entry": pending['beta_at_entry'],
|
|
|
|
|
|
"posture": pending['posture'],
|
|
|
|
|
|
"leverage": pending['leverage'],
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
# CH column is UInt16 — a negative value poisons the spool
|
|
|
|
|
|
# (head-of-line jam, incident 2026-06-12: bars_held=-106)
|
|
|
|
|
|
"bars_held": max(0, int(x.get('bars_held', 0) or 0)),
|
2026-06-12 14:59:49 +02:00
|
|
|
|
"regime_signal": 0,
|
|
|
|
|
|
"tp_threshold": float(self.eng.exit_manager.fixed_tp_pct),
|
|
|
|
|
|
"execution_quality_json": json.dumps(execution_quality, default=str),
|
|
|
|
|
|
"market_state_bundle_json": str(pending.get("market_state_bundle_json", "") or ""),
|
|
|
|
|
|
"tp_base_pct": float(pending.get("tp_base_pct", 0.0) or 0.0),
|
|
|
|
|
|
"tp_effective_pct": float(pending.get("tp_effective_pct", 0.0) or 0.0),
|
|
|
|
|
|
"our_leverage": float(pending.get("our_leverage", 0.0) or 0.0),
|
|
|
|
|
|
})
|
|
|
|
|
|
ch_put("trade_reconstruction", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"trade_id": str(tid or ""),
|
|
|
|
|
|
"event_type": "CLOSE",
|
|
|
|
|
|
"event_id": f"{tid}:close",
|
|
|
|
|
|
"payload_json": json.dumps({
|
|
|
|
|
|
"exit": x,
|
|
|
|
|
|
"pending": pending,
|
|
|
|
|
|
"exit_price": exit_price,
|
|
|
|
|
|
"retraction_legs": int(pending.get("retraction_legs", 0) or 0),
|
|
|
|
|
|
"retraction_realized_total": float(pending.get("realized_pnl_legs_total", 0.0) or 0.0),
|
|
|
|
|
|
"chain": {
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"chain_root_trade_id": pending.get("chain_root_trade_id", tid),
|
|
|
|
|
|
"chain_head_leg_id": pending.get("chain_head_leg_id", f"{tid}:open"),
|
|
|
|
|
|
"chain_prev_leg_id": pending.get("chain_prev_leg_id", ""),
|
|
|
|
|
|
"chain_seq": int(pending.get("retraction_legs", 0) or 0),
|
|
|
|
|
|
"chain_token": pending.get("chain_token", ""),
|
|
|
|
|
|
"chain_mode": pending.get("chain_mode", "LIVE"),
|
|
|
|
|
|
},
|
|
|
|
|
|
"execution_quality": execution_quality,
|
|
|
|
|
|
}, default=str),
|
|
|
|
|
|
"market_state_bundle_json": str(pending.get("market_state_bundle_json", "") or ""),
|
|
|
|
|
|
"tp_base_pct": float(pending.get("tp_base_pct", 0.0) or 0.0),
|
|
|
|
|
|
"tp_effective_pct": float(pending.get("tp_effective_pct", 0.0) or 0.0),
|
|
|
|
|
|
"our_leverage": float(pending.get("our_leverage", 0.0) or 0.0),
|
|
|
|
|
|
})
|
|
|
|
|
|
# Mark position closed in CH (supersedes OPEN row via ReplacingMergeTree)
|
|
|
|
|
|
self._ps_write_closed(tid, pending, x)
|
|
|
|
|
|
self._announce_position_event(
|
|
|
|
|
|
kind="trade_exit",
|
|
|
|
|
|
severity="info" if float(x.get("pnl_pct", 0) or 0) >= 0 else "warning",
|
|
|
|
|
|
title=f"[BLUE] EXIT {pending.get('asset', '')} {pending.get('side', '')}",
|
|
|
|
|
|
message=(
|
|
|
|
|
|
f"reason={x.get('reason', 'UNKNOWN')} "
|
|
|
|
|
|
f"pnl={float(x.get('net_pnl', 0) or 0):+.2f} "
|
|
|
|
|
|
f"pnl_pct={float(x.get('pnl_pct', 0) or 0):+.3%}"
|
|
|
|
|
|
),
|
|
|
|
|
|
metadata={
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"asset": pending.get("asset", ""),
|
|
|
|
|
|
"side": pending.get("side", ""),
|
|
|
|
|
|
"entry_price": pending.get("entry_price", 0),
|
|
|
|
|
|
"exit_price": exit_price,
|
|
|
|
|
|
"quantity": pending.get("quantity", 0),
|
|
|
|
|
|
"pnl": realized_pnl,
|
|
|
|
|
|
"pnl_pct": float(x.get("pnl_pct", 0) or 0),
|
|
|
|
|
|
"exit_reason": str(x.get("reason", "UNKNOWN")),
|
|
|
|
|
|
"bars_held": int(x.get("bars_held", 0) or 0),
|
|
|
|
|
|
"posture": pending.get("posture", ""),
|
|
|
|
|
|
"overlay_flip": bool(pending.get("overlay_flip", False)),
|
|
|
|
|
|
"overlay_reason": str(pending.get("overlay_reason", "")),
|
|
|
|
|
|
"overlay_slot": int(pending.get("overlay_slot", 0) or 0),
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
# Shadow AE: record outcome for online update
|
|
|
|
|
|
if self._ae is not None and tid:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._ae.on_exit(
|
|
|
|
|
|
trade_id=tid,
|
|
|
|
|
|
actual_exit_reason=str(x.get('reason', 'UNKNOWN')),
|
|
|
|
|
|
pnl_pct=float(x.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# Shadow AE: per-bar evaluate for all open trades — daemon thread, zero hot-path impact
|
|
|
|
|
|
if self._ae is not None and self._pending_entries:
|
|
|
|
|
|
_ae_ref = self._ae
|
|
|
|
|
|
_pending_snap = dict(self._pending_entries) # shallow copy under GIL
|
|
|
|
|
|
_prices_snap = dict(prices_dict)
|
|
|
|
|
|
_vel_now = vel_div
|
|
|
|
|
|
_bar = self.bar_idx
|
|
|
|
|
|
def _ae_eval():
|
|
|
|
|
|
for _tid, _p in _pending_snap.items():
|
|
|
|
|
|
try:
|
|
|
|
|
|
_cur = _prices_snap.get(_p['asset'], 0) or 0
|
|
|
|
|
|
if not _cur:
|
|
|
|
|
|
continue
|
|
|
|
|
|
_entry_px = float(_p.get('entry_price', 0) or 0)
|
|
|
|
|
|
_bars_held = max(0, int(_bar - int(_p.get('entry_bar', _bar))))
|
|
|
|
|
|
_shadow_pnl_pct = ((_entry_px - _cur) / _entry_px) if _entry_px > 0 else 0.0
|
|
|
|
|
|
_recent_prices = self._bounce_price_path(_p['asset'])
|
|
|
|
|
|
_shadow = _ae_ref.evaluate(
|
|
|
|
|
|
trade_id=_tid,
|
|
|
|
|
|
asset=_p['asset'],
|
|
|
|
|
|
direction=-1,
|
|
|
|
|
|
entry_price=_entry_px,
|
|
|
|
|
|
current_price=_cur,
|
|
|
|
|
|
bars_held=_bars_held,
|
|
|
|
|
|
vel_div_now=_vel_now,
|
|
|
|
|
|
)
|
|
|
|
|
|
_ae_ref.log_shadow(_shadow, pnl_pct=_shadow_pnl_pct)
|
|
|
|
|
|
if self._advanced_sl is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_ms_state = dict(self._market_state_runtime.latest_state) if self._market_state_runtime and getattr(self._market_state_runtime, "latest_state", None) else {}
|
|
|
|
|
|
_ms_bundle = dict(self._market_state_runtime.latest_bundle_dict) if self._market_state_runtime and getattr(self._market_state_runtime, "latest_bundle_dict", None) else {}
|
|
|
|
|
|
_v7 = dict(self._v7_decisions.get(_tid, {}) or {})
|
|
|
|
|
|
_maras_ctx = self._latest_maras_context()
|
|
|
|
|
|
_adv_meta = {}
|
|
|
|
|
|
if self._efsm is not None and hasattr(self._efsm, "exit_policy_meta"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
_adv_meta = self._efsm.exit_policy_meta(_maras_ctx)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
_adv_meta = {}
|
|
|
|
|
|
_adv = self._advanced_sl.evaluate(
|
|
|
|
|
|
trade_id=_tid,
|
|
|
|
|
|
asset=_p['asset'],
|
|
|
|
|
|
side=str(_p.get("side", "SHORT") or "SHORT"),
|
|
|
|
|
|
entry_price=_entry_px,
|
|
|
|
|
|
current_price=_cur,
|
|
|
|
|
|
bars_held=_bars_held,
|
|
|
|
|
|
recent_prices=_recent_prices,
|
|
|
|
|
|
ae_shadow=_shadow,
|
|
|
|
|
|
v7_decision=_v7,
|
|
|
|
|
|
market_state=_ms_state,
|
|
|
|
|
|
market_bundle=_ms_bundle,
|
|
|
|
|
|
exf_snapshot=dict(self._last_exf or {}),
|
|
|
|
|
|
meta_performance=_adv_meta,
|
|
|
|
|
|
)
|
|
|
|
|
|
self._advanced_sl.log_shadow(_adv, pnl_pct=_shadow_pnl_pct)
|
|
|
|
|
|
_overlay_exit = False
|
|
|
|
|
|
_overlay_exit_detail = ""
|
|
|
|
|
|
try:
|
|
|
|
|
|
_overlay_exit, _overlay_exit_detail = self._overlay_advsl_should_exit(
|
|
|
|
|
|
_tid,
|
|
|
|
|
|
_p,
|
|
|
|
|
|
_v7,
|
|
|
|
|
|
_bars_held,
|
|
|
|
|
|
_cur,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
_overlay_exit = False
|
|
|
|
|
|
_overlay_exit_detail = ""
|
|
|
|
|
|
if (
|
|
|
|
|
|
(self._advanced_sl_live_exit_enabled and bool(getattr(_adv, "would_exit", False)))
|
|
|
|
|
|
or _overlay_exit
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw_q = self.control_map.blocking().get("blue_runtime_commands") if self.control_map else None
|
|
|
|
|
|
q = json.loads(raw_q) if isinstance(raw_q, str) and raw_q else []
|
|
|
|
|
|
if not isinstance(q, list):
|
|
|
|
|
|
q = []
|
|
|
|
|
|
_reason = (
|
|
|
|
|
|
f"OVERLAY_ADVSL_{_overlay_exit_detail}"
|
|
|
|
|
|
if _overlay_exit
|
|
|
|
|
|
else f"ADVSL_{_adv.reason}"
|
|
|
|
|
|
)
|
|
|
|
|
|
q.append({
|
|
|
|
|
|
"command_id": f"advsl-exit-{uuid.uuid4().hex[:16]}",
|
|
|
|
|
|
"trade_id": _tid,
|
|
|
|
|
|
"action": "RETRACT",
|
|
|
|
|
|
"fraction": 1.0,
|
|
|
|
|
|
"reason": _reason,
|
|
|
|
|
|
"source": "advanced_sl",
|
|
|
|
|
|
"ts": float(time.time()),
|
|
|
|
|
|
"asset": _p["asset"],
|
|
|
|
|
|
"chain_root_trade_id": str(_p.get("chain_root_trade_id", _tid) or _tid),
|
|
|
|
|
|
"chain_head_leg_id": str(_p.get("chain_head_leg_id", f"{_tid}:open") or f"{_tid}:open"),
|
|
|
|
|
|
"chain_prev_leg_id": str(_p.get("chain_prev_leg_id", "") or ""),
|
|
|
|
|
|
"chain_seq": int(_p.get("chain_seq", _p.get("retraction_legs", 0)) or 0),
|
|
|
|
|
|
"chain_token": str(_p.get("chain_token", "") or ""),
|
|
|
|
|
|
})
|
|
|
|
|
|
q = q[-200:]
|
|
|
|
|
|
if self.control_map is not None:
|
|
|
|
|
|
self.control_map.blocking().put("blue_runtime_commands", json.dumps(q))
|
|
|
|
|
|
log(
|
|
|
|
|
|
" AdvancedSL live exit enqueue: "
|
|
|
|
|
|
f"{_tid} {_p['asset']} reason={_reason} "
|
|
|
|
|
|
f"score={float(getattr(_adv, 'score', 0.0) or 0.0):+.3f} "
|
|
|
|
|
|
f"pnl_pct={_shadow_pnl_pct:+.3f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" AdvancedSL live exit enqueue failed for {_tid}: {e}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
threading.Thread(target=_ae_eval, daemon=True).start()
|
|
|
|
|
|
|
|
|
|
|
|
self._push_state(scan_number, vel_div, vol_ok, self._read_posture())
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"ERROR in _process_scan: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def on_exf_update(self, event):
|
|
|
|
|
|
if not event.value: return
|
|
|
|
|
|
snapshot = json.loads(event.value) if isinstance(event.value, str) else event.value
|
|
|
|
|
|
if not self.current_day or not self.acb: return
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._last_exf = {
|
|
|
|
|
|
'funding': float(snapshot.get('funding_btc', 0.0)),
|
|
|
|
|
|
'dvol': float(snapshot.get('dvol_btc', 50.0)),
|
|
|
|
|
|
'fear_greed': float(snapshot.get('fng', 50.0)),
|
|
|
|
|
|
'taker': float(snapshot.get('taker', 0.5)),
|
|
|
|
|
|
}
|
|
|
|
|
|
w750_vel = getattr(self, 'last_w750_vel', None)
|
|
|
|
|
|
acb_info = self.acb.get_dynamic_boost_from_hz(
|
|
|
|
|
|
date_str=self.current_day,
|
|
|
|
|
|
exf_snapshot=snapshot,
|
|
|
|
|
|
w750_velocity=float(w750_vel) if w750_vel else None,
|
|
|
|
|
|
direction=self.trade_direction,
|
|
|
|
|
|
)
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
if hasattr(self.eng, 'update_acb_boost'):
|
|
|
|
|
|
subday_exit = self.eng.update_acb_boost(
|
|
|
|
|
|
boost=acb_info['boost'],
|
|
|
|
|
|
beta=acb_info['beta']
|
|
|
|
|
|
)
|
|
|
|
|
|
if subday_exit is not None:
|
|
|
|
|
|
log(f"SUBDAY_EXIT: {subday_exit} [{ALGO_VERSION}]")
|
|
|
|
|
|
tid = self._resolve_trade_id(subday_exit.get('trade_id'), create_if_missing=True)
|
|
|
|
|
|
subday_exit['trade_id'] = tid
|
|
|
|
|
|
pending = {}
|
|
|
|
|
|
if tid:
|
|
|
|
|
|
pending = self._pending_entries.pop(tid, {})
|
|
|
|
|
|
if pending and self._sc_advisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_rec = pending.get('sc_threshold_advisor')
|
|
|
|
|
|
if _rec:
|
|
|
|
|
|
self._sc_advisor.observe_outcome(
|
|
|
|
|
|
_rec,
|
|
|
|
|
|
executed_mult=float(pending.get('sc_exec_mult', self._last_esof_size_mult) or 1.0),
|
|
|
|
|
|
pnl_pct=float(subday_exit.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
exit_reason=str(subday_exit.get('reason', 'SUBDAY_ACB_NORMALIZATION')),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if pending and self._sc_gauge is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_rec_g = pending.get('sc_bucket_gauge')
|
|
|
|
|
|
if _rec_g:
|
|
|
|
|
|
self._sc_gauge.observe_outcome(
|
|
|
|
|
|
_rec_g,
|
|
|
|
|
|
executed_mult=float(pending.get('sc_bucket_gauge_exec_mult', self._last_esof_size_mult) or 1.0),
|
|
|
|
|
|
pnl_pct=float(subday_exit.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
exit_reason=str(subday_exit.get('reason', 'SUBDAY_ACB_NORMALIZATION')),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if pending and self._bounce_advisor is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_bounce_rec = pending.get('bounce_advisor_entry')
|
|
|
|
|
|
if _bounce_rec:
|
|
|
|
|
|
self._bounce_advisor.observe_outcome(
|
|
|
|
|
|
_bounce_rec,
|
|
|
|
|
|
pnl_pct=float(subday_exit.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
exit_reason=str(subday_exit.get('reason', 'SUBDAY_ACB_NORMALIZATION')),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" BounceAdvisor outcome update failed for {tid}: {e}")
|
|
|
|
|
|
if self._market_state_runtime is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._market_state_runtime.online_update_from_trade(
|
|
|
|
|
|
asset=str(pending.get("asset", "")),
|
|
|
|
|
|
entry_price=float(pending.get("entry_price", 0) or 0),
|
|
|
|
|
|
exit_price=float(subday_exit.get("exit_price", 0) or 0),
|
|
|
|
|
|
direction=-1 if str(pending.get("side", "SHORT")).upper() == "SHORT" else 1,
|
|
|
|
|
|
pnl_pct=float(subday_exit.get("pnl_pct", 0) or 0),
|
|
|
|
|
|
bars_held=int(subday_exit.get("bars_held", 0) or 0),
|
|
|
|
|
|
exit_reason=str(subday_exit.get("reason", "SUBDAY_ACB_NORMALIZATION")),
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
leverage=float(pending.get("leverage", 1.0) or 1.0),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" MarketStateRuntime outcome update failed for {tid}: {e}")
|
|
|
|
|
|
if self._efsm is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
_efsm_sub = self._efsm.observe_closed_trade(
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
asset=str(pending.get("asset", "") or ""),
|
|
|
|
|
|
side=str(pending.get("side", "SHORT") or "SHORT"),
|
|
|
|
|
|
pnl=float(subday_exit.get("net_pnl", 0) or 0),
|
|
|
|
|
|
pnl_pct=float(subday_exit.get("pnl_pct", 0) or 0),
|
|
|
|
|
|
leverage=float(pending.get("leverage", 0) or 0),
|
|
|
|
|
|
closed_ts=datetime.now(timezone.utc),
|
|
|
|
|
|
was_overlay_flip=bool(pending.get("overlay_flip", False)),
|
|
|
|
|
|
metadata={"exit_reason": str(subday_exit.get("reason", "SUBDAY_ACB_NORMALIZATION"))},
|
|
|
|
|
|
)
|
|
|
|
|
|
if _efsm_sub.action in {"ARMED", "TAG", "RESET"}:
|
|
|
|
|
|
log(f"EFSM { _efsm_sub.action }: { _efsm_sub.to_dict() }")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" EFSM observe_closed_trade failed for {tid}: {e}")
|
|
|
|
|
|
realized_pnl, realized_pnl_source = self._resolved_realized_trade_pnl(
|
|
|
|
|
|
pending,
|
|
|
|
|
|
subday_exit,
|
|
|
|
|
|
exit_price=float(subday_exit.get("exit_price", 0) or 0),
|
|
|
|
|
|
)
|
|
|
|
|
|
if realized_pnl_source != "net_pnl":
|
|
|
|
|
|
log(
|
|
|
|
|
|
" realized pnl resolved from "
|
|
|
|
|
|
f"{realized_pnl_source}: raw_net={float(subday_exit.get('net_pnl', 0) or 0):+.6f} "
|
|
|
|
|
|
f"resolved={realized_pnl:+.6f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
capital_apply_pnl, capital_apply_source = self._resolved_capital_apply_pnl(subday_exit, realized_pnl)
|
|
|
|
|
|
if capital_apply_source != "direct":
|
|
|
|
|
|
log(
|
|
|
|
|
|
" close capital delta suppressed: "
|
|
|
|
|
|
f"source={capital_apply_source} trade={tid} "
|
|
|
|
|
|
f"economic_pnl={realized_pnl:+.6f}"
|
|
|
|
|
|
)
|
|
|
|
|
|
capital_before, capital_after = self._apply_trade_capital_update(
|
|
|
|
|
|
capital_apply_pnl,
|
|
|
|
|
|
reason=str(subday_exit.get("reason", "SUBDAY_ACB_NORMALIZATION")),
|
|
|
|
|
|
source="trade_close",
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
asset=str(pending.get("asset", subday_exit.get("asset", ""))),
|
|
|
|
|
|
mirror_control_plane=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
execution_quality = self._build_trade_execution_quality_summary(
|
|
|
|
|
|
trade_id=str(tid or ""),
|
|
|
|
|
|
pending=pending,
|
|
|
|
|
|
exit_payload=subday_exit,
|
|
|
|
|
|
capital_before=capital_before,
|
|
|
|
|
|
capital_after=capital_after,
|
|
|
|
|
|
realized_pnl=realized_pnl,
|
|
|
|
|
|
exit_price=float(subday_exit.get("exit_price", 0) or 0),
|
|
|
|
|
|
source="trade_close",
|
|
|
|
|
|
)
|
|
|
|
|
|
self._persist_trade_execution_quality(execution_quality)
|
|
|
|
|
|
pending.update(self._tp_curve_context(notional=float(pending.get("notional", 0) or 0)))
|
|
|
|
|
|
ch_put("trade_events", {
|
|
|
|
|
|
"ts": _ch_ts_us(),
|
|
|
|
|
|
"date": self.current_day or '',
|
|
|
|
|
|
"strategy": "blue",
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"asset": pending.get('asset', subday_exit.get('asset', '')),
|
|
|
|
|
|
"side": pending.get('side', 'SHORT'),
|
|
|
|
|
|
"entry_price": pending.get('entry_price', 0),
|
|
|
|
|
|
"exit_price": float(subday_exit.get('exit_price', 0) or 0),
|
|
|
|
|
|
"quantity": round(float(pending.get('notional', 0) or 0) / max(float(pending.get('entry_price', 1) or 1), 1e-12), 6),
|
|
|
|
|
|
"capital_before": capital_before,
|
|
|
|
|
|
"capital_after": capital_after,
|
|
|
|
|
|
"pnl": realized_pnl,
|
|
|
|
|
|
"pnl_pct": float(subday_exit.get('pnl_pct', 0) or 0),
|
|
|
|
|
|
"exit_reason": str(subday_exit.get('reason', 'SUBDAY_ACB_NORMALIZATION')),
|
|
|
|
|
|
"vel_div_entry": float(pending.get('vel_div_entry', 0) or 0),
|
|
|
|
|
|
"boost_at_entry": float(pending.get('boost_at_entry', 0) or 0),
|
|
|
|
|
|
"beta_at_entry": float(pending.get('beta_at_entry', 0) or 0),
|
|
|
|
|
|
"posture": pending.get('posture', ''),
|
|
|
|
|
|
"leverage": float(pending.get('leverage', 0) or 0),
|
BLUE hardening: spool-poison guards, dead-session clock fix, HZ black-box, RETRACT race-safety
Seven uncommitted production fixes to BLUE's main runner that the LIVE
process has already been running since the 2026-06-15 17:23 restart (file
mtime 17:17, pid started 17:23). Each fix answers a documented incident;
committing now so they survive in history and a stray checkout can't
silently revert running-config code on the next restart.
1. bars_held = max(0, int(...)) at BOTH journal sites (terminal + sub-day).
CH column is UInt16 — a negative value poisons the spool with a
head-of-line jam (incident 2026-06-12: bars_held=-106).
2. entry_bar = int(restored_entry_bar) at BOTH reconstruction sites; NEVER
from chain_meta. trade_reconstruction payloads carry the DEAD session's
bar counter, so the old override reinstated the stale clock frame the
re-anchor exists to fix → negative bars_held → same UInt16 spool poison
(zombie-trade resurrections, incident 2026-06-12). restored_entry_bar
already encodes hold continuity via stored_bars in THIS session's frame.
3. capital parse handles list/ledger-style payloads: when the restore blob
is a list of update rows, take the latest dict row instead of falling
through to {} and losing the capital anchor.
4. _connect_hz routes the `hazelcast` logger to stderr at INFO. The
silent-HZ-death investigation found ZERO client log lines because
nothing routed them; without this the reactor's health is invisible.
5. _dump_blackbox(reason): forensic thread dump before a watchdog restart —
lifecycle.is_running, active_connections, every thread's stack, and a
flag when any hazelcast/reactor-named thread is MISSING (= reactor died,
the prime suspect for the silent 40min–8h client deaths). print()-only,
CIFS-safe. _watchdog_restart calls it first.
6. _drain_runtime_commands / _process_runtime_commands gain
`*, allow_retract=True`; the heartbeat path drains with
allow_retract=False and re-queues any RETRACT commands. A RETRACT can
force a terminal close that must run through the scan-thread close
finalizer, so the heartbeat must not race it.
7. +import traceback (for the black-box stack dumps).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:03:20 +02:00
|
|
|
|
# CH column is UInt16 — negative poisons the spool
|
|
|
|
|
|
"bars_held": max(0, int(subday_exit.get('bars_held', 0) or 0)),
|
2026-06-12 14:59:49 +02:00
|
|
|
|
"regime_signal": 0,
|
|
|
|
|
|
"execution_quality_json": json.dumps(execution_quality, default=str),
|
|
|
|
|
|
"market_state_bundle_json": str(pending.get("market_state_bundle_json", "") or ""),
|
|
|
|
|
|
"tp_base_pct": float(pending.get("tp_base_pct", 0.0) or 0.0),
|
|
|
|
|
|
"tp_effective_pct": float(pending.get("tp_effective_pct", 0.0) or 0.0),
|
|
|
|
|
|
"our_leverage": float(pending.get("our_leverage", 0.0) or 0.0),
|
|
|
|
|
|
})
|
|
|
|
|
|
self._announce_position_event(
|
|
|
|
|
|
kind="trade_exit",
|
|
|
|
|
|
severity="info" if float(subday_exit.get("pnl_pct", 0) or 0) >= 0 else "warning",
|
|
|
|
|
|
title=f"[BLUE] EXIT {pending.get('asset', '')} {pending.get('side', '')}",
|
|
|
|
|
|
message=(
|
|
|
|
|
|
f"reason={subday_exit.get('reason', 'SUBDAY_ACB_NORMALIZATION')} "
|
|
|
|
|
|
f"pnl={float(subday_exit.get('net_pnl', 0) or 0):+.2f} "
|
|
|
|
|
|
f"pnl_pct={float(subday_exit.get('pnl_pct', 0) or 0):+.3%}"
|
|
|
|
|
|
),
|
|
|
|
|
|
metadata={
|
|
|
|
|
|
"trade_id": tid,
|
|
|
|
|
|
"asset": pending.get("asset", subday_exit.get("asset", "")),
|
|
|
|
|
|
"side": pending.get("side", "SHORT"),
|
|
|
|
|
|
"entry_price": pending.get("entry_price", 0),
|
|
|
|
|
|
"exit_price": float(subday_exit.get("exit_price", 0) or 0),
|
|
|
|
|
|
"quantity": round(float(pending.get("notional", 0) or 0) / max(float(pending.get("entry_price", 1) or 1), 1e-12), 6),
|
|
|
|
|
|
"pnl": realized_pnl,
|
|
|
|
|
|
"pnl_pct": float(subday_exit.get("pnl_pct", 0) or 0),
|
|
|
|
|
|
"exit_reason": str(subday_exit.get("reason", "SUBDAY_ACB_NORMALIZATION")),
|
|
|
|
|
|
"bars_held": int(subday_exit.get("bars_held", 0) or 0),
|
|
|
|
|
|
"posture": pending.get("posture", ""),
|
|
|
|
|
|
"overlay_flip": bool(pending.get("overlay_flip", False)),
|
|
|
|
|
|
"overlay_reason": str(pending.get("overlay_reason", "")),
|
|
|
|
|
|
"overlay_slot": int(pending.get("overlay_slot", 0) or 0),
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
close_pending = pending if pending else self._fallback_pending_for_close(str(tid or ""), subday_exit)
|
|
|
|
|
|
self._ps_write_closed(str(tid or ""), close_pending, subday_exit)
|
|
|
|
|
|
if tid and not pending:
|
|
|
|
|
|
log(
|
|
|
|
|
|
" SUBDAY_EXIT pending metadata missing; wrote fallback CLOSED tombstone "
|
|
|
|
|
|
f"for trade={tid} asset={close_pending.get('asset', '')}"
|
|
|
|
|
|
)
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
|
if now - self._exf_log_time >= 300:
|
|
|
|
|
|
self._exf_log_time = now
|
|
|
|
|
|
log(f"ACB subday: boost={acb_info['boost']:.4f} beta={acb_info['beta']:.4f} "
|
|
|
|
|
|
f"signals={acb_info['signals']:.1f} src={acb_info.get('source','?')}")
|
|
|
|
|
|
# ACB_EXIT disabled: update_acb_boost() called to keep boost/beta current
|
|
|
|
|
|
# (ACBv6 intact), but SUBDAY_ACB_NORMALIZATION exits are suppressed.
|
|
|
|
|
|
except ValueError as e:
|
|
|
|
|
|
log(f"ACB Stale Data Fallback: {e}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"on_exf_update Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def _wire_obf(self, assets):
|
|
|
|
|
|
if not assets or self.ob_assets:
|
|
|
|
|
|
return
|
|
|
|
|
|
self.ob_assets = assets
|
|
|
|
|
|
from nautilus_dolphin.nautilus.hz_ob_provider import HZOBProvider
|
|
|
|
|
|
live_ob = HZOBProvider(
|
|
|
|
|
|
hz_cluster=HZ_CLUSTER,
|
|
|
|
|
|
hz_host=HZ_HOST,
|
|
|
|
|
|
assets=assets,
|
|
|
|
|
|
)
|
|
|
|
|
|
self.ob_eng = OBFeatureEngine(live_ob)
|
|
|
|
|
|
# No preload_date() call — live mode uses step_live() per scan
|
|
|
|
|
|
self.eng.set_ob_engine(self.ob_eng)
|
|
|
|
|
|
log(f" OBF wired: HZOBProvider, {len(assets)} assets (LIVE mode)")
|
|
|
|
|
|
|
|
|
|
|
|
def _save_capital(self):
|
|
|
|
|
|
"""Persist capital to HZ (primary) and disk (fallback) so restarts survive HZ loss."""
|
|
|
|
|
|
capital = getattr(self.eng, 'capital', None)
|
|
|
|
|
|
if capital is None or not math.isfinite(capital) or capital < 1.0:
|
|
|
|
|
|
return
|
|
|
|
|
|
self._commit_capital_state(
|
|
|
|
|
|
float(capital),
|
|
|
|
|
|
reason="ENGINE_SAVE",
|
|
|
|
|
|
source="engine_snapshot",
|
|
|
|
|
|
mirror_control_plane=False,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _publish_corrective_replay(self, replay_blob: Mapping[str, Any]) -> None:
|
|
|
|
|
|
"""Publish a corrective replay seed back into HZ and disk."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
capital = _safe_float(replay_blob.get("capital", 0.0), 0.0)
|
|
|
|
|
|
if capital < 1.0 or not math.isfinite(capital):
|
|
|
|
|
|
return
|
|
|
|
|
|
self._commit_capital_state(
|
|
|
|
|
|
capital,
|
|
|
|
|
|
reason=str(replay_blob.get("reason", "") or "CORRECTIVE_REPLAY"),
|
|
|
|
|
|
source="corrective_replay",
|
|
|
|
|
|
trade_id=str(replay_blob.get("trade_id", "") or ""),
|
|
|
|
|
|
asset=str(replay_blob.get("asset", "") or ""),
|
|
|
|
|
|
replay_blob=replay_blob,
|
|
|
|
|
|
update_replay_key=True,
|
|
|
|
|
|
mirror_control_plane=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" corrective replay publish failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def request_capital_update(
|
|
|
|
|
|
self,
|
|
|
|
|
|
capital: float,
|
|
|
|
|
|
*,
|
|
|
|
|
|
reason: str = "CAPITAL_UPDATE",
|
|
|
|
|
|
source: str = "control_plane",
|
|
|
|
|
|
trade_id: str = "",
|
|
|
|
|
|
asset: str = "",
|
|
|
|
|
|
event_ts: float | None = None,
|
|
|
|
|
|
applies_before_ts: float | None = None,
|
|
|
|
|
|
replay_blob: Mapping[str, Any] | None = None,
|
|
|
|
|
|
) -> dict:
|
|
|
|
|
|
"""Queue a capital update onto the BLUE runtime command channel."""
|
|
|
|
|
|
cmd = {
|
|
|
|
|
|
"command_id": f"cap-update-{uuid.uuid4().hex[:16]}",
|
|
|
|
|
|
"action": "SET_CAPITAL",
|
|
|
|
|
|
"capital": float(capital),
|
|
|
|
|
|
"reason": str(reason or "CAPITAL_UPDATE"),
|
|
|
|
|
|
"source": str(source or "control_plane"),
|
|
|
|
|
|
"ts": float(time.time()),
|
|
|
|
|
|
}
|
|
|
|
|
|
if event_ts is not None:
|
|
|
|
|
|
cmd["event_ts"] = float(event_ts)
|
|
|
|
|
|
if applies_before_ts is not None:
|
|
|
|
|
|
cmd["applies_before_ts"] = float(applies_before_ts)
|
|
|
|
|
|
if trade_id:
|
|
|
|
|
|
cmd["trade_id"] = str(trade_id)
|
|
|
|
|
|
if asset:
|
|
|
|
|
|
cmd["asset"] = str(asset)
|
|
|
|
|
|
if replay_blob is not None:
|
|
|
|
|
|
cmd["replay_blob"] = dict(replay_blob)
|
|
|
|
|
|
if self._enqueue_blue_runtime_command(cmd):
|
|
|
|
|
|
return cmd
|
|
|
|
|
|
raise RuntimeError("BLUE runtime command queue unavailable")
|
|
|
|
|
|
|
|
|
|
|
|
def _restore_capital(self):
|
|
|
|
|
|
"""Restore capital from live HZ state or ledger-backed snapshots.
|
|
|
|
|
|
|
|
|
|
|
|
The raw scalar checkpoint is legacy-only and requires the explicit
|
|
|
|
|
|
DOLPHIN_ALLOW_LEGACY_CAPITAL_CHECKPOINT=1 escape hatch.
|
|
|
|
|
|
"""
|
|
|
|
|
|
self._restore_failed = False
|
|
|
|
|
|
self._restore_failure_reason = ""
|
|
|
|
|
|
self._restore_source = ""
|
|
|
|
|
|
if self._restore_capital_from_state():
|
|
|
|
|
|
return
|
|
|
|
|
|
log(" Capital: no sane state source found — restore halted")
|
|
|
|
|
|
|
|
|
|
|
|
def _push_state(self, scan_number, vel_div, vol_ok, posture):
|
|
|
|
|
|
try:
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
capital = getattr(self.eng, 'capital', 25000.0)
|
|
|
|
|
|
# Engine uses a single NDPosition object, not a list
|
|
|
|
|
|
pos = getattr(self.eng, 'position', None)
|
|
|
|
|
|
if pos is not None:
|
|
|
|
|
|
pending = self._pending_entries.get(getattr(pos, "trade_id", ""), {})
|
|
|
|
|
|
open_notional = float(getattr(pos, 'notional', 0) or 0)
|
|
|
|
|
|
open_positions_list = [{
|
|
|
|
|
|
'trade_id': getattr(pos, 'trade_id', ''),
|
|
|
|
|
|
'asset': pos.asset,
|
|
|
|
|
|
'side': 'SHORT' if pos.direction == -1 else 'LONG',
|
|
|
|
|
|
'entry_price': pos.entry_price,
|
|
|
|
|
|
'quantity': round(open_notional / pos.entry_price, 6) if pos.entry_price else 0,
|
|
|
|
|
|
'notional': open_notional,
|
|
|
|
|
|
'retraction_legs': int(pending.get('retraction_legs', 0) or 0),
|
|
|
|
|
|
'realized_pnl_legs_total': float(pending.get('realized_pnl_legs_total', 0.0) or 0.0),
|
|
|
|
|
|
'chain_root_trade_id': str(pending.get('chain_root_trade_id', getattr(pos, 'trade_id', '')) or getattr(pos, 'trade_id', '')),
|
|
|
|
|
|
'chain_head_leg_id': str(pending.get('chain_head_leg_id', f"{getattr(pos, 'trade_id', '')}:open") or f"{getattr(pos, 'trade_id', '')}:open"),
|
|
|
|
|
|
'chain_prev_leg_id': str(pending.get('chain_prev_leg_id', '') or ''),
|
|
|
|
|
|
'chain_seq': int(pending.get('chain_seq', pending.get('retraction_legs', 0)) or 0),
|
|
|
|
|
|
'chain_token': str(pending.get('chain_token', '') or ''),
|
|
|
|
|
|
'leverage': float(getattr(pos, 'leverage', 0) or 0),
|
|
|
|
|
|
'unrealized_pnl': round(pos.pnl_pct * open_notional, 2),
|
|
|
|
|
|
}]
|
|
|
|
|
|
else:
|
|
|
|
|
|
open_notional = 0.0
|
|
|
|
|
|
open_positions_list = []
|
|
|
|
|
|
cur_leverage = (open_notional / capital) if capital and capital > 0 and math.isfinite(capital) else 0.0
|
|
|
|
|
|
|
|
|
|
|
|
snapshot = {
|
|
|
|
|
|
'capital': capital if math.isfinite(capital) else None,
|
|
|
|
|
|
'open_positions': open_positions_list,
|
|
|
|
|
|
'algo_version': ALGO_VERSION,
|
|
|
|
|
|
'last_scan_number': scan_number, 'last_vel_div': vel_div,
|
|
|
|
|
|
'vol_ok': vol_ok, 'posture': posture,
|
|
|
|
|
|
'vol_gate_threshold': float(self.vol_p60_threshold),
|
|
|
|
|
|
'scans_processed': self.scans_processed,
|
|
|
|
|
|
'trades_executed': self.trades_executed,
|
|
|
|
|
|
'bar_idx': self.bar_idx,
|
|
|
|
|
|
'timestamp': datetime.now(timezone.utc).isoformat(),
|
|
|
|
|
|
# Leverage envelope — for TUI slider
|
|
|
|
|
|
'leverage_soft_cap': getattr(self.eng, 'base_max_leverage', 8.0),
|
|
|
|
|
|
'leverage_abs_cap': getattr(self.eng, 'abs_max_leverage', 9.0),
|
|
|
|
|
|
'open_notional': round(open_notional, 2),
|
|
|
|
|
|
'current_leverage': round(cur_leverage, 4),
|
|
|
|
|
|
'trade_direction_base': int(self.trade_direction),
|
|
|
|
|
|
'trade_direction_runtime': int(self._runtime_direction),
|
|
|
|
|
|
# Launch metadata for observability only; no trading behavior.
|
|
|
|
|
|
'bingx_environment': str(os.environ.get("DOLPHIN_BINGX_ENV", "ENGINE") or "ENGINE").strip().upper(),
|
|
|
|
|
|
'bingx_sizing_mode': str(os.environ.get("DOLPHIN_BINGX_SIZING_MODE", "engine") or "engine").strip().lower(),
|
|
|
|
|
|
'bingx_allow_mainnet': bool(_env_bool("DOLPHIN_BINGX_ALLOW_MAINNET", False)),
|
|
|
|
|
|
'bingx_default_leverage': _safe_float(os.environ.get("DOLPHIN_BINGX_DEFAULT_LEVERAGE"), 1.0),
|
|
|
|
|
|
'bingx_exchange_leverage_cap': int(
|
|
|
|
|
|
_safe_float(
|
|
|
|
|
|
os.environ.get(
|
|
|
|
|
|
"DOLPHIN_BINGX_EXCHANGE_LEVERAGE_CAP",
|
|
|
|
|
|
getattr(self.eng, 'abs_max_leverage', 3.0),
|
|
|
|
|
|
),
|
|
|
|
|
|
3.0,
|
|
|
|
|
|
)
|
|
|
|
|
|
),
|
|
|
|
|
|
'efsm': self._efsm.snapshot() if self._efsm is not None else None,
|
|
|
|
|
|
'advanced_sl': self._advanced_sl.snapshot_dict() if self._advanced_sl is not None else None,
|
|
|
|
|
|
}
|
|
|
|
|
|
self._last_engine_snapshot_payload = dict(snapshot)
|
|
|
|
|
|
future = self.state_map.put('engine_snapshot', json.dumps(snapshot))
|
|
|
|
|
|
future.add_done_callback(lambda f: None)
|
|
|
|
|
|
# Heartbeat — MHS checks age < 30s; force blocking put to avoid
|
|
|
|
|
|
# silent async drop/stall under client backpressure.
|
|
|
|
|
|
if self.heartbeat_map is not None:
|
|
|
|
|
|
hb = build_runner_heartbeat_payload(
|
|
|
|
|
|
flow="nautilus_event_trader",
|
|
|
|
|
|
phase="trading",
|
|
|
|
|
|
run_date=self.current_day,
|
|
|
|
|
|
runner="blue",
|
|
|
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
|
|
|
write_runner_heartbeat(self.heartbeat_map, hb)
|
|
|
|
|
|
except Exception as hb_err:
|
|
|
|
|
|
log(f" Heartbeat put failed: {hb_err}")
|
|
|
|
|
|
# Persist capital so next restart resumes from here
|
|
|
|
|
|
if capital is not None and math.isfinite(capital) and capital >= 1.0:
|
|
|
|
|
|
self._save_capital()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" Failed to push state: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
|
global running
|
|
|
|
|
|
log("=" * 70)
|
|
|
|
|
|
log("🐬 DOLPHIN Nautilus Event-Driven Trader Starting")
|
|
|
|
|
|
log("=" * 70)
|
|
|
|
|
|
|
|
|
|
|
|
self._build_engine()
|
|
|
|
|
|
self._connect_hz()
|
|
|
|
|
|
threading.Thread(target=self._heartbeat_loop, daemon=True).start()
|
|
|
|
|
|
threading.Thread(target=self._scan_watchdog_loop, daemon=True,
|
|
|
|
|
|
name="scan_watchdog").start()
|
|
|
|
|
|
self._restore_capital()
|
|
|
|
|
|
if self._restore_failed:
|
|
|
|
|
|
log(f"RESTORE HALT: {self._restore_failure_reason}")
|
|
|
|
|
|
self.shutdown()
|
|
|
|
|
|
return
|
|
|
|
|
|
self._rollover_day()
|
|
|
|
|
|
self._restore_position_state()
|
|
|
|
|
|
if self._restore_failed:
|
|
|
|
|
|
log(f"RESTORE HALT: {self._restore_failure_reason}")
|
|
|
|
|
|
self.shutdown()
|
|
|
|
|
|
return
|
|
|
|
|
|
# Seed the live snapshot immediately so engine_snapshot and
|
|
|
|
|
|
# capital_checkpoint reflect the restored capital before scan traffic.
|
|
|
|
|
|
try:
|
|
|
|
|
|
posture = self._read_posture()
|
|
|
|
|
|
self._push_state(self.bar_idx, 0.0, True, posture)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" Startup seed push failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def listener(event):
|
|
|
|
|
|
self.on_scan(event)
|
|
|
|
|
|
|
|
|
|
|
|
self.features_map.add_entry_listener(
|
|
|
|
|
|
key='latest_eigen_scan', include_value=True,
|
|
|
|
|
|
updated_func=listener, added_func=listener
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def exf_listener(event):
|
|
|
|
|
|
self.on_exf_update(event)
|
|
|
|
|
|
|
|
|
|
|
|
self.features_map.add_entry_listener(
|
|
|
|
|
|
key='exf_latest', include_value=True,
|
|
|
|
|
|
updated_func=exf_listener, added_func=exf_listener
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
log("✅ Hz listener registered")
|
|
|
|
|
|
log(f"🏷️ ALGO_VERSION: {ALGO_VERSION}")
|
|
|
|
|
|
log("⏳ Waiting for scans...")
|
|
|
|
|
|
global running
|
|
|
|
|
|
if not running:
|
|
|
|
|
|
log(" Startup SIGTERM latch cleared before main scan loop")
|
|
|
|
|
|
running = True
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
while running:
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
|
log("Interrupted")
|
|
|
|
|
|
finally:
|
|
|
|
|
|
self.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
def shutdown(self):
|
|
|
|
|
|
log("Shutting down...")
|
|
|
|
|
|
self._watchdog_stop.set()
|
|
|
|
|
|
self._scan_executor.shutdown(wait=False)
|
|
|
|
|
|
if self.eng and self.current_day:
|
|
|
|
|
|
try:
|
|
|
|
|
|
with self.eng_lock:
|
|
|
|
|
|
summary = self.eng.end_day()
|
|
|
|
|
|
log(f"end_day: {summary}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"end_day failed: {e}")
|
|
|
|
|
|
if self._market_state_runtime is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._market_state_runtime.save()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if self.hz_client:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.hz_client.shutdown()
|
|
|
|
|
|
log("Hz disconnected")
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
log(f"🛑 Stopped. Scans: {self.scans_processed}, Trades: {self.trades_executed}")
|
|
|
|
|
|
|
|
|
|
|
|
def signal_handler(signum, frame):
|
|
|
|
|
|
global running
|
|
|
|
|
|
age_s = time.time() - _PROCESS_BOOT_TS
|
|
|
|
|
|
if signum == signal.SIGTERM and age_s < _SIGTERM_STARTUP_GRACE_S:
|
|
|
|
|
|
log(f"Signal {signum} received during startup grace ({age_s:.1f}s) — ignored")
|
|
|
|
|
|
return
|
|
|
|
|
|
log(f"Signal {signum} received")
|
|
|
|
|
|
running = False
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
|
|
trader = DolphinLiveTrader()
|
|
|
|
|
|
trader.run()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
main()
|