Files
DOLPHIN/prod/backtest_gold_verify.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

385 lines
16 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
backtest_gold_verify.py — Gold parity verification via direct engine codepath.
Runs all 56 backtest dates through the same engine codepath used in production:
same step_bar loop, same OB preload, same vol_ok, same hazard multiplier,
same ACB, same MC forewarner.
Avoids DolphinActor/Nautilus Strategy overhead (Strategy.log is Rust-backed
read-only; Strategy requires a kernel context to initialise). Instead this
harness directly instantiates and wires the same sub-components that
DolphinActor.on_start() wires, then replicates _run_replay_day() inline.
Gold targets (post-fix D_LIQ):
T=2155 (exact) ROI≈+181% (no ACB, Linux) ROI≈+189% (full ACB on Windows)
Usage:
/usr/bin/python3 prod/backtest_gold_verify.py
/usr/bin/python3 prod/backtest_gold_verify.py --summary # quick summary only
"""
import sys, time, argparse, yaml
from pathlib import Path
from datetime import datetime, timezone
import numpy as np
import pandas as pd
HCM_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(HCM_DIR / 'nautilus_dolphin'))
sys.path.insert(0, str(HCM_DIR))
PARQUET_DIR = HCM_DIR / 'vbt_cache'
MC_MODELS_DIR = str(HCM_DIR / 'nautilus_dolphin' / 'mc_results' / 'models')
CONFIG_PATH = Path(__file__).parent / 'configs' / 'blue.yml'
INITIAL_CAPITAL = 25_000.0
GOLD_T = 2155
GOLD_ROI_LO = 175.0 # lower bound (no ACB, no w750)
GOLD_ROI_HI = 195.0 # upper bound (full ACB)
_META_COLS_SET = {
'timestamp', 'scan_number', 'v50_lambda_max_velocity',
'v150_lambda_max_velocity', 'v300_lambda_max_velocity',
'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150',
}
_MC_BASE_CFG = {
'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
'leverage_convexity': 3.00, 'fraction': 0.20,
'use_alpha_layers': True, 'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
def _load_config() -> dict:
with open(CONFIG_PATH) as f:
return yaml.safe_load(f)
def _build_engine(cfg: dict, initial_capital: float):
"""Mirror DolphinActor.on_start() engine + subsystem wiring."""
from nautilus_dolphin.nautilus.proxy_boost_engine import create_boost_engine, DEFAULT_BOOST_MODE
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
eng_cfg = cfg.get('engine', {})
boost_mode = eng_cfg.get('boost_mode', DEFAULT_BOOST_MODE)
engine = create_boost_engine(
mode=boost_mode,
initial_capital=initial_capital,
vel_div_threshold=eng_cfg.get('vel_div_threshold', -0.02),
vel_div_extreme=eng_cfg.get('vel_div_extreme', -0.05),
min_leverage=eng_cfg.get('min_leverage', 0.5),
max_leverage=eng_cfg.get('max_leverage', 5.0),
abs_max_leverage=eng_cfg.get('abs_max_leverage', 6.0),
leverage_convexity=eng_cfg.get('leverage_convexity', 3.0),
fraction=eng_cfg.get('fraction', 0.20),
fixed_tp_pct=eng_cfg.get('fixed_tp_pct', 0.0095),
stop_pct=eng_cfg.get('stop_pct', 1.0),
max_hold_bars=eng_cfg.get('max_hold_bars', 120),
use_direction_confirm=eng_cfg.get('use_direction_confirm', True),
dc_lookback_bars=eng_cfg.get('dc_lookback_bars', 7),
dc_min_magnitude_bps=eng_cfg.get('dc_min_magnitude_bps', 0.75),
dc_skip_contradicts=eng_cfg.get('dc_skip_contradicts', True),
dc_leverage_boost=eng_cfg.get('dc_leverage_boost', 1.0),
dc_leverage_reduce=eng_cfg.get('dc_leverage_reduce', 0.5),
use_asset_selection=eng_cfg.get('use_asset_selection', True),
min_irp_alignment=eng_cfg.get('min_irp_alignment', 0.45),
use_sp_fees=eng_cfg.get('use_sp_fees', True),
use_sp_slippage=eng_cfg.get('use_sp_slippage', True),
sp_maker_entry_rate=eng_cfg.get('sp_maker_entry_rate', 0.62),
sp_maker_exit_rate=eng_cfg.get('sp_maker_exit_rate', 0.50),
use_ob_edge=eng_cfg.get('use_ob_edge', True),
ob_edge_bps=eng_cfg.get('ob_edge_bps', 5.0),
ob_confirm_rate=eng_cfg.get('ob_confirm_rate', 0.40),
lookback=eng_cfg.get('lookback', 100),
use_alpha_layers=eng_cfg.get('use_alpha_layers', True),
use_dynamic_leverage=eng_cfg.get('use_dynamic_leverage', True),
seed=eng_cfg.get('seed', 42),
)
engine.set_esoteric_hazard_multiplier(0.0) # gold spec: hazard=0 → base_max_leverage=8.0
print(f"[INIT] Engine created: {type(engine).__name__}, base_max_leverage={getattr(engine,'base_max_leverage','?')}", flush=True)
# MC Forewarner
mc_models_dir = MC_MODELS_DIR
if Path(mc_models_dir).exists():
try:
from mc.mc_ml import DolphinForewarner
fw = DolphinForewarner(models_dir=mc_models_dir)
engine.set_mc_forewarner(fw, _MC_BASE_CFG)
print(f"[INIT] DolphinForewarner wired from {mc_models_dir}", flush=True)
except Exception as e:
print(f"[WARN] MC Forewarner init failed: {e}", flush=True)
else:
print(f"[WARN] MC models dir not found: {mc_models_dir}", flush=True)
# Discover asset columns from first 5 parquet files
_all_bt_assets: list = []
try:
_seen: set = set()
for _pf in sorted(PARQUET_DIR.glob('*.parquet'))[:5]:
_df_h = pd.read_parquet(_pf)
_seen.update(c for c in _df_h.columns if c not in _META_COLS_SET)
_all_bt_assets = sorted(_seen)
print(f"[INIT] Discovered {len(_all_bt_assets)} asset columns: {_all_bt_assets}", flush=True)
except Exception as e:
print(f"[WARN] Could not scan parquet assets: {e}", flush=True)
# ACB injection (matches gold_repro)
try:
acb = AdaptiveCircuitBreaker()
_linux_eigen_paths = [
Path('/mnt/ng6_data/eigenvalues'),
Path('/mnt/dolphin_training/data/eigenvalues'),
Path('/mnt/dolphinng6_data/eigenvalues'),
]
for _ep in _linux_eigen_paths:
if _ep.exists():
acb.config.EIGENVALUES_PATH = _ep
print(f"[INIT] ACB eigenvalues path → {_ep}", flush=True)
break
files = sorted(PARQUET_DIR.glob('*.parquet'))
preload_dates = [pf.stem for pf in files]
acb.preload_w750(preload_dates)
engine.set_acb(acb)
print(f"[INIT] ACB injected ({len(preload_dates)} dates preloaded)", flush=True)
except Exception as e:
print(f"[WARN] ACB injection failed: {e}", flush=True)
# MockOBProvider injection (Gold Biases)
# Preload ONCE with "mock" — matches exp_shared.py gold reference exactly.
# MockOBProvider produces identical synthetic data on every call so a single
# preload populates the full snap-index cache used for all 56 replay days.
try:
gold_biases = {
'BTCUSDT': -0.086, 'ETHUSDT': -0.092, 'BNBUSDT': +0.05, 'SOLUSDT': +0.05,
}
mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0,
assets=_all_bt_assets,
imbalance_biases=gold_biases,
)
ob_eng = OBFeatureEngine(mock_ob)
ob_eng.preload_date("mock", _all_bt_assets) # gold method: single global preload
engine.set_ob_engine(ob_eng)
print(f"[INIT] MockOBProvider injected + preloaded (Gold Biases, {len(_all_bt_assets)} assets)", flush=True)
except Exception as e:
print(f"[WARN] OB injection failed: {e}", flush=True)
return engine
def _compute_vol_ok(df: pd.DataFrame, vol_p60: float) -> np.ndarray:
"""Gold vol_ok method — matches exp_shared.load_data() / process_day() exactly.
Uses segment-based dvol: std(diff(seg) / seg[:-1]) for 50-bar sliding window,
starting at bar 50. Rows without finite dvol or below threshold → False.
"""
vol_ok = np.zeros(len(df), dtype=bool)
if 'BTCUSDT' not in df.columns or vol_p60 <= 0:
return vol_ok
bp = df['BTCUSDT'].values
dv = np.full(len(bp), np.nan)
for i in range(50, len(bp)):
seg = bp[max(0, i - 50):i]
if len(seg) < 10:
continue
with np.errstate(invalid='ignore', divide='ignore'):
rets = np.diff(seg) / seg[:-1]
fin = rets[np.isfinite(rets)]
if len(fin) >= 5:
dv[i] = float(np.std(fin))
vol_ok = np.where(np.isfinite(dv), dv > vol_p60, False)
return vol_ok
def _compute_mae_for_day(trades_today: list, df: pd.DataFrame) -> list:
"""Compute per-trade Maximum Adverse Excursion (MAE) for trades closed today.
For SHORT trades, adverse excursion = price moving UP from entry.
MAE_pct = max(price[entry_bar:exit_bar+1] / entry_price - 1) * 100 (positive = adverse)
Uses close prices only (1-min bars don't have OHLC), so MAE is a lower-bound
estimate — true intra-bar MAE could be higher.
Returns list of (trade_record, mae_pct) pairs.
"""
results = []
for t in trades_today:
asset = getattr(t, 'asset', None)
entry_bar = getattr(t, 'entry_bar', None)
exit_bar = getattr(t, 'exit_bar', None)
entry_price = getattr(t, 'entry_price', None)
direction = getattr(t, 'direction', -1)
if asset is None or entry_bar is None or exit_bar is None or not entry_price:
results.append((t, float('nan')))
continue
if asset not in df.columns:
results.append((t, float('nan')))
continue
lo = max(0, int(entry_bar))
hi = min(len(df) - 1, int(exit_bar))
prices = df[asset].iloc[lo:hi + 1].values.astype(float)
prices = prices[np.isfinite(prices) & (prices > 0)]
if len(prices) == 0:
results.append((t, float('nan')))
continue
if direction == -1: # SHORT: adverse = price going up
mae_pct = float(np.max(prices) / entry_price - 1.0) * 100.0
else: # LONG: adverse = price going down
mae_pct = float(1.0 - np.min(prices) / entry_price) * 100.0
mae_pct = max(0.0, mae_pct) # clamp: negative means favorable the whole time
results.append((t, mae_pct))
return results
def _run_day(engine, cfg: dict, date_str: str, posture: str = 'APEX') -> tuple:
"""Run a single replay day via engine.process_day() — identical to gold reference path.
Uses process_day() directly (same as test_dliq_fix_verify.py / exp_shared.py) so
NaN-vel_div skipping, bar_idx assignment, and proxy_B updates are bit-for-bit identical.
OB preload is done once globally in _build_engine(), not per-day.
Returns (n_bars, df) where df is the loaded parquet (used for MAE computation).
"""
dir_str = cfg.get('direction', 'short_only')
direction_val = 1 if dir_str in ['long', 'long_only'] else -1
pq_file = PARQUET_DIR / f"{date_str}.parquet"
if not pq_file.exists():
print(f"[WARN] No parquet for {date_str} — skipping", flush=True)
return 0, pd.DataFrame()
df = pd.read_parquet(pq_file)
asset_columns = [c for c in df.columns if c not in _META_COLS_SET]
vol_p60 = float(
cfg.get('paper_trade', {}).get('vol_p60')
or cfg.get('vol_p60')
or 0.00009868
)
vol_ok = _compute_vol_ok(df, vol_p60)
engine.process_day(
date_str,
df,
asset_columns,
vol_regime_ok=vol_ok,
direction=direction_val,
posture=posture,
)
return len(df), df
def run_verify(summary_only: bool = False):
cfg = _load_config()
files = sorted(PARQUET_DIR.glob('*.parquet'))
if not files:
print(f"[ERROR] No parquet files in {PARQUET_DIR}", flush=True)
sys.exit(1)
all_dates = [pf.stem for pf in files]
print(f"[VERIFY] {len(files)} dates: {all_dates[0]}{all_dates[-1]}", flush=True)
engine = _build_engine(cfg, INITIAL_CAPITAL)
total_T = 0
peak_cap = INITIAL_CAPITAL
max_dd = 0.0
all_mae: list = [] # (mae_pct, trade) — collected across all days
t0 = time.time()
for pf in files:
date_str = pf.stem
t_before = len(engine.trade_history)
_, day_df = _run_day(engine, cfg, date_str)
cap_after = engine.capital
trades_today = engine.trade_history[t_before:]
day_trades = len(trades_today)
total_T = len(engine.trade_history)
peak_cap = max(peak_cap, cap_after)
dd = (peak_cap - cap_after) / peak_cap * 100.0
max_dd = max(max_dd, dd)
# MAE per trade (uses same parquet df, no extra I/O)
if not day_df.empty and trades_today:
for t, mae in _compute_mae_for_day(trades_today, day_df):
all_mae.append((mae, t))
if not summary_only:
print(
f"{date_str}: T+{day_trades:3d} (cum={total_T:4d}) "
f"${cap_after:9,.0f} dd={dd:.2f}%",
flush=True,
)
elapsed = time.time() - t0
roi = (engine.capital / INITIAL_CAPITAL - 1.0) * 100.0
# ── MAE summary ─────────────────────────────────────────────────────────────
valid_mae = [(m, t) for m, t in all_mae if not (m != m)] # exclude NaN
mae_arr = np.array([m for m, _ in valid_mae]) if valid_mae else np.array([])
print(flush=True)
print(f"{'='*60}", flush=True)
print(f"RESULT: T={total_T} ROI={roi:+.2f}% DD={max_dd:.2f}% ({elapsed:.0f}s)", flush=True)
print(f"TARGET: T={GOLD_T} ROI={GOLD_ROI_LO:.0f}{GOLD_ROI_HI:.0f}% (gold range)", flush=True)
print(flush=True)
if len(mae_arr) > 0:
worst_mae = float(np.max(mae_arr))
p90_mae = float(np.percentile(mae_arr, 90))
p50_mae = float(np.percentile(mae_arr, 50))
worst_idx = int(np.argmax(mae_arr))
worst_t = valid_mae[worst_idx][1]
mae_as_dd_pct = (worst_mae / max_dd * 100.0) if max_dd > 0 else float('nan')
print(f"MAE (close-price lower bound, SHORT=adverse-up):", flush=True)
print(f" worst single trade : {worst_mae:.4f}% "
f"({worst_t.asset if hasattr(worst_t,'asset') else '?'} "
f"bars {getattr(worst_t,'entry_bar','?')}{getattr(worst_t,'exit_bar','?')} "
f"exit={getattr(worst_t,'exit_reason','?')})", flush=True)
print(f" worst / max_DD : {mae_as_dd_pct:.1f}% ({worst_mae:.4f}% vs DD={max_dd:.2f}%)", flush=True)
print(f" p90 / p50 / mean : {p90_mae:.4f}% / {p50_mae:.4f}% / {np.mean(mae_arr):.4f}%", flush=True)
print(flush=True)
t_ok = (total_T == GOLD_T)
roi_ok = (GOLD_ROI_LO <= roi <= GOLD_ROI_HI)
print(f"T={total_T} {'✓ PASS' if t_ok else '✗ FAIL (expected 2155)':30s}", flush=True)
print(f"ROI={roi:+.2f}% {'✓ PASS' if roi_ok else f'✗ FAIL (expected {GOLD_ROI_LO:.0f}{GOLD_ROI_HI:.0f}%)':30s}", flush=True)
print(f"{'='*60}", flush=True)
if t_ok and roi_ok:
print("\n=== GOLD PARITY CONFIRMED ===\n", flush=True)
return True
else:
print("\n!!! GOLD PARITY MISMATCH — investigate !!!\n", flush=True)
return False
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('--summary', action='store_true', help='Print summary only (no per-day output)')
args = ap.parse_args()
ok = run_verify(summary_only=args.summary)
sys.exit(0 if ok else 1)