""" backtest_gold_verify.py — Gold parity verification via direct engine codepath. Runs all 56 backtest dates through the same engine codepath used in production: same step_bar loop, same OB preload, same vol_ok, same hazard multiplier, same ACB, same MC forewarner. Avoids DolphinActor/Nautilus Strategy overhead (Strategy.log is Rust-backed read-only; Strategy requires a kernel context to initialise). Instead this harness directly instantiates and wires the same sub-components that DolphinActor.on_start() wires, then replicates _run_replay_day() inline. Gold targets (post-fix D_LIQ): T=2155 (exact) ROI≈+181% (no ACB, Linux) ROI≈+189% (full ACB on Windows) Usage: /usr/bin/python3 prod/backtest_gold_verify.py /usr/bin/python3 prod/backtest_gold_verify.py --summary # quick summary only """ import sys, time, argparse, yaml from pathlib import Path from datetime import datetime, timezone import numpy as np import pandas as pd HCM_DIR = Path(__file__).parent.parent sys.path.insert(0, str(HCM_DIR / 'nautilus_dolphin')) sys.path.insert(0, str(HCM_DIR)) PARQUET_DIR = HCM_DIR / 'vbt_cache' MC_MODELS_DIR = str(HCM_DIR / 'nautilus_dolphin' / 'mc_results' / 'models') CONFIG_PATH = Path(__file__).parent / 'configs' / 'blue.yml' INITIAL_CAPITAL = 25_000.0 GOLD_T = 2155 GOLD_ROI_LO = 175.0 # lower bound (no ACB, no w750) GOLD_ROI_HI = 195.0 # upper bound (full ACB) _META_COLS_SET = { 'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150', } _MC_BASE_CFG = { 'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050, 'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True, 'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50, 'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00, 'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True, 'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True, 'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50, 'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40, 'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00, 'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100, 'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60, } def _load_config() -> dict: with open(CONFIG_PATH) as f: return yaml.safe_load(f) def _build_engine(cfg: dict, initial_capital: float): """Mirror DolphinActor.on_start() engine + subsystem wiring.""" from nautilus_dolphin.nautilus.proxy_boost_engine import create_boost_engine, DEFAULT_BOOST_MODE from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker from nautilus_dolphin.nautilus.ob_provider import MockOBProvider from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine eng_cfg = cfg.get('engine', {}) boost_mode = eng_cfg.get('boost_mode', DEFAULT_BOOST_MODE) engine = create_boost_engine( mode=boost_mode, initial_capital=initial_capital, vel_div_threshold=eng_cfg.get('vel_div_threshold', -0.02), vel_div_extreme=eng_cfg.get('vel_div_extreme', -0.05), min_leverage=eng_cfg.get('min_leverage', 0.5), max_leverage=eng_cfg.get('max_leverage', 5.0), abs_max_leverage=eng_cfg.get('abs_max_leverage', 6.0), leverage_convexity=eng_cfg.get('leverage_convexity', 3.0), fraction=eng_cfg.get('fraction', 0.20), fixed_tp_pct=eng_cfg.get('fixed_tp_pct', 0.0095), stop_pct=eng_cfg.get('stop_pct', 1.0), max_hold_bars=eng_cfg.get('max_hold_bars', 120), use_direction_confirm=eng_cfg.get('use_direction_confirm', True), dc_lookback_bars=eng_cfg.get('dc_lookback_bars', 7), dc_min_magnitude_bps=eng_cfg.get('dc_min_magnitude_bps', 0.75), dc_skip_contradicts=eng_cfg.get('dc_skip_contradicts', True), dc_leverage_boost=eng_cfg.get('dc_leverage_boost', 1.0), dc_leverage_reduce=eng_cfg.get('dc_leverage_reduce', 0.5), use_asset_selection=eng_cfg.get('use_asset_selection', True), min_irp_alignment=eng_cfg.get('min_irp_alignment', 0.45), use_sp_fees=eng_cfg.get('use_sp_fees', True), use_sp_slippage=eng_cfg.get('use_sp_slippage', True), sp_maker_entry_rate=eng_cfg.get('sp_maker_entry_rate', 0.62), sp_maker_exit_rate=eng_cfg.get('sp_maker_exit_rate', 0.50), use_ob_edge=eng_cfg.get('use_ob_edge', True), ob_edge_bps=eng_cfg.get('ob_edge_bps', 5.0), ob_confirm_rate=eng_cfg.get('ob_confirm_rate', 0.40), lookback=eng_cfg.get('lookback', 100), use_alpha_layers=eng_cfg.get('use_alpha_layers', True), use_dynamic_leverage=eng_cfg.get('use_dynamic_leverage', True), seed=eng_cfg.get('seed', 42), ) engine.set_esoteric_hazard_multiplier(0.0) # gold spec: hazard=0 → base_max_leverage=8.0 print(f"[INIT] Engine created: {type(engine).__name__}, base_max_leverage={getattr(engine,'base_max_leverage','?')}", flush=True) # MC Forewarner mc_models_dir = MC_MODELS_DIR if Path(mc_models_dir).exists(): try: from mc.mc_ml import DolphinForewarner fw = DolphinForewarner(models_dir=mc_models_dir) engine.set_mc_forewarner(fw, _MC_BASE_CFG) print(f"[INIT] DolphinForewarner wired from {mc_models_dir}", flush=True) except Exception as e: print(f"[WARN] MC Forewarner init failed: {e}", flush=True) else: print(f"[WARN] MC models dir not found: {mc_models_dir}", flush=True) # Discover asset columns from first 5 parquet files _all_bt_assets: list = [] try: _seen: set = set() for _pf in sorted(PARQUET_DIR.glob('*.parquet'))[:5]: _df_h = pd.read_parquet(_pf) _seen.update(c for c in _df_h.columns if c not in _META_COLS_SET) _all_bt_assets = sorted(_seen) print(f"[INIT] Discovered {len(_all_bt_assets)} asset columns: {_all_bt_assets}", flush=True) except Exception as e: print(f"[WARN] Could not scan parquet assets: {e}", flush=True) # ACB injection (matches gold_repro) try: acb = AdaptiveCircuitBreaker() _linux_eigen_paths = [ Path('/mnt/ng6_data/eigenvalues'), Path('/mnt/dolphin_training/data/eigenvalues'), Path('/mnt/dolphinng6_data/eigenvalues'), ] for _ep in _linux_eigen_paths: if _ep.exists(): acb.config.EIGENVALUES_PATH = _ep print(f"[INIT] ACB eigenvalues path → {_ep}", flush=True) break files = sorted(PARQUET_DIR.glob('*.parquet')) preload_dates = [pf.stem for pf in files] acb.preload_w750(preload_dates) engine.set_acb(acb) print(f"[INIT] ACB injected ({len(preload_dates)} dates preloaded)", flush=True) except Exception as e: print(f"[WARN] ACB injection failed: {e}", flush=True) # MockOBProvider injection (Gold Biases) # Preload ONCE with "mock" — matches exp_shared.py gold reference exactly. # MockOBProvider produces identical synthetic data on every call so a single # preload populates the full snap-index cache used for all 56 replay days. try: gold_biases = { 'BTCUSDT': -0.086, 'ETHUSDT': -0.092, 'BNBUSDT': +0.05, 'SOLUSDT': +0.05, } mock_ob = MockOBProvider( imbalance_bias=-0.09, depth_scale=1.0, assets=_all_bt_assets, imbalance_biases=gold_biases, ) ob_eng = OBFeatureEngine(mock_ob) ob_eng.preload_date("mock", _all_bt_assets) # gold method: single global preload engine.set_ob_engine(ob_eng) print(f"[INIT] MockOBProvider injected + preloaded (Gold Biases, {len(_all_bt_assets)} assets)", flush=True) except Exception as e: print(f"[WARN] OB injection failed: {e}", flush=True) return engine def _compute_vol_ok(df: pd.DataFrame, vol_p60: float) -> np.ndarray: """Gold vol_ok method — matches exp_shared.load_data() / process_day() exactly. Uses segment-based dvol: std(diff(seg) / seg[:-1]) for 50-bar sliding window, starting at bar 50. Rows without finite dvol or below threshold → False. """ vol_ok = np.zeros(len(df), dtype=bool) if 'BTCUSDT' not in df.columns or vol_p60 <= 0: return vol_ok bp = df['BTCUSDT'].values dv = np.full(len(bp), np.nan) for i in range(50, len(bp)): seg = bp[max(0, i - 50):i] if len(seg) < 10: continue with np.errstate(invalid='ignore', divide='ignore'): rets = np.diff(seg) / seg[:-1] fin = rets[np.isfinite(rets)] if len(fin) >= 5: dv[i] = float(np.std(fin)) vol_ok = np.where(np.isfinite(dv), dv > vol_p60, False) return vol_ok def _compute_mae_for_day(trades_today: list, df: pd.DataFrame) -> list: """Compute per-trade Maximum Adverse Excursion (MAE) for trades closed today. For SHORT trades, adverse excursion = price moving UP from entry. MAE_pct = max(price[entry_bar:exit_bar+1] / entry_price - 1) * 100 (positive = adverse) Uses close prices only (1-min bars don't have OHLC), so MAE is a lower-bound estimate — true intra-bar MAE could be higher. Returns list of (trade_record, mae_pct) pairs. """ results = [] for t in trades_today: asset = getattr(t, 'asset', None) entry_bar = getattr(t, 'entry_bar', None) exit_bar = getattr(t, 'exit_bar', None) entry_price = getattr(t, 'entry_price', None) direction = getattr(t, 'direction', -1) if asset is None or entry_bar is None or exit_bar is None or not entry_price: results.append((t, float('nan'))) continue if asset not in df.columns: results.append((t, float('nan'))) continue lo = max(0, int(entry_bar)) hi = min(len(df) - 1, int(exit_bar)) prices = df[asset].iloc[lo:hi + 1].values.astype(float) prices = prices[np.isfinite(prices) & (prices > 0)] if len(prices) == 0: results.append((t, float('nan'))) continue if direction == -1: # SHORT: adverse = price going up mae_pct = float(np.max(prices) / entry_price - 1.0) * 100.0 else: # LONG: adverse = price going down mae_pct = float(1.0 - np.min(prices) / entry_price) * 100.0 mae_pct = max(0.0, mae_pct) # clamp: negative means favorable the whole time results.append((t, mae_pct)) return results def _run_day(engine, cfg: dict, date_str: str, posture: str = 'APEX') -> tuple: """Run a single replay day via engine.process_day() — identical to gold reference path. Uses process_day() directly (same as test_dliq_fix_verify.py / exp_shared.py) so NaN-vel_div skipping, bar_idx assignment, and proxy_B updates are bit-for-bit identical. OB preload is done once globally in _build_engine(), not per-day. Returns (n_bars, df) where df is the loaded parquet (used for MAE computation). """ dir_str = cfg.get('direction', 'short_only') direction_val = 1 if dir_str in ['long', 'long_only'] else -1 pq_file = PARQUET_DIR / f"{date_str}.parquet" if not pq_file.exists(): print(f"[WARN] No parquet for {date_str} — skipping", flush=True) return 0, pd.DataFrame() df = pd.read_parquet(pq_file) asset_columns = [c for c in df.columns if c not in _META_COLS_SET] vol_p60 = float( cfg.get('paper_trade', {}).get('vol_p60') or cfg.get('vol_p60') or 0.00009868 ) vol_ok = _compute_vol_ok(df, vol_p60) engine.process_day( date_str, df, asset_columns, vol_regime_ok=vol_ok, direction=direction_val, posture=posture, ) return len(df), df def run_verify(summary_only: bool = False): cfg = _load_config() files = sorted(PARQUET_DIR.glob('*.parquet')) if not files: print(f"[ERROR] No parquet files in {PARQUET_DIR}", flush=True) sys.exit(1) all_dates = [pf.stem for pf in files] print(f"[VERIFY] {len(files)} dates: {all_dates[0]} → {all_dates[-1]}", flush=True) engine = _build_engine(cfg, INITIAL_CAPITAL) total_T = 0 peak_cap = INITIAL_CAPITAL max_dd = 0.0 all_mae: list = [] # (mae_pct, trade) — collected across all days t0 = time.time() for pf in files: date_str = pf.stem t_before = len(engine.trade_history) _, day_df = _run_day(engine, cfg, date_str) cap_after = engine.capital trades_today = engine.trade_history[t_before:] day_trades = len(trades_today) total_T = len(engine.trade_history) peak_cap = max(peak_cap, cap_after) dd = (peak_cap - cap_after) / peak_cap * 100.0 max_dd = max(max_dd, dd) # MAE per trade (uses same parquet df, no extra I/O) if not day_df.empty and trades_today: for t, mae in _compute_mae_for_day(trades_today, day_df): all_mae.append((mae, t)) if not summary_only: print( f"{date_str}: T+{day_trades:3d} (cum={total_T:4d}) " f"${cap_after:9,.0f} dd={dd:.2f}%", flush=True, ) elapsed = time.time() - t0 roi = (engine.capital / INITIAL_CAPITAL - 1.0) * 100.0 # ── MAE summary ───────────────────────────────────────────────────────────── valid_mae = [(m, t) for m, t in all_mae if not (m != m)] # exclude NaN mae_arr = np.array([m for m, _ in valid_mae]) if valid_mae else np.array([]) print(flush=True) print(f"{'='*60}", flush=True) print(f"RESULT: T={total_T} ROI={roi:+.2f}% DD={max_dd:.2f}% ({elapsed:.0f}s)", flush=True) print(f"TARGET: T={GOLD_T} ROI={GOLD_ROI_LO:.0f}–{GOLD_ROI_HI:.0f}% (gold range)", flush=True) print(flush=True) if len(mae_arr) > 0: worst_mae = float(np.max(mae_arr)) p90_mae = float(np.percentile(mae_arr, 90)) p50_mae = float(np.percentile(mae_arr, 50)) worst_idx = int(np.argmax(mae_arr)) worst_t = valid_mae[worst_idx][1] mae_as_dd_pct = (worst_mae / max_dd * 100.0) if max_dd > 0 else float('nan') print(f"MAE (close-price lower bound, SHORT=adverse-up):", flush=True) print(f" worst single trade : {worst_mae:.4f}% " f"({worst_t.asset if hasattr(worst_t,'asset') else '?'} " f"bars {getattr(worst_t,'entry_bar','?')}→{getattr(worst_t,'exit_bar','?')} " f"exit={getattr(worst_t,'exit_reason','?')})", flush=True) print(f" worst / max_DD : {mae_as_dd_pct:.1f}% ({worst_mae:.4f}% vs DD={max_dd:.2f}%)", flush=True) print(f" p90 / p50 / mean : {p90_mae:.4f}% / {p50_mae:.4f}% / {np.mean(mae_arr):.4f}%", flush=True) print(flush=True) t_ok = (total_T == GOLD_T) roi_ok = (GOLD_ROI_LO <= roi <= GOLD_ROI_HI) print(f"T={total_T} {'✓ PASS' if t_ok else '✗ FAIL (expected 2155)':30s}", flush=True) print(f"ROI={roi:+.2f}% {'✓ PASS' if roi_ok else f'✗ FAIL (expected {GOLD_ROI_LO:.0f}–{GOLD_ROI_HI:.0f}%)':30s}", flush=True) print(f"{'='*60}", flush=True) if t_ok and roi_ok: print("\n=== GOLD PARITY CONFIRMED ===\n", flush=True) return True else: print("\n!!! GOLD PARITY MISMATCH — investigate !!!\n", flush=True) return False if __name__ == '__main__': ap = argparse.ArgumentParser() ap.add_argument('--summary', action='store_true', help='Print summary only (no per-day output)') args = ap.parse_args() ok = run_verify(summary_only=args.summary) sys.exit(0 if ok else 1)