""" Task 5: Backtest with proxy_B Flint Gate vs Gold Standard. Tests three gate configs against the gold baseline: Gold: ROI~+44.89%, PF~1.123, DD~14.95%, Sharpe~2.50, Trades~2128 (same 55-day NG3 5s dataset, same engine stack) Gate variants: A. No gate (baseline reproduction — sanity check) B. Fixed threshold=0.0 (allow when proxy_B > 0) C. Adaptive p50 (allow when proxy_B > rolling median) D. Adaptive p75 (allow when proxy_B > rolling p75) Measures: ROI, PF, DD, WR, Sharpe, Trades, gate suppression rate. DOES NOT MODIFY ANY PRODUCTION CODE. """ import sys, time, math sys.stdout.reconfigure(encoding='utf-8', errors='replace') from pathlib import Path from datetime import datetime import numpy as np import pandas as pd _HERE = Path(__file__).resolve().parent _ND_ROOT = _HERE.parent # nautilus_dolphin/ outer dir — contains nautilus_dolphin pkg + mc/ # Insert ND_ROOT at index 0 so it takes priority over any stub nautilus_dolphin at project root sys.path.insert(0, str(_ND_ROOT)) print("Compiling numba kernels...") t0c = time.time() from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb from nautilus_dolphin.nautilus.ob_features import ( OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb, compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb, compute_depth_asymmetry_nb, compute_imbalance_persistence_nb, compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb, ) from nautilus_dolphin.nautilus.ob_provider import MockOBProvider from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker # DolphinForewarner skipped — pickle hangs on sklearn 1.8 vs 1.7.1 mismatch; 0 interventions anyway from mc.mc_ml import DolphinForewarner from alpha_signal_generator_flint_gate import FlintGatedEngine _p = np.array([1.0, 2.0, 3.0], dtype=np.float64) compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01) rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20) compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0, np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64), np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04) check_dc_nb(_p, 3, 1, 0.75) _b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64) _a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64) compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a) compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2) compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10) print(f" JIT: {time.time() - t0c:.1f}s") VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache") META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150'} ENGINE_KWARGS = dict( initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05, min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0, fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75, dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5, use_asset_selection=True, min_irp_alignment=0.45, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50, use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40, lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42, ) MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models")) MC_BASE_CFG = { 'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050, 'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True, 'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50, 'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00, 'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True, 'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True, 'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50, 'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40, 'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00, 'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100, 'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60, } print("\nLoading MC-Forewarner...") try: forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR) print(" MC-Forewarner ready") except Exception as e: print(f" [WARN] MC-Forewarner failed to load: {e} — running without it") forewarner = None # ── Load data ───────────────────────────────────────────────────── parquet_files = sorted(VBT_DIR.glob("*.parquet")) parquet_files = [p for p in parquet_files if 'catalog' not in str(p)] acb_master = AdaptiveCircuitBreaker() date_strings = [pf.stem for pf in parquet_files] acb_master.preload_w750(date_strings) all_vols = [] for pf in parquet_files[:2]: df = pd.read_parquet(pf) if 'BTCUSDT' not in df.columns: continue pr = df['BTCUSDT'].values for i in range(60, len(pr)): seg = pr[max(0,i-50):i] if len(seg)<10: continue v = float(np.std(np.diff(seg)/seg[:-1])) if v > 0: all_vols.append(v) vol_p60 = float(np.percentile(all_vols, 60)) pq_data = {} all_assets = set() for pf in parquet_files: df = pd.read_parquet(pf) ac = [c for c in df.columns if c not in META_COLS] all_assets.update(ac) bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None dv = np.full(len(df), np.nan) if bp is not None: for i in range(50, len(bp)): seg = bp[max(0,i-50):i] if len(seg)<10: continue dv[i] = float(np.std(np.diff(seg)/seg[:-1])) pq_data[pf.stem] = (df, ac, dv) OB_ASSETS = sorted(list(all_assets)) _mock_ob = MockOBProvider( imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS, imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092, "BNBUSDT": +0.05, "SOLUSDT": +0.05}, ) ob_eng = OBFeatureEngine(_mock_ob) ob_eng.preload_date("mock", OB_ASSETS) # ── Helpers ─────────────────────────────────────────────────────── def run_backtest(engine_cls, engine_kwargs, name, gate_kwargs=None): """Run full 55-day backtest. Returns metrics dict. Metrics use the SAME methodology as test_pf_dynamic_beta_validate.py: - PF: pnl_absolute (dollar-weighted, matches gold script) - DD: day-end capital snapshots (not trade-level curve) - Sharpe: daily P&L, annualized with sqrt(365) """ gate_kwargs = gate_kwargs or {} acb = AdaptiveCircuitBreaker() acb.preload_w750(date_strings) if engine_cls is NDAlphaEngine: eng = NDAlphaEngine(**engine_kwargs) else: eng = FlintGatedEngine(**engine_kwargs, **gate_kwargs) eng.set_ob_engine(ob_eng) eng.set_acb(acb) if forewarner is not None: eng.set_mc_forewarner(forewarner, MC_BASE_CFG) eng.set_esoteric_hazard_multiplier(0.0) daily_caps = [] daily_pnls = [] for pf in parquet_files: ds = pf.stem df, acols, dvol = pq_data[ds] cap_before = eng.capital vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False) stats = eng.process_day(ds, df, acols, vol_regime_ok=vol_ok) daily_caps.append(eng.capital) daily_pnls.append(eng.capital - cap_before) tr = eng.trade_history n = len(tr) if n == 0: return {'name': name, 'roi': 0, 'pf': 0, 'dd': 0, 'wr': 0, 'sharpe': 0, 'trades': 0, 'suppressed': 0, 'suppression_rate': 0} roi = (eng.capital - 25000.0) / 25000.0 * 100.0 # PF: dollar-weighted (matches gold script which uses pnl_absolute) def _abs(t): return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0 wins = [t for t in tr if _abs(t) > 0] losses = [t for t in tr if _abs(t) <= 0] wr = len(wins) / n * 100.0 gross_profit = sum(_abs(t) for t in wins) gross_loss = abs(sum(_abs(t) for t in losses)) pf = gross_profit / max(gross_loss, 1e-9) # DD: day-end capital snapshots (matches gold script) peak_cap = 25000.0 max_dd = 0.0 for cap in daily_caps: peak_cap = max(peak_cap, cap) dd = (peak_cap - cap) / peak_cap * 100.0 max_dd = max(max_dd, dd) # Sharpe: daily P&L annualized (matches gold script) dr = np.array([p / 25000.0 * 100.0 for p in daily_pnls]) sharpe = float(dr.mean() / (dr.std() + 1e-9) * math.sqrt(365)) if len(dr) > 1 else 0.0 suppressed = getattr(eng, 'gate_suppressed', 0) allowed = getattr(eng, 'gate_allowed', 0) sup_rate = suppressed / max(1, suppressed + allowed) * 100.0 return { 'name': name, 'roi': roi, 'pf': pf, 'dd': max_dd, 'wr': wr, 'sharpe': sharpe, 'trades': n, 'suppressed': suppressed, 'suppression_rate': sup_rate, } # ── Run all configs ─────────────────────────────────────────────── GOLD = {'name': 'GOLD REFERENCE', 'roi': 88.55, 'pf': 1.215, 'dd': 15.05, 'wr': 50.5, 'sharpe': 4.38, 'trades': 2155} configs = [ (NDAlphaEngine, {}, 'A. Baseline (no gate)', {}), (FlintGatedEngine, {}, 'B. Gate: proxy_B > 0.00 (fixed)', {'proxy_b_threshold': 0.00}), ] results = [] for engine_cls, _, name, gate_kw in configs: print(f"\n{'='*55}") print(f"Running: {name}") t0 = time.time() r = run_backtest(engine_cls, ENGINE_KWARGS.copy(), name, gate_kw) r['elapsed'] = time.time() - t0 results.append(r) print(f" Done in {r['elapsed']:.1f}s Trades={r['trades']} ROI={r['roi']:.2f}% PF={r['pf']:.4f}") # ── Final comparison ────────────────────────────────────────────── print("\n" + "="*75) print("FLINT GATE vs GOLD STANDARD — FINAL COMPARISON") print("="*75) hdr = f"{'Config':<35} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'WR%':>6} {'Sharpe':>7} {'Trades':>7} {'Supp%':>7}" print(hdr) print("-"*75) g = GOLD print(f"{'*** GOLD REFERENCE ***':<35} {g['roi']:>7.2f} {g['pf']:>6.4f} {g['dd']:>6.2f} {'N/A':>6} {g['sharpe']:>7.2f} {g['trades']:>7d} {'N/A':>7}") print("-"*75) for r in results: print(f"{r['name']:<35} {r['roi']:>7.2f} {r['pf']:>6.4f} {r['dd']:>6.2f} " f"{r['wr']:>6.2f} {r['sharpe']:>7.3f} {r['trades']:>7d} {r['suppression_rate']:>7.1f}%") print("="*75) print("\nLEGEND:") print(" ROI: Return on Initial Capital ($25k)") print(" PF : Profit Factor (gross profit / gross loss)") print(" DD : Max Drawdown from equity peak") print(" WR : Win Rate") print(" Sharpe: trade-based Sharpe") print(" Supp%: % of entry attempts suppressed by gate") print("\n VERDICT:") base = results[0] if results else None best_gated = max(results[1:], key=lambda r: r['pf']) if len(results) > 1 else None if base and best_gated: pf_delta = best_gated['pf'] - base['pf'] roi_delta = best_gated['roi'] - base['roi'] dd_delta = best_gated['dd'] - base['dd'] print(f" Best gate ({best_gated['name']}):") print(f" PF: {base['pf']:.4f} → {best_gated['pf']:.4f} ({pf_delta:+.4f})") print(f" ROI: {base['roi']:.2f}% → {best_gated['roi']:.2f}% ({roi_delta:+.2f}pp)") print(f" DD: {base['dd']:.2f}% → {best_gated['dd']:.2f}% ({dd_delta:+.2f}pp)") print(f" Trades: {base['trades']} → {best_gated['trades']} ({best_gated['suppression_rate']:.1f}% suppressed)") if best_gated['pf'] > base['pf'] * 1.01: print(" → GATE IS BENEFICIAL: PF improved >1%") elif best_gated['pf'] > base['pf']: print(" → GATE SHOWS MARGINAL IMPROVEMENT") else: print(" → GATE IS NEUTRAL/NEGATIVE: no improvement over baseline")