""" Exp 9c — Overfitting Validation for D_LIQ_GOLD Battery of tests designed to expose any period-specific bias in the D_LIQ_GOLD result (8x/9x + liquidation guard, exp9b: ROI=181.81%, DD=17.65%, Calmar=10.30). Three test families: 1. TEMPORAL SPLIT (H1/H2) Same split as exp8 adaptive_beta validation (days 0-27 vs days 28-55). Each half: fresh engine, fresh capital=$25k, cold start. Pass criterion: Calmar(d_liq) > Calmar(adaptive_beta) in BOTH halves. If d_liq only wins in one half → period-specific, do NOT flip default. 2. QUARTERLY SPLIT (Q1/Q2/Q3/Q4) Four independent ~14-day windows. Finer-grained: reveals if any single quarter is carrying the full result. Pass criterion: d_liq Calmar consistently above adaptive_beta across quarters. 3. MARGIN BUFFER SENSITIVITY Test margin_buffer = 0.80, 0.90, 0.95 (gold), 1.00 on the full period. Confirms the specific 10.6% floor is not cherry-picked. Pass criterion: ROI/DD metrics stable across ±0.15 variation in buffer. Reference benchmarks: D_LIQ_GOLD (full period): ROI=181.81%, DD=17.65%, Calmar=10.30 adaptive_beta (full): ROI= 96.55%, DD=14.32%, Calmar= 6.74 Results → exp9c_overfitting_results.json """ import sys, time, json, math sys.stdout.reconfigure(encoding='utf-8', errors='replace') from pathlib import Path import numpy as np _HERE = Path(__file__).resolve().parent sys.path.insert(0, str(_HERE.parent)) from exp_shared import ( ensure_jit, ENGINE_KWARGS, MC_BASE_CFG, load_data, load_forewarner, log_results, ) from nautilus_dolphin.nautilus.proxy_boost_engine import ( AdaptiveBoostEngine, LiquidationGuardEngine, DEFAULT_THRESHOLD, DEFAULT_ALPHA, D_LIQ_SOFT_CAP, D_LIQ_ABS_CAP, D_LIQ_MC_REF, ) from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker _D_LIQ_FULL = dict(roi=181.81, dd=17.65, calmar=10.30, trades=2155) _ABETA_FULL = dict(roi= 96.55, dd=14.32, calmar= 6.74, trades=2155) _PROXY = dict(threshold=DEFAULT_THRESHOLD, alpha=DEFAULT_ALPHA, adaptive_beta=True, adaptive_alpha=False, adaptive_thr=False) # ── Engine factories ────────────────────────────────────────────────────────── def _make_dliq(kw, margin_buffer=0.95): return LiquidationGuardEngine( extended_soft_cap=D_LIQ_SOFT_CAP, extended_abs_cap=D_LIQ_ABS_CAP, mc_leverage_ref=D_LIQ_MC_REF, margin_buffer=margin_buffer, **_PROXY, **kw, ) def _make_abeta(kw): return AdaptiveBoostEngine(**_PROXY, **kw) # ── Run harness (window-aware) ──────────────────────────────────────────────── def _run_window(engine_factory, name, d, fw, day_indices): """Run a sub-period backtest over the given day index slice.""" kw = ENGINE_KWARGS.copy() acb = AdaptiveCircuitBreaker() # Preload full date list for proper w750 context even in sub-period runs acb.preload_w750(d['date_strings']) eng = engine_factory(kw) eng.set_ob_engine(d['ob_eng']) eng.set_acb(acb) if fw is not None: eng.set_mc_forewarner(fw, MC_BASE_CFG) eng.set_esoteric_hazard_multiplier(0.0) daily_caps, daily_pnls = [], [] pf_list = d['parquet_files'] for idx in day_indices: pf = pf_list[idx] ds = pf.stem df, acols, dvol = d['pq_data'][ds] cap_before = eng.capital vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False) eng.process_day(ds, df, acols, vol_regime_ok=vol_ok) daily_caps.append(eng.capital) daily_pnls.append(eng.capital - cap_before) tr = eng.trade_history n = len(tr) roi = (eng.capital - 25000.0) / 25000.0 * 100.0 liq_stops = getattr(eng, 'liquidation_stops', 0) mc_mon = getattr(eng, 'mc_monitor', {}) if n == 0: return dict(name=name, roi=roi, dd=0.0, calmar=0.0, trades=0, liq_stops=liq_stops, days=len(day_indices)) def _abs(t): return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0 wins = [t for t in tr if _abs(t) > 0] losses = [t for t in tr if _abs(t) <= 0] peak_cap, max_dd = 25000.0, 0.0 for cap in daily_caps: peak_cap = max(peak_cap, cap) max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0) calmar = roi / max_dd if max_dd > 0 else 0.0 return dict( name=name, roi=roi, dd=max_dd, calmar=calmar, trades=n, liq_stops=liq_stops, days=len(day_indices), mc_red=mc_mon.get('red', 0), mc_halted=mc_mon.get('halted', 0), ) def _compare(dliq_r, abeta_r, window_label): """Print head-to-head for one window.""" d_roi = dliq_r['roi'] - abeta_r['roi'] d_dd = dliq_r['dd'] - abeta_r['dd'] d_cal = dliq_r['calmar'] - abeta_r['calmar'] liq = dliq_r.get('liq_stops', 0) verdict = 'PASS' if dliq_r['calmar'] > abeta_r['calmar'] else 'FAIL' print(f" {window_label:<18} d_liq {dliq_r['roi']:>7.2f}% / {dliq_r['dd']:>5.2f}% " f"cal={dliq_r['calmar']:.2f} | abeta {abeta_r['roi']:>7.2f}% / {abeta_r['dd']:>5.2f}% " f"cal={abeta_r['calmar']:.2f} | ΔROI={d_roi:+.2f} ΔDD={d_dd:+.2f} ΔCal={d_cal:+.2f} " f"liq={liq} [{verdict}]") return verdict == 'PASS' # ── Main ───────────────────────────────────────────────────────────────────── def main(): t_start = time.time() print("=" * 80) print("Exp 9c — D_LIQ_GOLD Overfitting Validation") print("=" * 80) ensure_jit() d = load_data() fw = load_forewarner() n_days = len(d['parquet_files']) print(f" Dataset: {n_days} trading days") # Day index windows all_idx = list(range(n_days)) mid = n_days // 2 h1_idx = all_idx[:mid] h2_idx = all_idx[mid:] q_size = n_days // 4 q_idx = [all_idx[i*q_size : (i+1)*q_size] for i in range(4)] # Last quarter gets any remainder q_idx[3] = all_idx[3*q_size:] print(f" H1: days 0–{mid-1} ({len(h1_idx)}d) " f"H2: days {mid}–{n_days-1} ({len(h2_idx)}d)") print(f" Q1:{len(q_idx[0])}d Q2:{len(q_idx[1])}d " f"Q3:{len(q_idx[2])}d Q4:{len(q_idx[3])}d") results_all = [] pass_counts = {'split': 0, 'split_total': 0, 'quarter': 0, 'quarter_total': 0} # ── FAMILY 1: Temporal split H1/H2 ─────────────────────────────────────── print(f"\n{'='*80}") print("FAMILY 1 — Temporal Split H1/H2") print(f"{'='*80}") for label, idx in [('H1 (days 0-27)', h1_idx), ('H2 (days 28-55)', h2_idx)]: t0 = time.time() print(f"\n {label}:") dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_{label}', d, fw, idx) abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_{label}', d, fw, idx) elapsed = time.time() - t0 passed = _compare(dliq_r, abeta_r, label) print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)") results_all += [dliq_r, abeta_r] pass_counts['split'] += int(passed) pass_counts['split_total'] += 1 split_verdict = ('PASS ✓' if pass_counts['split'] == pass_counts['split_total'] else f"PARTIAL ({pass_counts['split']}/{pass_counts['split_total']})") print(f"\n H1/H2 SPLIT VERDICT: {split_verdict}") # ── FAMILY 2: Quarterly split ───────────────────────────────────────────── print(f"\n{'='*80}") print("FAMILY 2 — Quarterly Split (Q1/Q2/Q3/Q4)") print(f"{'='*80}") for qi, idx in enumerate(q_idx, 1): label = f'Q{qi} (days {idx[0]}-{idx[-1]})' t0 = time.time() print(f"\n {label}:") dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_Q{qi}', d, fw, idx) abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_Q{qi}', d, fw, idx) elapsed = time.time() - t0 passed = _compare(dliq_r, abeta_r, label) print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)") results_all += [dliq_r, abeta_r] pass_counts['quarter'] += int(passed) pass_counts['quarter_total'] += 1 quarter_verdict = ('PASS ✓' if pass_counts['quarter'] == pass_counts['quarter_total'] else f"PARTIAL ({pass_counts['quarter']}/{pass_counts['quarter_total']})") print(f"\n QUARTERLY VERDICT: {quarter_verdict}") # ── FAMILY 3: Margin buffer sensitivity (full period) ───────────────────── print(f"\n{'='*80}") print("FAMILY 3 — Margin Buffer Sensitivity (full period, d_liq only)") print(f"{'='*80}") print(f" Floor = (1/abs_cap) * buffer | abs_cap=9.0") print(f" {'Buffer':>8} {'Floor%':>7} {'ROI%':>8} {'DD%':>6} {'Calmar':>7} " f"{'liq_stops':>10} {'ΔROI vs gold':>13}") buf_results = [] for buf in [0.80, 0.90, 0.95, 1.00]: t0 = time.time() floor_pct = (1.0 / D_LIQ_ABS_CAP) * buf * 100 r = _run_window(lambda kw, b=buf: _make_dliq(kw, margin_buffer=b), f'd_liq_buf{buf:.2f}', d, fw, all_idx) elapsed = time.time() - t0 d_roi = r['roi'] - _D_LIQ_FULL['roi'] marker = ' ← GOLD' if abs(buf - 0.95) < 0.001 else '' print(f" {buf:>8.2f} {floor_pct:>6.1f}% {r['roi']:>8.2f} {r['dd']:>6.2f} " f"{r['calmar']:>7.2f} {r['liq_stops']:>10} {d_roi:>+13.2f}pp ({elapsed:.0f}s){marker}") r['margin_buffer'] = buf buf_results.append(r) results_all.append(r) # Stability check: ROI range across buffers buf_rois = [r['roi'] for r in buf_results] roi_range = max(buf_rois) - min(buf_rois) buf_dds = [r['dd'] for r in buf_results] dd_range = max(buf_dds) - min(buf_dds) buf_stable = roi_range < 10.0 and dd_range < 2.0 print(f"\n ROI range across buffers: {roi_range:.2f}pp " f"DD range: {dd_range:.2f}pp " f"['STABLE ✓' if buf_stable else 'UNSTABLE ✗']") # ── SUMMARY ─────────────────────────────────────────────────────────────── total_passes = pass_counts['split'] + pass_counts['quarter'] total_tests = pass_counts['split_total'] + pass_counts['quarter_total'] print(f"\n{'='*80}") print("OVERFITTING VALIDATION SUMMARY") print(f"{'='*80}") print(f" Temporal split (H1/H2): {pass_counts['split']}/{pass_counts['split_total']} {split_verdict}") print(f" Quarterly split (Q1-Q4): {pass_counts['quarter']}/{pass_counts['quarter_total']} {quarter_verdict}") print(f" Margin buffer stability: {'STABLE ✓' if buf_stable else 'UNSTABLE ✗'} " f"(ROI range={roi_range:.1f}pp, DD range={dd_range:.1f}pp)") print() all_pass = (total_passes == total_tests and buf_stable) if all_pass: print(" VERDICT: ALL TESTS PASS ✓") print(" D_LIQ_GOLD is robust. Calmar advantage holds across all time windows.") print(" Margin buffer choice is not critical. Safe to set as DEFAULT.") else: print(" VERDICT: SOME TESTS FAIL ✗") print(f" {total_passes}/{total_tests} split windows passed, " f"buffer stable={buf_stable}.") print(" Do NOT flip default until failures are investigated.") outfile = _HERE / "exp9c_overfitting_results.json" log_results(results_all, outfile, meta={ "exp": "exp9c", "question": "Is D_LIQ_GOLD robust across time windows and parameter perturbations?", "split_passes": f"{pass_counts['split']}/{pass_counts['split_total']}", "quarter_passes": f"{pass_counts['quarter']}/{pass_counts['quarter_total']}", "buf_roi_range_pp": round(roi_range, 3), "buf_dd_range_pp": round(dd_range, 3), "all_pass": all_pass, "total_elapsed_s": round(time.time() - t_start, 1), }) print(f"\nTotal elapsed: {(time.time()-t_start)/60:.1f} min") print("Done.") if __name__ == "__main__": main()