Files
DOLPHIN/nautilus_dolphin/dvae/exp9c_overfitting_validation.py

297 lines
13 KiB
Python
Raw Normal View History

"""
Exp 9c Overfitting Validation for D_LIQ_GOLD
Battery of tests designed to expose any period-specific bias in the D_LIQ_GOLD result
(8x/9x + liquidation guard, exp9b: ROI=181.81%, DD=17.65%, Calmar=10.30).
Three test families:
1. TEMPORAL SPLIT (H1/H2)
Same split as exp8 adaptive_beta validation (days 0-27 vs days 28-55).
Each half: fresh engine, fresh capital=$25k, cold start.
Pass criterion: Calmar(d_liq) > Calmar(adaptive_beta) in BOTH halves.
If d_liq only wins in one half period-specific, do NOT flip default.
2. QUARTERLY SPLIT (Q1/Q2/Q3/Q4)
Four independent ~14-day windows.
Finer-grained: reveals if any single quarter is carrying the full result.
Pass criterion: d_liq Calmar consistently above adaptive_beta across quarters.
3. MARGIN BUFFER SENSITIVITY
Test margin_buffer = 0.80, 0.90, 0.95 (gold), 1.00 on the full period.
Confirms the specific 10.6% floor is not cherry-picked.
Pass criterion: ROI/DD metrics stable across ±0.15 variation in buffer.
Reference benchmarks:
D_LIQ_GOLD (full period): ROI=181.81%, DD=17.65%, Calmar=10.30
adaptive_beta (full): ROI= 96.55%, DD=14.32%, Calmar= 6.74
Results exp9c_overfitting_results.json
"""
import sys, time, json, math
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))
from exp_shared import (
ensure_jit, ENGINE_KWARGS, MC_BASE_CFG,
load_data, load_forewarner, log_results,
)
from nautilus_dolphin.nautilus.proxy_boost_engine import (
AdaptiveBoostEngine, LiquidationGuardEngine,
DEFAULT_THRESHOLD, DEFAULT_ALPHA,
D_LIQ_SOFT_CAP, D_LIQ_ABS_CAP, D_LIQ_MC_REF,
)
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
_D_LIQ_FULL = dict(roi=181.81, dd=17.65, calmar=10.30, trades=2155)
_ABETA_FULL = dict(roi= 96.55, dd=14.32, calmar= 6.74, trades=2155)
_PROXY = dict(threshold=DEFAULT_THRESHOLD, alpha=DEFAULT_ALPHA,
adaptive_beta=True, adaptive_alpha=False, adaptive_thr=False)
# ── Engine factories ──────────────────────────────────────────────────────────
def _make_dliq(kw, margin_buffer=0.95):
return LiquidationGuardEngine(
extended_soft_cap=D_LIQ_SOFT_CAP,
extended_abs_cap=D_LIQ_ABS_CAP,
mc_leverage_ref=D_LIQ_MC_REF,
margin_buffer=margin_buffer,
**_PROXY, **kw,
)
def _make_abeta(kw):
return AdaptiveBoostEngine(**_PROXY, **kw)
# ── Run harness (window-aware) ────────────────────────────────────────────────
def _run_window(engine_factory, name, d, fw, day_indices):
"""Run a sub-period backtest over the given day index slice."""
kw = ENGINE_KWARGS.copy()
acb = AdaptiveCircuitBreaker()
# Preload full date list for proper w750 context even in sub-period runs
acb.preload_w750(d['date_strings'])
eng = engine_factory(kw)
eng.set_ob_engine(d['ob_eng'])
eng.set_acb(acb)
if fw is not None:
eng.set_mc_forewarner(fw, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
daily_caps, daily_pnls = [], []
pf_list = d['parquet_files']
for idx in day_indices:
pf = pf_list[idx]
ds = pf.stem
df, acols, dvol = d['pq_data'][ds]
cap_before = eng.capital
vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
daily_caps.append(eng.capital)
daily_pnls.append(eng.capital - cap_before)
tr = eng.trade_history
n = len(tr)
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
liq_stops = getattr(eng, 'liquidation_stops', 0)
mc_mon = getattr(eng, 'mc_monitor', {})
if n == 0:
return dict(name=name, roi=roi, dd=0.0, calmar=0.0, trades=0,
liq_stops=liq_stops, days=len(day_indices))
def _abs(t): return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
peak_cap, max_dd = 25000.0, 0.0
for cap in daily_caps:
peak_cap = max(peak_cap, cap)
max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)
calmar = roi / max_dd if max_dd > 0 else 0.0
return dict(
name=name, roi=roi, dd=max_dd, calmar=calmar, trades=n,
liq_stops=liq_stops, days=len(day_indices),
mc_red=mc_mon.get('red', 0), mc_halted=mc_mon.get('halted', 0),
)
def _compare(dliq_r, abeta_r, window_label):
"""Print head-to-head for one window."""
d_roi = dliq_r['roi'] - abeta_r['roi']
d_dd = dliq_r['dd'] - abeta_r['dd']
d_cal = dliq_r['calmar'] - abeta_r['calmar']
liq = dliq_r.get('liq_stops', 0)
verdict = 'PASS' if dliq_r['calmar'] > abeta_r['calmar'] else 'FAIL'
print(f" {window_label:<18} d_liq {dliq_r['roi']:>7.2f}% / {dliq_r['dd']:>5.2f}% "
f"cal={dliq_r['calmar']:.2f} | abeta {abeta_r['roi']:>7.2f}% / {abeta_r['dd']:>5.2f}% "
f"cal={abeta_r['calmar']:.2f} | ΔROI={d_roi:+.2f} ΔDD={d_dd:+.2f} ΔCal={d_cal:+.2f} "
f"liq={liq} [{verdict}]")
return verdict == 'PASS'
# ── Main ─────────────────────────────────────────────────────────────────────
def main():
t_start = time.time()
print("=" * 80)
print("Exp 9c — D_LIQ_GOLD Overfitting Validation")
print("=" * 80)
ensure_jit()
d = load_data()
fw = load_forewarner()
n_days = len(d['parquet_files'])
print(f" Dataset: {n_days} trading days")
# Day index windows
all_idx = list(range(n_days))
mid = n_days // 2
h1_idx = all_idx[:mid]
h2_idx = all_idx[mid:]
q_size = n_days // 4
q_idx = [all_idx[i*q_size : (i+1)*q_size] for i in range(4)]
# Last quarter gets any remainder
q_idx[3] = all_idx[3*q_size:]
print(f" H1: days 0{mid-1} ({len(h1_idx)}d) "
f"H2: days {mid}{n_days-1} ({len(h2_idx)}d)")
print(f" Q1:{len(q_idx[0])}d Q2:{len(q_idx[1])}d "
f"Q3:{len(q_idx[2])}d Q4:{len(q_idx[3])}d")
results_all = []
pass_counts = {'split': 0, 'split_total': 0,
'quarter': 0, 'quarter_total': 0}
# ── FAMILY 1: Temporal split H1/H2 ───────────────────────────────────────
print(f"\n{'='*80}")
print("FAMILY 1 — Temporal Split H1/H2")
print(f"{'='*80}")
for label, idx in [('H1 (days 0-27)', h1_idx), ('H2 (days 28-55)', h2_idx)]:
t0 = time.time()
print(f"\n {label}:")
dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_{label}', d, fw, idx)
abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_{label}', d, fw, idx)
elapsed = time.time() - t0
passed = _compare(dliq_r, abeta_r, label)
print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)")
results_all += [dliq_r, abeta_r]
pass_counts['split'] += int(passed)
pass_counts['split_total'] += 1
split_verdict = ('PASS ✓' if pass_counts['split'] == pass_counts['split_total']
else f"PARTIAL ({pass_counts['split']}/{pass_counts['split_total']})")
print(f"\n H1/H2 SPLIT VERDICT: {split_verdict}")
# ── FAMILY 2: Quarterly split ─────────────────────────────────────────────
print(f"\n{'='*80}")
print("FAMILY 2 — Quarterly Split (Q1/Q2/Q3/Q4)")
print(f"{'='*80}")
for qi, idx in enumerate(q_idx, 1):
label = f'Q{qi} (days {idx[0]}-{idx[-1]})'
t0 = time.time()
print(f"\n {label}:")
dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_Q{qi}', d, fw, idx)
abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_Q{qi}', d, fw, idx)
elapsed = time.time() - t0
passed = _compare(dliq_r, abeta_r, label)
print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)")
results_all += [dliq_r, abeta_r]
pass_counts['quarter'] += int(passed)
pass_counts['quarter_total'] += 1
quarter_verdict = ('PASS ✓' if pass_counts['quarter'] == pass_counts['quarter_total']
else f"PARTIAL ({pass_counts['quarter']}/{pass_counts['quarter_total']})")
print(f"\n QUARTERLY VERDICT: {quarter_verdict}")
# ── FAMILY 3: Margin buffer sensitivity (full period) ─────────────────────
print(f"\n{'='*80}")
print("FAMILY 3 — Margin Buffer Sensitivity (full period, d_liq only)")
print(f"{'='*80}")
print(f" Floor = (1/abs_cap) * buffer | abs_cap=9.0")
print(f" {'Buffer':>8} {'Floor%':>7} {'ROI%':>8} {'DD%':>6} {'Calmar':>7} "
f"{'liq_stops':>10} {'ΔROI vs gold':>13}")
buf_results = []
for buf in [0.80, 0.90, 0.95, 1.00]:
t0 = time.time()
floor_pct = (1.0 / D_LIQ_ABS_CAP) * buf * 100
r = _run_window(lambda kw, b=buf: _make_dliq(kw, margin_buffer=b),
f'd_liq_buf{buf:.2f}', d, fw, all_idx)
elapsed = time.time() - t0
d_roi = r['roi'] - _D_LIQ_FULL['roi']
marker = ' ← GOLD' if abs(buf - 0.95) < 0.001 else ''
print(f" {buf:>8.2f} {floor_pct:>6.1f}% {r['roi']:>8.2f} {r['dd']:>6.2f} "
f"{r['calmar']:>7.2f} {r['liq_stops']:>10} {d_roi:>+13.2f}pp ({elapsed:.0f}s){marker}")
r['margin_buffer'] = buf
buf_results.append(r)
results_all.append(r)
# Stability check: ROI range across buffers
buf_rois = [r['roi'] for r in buf_results]
roi_range = max(buf_rois) - min(buf_rois)
buf_dds = [r['dd'] for r in buf_results]
dd_range = max(buf_dds) - min(buf_dds)
buf_stable = roi_range < 10.0 and dd_range < 2.0
print(f"\n ROI range across buffers: {roi_range:.2f}pp "
f"DD range: {dd_range:.2f}pp "
f"['STABLE ✓' if buf_stable else 'UNSTABLE ✗']")
# ── SUMMARY ───────────────────────────────────────────────────────────────
total_passes = pass_counts['split'] + pass_counts['quarter']
total_tests = pass_counts['split_total'] + pass_counts['quarter_total']
print(f"\n{'='*80}")
print("OVERFITTING VALIDATION SUMMARY")
print(f"{'='*80}")
print(f" Temporal split (H1/H2): {pass_counts['split']}/{pass_counts['split_total']} {split_verdict}")
print(f" Quarterly split (Q1-Q4): {pass_counts['quarter']}/{pass_counts['quarter_total']} {quarter_verdict}")
print(f" Margin buffer stability: {'STABLE ✓' if buf_stable else 'UNSTABLE ✗'} "
f"(ROI range={roi_range:.1f}pp, DD range={dd_range:.1f}pp)")
print()
all_pass = (total_passes == total_tests and buf_stable)
if all_pass:
print(" VERDICT: ALL TESTS PASS ✓")
print(" D_LIQ_GOLD is robust. Calmar advantage holds across all time windows.")
print(" Margin buffer choice is not critical. Safe to set as DEFAULT.")
else:
print(" VERDICT: SOME TESTS FAIL ✗")
print(f" {total_passes}/{total_tests} split windows passed, "
f"buffer stable={buf_stable}.")
print(" Do NOT flip default until failures are investigated.")
outfile = _HERE / "exp9c_overfitting_results.json"
log_results(results_all, outfile, meta={
"exp": "exp9c",
"question": "Is D_LIQ_GOLD robust across time windows and parameter perturbations?",
"split_passes": f"{pass_counts['split']}/{pass_counts['split_total']}",
"quarter_passes": f"{pass_counts['quarter']}/{pass_counts['quarter_total']}",
"buf_roi_range_pp": round(roi_range, 3),
"buf_dd_range_pp": round(dd_range, 3),
"all_pass": all_pass,
"total_elapsed_s": round(time.time() - t_start, 1),
})
print(f"\nTotal elapsed: {(time.time()-t_start)/60:.1f} min")
print("Done.")
if __name__ == "__main__":
main()