Files
DOLPHIN/nautilus_dolphin/dvae/exp9c_overfitting_validation.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

297 lines
13 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Exp 9c — Overfitting Validation for D_LIQ_GOLD
Battery of tests designed to expose any period-specific bias in the D_LIQ_GOLD result
(8x/9x + liquidation guard, exp9b: ROI=181.81%, DD=17.65%, Calmar=10.30).
Three test families:
1. TEMPORAL SPLIT (H1/H2)
Same split as exp8 adaptive_beta validation (days 0-27 vs days 28-55).
Each half: fresh engine, fresh capital=$25k, cold start.
Pass criterion: Calmar(d_liq) > Calmar(adaptive_beta) in BOTH halves.
If d_liq only wins in one half → period-specific, do NOT flip default.
2. QUARTERLY SPLIT (Q1/Q2/Q3/Q4)
Four independent ~14-day windows.
Finer-grained: reveals if any single quarter is carrying the full result.
Pass criterion: d_liq Calmar consistently above adaptive_beta across quarters.
3. MARGIN BUFFER SENSITIVITY
Test margin_buffer = 0.80, 0.90, 0.95 (gold), 1.00 on the full period.
Confirms the specific 10.6% floor is not cherry-picked.
Pass criterion: ROI/DD metrics stable across ±0.15 variation in buffer.
Reference benchmarks:
D_LIQ_GOLD (full period): ROI=181.81%, DD=17.65%, Calmar=10.30
adaptive_beta (full): ROI= 96.55%, DD=14.32%, Calmar= 6.74
Results → exp9c_overfitting_results.json
"""
import sys, time, json, math
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))
from exp_shared import (
ensure_jit, ENGINE_KWARGS, MC_BASE_CFG,
load_data, load_forewarner, log_results,
)
from nautilus_dolphin.nautilus.proxy_boost_engine import (
AdaptiveBoostEngine, LiquidationGuardEngine,
DEFAULT_THRESHOLD, DEFAULT_ALPHA,
D_LIQ_SOFT_CAP, D_LIQ_ABS_CAP, D_LIQ_MC_REF,
)
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
_D_LIQ_FULL = dict(roi=181.81, dd=17.65, calmar=10.30, trades=2155)
_ABETA_FULL = dict(roi= 96.55, dd=14.32, calmar= 6.74, trades=2155)
_PROXY = dict(threshold=DEFAULT_THRESHOLD, alpha=DEFAULT_ALPHA,
adaptive_beta=True, adaptive_alpha=False, adaptive_thr=False)
# ── Engine factories ──────────────────────────────────────────────────────────
def _make_dliq(kw, margin_buffer=0.95):
return LiquidationGuardEngine(
extended_soft_cap=D_LIQ_SOFT_CAP,
extended_abs_cap=D_LIQ_ABS_CAP,
mc_leverage_ref=D_LIQ_MC_REF,
margin_buffer=margin_buffer,
**_PROXY, **kw,
)
def _make_abeta(kw):
return AdaptiveBoostEngine(**_PROXY, **kw)
# ── Run harness (window-aware) ────────────────────────────────────────────────
def _run_window(engine_factory, name, d, fw, day_indices):
"""Run a sub-period backtest over the given day index slice."""
kw = ENGINE_KWARGS.copy()
acb = AdaptiveCircuitBreaker()
# Preload full date list for proper w750 context even in sub-period runs
acb.preload_w750(d['date_strings'])
eng = engine_factory(kw)
eng.set_ob_engine(d['ob_eng'])
eng.set_acb(acb)
if fw is not None:
eng.set_mc_forewarner(fw, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
daily_caps, daily_pnls = [], []
pf_list = d['parquet_files']
for idx in day_indices:
pf = pf_list[idx]
ds = pf.stem
df, acols, dvol = d['pq_data'][ds]
cap_before = eng.capital
vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
daily_caps.append(eng.capital)
daily_pnls.append(eng.capital - cap_before)
tr = eng.trade_history
n = len(tr)
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
liq_stops = getattr(eng, 'liquidation_stops', 0)
mc_mon = getattr(eng, 'mc_monitor', {})
if n == 0:
return dict(name=name, roi=roi, dd=0.0, calmar=0.0, trades=0,
liq_stops=liq_stops, days=len(day_indices))
def _abs(t): return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
peak_cap, max_dd = 25000.0, 0.0
for cap in daily_caps:
peak_cap = max(peak_cap, cap)
max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)
calmar = roi / max_dd if max_dd > 0 else 0.0
return dict(
name=name, roi=roi, dd=max_dd, calmar=calmar, trades=n,
liq_stops=liq_stops, days=len(day_indices),
mc_red=mc_mon.get('red', 0), mc_halted=mc_mon.get('halted', 0),
)
def _compare(dliq_r, abeta_r, window_label):
"""Print head-to-head for one window."""
d_roi = dliq_r['roi'] - abeta_r['roi']
d_dd = dliq_r['dd'] - abeta_r['dd']
d_cal = dliq_r['calmar'] - abeta_r['calmar']
liq = dliq_r.get('liq_stops', 0)
verdict = 'PASS' if dliq_r['calmar'] > abeta_r['calmar'] else 'FAIL'
print(f" {window_label:<18} d_liq {dliq_r['roi']:>7.2f}% / {dliq_r['dd']:>5.2f}% "
f"cal={dliq_r['calmar']:.2f} | abeta {abeta_r['roi']:>7.2f}% / {abeta_r['dd']:>5.2f}% "
f"cal={abeta_r['calmar']:.2f} | ΔROI={d_roi:+.2f} ΔDD={d_dd:+.2f} ΔCal={d_cal:+.2f} "
f"liq={liq} [{verdict}]")
return verdict == 'PASS'
# ── Main ─────────────────────────────────────────────────────────────────────
def main():
t_start = time.time()
print("=" * 80)
print("Exp 9c — D_LIQ_GOLD Overfitting Validation")
print("=" * 80)
ensure_jit()
d = load_data()
fw = load_forewarner()
n_days = len(d['parquet_files'])
print(f" Dataset: {n_days} trading days")
# Day index windows
all_idx = list(range(n_days))
mid = n_days // 2
h1_idx = all_idx[:mid]
h2_idx = all_idx[mid:]
q_size = n_days // 4
q_idx = [all_idx[i*q_size : (i+1)*q_size] for i in range(4)]
# Last quarter gets any remainder
q_idx[3] = all_idx[3*q_size:]
print(f" H1: days 0{mid-1} ({len(h1_idx)}d) "
f"H2: days {mid}{n_days-1} ({len(h2_idx)}d)")
print(f" Q1:{len(q_idx[0])}d Q2:{len(q_idx[1])}d "
f"Q3:{len(q_idx[2])}d Q4:{len(q_idx[3])}d")
results_all = []
pass_counts = {'split': 0, 'split_total': 0,
'quarter': 0, 'quarter_total': 0}
# ── FAMILY 1: Temporal split H1/H2 ───────────────────────────────────────
print(f"\n{'='*80}")
print("FAMILY 1 — Temporal Split H1/H2")
print(f"{'='*80}")
for label, idx in [('H1 (days 0-27)', h1_idx), ('H2 (days 28-55)', h2_idx)]:
t0 = time.time()
print(f"\n {label}:")
dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_{label}', d, fw, idx)
abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_{label}', d, fw, idx)
elapsed = time.time() - t0
passed = _compare(dliq_r, abeta_r, label)
print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)")
results_all += [dliq_r, abeta_r]
pass_counts['split'] += int(passed)
pass_counts['split_total'] += 1
split_verdict = ('PASS ✓' if pass_counts['split'] == pass_counts['split_total']
else f"PARTIAL ({pass_counts['split']}/{pass_counts['split_total']})")
print(f"\n H1/H2 SPLIT VERDICT: {split_verdict}")
# ── FAMILY 2: Quarterly split ─────────────────────────────────────────────
print(f"\n{'='*80}")
print("FAMILY 2 — Quarterly Split (Q1/Q2/Q3/Q4)")
print(f"{'='*80}")
for qi, idx in enumerate(q_idx, 1):
label = f'Q{qi} (days {idx[0]}-{idx[-1]})'
t0 = time.time()
print(f"\n {label}:")
dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_Q{qi}', d, fw, idx)
abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_Q{qi}', d, fw, idx)
elapsed = time.time() - t0
passed = _compare(dliq_r, abeta_r, label)
print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)")
results_all += [dliq_r, abeta_r]
pass_counts['quarter'] += int(passed)
pass_counts['quarter_total'] += 1
quarter_verdict = ('PASS ✓' if pass_counts['quarter'] == pass_counts['quarter_total']
else f"PARTIAL ({pass_counts['quarter']}/{pass_counts['quarter_total']})")
print(f"\n QUARTERLY VERDICT: {quarter_verdict}")
# ── FAMILY 3: Margin buffer sensitivity (full period) ─────────────────────
print(f"\n{'='*80}")
print("FAMILY 3 — Margin Buffer Sensitivity (full period, d_liq only)")
print(f"{'='*80}")
print(f" Floor = (1/abs_cap) * buffer | abs_cap=9.0")
print(f" {'Buffer':>8} {'Floor%':>7} {'ROI%':>8} {'DD%':>6} {'Calmar':>7} "
f"{'liq_stops':>10} {'ΔROI vs gold':>13}")
buf_results = []
for buf in [0.80, 0.90, 0.95, 1.00]:
t0 = time.time()
floor_pct = (1.0 / D_LIQ_ABS_CAP) * buf * 100
r = _run_window(lambda kw, b=buf: _make_dliq(kw, margin_buffer=b),
f'd_liq_buf{buf:.2f}', d, fw, all_idx)
elapsed = time.time() - t0
d_roi = r['roi'] - _D_LIQ_FULL['roi']
marker = ' ← GOLD' if abs(buf - 0.95) < 0.001 else ''
print(f" {buf:>8.2f} {floor_pct:>6.1f}% {r['roi']:>8.2f} {r['dd']:>6.2f} "
f"{r['calmar']:>7.2f} {r['liq_stops']:>10} {d_roi:>+13.2f}pp ({elapsed:.0f}s){marker}")
r['margin_buffer'] = buf
buf_results.append(r)
results_all.append(r)
# Stability check: ROI range across buffers
buf_rois = [r['roi'] for r in buf_results]
roi_range = max(buf_rois) - min(buf_rois)
buf_dds = [r['dd'] for r in buf_results]
dd_range = max(buf_dds) - min(buf_dds)
buf_stable = roi_range < 10.0 and dd_range < 2.0
print(f"\n ROI range across buffers: {roi_range:.2f}pp "
f"DD range: {dd_range:.2f}pp "
f"['STABLE ✓' if buf_stable else 'UNSTABLE ✗']")
# ── SUMMARY ───────────────────────────────────────────────────────────────
total_passes = pass_counts['split'] + pass_counts['quarter']
total_tests = pass_counts['split_total'] + pass_counts['quarter_total']
print(f"\n{'='*80}")
print("OVERFITTING VALIDATION SUMMARY")
print(f"{'='*80}")
print(f" Temporal split (H1/H2): {pass_counts['split']}/{pass_counts['split_total']} {split_verdict}")
print(f" Quarterly split (Q1-Q4): {pass_counts['quarter']}/{pass_counts['quarter_total']} {quarter_verdict}")
print(f" Margin buffer stability: {'STABLE ✓' if buf_stable else 'UNSTABLE ✗'} "
f"(ROI range={roi_range:.1f}pp, DD range={dd_range:.1f}pp)")
print()
all_pass = (total_passes == total_tests and buf_stable)
if all_pass:
print(" VERDICT: ALL TESTS PASS ✓")
print(" D_LIQ_GOLD is robust. Calmar advantage holds across all time windows.")
print(" Margin buffer choice is not critical. Safe to set as DEFAULT.")
else:
print(" VERDICT: SOME TESTS FAIL ✗")
print(f" {total_passes}/{total_tests} split windows passed, "
f"buffer stable={buf_stable}.")
print(" Do NOT flip default until failures are investigated.")
outfile = _HERE / "exp9c_overfitting_results.json"
log_results(results_all, outfile, meta={
"exp": "exp9c",
"question": "Is D_LIQ_GOLD robust across time windows and parameter perturbations?",
"split_passes": f"{pass_counts['split']}/{pass_counts['split_total']}",
"quarter_passes": f"{pass_counts['quarter']}/{pass_counts['quarter_total']}",
"buf_roi_range_pp": round(roi_range, 3),
"buf_dd_range_pp": round(dd_range, 3),
"all_pass": all_pass,
"total_elapsed_s": round(time.time() - t_start, 1),
})
print(f"\nTotal elapsed: {(time.time()-t_start)/60:.1f} min")
print("Done.")
if __name__ == "__main__":
main()