297 lines
13 KiB
Python
297 lines
13 KiB
Python
|
|
"""
|
|||
|
|
Exp 9c — Overfitting Validation for D_LIQ_GOLD
|
|||
|
|
|
|||
|
|
Battery of tests designed to expose any period-specific bias in the D_LIQ_GOLD result
|
|||
|
|
(8x/9x + liquidation guard, exp9b: ROI=181.81%, DD=17.65%, Calmar=10.30).
|
|||
|
|
|
|||
|
|
Three test families:
|
|||
|
|
|
|||
|
|
1. TEMPORAL SPLIT (H1/H2)
|
|||
|
|
Same split as exp8 adaptive_beta validation (days 0-27 vs days 28-55).
|
|||
|
|
Each half: fresh engine, fresh capital=$25k, cold start.
|
|||
|
|
Pass criterion: Calmar(d_liq) > Calmar(adaptive_beta) in BOTH halves.
|
|||
|
|
If d_liq only wins in one half → period-specific, do NOT flip default.
|
|||
|
|
|
|||
|
|
2. QUARTERLY SPLIT (Q1/Q2/Q3/Q4)
|
|||
|
|
Four independent ~14-day windows.
|
|||
|
|
Finer-grained: reveals if any single quarter is carrying the full result.
|
|||
|
|
Pass criterion: d_liq Calmar consistently above adaptive_beta across quarters.
|
|||
|
|
|
|||
|
|
3. MARGIN BUFFER SENSITIVITY
|
|||
|
|
Test margin_buffer = 0.80, 0.90, 0.95 (gold), 1.00 on the full period.
|
|||
|
|
Confirms the specific 10.6% floor is not cherry-picked.
|
|||
|
|
Pass criterion: ROI/DD metrics stable across ±0.15 variation in buffer.
|
|||
|
|
|
|||
|
|
Reference benchmarks:
|
|||
|
|
D_LIQ_GOLD (full period): ROI=181.81%, DD=17.65%, Calmar=10.30
|
|||
|
|
adaptive_beta (full): ROI= 96.55%, DD=14.32%, Calmar= 6.74
|
|||
|
|
|
|||
|
|
Results → exp9c_overfitting_results.json
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys, time, json, math
|
|||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
|||
|
|
from pathlib import Path
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
_HERE = Path(__file__).resolve().parent
|
|||
|
|
sys.path.insert(0, str(_HERE.parent))
|
|||
|
|
|
|||
|
|
from exp_shared import (
|
|||
|
|
ensure_jit, ENGINE_KWARGS, MC_BASE_CFG,
|
|||
|
|
load_data, load_forewarner, log_results,
|
|||
|
|
)
|
|||
|
|
from nautilus_dolphin.nautilus.proxy_boost_engine import (
|
|||
|
|
AdaptiveBoostEngine, LiquidationGuardEngine,
|
|||
|
|
DEFAULT_THRESHOLD, DEFAULT_ALPHA,
|
|||
|
|
D_LIQ_SOFT_CAP, D_LIQ_ABS_CAP, D_LIQ_MC_REF,
|
|||
|
|
)
|
|||
|
|
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
|||
|
|
|
|||
|
|
_D_LIQ_FULL = dict(roi=181.81, dd=17.65, calmar=10.30, trades=2155)
|
|||
|
|
_ABETA_FULL = dict(roi= 96.55, dd=14.32, calmar= 6.74, trades=2155)
|
|||
|
|
|
|||
|
|
_PROXY = dict(threshold=DEFAULT_THRESHOLD, alpha=DEFAULT_ALPHA,
|
|||
|
|
adaptive_beta=True, adaptive_alpha=False, adaptive_thr=False)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Engine factories ──────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _make_dliq(kw, margin_buffer=0.95):
|
|||
|
|
return LiquidationGuardEngine(
|
|||
|
|
extended_soft_cap=D_LIQ_SOFT_CAP,
|
|||
|
|
extended_abs_cap=D_LIQ_ABS_CAP,
|
|||
|
|
mc_leverage_ref=D_LIQ_MC_REF,
|
|||
|
|
margin_buffer=margin_buffer,
|
|||
|
|
**_PROXY, **kw,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def _make_abeta(kw):
|
|||
|
|
return AdaptiveBoostEngine(**_PROXY, **kw)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Run harness (window-aware) ────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def _run_window(engine_factory, name, d, fw, day_indices):
|
|||
|
|
"""Run a sub-period backtest over the given day index slice."""
|
|||
|
|
kw = ENGINE_KWARGS.copy()
|
|||
|
|
acb = AdaptiveCircuitBreaker()
|
|||
|
|
# Preload full date list for proper w750 context even in sub-period runs
|
|||
|
|
acb.preload_w750(d['date_strings'])
|
|||
|
|
|
|||
|
|
eng = engine_factory(kw)
|
|||
|
|
eng.set_ob_engine(d['ob_eng'])
|
|||
|
|
eng.set_acb(acb)
|
|||
|
|
if fw is not None:
|
|||
|
|
eng.set_mc_forewarner(fw, MC_BASE_CFG)
|
|||
|
|
eng.set_esoteric_hazard_multiplier(0.0)
|
|||
|
|
|
|||
|
|
daily_caps, daily_pnls = [], []
|
|||
|
|
pf_list = d['parquet_files']
|
|||
|
|
|
|||
|
|
for idx in day_indices:
|
|||
|
|
pf = pf_list[idx]
|
|||
|
|
ds = pf.stem
|
|||
|
|
df, acols, dvol = d['pq_data'][ds]
|
|||
|
|
cap_before = eng.capital
|
|||
|
|
vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
|
|||
|
|
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
|
|||
|
|
daily_caps.append(eng.capital)
|
|||
|
|
daily_pnls.append(eng.capital - cap_before)
|
|||
|
|
|
|||
|
|
tr = eng.trade_history
|
|||
|
|
n = len(tr)
|
|||
|
|
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
|
|||
|
|
|
|||
|
|
liq_stops = getattr(eng, 'liquidation_stops', 0)
|
|||
|
|
mc_mon = getattr(eng, 'mc_monitor', {})
|
|||
|
|
|
|||
|
|
if n == 0:
|
|||
|
|
return dict(name=name, roi=roi, dd=0.0, calmar=0.0, trades=0,
|
|||
|
|
liq_stops=liq_stops, days=len(day_indices))
|
|||
|
|
|
|||
|
|
def _abs(t): return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
|
|||
|
|
wins = [t for t in tr if _abs(t) > 0]
|
|||
|
|
losses = [t for t in tr if _abs(t) <= 0]
|
|||
|
|
|
|||
|
|
peak_cap, max_dd = 25000.0, 0.0
|
|||
|
|
for cap in daily_caps:
|
|||
|
|
peak_cap = max(peak_cap, cap)
|
|||
|
|
max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)
|
|||
|
|
|
|||
|
|
calmar = roi / max_dd if max_dd > 0 else 0.0
|
|||
|
|
|
|||
|
|
return dict(
|
|||
|
|
name=name, roi=roi, dd=max_dd, calmar=calmar, trades=n,
|
|||
|
|
liq_stops=liq_stops, days=len(day_indices),
|
|||
|
|
mc_red=mc_mon.get('red', 0), mc_halted=mc_mon.get('halted', 0),
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _compare(dliq_r, abeta_r, window_label):
|
|||
|
|
"""Print head-to-head for one window."""
|
|||
|
|
d_roi = dliq_r['roi'] - abeta_r['roi']
|
|||
|
|
d_dd = dliq_r['dd'] - abeta_r['dd']
|
|||
|
|
d_cal = dliq_r['calmar'] - abeta_r['calmar']
|
|||
|
|
liq = dliq_r.get('liq_stops', 0)
|
|||
|
|
verdict = 'PASS' if dliq_r['calmar'] > abeta_r['calmar'] else 'FAIL'
|
|||
|
|
print(f" {window_label:<18} d_liq {dliq_r['roi']:>7.2f}% / {dliq_r['dd']:>5.2f}% "
|
|||
|
|
f"cal={dliq_r['calmar']:.2f} | abeta {abeta_r['roi']:>7.2f}% / {abeta_r['dd']:>5.2f}% "
|
|||
|
|
f"cal={abeta_r['calmar']:.2f} | ΔROI={d_roi:+.2f} ΔDD={d_dd:+.2f} ΔCal={d_cal:+.2f} "
|
|||
|
|
f"liq={liq} [{verdict}]")
|
|||
|
|
return verdict == 'PASS'
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
t_start = time.time()
|
|||
|
|
print("=" * 80)
|
|||
|
|
print("Exp 9c — D_LIQ_GOLD Overfitting Validation")
|
|||
|
|
print("=" * 80)
|
|||
|
|
|
|||
|
|
ensure_jit()
|
|||
|
|
d = load_data()
|
|||
|
|
fw = load_forewarner()
|
|||
|
|
|
|||
|
|
n_days = len(d['parquet_files'])
|
|||
|
|
print(f" Dataset: {n_days} trading days")
|
|||
|
|
|
|||
|
|
# Day index windows
|
|||
|
|
all_idx = list(range(n_days))
|
|||
|
|
mid = n_days // 2
|
|||
|
|
h1_idx = all_idx[:mid]
|
|||
|
|
h2_idx = all_idx[mid:]
|
|||
|
|
q_size = n_days // 4
|
|||
|
|
q_idx = [all_idx[i*q_size : (i+1)*q_size] for i in range(4)]
|
|||
|
|
# Last quarter gets any remainder
|
|||
|
|
q_idx[3] = all_idx[3*q_size:]
|
|||
|
|
|
|||
|
|
print(f" H1: days 0–{mid-1} ({len(h1_idx)}d) "
|
|||
|
|
f"H2: days {mid}–{n_days-1} ({len(h2_idx)}d)")
|
|||
|
|
print(f" Q1:{len(q_idx[0])}d Q2:{len(q_idx[1])}d "
|
|||
|
|
f"Q3:{len(q_idx[2])}d Q4:{len(q_idx[3])}d")
|
|||
|
|
|
|||
|
|
results_all = []
|
|||
|
|
pass_counts = {'split': 0, 'split_total': 0,
|
|||
|
|
'quarter': 0, 'quarter_total': 0}
|
|||
|
|
|
|||
|
|
# ── FAMILY 1: Temporal split H1/H2 ───────────────────────────────────────
|
|||
|
|
print(f"\n{'='*80}")
|
|||
|
|
print("FAMILY 1 — Temporal Split H1/H2")
|
|||
|
|
print(f"{'='*80}")
|
|||
|
|
|
|||
|
|
for label, idx in [('H1 (days 0-27)', h1_idx), ('H2 (days 28-55)', h2_idx)]:
|
|||
|
|
t0 = time.time()
|
|||
|
|
print(f"\n {label}:")
|
|||
|
|
dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_{label}', d, fw, idx)
|
|||
|
|
abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_{label}', d, fw, idx)
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
passed = _compare(dliq_r, abeta_r, label)
|
|||
|
|
print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)")
|
|||
|
|
results_all += [dliq_r, abeta_r]
|
|||
|
|
pass_counts['split'] += int(passed)
|
|||
|
|
pass_counts['split_total'] += 1
|
|||
|
|
|
|||
|
|
split_verdict = ('PASS ✓' if pass_counts['split'] == pass_counts['split_total']
|
|||
|
|
else f"PARTIAL ({pass_counts['split']}/{pass_counts['split_total']})")
|
|||
|
|
print(f"\n H1/H2 SPLIT VERDICT: {split_verdict}")
|
|||
|
|
|
|||
|
|
# ── FAMILY 2: Quarterly split ─────────────────────────────────────────────
|
|||
|
|
print(f"\n{'='*80}")
|
|||
|
|
print("FAMILY 2 — Quarterly Split (Q1/Q2/Q3/Q4)")
|
|||
|
|
print(f"{'='*80}")
|
|||
|
|
|
|||
|
|
for qi, idx in enumerate(q_idx, 1):
|
|||
|
|
label = f'Q{qi} (days {idx[0]}-{idx[-1]})'
|
|||
|
|
t0 = time.time()
|
|||
|
|
print(f"\n {label}:")
|
|||
|
|
dliq_r = _run_window(lambda kw: _make_dliq(kw), f'd_liq_Q{qi}', d, fw, idx)
|
|||
|
|
abeta_r = _run_window(lambda kw: _make_abeta(kw), f'abeta_Q{qi}', d, fw, idx)
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
passed = _compare(dliq_r, abeta_r, label)
|
|||
|
|
print(f" trades: d_liq={dliq_r['trades']} abeta={abeta_r['trades']} ({elapsed:.0f}s)")
|
|||
|
|
results_all += [dliq_r, abeta_r]
|
|||
|
|
pass_counts['quarter'] += int(passed)
|
|||
|
|
pass_counts['quarter_total'] += 1
|
|||
|
|
|
|||
|
|
quarter_verdict = ('PASS ✓' if pass_counts['quarter'] == pass_counts['quarter_total']
|
|||
|
|
else f"PARTIAL ({pass_counts['quarter']}/{pass_counts['quarter_total']})")
|
|||
|
|
print(f"\n QUARTERLY VERDICT: {quarter_verdict}")
|
|||
|
|
|
|||
|
|
# ── FAMILY 3: Margin buffer sensitivity (full period) ─────────────────────
|
|||
|
|
print(f"\n{'='*80}")
|
|||
|
|
print("FAMILY 3 — Margin Buffer Sensitivity (full period, d_liq only)")
|
|||
|
|
print(f"{'='*80}")
|
|||
|
|
print(f" Floor = (1/abs_cap) * buffer | abs_cap=9.0")
|
|||
|
|
print(f" {'Buffer':>8} {'Floor%':>7} {'ROI%':>8} {'DD%':>6} {'Calmar':>7} "
|
|||
|
|
f"{'liq_stops':>10} {'ΔROI vs gold':>13}")
|
|||
|
|
|
|||
|
|
buf_results = []
|
|||
|
|
for buf in [0.80, 0.90, 0.95, 1.00]:
|
|||
|
|
t0 = time.time()
|
|||
|
|
floor_pct = (1.0 / D_LIQ_ABS_CAP) * buf * 100
|
|||
|
|
r = _run_window(lambda kw, b=buf: _make_dliq(kw, margin_buffer=b),
|
|||
|
|
f'd_liq_buf{buf:.2f}', d, fw, all_idx)
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
d_roi = r['roi'] - _D_LIQ_FULL['roi']
|
|||
|
|
marker = ' ← GOLD' if abs(buf - 0.95) < 0.001 else ''
|
|||
|
|
print(f" {buf:>8.2f} {floor_pct:>6.1f}% {r['roi']:>8.2f} {r['dd']:>6.2f} "
|
|||
|
|
f"{r['calmar']:>7.2f} {r['liq_stops']:>10} {d_roi:>+13.2f}pp ({elapsed:.0f}s){marker}")
|
|||
|
|
r['margin_buffer'] = buf
|
|||
|
|
buf_results.append(r)
|
|||
|
|
results_all.append(r)
|
|||
|
|
|
|||
|
|
# Stability check: ROI range across buffers
|
|||
|
|
buf_rois = [r['roi'] for r in buf_results]
|
|||
|
|
roi_range = max(buf_rois) - min(buf_rois)
|
|||
|
|
buf_dds = [r['dd'] for r in buf_results]
|
|||
|
|
dd_range = max(buf_dds) - min(buf_dds)
|
|||
|
|
buf_stable = roi_range < 10.0 and dd_range < 2.0
|
|||
|
|
print(f"\n ROI range across buffers: {roi_range:.2f}pp "
|
|||
|
|
f"DD range: {dd_range:.2f}pp "
|
|||
|
|
f"['STABLE ✓' if buf_stable else 'UNSTABLE ✗']")
|
|||
|
|
|
|||
|
|
# ── SUMMARY ───────────────────────────────────────────────────────────────
|
|||
|
|
total_passes = pass_counts['split'] + pass_counts['quarter']
|
|||
|
|
total_tests = pass_counts['split_total'] + pass_counts['quarter_total']
|
|||
|
|
|
|||
|
|
print(f"\n{'='*80}")
|
|||
|
|
print("OVERFITTING VALIDATION SUMMARY")
|
|||
|
|
print(f"{'='*80}")
|
|||
|
|
print(f" Temporal split (H1/H2): {pass_counts['split']}/{pass_counts['split_total']} {split_verdict}")
|
|||
|
|
print(f" Quarterly split (Q1-Q4): {pass_counts['quarter']}/{pass_counts['quarter_total']} {quarter_verdict}")
|
|||
|
|
print(f" Margin buffer stability: {'STABLE ✓' if buf_stable else 'UNSTABLE ✗'} "
|
|||
|
|
f"(ROI range={roi_range:.1f}pp, DD range={dd_range:.1f}pp)")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
all_pass = (total_passes == total_tests and buf_stable)
|
|||
|
|
if all_pass:
|
|||
|
|
print(" VERDICT: ALL TESTS PASS ✓")
|
|||
|
|
print(" D_LIQ_GOLD is robust. Calmar advantage holds across all time windows.")
|
|||
|
|
print(" Margin buffer choice is not critical. Safe to set as DEFAULT.")
|
|||
|
|
else:
|
|||
|
|
print(" VERDICT: SOME TESTS FAIL ✗")
|
|||
|
|
print(f" {total_passes}/{total_tests} split windows passed, "
|
|||
|
|
f"buffer stable={buf_stable}.")
|
|||
|
|
print(" Do NOT flip default until failures are investigated.")
|
|||
|
|
|
|||
|
|
outfile = _HERE / "exp9c_overfitting_results.json"
|
|||
|
|
log_results(results_all, outfile, meta={
|
|||
|
|
"exp": "exp9c",
|
|||
|
|
"question": "Is D_LIQ_GOLD robust across time windows and parameter perturbations?",
|
|||
|
|
"split_passes": f"{pass_counts['split']}/{pass_counts['split_total']}",
|
|||
|
|
"quarter_passes": f"{pass_counts['quarter']}/{pass_counts['quarter_total']}",
|
|||
|
|
"buf_roi_range_pp": round(roi_range, 3),
|
|||
|
|
"buf_dd_range_pp": round(dd_range, 3),
|
|||
|
|
"all_pass": all_pass,
|
|||
|
|
"total_elapsed_s": round(time.time() - t_start, 1),
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
print(f"\nTotal elapsed: {(time.time()-t_start)/60:.1f} min")
|
|||
|
|
print("Done.")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|