Files
DOLPHIN/nautilus_dolphin/dvae/exp8_boost_robustness.py

426 lines
18 KiB
Python
Raw Normal View History

"""
Exp 8 scale_boost Robustness & Adaptive Parameterization
Two questions from exp7 scale_boost winner (thr=0.35, a=1.0):
Q1. Is it overfitting? (+5pp ROI AND -0.54pp DD on same 55 days it was found)
Test: temporal split first-half (days 127) vs second-half (days 2855)
If improvement holds in BOTH halves independently, it's structurally real.
If only one half drives it, the result is temporally fragile.
Q2. Are threshold and alpha regime-dependent?
Hypothesis: proxy_B is more discriminating in high-eigenvalue-regime days
(high ACB beta). On those days, "calm" entries should receive stronger boost,
and the threshold for "what qualifies as calm" should be tighter.
Adaptive formulas (using ACB state available in _try_entry as self._day_base_boost
and self._day_beta):
alpha_eff = alpha * day_base_boost (more boost on stressed days)
thr_eff = threshold / day_base_boost (tighter gate on stressed days)
Both together: combine both adjustments
Also test dvol-proxy adaptation: use day_beta directly as a continuous scaler.
Configs:
0. Baseline
1. Fixed: thr=0.35, a=1.0 (exp7 winner must reproduce exp7 results)
2. Adaptive-alpha: alpha_eff = 1.0 * day_base_boost, thr fixed at 0.35
3. Adaptive-threshold: thr_eff = 0.35 / day_base_boost, alpha fixed at 1.0
4. Adaptive-both: both formulas combined
5. Beta-scaled alpha: alpha_eff = 1.0 * (1 + day_beta), thr fixed at 0.35
(day_beta is the ACB eigenvalue signal; more direct than base_boost)
Results include:
- Full 55-day metrics (standard)
- First-half (days 127) and second-half (days 2855) metrics split out
to test temporal stability of the DD reduction
- Per-day scale distribution analysis
Results logged to exp8_boost_robustness_results.json
"""
import sys, time, json, math
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))
from exp_shared import (
ensure_jit, ENGINE_KWARGS, GOLD, MC_BASE_CFG,
load_data, load_forewarner, log_results,
)
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
# ── Re-use ProxyBaseEngine from exp7 (copy-minimal) ──────────────────────────
class ProxyBaseEngine(NDAlphaEngine):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._current_proxy_b: float = 0.0
self._proxy_b_history: list = []
def _update_proxy(self, inst: float, v750: float) -> float:
pb = inst - v750
self._current_proxy_b = pb
self._proxy_b_history.append(pb)
if len(self._proxy_b_history) > 500:
self._proxy_b_history = self._proxy_b_history[-500:]
return pb
def _proxy_prank(self) -> float:
if not self._proxy_b_history:
return 0.5
n = len(self._proxy_b_history)
return sum(v < self._current_proxy_b for v in self._proxy_b_history) / n
def process_day(self, date_str, df, asset_columns,
vol_regime_ok=None, direction=None, posture='APEX'):
self.begin_day(date_str, posture=posture, direction=direction)
bid = 0
for ri in range(len(df)):
row = df.iloc[ri]
vd = row.get('vel_div')
if vd is None or not np.isfinite(float(vd)):
self._global_bar_idx += 1; bid += 1; continue
def gf(col):
v = row.get(col)
if v is None: return 0.0
try: return float(v) if np.isfinite(float(v)) else 0.0
except: return 0.0
v50 = gf('v50_lambda_max_velocity')
v750 = gf('v750_lambda_max_velocity')
inst = gf('instability_50')
self._update_proxy(inst, v750)
prices = {}
for ac in asset_columns:
p = row.get(ac)
if p is not None and p > 0 and np.isfinite(float(p)):
prices[ac] = float(p)
if not prices:
self._global_bar_idx += 1; bid += 1; continue
vrok = bool(vol_regime_ok[ri]) if vol_regime_ok is not None else (bid >= 100)
self.step_bar(bar_idx=ri, vel_div=float(vd), prices=prices,
vol_regime_ok=vrok, v50_vel=v50, v750_vel=v750)
bid += 1
return self.end_day()
# ── Adaptive scale_boost engine ───────────────────────────────────────────────
class AdaptiveBoostEngine(ProxyBaseEngine):
"""
scale_boost with optionally regime-adaptive threshold and alpha.
Fixed mode (adaptive_alpha=False, adaptive_thr=False, adaptive_beta=False):
scale = 1 + alpha * max(0, threshold - prank)
Identical to exp7 ProxyScaleEngine(mode='boost').
Adaptive modes use ACB state (self._day_base_boost, self._day_beta)
which is set by begin_day() before any _try_entry calls in that day:
adaptive_alpha: alpha_eff = alpha * day_base_boost
High-boost day (stressed eigenspace regime) stronger boost on calm entries
Low-boost day modest boost
adaptive_thr: thr_eff = threshold / day_base_boost
High-boost day lower threshold more selective (only very calm entries qualify)
Low-boost day higher threshold more entries qualify
adaptive_beta: alpha_eff = alpha * (1 + day_beta)
day_beta is the ACB's direct eigenvalue signal (0 when inactive)
More discriminating on days where eigenvalue regime is active
Parameters can be combined freely.
"""
def __init__(self, *args,
threshold: float = 0.35,
alpha: float = 1.0,
adaptive_alpha: bool = False,
adaptive_thr: bool = False,
adaptive_beta: bool = False,
**kwargs):
super().__init__(*args, **kwargs)
self.threshold = threshold
self.alpha = alpha
self.adaptive_alpha = adaptive_alpha
self.adaptive_thr = adaptive_thr
self.adaptive_beta = adaptive_beta
self._scale_history: list = []
self._alpha_eff_history: list = []
self._thr_eff_history: list = []
@property
def sizing_scale_mean(self) -> float:
return float(np.mean(self._scale_history)) if self._scale_history else 1.0
def _try_entry(self, bar_idx, vel_div, prices, price_histories,
v50_vel=0.0, v750_vel=0.0):
result = super()._try_entry(bar_idx, vel_div, prices, price_histories,
v50_vel, v750_vel)
if result and self.position:
boost = max(1.0, getattr(self, '_day_base_boost', 1.0))
beta = max(0.0, getattr(self, '_day_beta', 0.0))
# Effective parameters
alpha_eff = self.alpha
if self.adaptive_alpha:
alpha_eff *= boost # more boost on stressed-regime days
if self.adaptive_beta:
alpha_eff *= (1.0 + beta) # beta signal scales aggression
thr_eff = self.threshold
if self.adaptive_thr:
# High boost → lower threshold → be more selective about "calm"
thr_eff = self.threshold / max(1.0, boost)
prank = self._proxy_prank()
scale = 1.0 + alpha_eff * max(0.0, thr_eff - prank)
self.position.notional *= scale
self._scale_history.append(scale)
self._alpha_eff_history.append(alpha_eff)
self._thr_eff_history.append(thr_eff)
return result
def reset(self):
super().reset()
self._scale_history = []
self._alpha_eff_history = []
self._thr_eff_history = []
# ── Run harness with half-split ───────────────────────────────────────────────
def _run(engine_factory, name, d, fw):
"""Full run + temporal split (first vs second half of days)."""
kw = ENGINE_KWARGS.copy()
acb = AdaptiveCircuitBreaker()
acb.preload_w750(d['date_strings'])
eng = engine_factory(kw)
eng.set_ob_engine(d['ob_eng'])
eng.set_acb(acb)
if fw is not None:
eng.set_mc_forewarner(fw, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
pf_list = d['parquet_files']
n_days = len(pf_list)
half = n_days // 2 # split point
daily_caps, daily_pnls = [], []
half_caps = [[], []] # [first_half, second_half]
half_pnls = [[], []]
half_trades_n = [0, 0]
for i, pf in enumerate(pf_list):
ds = pf.stem
df, acols, dvol = d['pq_data'][ds]
cap_before = eng.capital
vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
cap_after = eng.capital
daily_caps.append(cap_after)
daily_pnls.append(cap_after - cap_before)
h = 0 if i < half else 1
half_caps[h].append(cap_after)
half_pnls[h].append(cap_after - cap_before)
tr = eng.trade_history
n = len(tr)
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
def _metrics(caps, pnls, start_cap=25000.0):
"""Compute metrics for a sub-period given daily capitals and a starting capital."""
if not caps:
return dict(roi=0.0, dd=0.0, sharpe=0.0)
peak = start_cap
max_dd = 0.0
for c in caps:
peak = max(peak, c)
max_dd = max(max_dd, (peak - c) / peak * 100.0)
total_pnl = sum(pnls)
roi_sub = total_pnl / start_cap * 100.0
dr = np.array([p / start_cap * 100.0 for p in pnls])
sharpe = float(dr.mean() / (dr.std() + 1e-9) * math.sqrt(365)) if len(dr) > 1 else 0.0
return dict(roi=roi_sub, dd=max_dd, sharpe=sharpe, n_days=len(caps))
if n == 0:
return dict(name=name, roi=roi, pf=0.0, dd=0.0, wr=0.0, sharpe=0.0,
trades=0, sizing_scale_mean=1.0)
def _abs(t): return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
wr = len(wins) / n * 100.0
pf_val = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in losses)), 1e-9)
peak_cap, max_dd = 25000.0, 0.0
for cap in daily_caps:
peak_cap = max(peak_cap, cap)
max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)
dr = np.array([p / 25000.0 * 100.0 for p in daily_pnls])
sharpe = float(dr.mean() / (dr.std() + 1e-9) * math.sqrt(365)) if len(dr) > 1 else 0.0
# First/second half split — using capital at end of first-half as baseline for second half
cap_at_halftime = half_caps[0][-1] if half_caps[0] else 25000.0
h1 = _metrics(half_caps[0], half_pnls[0], start_cap=25000.0)
h2 = _metrics(half_caps[1], half_pnls[1], start_cap=cap_at_halftime)
sizing_scale_mean = getattr(eng, 'sizing_scale_mean', 1.0)
# Alpha/threshold eff distributions for adaptive engines
alpha_mean = 1.0
thr_mean = 0.35
eng_ae = eng if isinstance(eng, AdaptiveBoostEngine) else None
if eng_ae:
if eng_ae._alpha_eff_history:
alpha_mean = float(np.mean(eng_ae._alpha_eff_history))
if eng_ae._thr_eff_history:
thr_mean = float(np.mean(eng_ae._thr_eff_history))
return dict(
name=name,
roi=roi, pf=pf_val, dd=max_dd, wr=wr, sharpe=sharpe, trades=n,
sizing_scale_mean=sizing_scale_mean,
alpha_eff_mean=alpha_mean,
thr_eff_mean=thr_mean,
# Temporal split
h1_roi=h1['roi'], h1_dd=h1['dd'], h1_sharpe=h1['sharpe'],
h2_roi=h2['roi'], h2_dd=h2['dd'], h2_sharpe=h2['sharpe'],
split_days=(half, n_days - half),
)
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
t_start = time.time()
print("=" * 74)
print("Exp 8 — scale_boost Robustness & Adaptive Parameterization")
print("=" * 74)
ensure_jit()
d = load_data()
fw = load_forewarner()
configs = [
("0_baseline",
lambda kw: NDAlphaEngine(**kw)),
("1_fixed_thr035_a1.0",
lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0, **kw)),
("2_adaptive_alpha__thr035_a1.0xboost",
lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
adaptive_alpha=True, **kw)),
("3_adaptive_thr__thr035/boost_a1.0",
lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
adaptive_thr=True, **kw)),
("4_adaptive_both__thr/boost_axboost",
lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
adaptive_alpha=True, adaptive_thr=True, **kw)),
("5_adaptive_beta__thr035_ax(1+beta)",
lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
adaptive_beta=True, **kw)),
]
results = []
for i, (name, factory) in enumerate(configs):
t0 = time.time()
print(f"\n[{i+1}/{len(configs)}] {name} ...")
res = _run(factory, name, d, fw)
elapsed = time.time() - t0
print(f" ROI={res['roi']:.2f}% PF={res['pf']:.4f} DD={res['dd']:.2f}%"
f" WR={res['wr']:.2f}% Sharpe={res['sharpe']:.3f} Trades={res['trades']}"
f" scale={res['sizing_scale_mean']:.4f} alpha_eff={res['alpha_eff_mean']:.4f}"
f" ({elapsed:.0f}s)")
print(f" H1(days 1-{res['split_days'][0]}): ROI={res['h1_roi']:.2f}%"
f" DD={res['h1_dd']:.2f}% Sharpe={res['h1_sharpe']:.3f}")
print(f" H2(days {res['split_days'][0]+1}-{sum(res['split_days'])}): ROI={res['h2_roi']:.2f}%"
f" DD={res['h2_dd']:.2f}% Sharpe={res['h2_sharpe']:.3f}")
results.append(res)
# Baseline verification
b = results[0]
fixed = results[1]
gold_match = (abs(b['roi'] - GOLD['roi']) < 0.5 and abs(b['dd'] - GOLD['dd']) < 0.5
and abs(b['trades'] - GOLD['trades']) < 10)
fixed_match = (abs(fixed['roi'] - 93.61) < 0.5 and abs(fixed['dd'] - 14.51) < 0.5)
print(f"\n{'='*74}")
print(f"VERIFICATION:")
print(f" Baseline vs gold: {'PASS ✓' if gold_match else 'FAIL ✗'} "
f"(ROI={b['roi']:.2f}% DD={b['dd']:.2f}%)")
print(f" Fixed vs exp7 winner: {'PASS ✓' if fixed_match else 'FAIL ✗'} "
f"(ROI={fixed['roi']:.2f}% DD={fixed['dd']:.2f}%)")
print(f"\n{'='*74}")
print(f"FULL-PERIOD RESULTS (target: DD<15.05% AND ROI>=84.1%)")
hdr = f"{'Config':<46} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'ΔDD':>6} {'ΔROI':>6} {'scale':>7} {'alpha':>7} {'OK':>4}"
print(hdr); print('-' * 98)
base_roi = b['roi']; base_dd = b['dd']
for r in results:
dROI = r['roi'] - base_roi; dDD = r['dd'] - base_dd
ok = 'Y' if (r['dd'] < GOLD['dd'] and r['roi'] >= GOLD['roi'] * 0.95) else 'N'
print(f"{r['name']:<46} {r['roi']:>7.2f} {r['pf']:>6.4f} {r['dd']:>6.2f} "
f"{dDD:>+6.2f} {dROI:>+6.2f} {r['sizing_scale_mean']:>7.4f} "
f"{r['alpha_eff_mean']:>7.4f} {ok:>4}")
print(f"\n{'='*74}")
print("TEMPORAL SPLIT — Overfitting check (does improvement hold in both halves?)")
h_days = results[0]['split_days']
print(f"Split: H1=days 1{h_days[0]}, H2=days {h_days[0]+1}{sum(h_days)}")
print(f"{'Config':<46} {'H1 ROI':>8} {'H1 DD':>7} {'H2 ROI':>8} {'H2 DD':>7} "
f"{'ΔH1DD':>7} {'ΔH2DD':>7}")
print('-' * 98)
b_h1dd = b['h1_dd']; b_h2dd = b['h2_dd']
for r in results:
dH1 = r['h1_dd'] - b_h1dd; dH2 = r['h2_dd'] - b_h2dd
print(f"{r['name']:<46} {r['h1_roi']:>8.2f} {r['h1_dd']:>7.2f} "
f"{r['h2_roi']:>8.2f} {r['h2_dd']:>7.2f} {dH1:>+7.2f} {dH2:>+7.2f}")
print(f"\n{'='*74}")
print("OVERFITTING VERDICT:")
for r in results[1:]:
h1_better = r['h1_dd'] < b_h1dd
h2_better = r['h2_dd'] < b_h2dd
both = h1_better and h2_better
neither = (not h1_better) and (not h2_better)
verdict = "BOTH halves improve DD ✓" if both else \
"NEITHER half improves DD ✗" if neither else \
f"Mixed: H1={'' if h1_better else ''} H2={'' if h2_better else ''}"
print(f" {r['name']:<46}: {verdict}")
# Adaptive summary
print(f"\n{'='*74}")
print("ADAPTIVE PARAMETERIZATION — alpha_eff and thr_eff distributions:")
for r in results[2:]:
print(f" {r['name']:<46}: alpha_eff_mean={r['alpha_eff_mean']:.4f}"
f" thr_eff_mean={r['thr_eff_mean']:.4f}")
outfile = _HERE / "exp8_boost_robustness_results.json"
log_results(results, outfile, gold=GOLD, meta={
"exp": "exp8",
"question": "Is scale_boost overfitting? Are threshold/alpha regime-dependent?",
"total_elapsed_s": round(time.time() - t_start, 1),
"gold_match": gold_match,
"fixed_match": fixed_match,
})
total = time.time() - t_start
print(f"\nTotal elapsed: {total/60:.1f} min")
print("Done.")
if __name__ == "__main__":
main()