DOLPHIN/nautilus_dolphin/dvae/exp8_boost_robustness.py

"""
Exp 8 — scale_boost Robustness & Adaptive Parameterization

Two questions from exp7 scale_boost winner (thr=0.35, a=1.0):

  Q1. Is it overfitting? (+5pp ROI AND -0.54pp DD on same 55 days it was found)
      Test: temporal split — first-half (days 1–27) vs second-half (days 28–55)
      If improvement holds in BOTH halves independently, it's structurally real.
      If only one half drives it, the result is temporally fragile.

  Q2. Are threshold and alpha regime-dependent?
      Hypothesis: proxy_B is more discriminating in high-eigenvalue-regime days
      (high ACB beta). On those days, "calm" entries should receive stronger boost,
      and the threshold for "what qualifies as calm" should be tighter.

      Adaptive formulas (using ACB state available in _try_entry as self._day_base_boost
      and self._day_beta):
        alpha_eff    = alpha    * day_base_boost           (more boost on stressed days)
        thr_eff      = threshold / day_base_boost          (tighter gate on stressed days)
        Both together: combine both adjustments

      Also test dvol-proxy adaptation: use day_beta directly as a continuous scaler.

Configs:
  0. Baseline
  1. Fixed: thr=0.35, a=1.0 (exp7 winner — must reproduce exp7 results)
  2. Adaptive-alpha:     alpha_eff = 1.0 * day_base_boost, thr fixed at 0.35
  3. Adaptive-threshold: thr_eff = 0.35 / day_base_boost, alpha fixed at 1.0
  4. Adaptive-both:      both formulas combined
  5. Beta-scaled alpha:  alpha_eff = 1.0 * (1 + day_beta), thr fixed at 0.35
     (day_beta is the ACB eigenvalue signal; more direct than base_boost)

Results include:
  - Full 55-day metrics (standard)
  - First-half (days 1–27) and second-half (days 28–55) metrics split out
    to test temporal stability of the DD reduction
  - Per-day scale distribution analysis

Results logged to exp8_boost_robustness_results.json
"""
import sys, time, json, math
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np

_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))

from exp_shared import (
    ensure_jit, ENGINE_KWARGS, GOLD, MC_BASE_CFG,
    load_data, load_forewarner, log_results,
)
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker


# ── Re-use ProxyBaseEngine from exp7 (copy-minimal) ──────────────────────────

class ProxyBaseEngine(NDAlphaEngine):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._current_proxy_b: float = 0.0
        self._proxy_b_history: list = []

    def _update_proxy(self, inst: float, v750: float) -> float:
        pb = inst - v750
        self._current_proxy_b = pb
        self._proxy_b_history.append(pb)
        if len(self._proxy_b_history) > 500:
            self._proxy_b_history = self._proxy_b_history[-500:]
        return pb

    def _proxy_prank(self) -> float:
        if not self._proxy_b_history:
            return 0.5
        n = len(self._proxy_b_history)
        return sum(v < self._current_proxy_b for v in self._proxy_b_history) / n

    def process_day(self, date_str, df, asset_columns,
                    vol_regime_ok=None, direction=None, posture='APEX'):
        self.begin_day(date_str, posture=posture, direction=direction)
        bid = 0
        for ri in range(len(df)):
            row = df.iloc[ri]
            vd = row.get('vel_div')
            if vd is None or not np.isfinite(float(vd)):
                self._global_bar_idx += 1; bid += 1; continue

            def gf(col):
                v = row.get(col)
                if v is None: return 0.0
                try:   return float(v) if np.isfinite(float(v)) else 0.0
                except: return 0.0

            v50  = gf('v50_lambda_max_velocity')
            v750 = gf('v750_lambda_max_velocity')
            inst = gf('instability_50')
            self._update_proxy(inst, v750)

            prices = {}
            for ac in asset_columns:
                p = row.get(ac)
                if p is not None and p > 0 and np.isfinite(float(p)):
                    prices[ac] = float(p)

            if not prices:
                self._global_bar_idx += 1; bid += 1; continue

            vrok = bool(vol_regime_ok[ri]) if vol_regime_ok is not None else (bid >= 100)
            self.step_bar(bar_idx=ri, vel_div=float(vd), prices=prices,
                          vol_regime_ok=vrok, v50_vel=v50, v750_vel=v750)
            bid += 1

        return self.end_day()


# ── Adaptive scale_boost engine ───────────────────────────────────────────────

class AdaptiveBoostEngine(ProxyBaseEngine):
    """
    scale_boost with optionally regime-adaptive threshold and alpha.

    Fixed mode (adaptive_alpha=False, adaptive_thr=False, adaptive_beta=False):
      scale = 1 + alpha * max(0, threshold - prank)
      Identical to exp7 ProxyScaleEngine(mode='boost').

    Adaptive modes use ACB state (self._day_base_boost, self._day_beta)
    which is set by begin_day() before any _try_entry calls in that day:

      adaptive_alpha:  alpha_eff = alpha * day_base_boost
        → High-boost day (stressed eigenspace regime) → stronger boost on calm entries
        → Low-boost day → modest boost

      adaptive_thr:    thr_eff = threshold / day_base_boost
        → High-boost day → lower threshold → more selective (only very calm entries qualify)
        → Low-boost day → higher threshold → more entries qualify

      adaptive_beta:   alpha_eff = alpha * (1 + day_beta)
        → day_beta is the ACB's direct eigenvalue signal (0 when inactive)
        → More discriminating on days where eigenvalue regime is active

    Parameters can be combined freely.
    """
    def __init__(self, *args,
                 threshold: float = 0.35,
                 alpha: float = 1.0,
                 adaptive_alpha: bool = False,
                 adaptive_thr: bool = False,
                 adaptive_beta: bool = False,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.threshold = threshold
        self.alpha = alpha
        self.adaptive_alpha = adaptive_alpha
        self.adaptive_thr = adaptive_thr
        self.adaptive_beta = adaptive_beta
        self._scale_history: list = []
        self._alpha_eff_history: list = []
        self._thr_eff_history: list = []

    @property
    def sizing_scale_mean(self) -> float:
        return float(np.mean(self._scale_history)) if self._scale_history else 1.0

    def _try_entry(self, bar_idx, vel_div, prices, price_histories,
                   v50_vel=0.0, v750_vel=0.0):
        result = super()._try_entry(bar_idx, vel_div, prices, price_histories,
                                     v50_vel, v750_vel)
        if result and self.position:
            boost  = max(1.0, getattr(self, '_day_base_boost', 1.0))
            beta   = max(0.0, getattr(self, '_day_beta',       0.0))

            # Effective parameters
            alpha_eff = self.alpha
            if self.adaptive_alpha:
                alpha_eff *= boost          # more boost on stressed-regime days
            if self.adaptive_beta:
                alpha_eff *= (1.0 + beta)   # beta signal scales aggression

            thr_eff = self.threshold
            if self.adaptive_thr:
                # High boost → lower threshold → be more selective about "calm"
                thr_eff = self.threshold / max(1.0, boost)

            prank = self._proxy_prank()
            scale = 1.0 + alpha_eff * max(0.0, thr_eff - prank)

            self.position.notional *= scale
            self._scale_history.append(scale)
            self._alpha_eff_history.append(alpha_eff)
            self._thr_eff_history.append(thr_eff)

        return result

    def reset(self):
        super().reset()
        self._scale_history = []
        self._alpha_eff_history = []
        self._thr_eff_history = []


# ── Run harness with half-split ───────────────────────────────────────────────

def _run(engine_factory, name, d, fw):
    """Full run + temporal split (first vs second half of days)."""
    kw = ENGINE_KWARGS.copy()
    acb = AdaptiveCircuitBreaker()
    acb.preload_w750(d['date_strings'])

    eng = engine_factory(kw)
    eng.set_ob_engine(d['ob_eng'])
    eng.set_acb(acb)
    if fw is not None:
        eng.set_mc_forewarner(fw, MC_BASE_CFG)
    eng.set_esoteric_hazard_multiplier(0.0)

    pf_list = d['parquet_files']
    n_days  = len(pf_list)
    half    = n_days // 2   # split point

    daily_caps, daily_pnls = [], []
    half_caps = [[], []]    # [first_half, second_half]
    half_pnls = [[], []]
    half_trades_n = [0, 0]

    for i, pf in enumerate(pf_list):
        ds = pf.stem
        df, acols, dvol = d['pq_data'][ds]
        cap_before = eng.capital
        vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
        eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
        cap_after = eng.capital
        daily_caps.append(cap_after)
        daily_pnls.append(cap_after - cap_before)
        h = 0 if i < half else 1
        half_caps[h].append(cap_after)
        half_pnls[h].append(cap_after - cap_before)

    tr  = eng.trade_history
    n   = len(tr)
    roi = (eng.capital - 25000.0) / 25000.0 * 100.0

    def _metrics(caps, pnls, start_cap=25000.0):
        """Compute metrics for a sub-period given daily capitals and a starting capital."""
        if not caps:
            return dict(roi=0.0, dd=0.0, sharpe=0.0)
        peak = start_cap
        max_dd = 0.0
        for c in caps:
            peak = max(peak, c)
            max_dd = max(max_dd, (peak - c) / peak * 100.0)
        total_pnl = sum(pnls)
        roi_sub = total_pnl / start_cap * 100.0
        dr = np.array([p / start_cap * 100.0 for p in pnls])
        sharpe = float(dr.mean() / (dr.std() + 1e-9) * math.sqrt(365)) if len(dr) > 1 else 0.0
        return dict(roi=roi_sub, dd=max_dd, sharpe=sharpe, n_days=len(caps))

    if n == 0:
        return dict(name=name, roi=roi, pf=0.0, dd=0.0, wr=0.0, sharpe=0.0,
                    trades=0, sizing_scale_mean=1.0)

    def _abs(t): return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
    wins   = [t for t in tr if _abs(t) > 0]
    losses = [t for t in tr if _abs(t) <= 0]
    wr     = len(wins) / n * 100.0
    pf_val = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in losses)), 1e-9)

    peak_cap, max_dd = 25000.0, 0.0
    for cap in daily_caps:
        peak_cap = max(peak_cap, cap)
        max_dd   = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)

    dr     = np.array([p / 25000.0 * 100.0 for p in daily_pnls])
    sharpe = float(dr.mean() / (dr.std() + 1e-9) * math.sqrt(365)) if len(dr) > 1 else 0.0

    # First/second half split — using capital at end of first-half as baseline for second half
    cap_at_halftime = half_caps[0][-1] if half_caps[0] else 25000.0
    h1 = _metrics(half_caps[0], half_pnls[0], start_cap=25000.0)
    h2 = _metrics(half_caps[1], half_pnls[1], start_cap=cap_at_halftime)

    sizing_scale_mean = getattr(eng, 'sizing_scale_mean', 1.0)

    # Alpha/threshold eff distributions for adaptive engines
    alpha_mean = 1.0
    thr_mean   = 0.35
    eng_ae = eng if isinstance(eng, AdaptiveBoostEngine) else None
    if eng_ae:
        if eng_ae._alpha_eff_history:
            alpha_mean = float(np.mean(eng_ae._alpha_eff_history))
        if eng_ae._thr_eff_history:
            thr_mean = float(np.mean(eng_ae._thr_eff_history))

    return dict(
        name=name,
        roi=roi, pf=pf_val, dd=max_dd, wr=wr, sharpe=sharpe, trades=n,
        sizing_scale_mean=sizing_scale_mean,
        alpha_eff_mean=alpha_mean,
        thr_eff_mean=thr_mean,
        # Temporal split
        h1_roi=h1['roi'], h1_dd=h1['dd'], h1_sharpe=h1['sharpe'],
        h2_roi=h2['roi'], h2_dd=h2['dd'], h2_sharpe=h2['sharpe'],
        split_days=(half, n_days - half),
    )


# ── Main ──────────────────────────────────────────────────────────────────────

def main():
    t_start = time.time()
    print("=" * 74)
    print("Exp 8 — scale_boost Robustness & Adaptive Parameterization")
    print("=" * 74)

    ensure_jit()
    d  = load_data()
    fw = load_forewarner()

    configs = [
        ("0_baseline",
         lambda kw: NDAlphaEngine(**kw)),
        ("1_fixed_thr035_a1.0",
         lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0, **kw)),
        ("2_adaptive_alpha__thr035_a1.0xboost",
         lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
                                         adaptive_alpha=True, **kw)),
        ("3_adaptive_thr__thr035/boost_a1.0",
         lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
                                         adaptive_thr=True, **kw)),
        ("4_adaptive_both__thr/boost_axboost",
         lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
                                         adaptive_alpha=True, adaptive_thr=True, **kw)),
        ("5_adaptive_beta__thr035_ax(1+beta)",
         lambda kw: AdaptiveBoostEngine(threshold=0.35, alpha=1.0,
                                         adaptive_beta=True, **kw)),
    ]

    results = []
    for i, (name, factory) in enumerate(configs):
        t0 = time.time()
        print(f"\n[{i+1}/{len(configs)}] {name} ...")
        res = _run(factory, name, d, fw)
        elapsed = time.time() - t0
        print(f"  ROI={res['roi']:.2f}% PF={res['pf']:.4f} DD={res['dd']:.2f}%"
              f" WR={res['wr']:.2f}% Sharpe={res['sharpe']:.3f} Trades={res['trades']}"
              f" scale={res['sizing_scale_mean']:.4f} alpha_eff={res['alpha_eff_mean']:.4f}"
              f" ({elapsed:.0f}s)")
        print(f"    H1(days 1-{res['split_days'][0]}): ROI={res['h1_roi']:.2f}%"
              f" DD={res['h1_dd']:.2f}% Sharpe={res['h1_sharpe']:.3f}")
        print(f"    H2(days {res['split_days'][0]+1}-{sum(res['split_days'])}): ROI={res['h2_roi']:.2f}%"
              f" DD={res['h2_dd']:.2f}% Sharpe={res['h2_sharpe']:.3f}")
        results.append(res)

    # Baseline verification
    b = results[0]
    fixed = results[1]
    gold_match = (abs(b['roi'] - GOLD['roi']) < 0.5 and abs(b['dd'] - GOLD['dd']) < 0.5
                  and abs(b['trades'] - GOLD['trades']) < 10)
    fixed_match = (abs(fixed['roi'] - 93.61) < 0.5 and abs(fixed['dd'] - 14.51) < 0.5)
    print(f"\n{'='*74}")
    print(f"VERIFICATION:")
    print(f"  Baseline vs gold:     {'PASS ✓' if gold_match else 'FAIL ✗'}  "
          f"(ROI={b['roi']:.2f}% DD={b['dd']:.2f}%)")
    print(f"  Fixed vs exp7 winner: {'PASS ✓' if fixed_match else 'FAIL ✗'}  "
          f"(ROI={fixed['roi']:.2f}% DD={fixed['dd']:.2f}%)")

    print(f"\n{'='*74}")
    print(f"FULL-PERIOD RESULTS (target: DD<15.05% AND ROI>=84.1%)")
    hdr = f"{'Config':<46} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'ΔDD':>6} {'ΔROI':>6} {'scale':>7} {'alpha':>7} {'OK':>4}"
    print(hdr); print('-' * 98)
    base_roi = b['roi']; base_dd = b['dd']
    for r in results:
        dROI = r['roi'] - base_roi; dDD = r['dd'] - base_dd
        ok = 'Y' if (r['dd'] < GOLD['dd'] and r['roi'] >= GOLD['roi'] * 0.95) else 'N'
        print(f"{r['name']:<46} {r['roi']:>7.2f} {r['pf']:>6.4f} {r['dd']:>6.2f} "
              f"{dDD:>+6.2f} {dROI:>+6.2f} {r['sizing_scale_mean']:>7.4f} "
              f"{r['alpha_eff_mean']:>7.4f} {ok:>4}")

    print(f"\n{'='*74}")
    print("TEMPORAL SPLIT — Overfitting check (does improvement hold in both halves?)")
    h_days = results[0]['split_days']
    print(f"Split: H1=days 1–{h_days[0]}, H2=days {h_days[0]+1}–{sum(h_days)}")
    print(f"{'Config':<46} {'H1 ROI':>8} {'H1 DD':>7} {'H2 ROI':>8} {'H2 DD':>7} "
          f"{'ΔH1DD':>7} {'ΔH2DD':>7}")
    print('-' * 98)
    b_h1dd = b['h1_dd']; b_h2dd = b['h2_dd']
    for r in results:
        dH1 = r['h1_dd'] - b_h1dd; dH2 = r['h2_dd'] - b_h2dd
        print(f"{r['name']:<46} {r['h1_roi']:>8.2f} {r['h1_dd']:>7.2f} "
              f"{r['h2_roi']:>8.2f} {r['h2_dd']:>7.2f} {dH1:>+7.2f} {dH2:>+7.2f}")

    print(f"\n{'='*74}")
    print("OVERFITTING VERDICT:")
    for r in results[1:]:
        h1_better = r['h1_dd'] < b_h1dd
        h2_better = r['h2_dd'] < b_h2dd
        both = h1_better and h2_better
        neither = (not h1_better) and (not h2_better)
        verdict = "BOTH halves improve DD ✓" if both else \
                  "NEITHER half improves DD ✗" if neither else \
                  f"Mixed: H1={'↓' if h1_better else '↑'} H2={'↓' if h2_better else '↑'}"
        print(f"  {r['name']:<46}: {verdict}")

    # Adaptive summary
    print(f"\n{'='*74}")
    print("ADAPTIVE PARAMETERIZATION — alpha_eff and thr_eff distributions:")
    for r in results[2:]:
        print(f"  {r['name']:<46}: alpha_eff_mean={r['alpha_eff_mean']:.4f}"
              f"  thr_eff_mean={r['thr_eff_mean']:.4f}")

    outfile = _HERE / "exp8_boost_robustness_results.json"
    log_results(results, outfile, gold=GOLD, meta={
        "exp": "exp8",
        "question": "Is scale_boost overfitting? Are threshold/alpha regime-dependent?",
        "total_elapsed_s": round(time.time() - t_start, 1),
        "gold_match": gold_match,
        "fixed_match": fixed_match,
    })

    total = time.time() - t_start
    print(f"\nTotal elapsed: {total/60:.1f} min")
    print("Done.")


if __name__ == "__main__":
    main()