initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/nautilus_dolphin/dvae/exp2_proxy_exit.py
+++ b/nautilus_dolphin/dvae/exp2_proxy_exit.py
@@ -0,0 +1,314 @@
+"""
+Exp 2 — proxy_B as premature exit signal, with shadow trades.
+
+Post-hoc "what-if" analysis on the baseline trade set.
+  1. Run baseline engine; log per-day proxy_B and per-asset prices keyed by
+     (date_str, bar_idx) — the composite key that matches trade.entry_bar.
+  2. For each trade: find which day it was on (tracked by engine override),
+     then check if proxy_B dropped below threshold during the hold.
+  3. Compute early-exit PnL at the trigger bar using the CORRECT asset price.
+  4. Compare vs actual PnL.
+
+Shadow insight: avg_pnl_delta = early_exit_pnl - actual_pnl
+  Positive → early exit would have been better
+  Negative → holding to natural exit was better (proxy_B is NOT a useful exit signal)
+
+Thresholds tested (rolling percentile of proxy_B, window=500):
+  T1: exit if proxy_B < p10  (rare trigger)
+  T2: exit if proxy_B < p25  (moderate)
+  T3: exit if proxy_B < p50  (aggressive)
+
+Logged to exp2_proxy_exit_results.json.
+"""
+import sys, time, json
+sys.stdout.reconfigure(encoding='utf-8', errors='replace')
+from pathlib import Path
+import numpy as np
+
+_HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(_HERE.parent))
+
+from exp_shared import (
+    ensure_jit, ENGINE_KWARGS, GOLD, load_data, load_forewarner, log_results
+)
+from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
+from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
+
+
+# ── Engine that logs per-day proxy_B + asset prices + trade dates ─────────────
+
+class ShadowLoggingEngine(NDAlphaEngine):
+    """
+    NDAlphaEngine that captures:
+      - day_proxy[date][ri] = proxy_b value
+      - day_prices[date][ri][asset] = price
+      - trade_dates[trade_idx] = date_str of entry
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.day_proxy  = {}   # date_str → {ri: proxy_b}
+        self.day_prices = {}   # date_str → {ri: {asset: price}}
+        self._cur_date  = None
+        self._n_trades_before = 0
+        self.trade_dates = []  # parallel list to trade_history, entry date per trade
+
+    def process_day(self, date_str, df, asset_columns,
+                    vol_regime_ok=None, direction=None, posture='APEX'):
+        self._cur_date = date_str
+        self.day_proxy[date_str]  = {}
+        self.day_prices[date_str] = {}
+        self._n_trades_before = len(self.trade_history)
+
+        self.begin_day(date_str, posture=posture, direction=direction)
+        bid = 0
+        for ri in range(len(df)):
+            row = df.iloc[ri]
+            vd = row.get('vel_div')
+            if vd is None or not np.isfinite(float(vd)):
+                self._global_bar_idx += 1; bid += 1; continue
+
+            def gf(col):
+                v = row.get(col)
+                if v is None: return 0.0
+                try: f = float(v); return f if np.isfinite(f) else 0.0
+                except: return 0.0
+
+            v50  = gf('v50_lambda_max_velocity')
+            v750 = gf('v750_lambda_max_velocity')
+            inst = gf('instability_50')
+            pb   = inst - v750
+            self.day_proxy[date_str][ri] = pb
+
+            prices = {}
+            for ac in asset_columns:
+                p = row.get(ac)
+                if p is not None and p > 0 and np.isfinite(p):
+                    prices[ac] = float(p)
+            self.day_prices[date_str][ri] = dict(prices)
+
+            if not prices:
+                self._global_bar_idx += 1; bid += 1; continue
+
+            vrok = bool(vol_regime_ok[ri]) if vol_regime_ok is not None else (bid >= 100)
+            self.step_bar(bar_idx=ri, vel_div=float(vd), prices=prices,
+                          vol_regime_ok=vrok, v50_vel=v50, v750_vel=v750)
+            bid += 1
+
+        result = self.end_day()
+
+        # Tag new trades with this date
+        new_trades = self.trade_history[self._n_trades_before:]
+        for _ in new_trades:
+            self.trade_dates.append(date_str)
+
+        return result
+
+
+# ── Shadow analysis ───────────────────────────────────────────────────────────
+
+def shadow_analysis(eng, threshold_pct, window=500):
+    """
+    For each trade, check if proxy_B dropped below rolling threshold
+    during hold period (same-day bars between entry_bar and exit_bar).
+    Uses the correct asset price for PnL computation.
+    """
+    tr    = eng.trade_history
+    dates = eng.trade_dates
+
+    if len(dates) < len(tr):
+        # Pad if any trades weren't tagged (shouldn't happen)
+        dates = dates + [None] * (len(tr) - len(dates))
+
+    # Build rolling proxy_B history across all days (chronological)
+    # We need a global chronological sequence for percentile computation
+    all_proxy_seq = []
+    for pf_stem in sorted(eng.day_proxy.keys()):
+        day_d = eng.day_proxy[pf_stem]
+        for ri in sorted(day_d.keys()):
+            all_proxy_seq.append((pf_stem, ri, day_d[ri]))
+
+    results = []
+    proxy_hist = []  # rolling window of ALL bars seen so far
+
+    # Build per-day sorted bar sequences for efficient lookup
+    day_bars = {d: sorted(eng.day_proxy[d].keys()) for d in eng.day_proxy}
+
+    # Build lookup: (date, ri) → index in all_proxy_seq (for rolling history)
+    seq_idx = {(s, r): i for i, (s, r, _) in enumerate(all_proxy_seq)}
+
+    for t, date in zip(tr, dates):
+        if date is None:
+            results.append(dict(triggered=False, actual_pnl=t.pnl_pct))
+            continue
+
+        entry_bar = int(t.entry_bar) if hasattr(t, 'entry_bar') else 0
+        exit_bar  = int(t.exit_bar)  if hasattr(t, 'exit_bar')  else entry_bar
+        actual_pnl = float(t.pnl_pct) if hasattr(t, 'pnl_pct') else 0.0
+        entry_price = float(t.entry_price) if hasattr(t, 'entry_price') and t.entry_price else 0.0
+        direction   = int(t.direction)   if hasattr(t, 'direction')   else -1
+        asset       = t.asset            if hasattr(t, 'asset')        else 'BTCUSDT'
+
+        # Rolling threshold: use all bars BEFORE entry on this day
+        eidx = seq_idx.get((date, entry_bar), 0)
+        hist_window = [pb for (_, _, pb) in all_proxy_seq[max(0, eidx-window):eidx]]
+        if len(hist_window) < 20:
+            results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
+        threshold = float(np.percentile(hist_window, threshold_pct * 100))
+
+        # Find hold bars on the same day
+        if date not in day_bars:
+            results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
+        hold_bars = [ri for ri in day_bars[date]
+                     if entry_bar < ri <= exit_bar]
+
+        triggered_bar = None
+        for ri in hold_bars:
+            if eng.day_proxy[date].get(ri, 999) < threshold:
+                triggered_bar = ri
+                break
+
+        if triggered_bar is None:
+            results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
+
+        # Correct early-exit price: same asset, triggered bar on same day
+        early_price = eng.day_prices[date].get(triggered_bar, {}).get(asset, 0.0)
+        if entry_price > 0 and early_price > 0:
+            early_pnl = direction * (early_price - entry_price) / entry_price
+        else:
+            results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
+
+        bars_saved = exit_bar - triggered_bar
+        results.append(dict(
+            triggered=True,
+            date=date, entry_bar=entry_bar, exit_bar=exit_bar,
+            triggered_bar=triggered_bar, bars_saved=bars_saved,
+            asset=asset, direction=direction,
+            entry_price=entry_price, early_price=early_price,
+            actual_pnl=actual_pnl,
+            early_exit_pnl=early_pnl,
+            pnl_delta=early_pnl - actual_pnl,
+        ))
+
+    triggered = [r for r in results if r['triggered']]
+    if not triggered:
+        return dict(n_triggered=0, n_total=len(results), pct_triggered=0,
+                    avg_actual_pnl_pct=0, avg_early_pnl_pct=0, avg_delta_pct=0,
+                    early_better_rate=0, roi_impact_pp=0)
+
+    avg_actual = float(np.mean([r['actual_pnl']   for r in triggered]))
+    avg_early  = float(np.mean([r['early_exit_pnl'] for r in triggered]))
+    avg_delta  = float(np.mean([r['pnl_delta']    for r in triggered]))
+    early_better = float(np.mean([r['pnl_delta'] > 0 for r in triggered]))
+    avg_bars_saved = float(np.mean([r['bars_saved'] for r in triggered]))
+
+    # Estimated ROI impact (sum of pnl deltas × fraction × 100)
+    roi_impact = float(sum(r['pnl_delta'] for r in triggered) * 0.20 * 100)
+
+    # Per-exit-reason breakdown if available
+    return dict(
+        n_triggered=len(triggered),
+        n_total=len(results),
+        pct_triggered=len(triggered) / max(1, len(results)) * 100,
+        avg_actual_pnl_pct=avg_actual * 100,
+        avg_early_exit_pnl_pct=avg_early * 100,
+        avg_pnl_delta_pct=avg_delta * 100,
+        early_better_rate=early_better * 100,
+        avg_bars_saved=avg_bars_saved,
+        roi_impact_estimate_pp=roi_impact,
+    )
+
+
+def main():
+    ensure_jit()
+    print("\nLoading data & forewarner...")
+    d  = load_data()
+    fw = load_forewarner()
+
+    from exp_shared import ENGINE_KWARGS, MC_BASE_CFG
+    import math
+
+    print("\nRunning baseline with shadow logging...")
+    t0 = time.time()
+    kw  = ENGINE_KWARGS.copy()
+    acb = AdaptiveCircuitBreaker()
+    acb.preload_w750(d['date_strings'])
+    eng = ShadowLoggingEngine(**kw)
+    eng.set_ob_engine(d['ob_eng'])
+    eng.set_acb(acb)
+    if fw: eng.set_mc_forewarner(fw, MC_BASE_CFG)
+    eng.set_esoteric_hazard_multiplier(0.0)
+
+    daily_caps, daily_pnls = [], []
+    for pf in d['parquet_files']:
+        ds = pf.stem
+        df, acols, dvol = d['pq_data'][ds]
+        cap_before = eng.capital
+        vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
+        eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
+        daily_caps.append(eng.capital)
+        daily_pnls.append(eng.capital - cap_before)
+
+    tr = eng.trade_history
+    print(f"  Done in {time.time()-t0:.0f}s  Trades={len(tr)}  "
+          f"Tagged={len(eng.trade_dates)}")
+
+    # Confirm baseline metrics match gold
+    def _abs(t): return t.pnl_absolute if hasattr(t,'pnl_absolute') else t.pnl_pct*250.
+    wins = [t for t in tr if _abs(t) > 0]
+    pf   = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in [x for x in tr if _abs(x)<=0])),1e-9)
+    roi  = (eng.capital - 25000) / 25000 * 100
+    print(f"  Baseline: ROI={roi:.2f}%  PF={pf:.4f}  (gold: 88.55% / 1.215)")
+
+    THRESHOLDS = [
+        ('T1: exit if proxy_B < p10', 0.10),
+        ('T2: exit if proxy_B < p25', 0.25),
+        ('T3: exit if proxy_B < p50', 0.50),
+    ]
+
+    all_results = []
+    for tname, tpct in THRESHOLDS:
+        print(f"\n{tname}")
+        res = shadow_analysis(eng, threshold_pct=tpct, window=500)
+        res['name'] = tname
+        all_results.append(res)
+        print(f"  Triggered: {res['n_triggered']}/{res['n_total']} "
+              f"({res['pct_triggered']:.1f}%)")
+        if res['n_triggered'] > 0:
+            print(f"  Avg actual PnL:     {res['avg_actual_pnl_pct']:+.4f}%")
+            print(f"  Avg early-exit PnL: {res['avg_early_exit_pnl_pct']:+.4f}%")
+            print(f"  Avg delta:          {res['avg_pnl_delta_pct']:+.4f}%  "
+                  f"(+ = early exit BETTER)")
+            print(f"  Early exit better:  {res['early_better_rate']:.1f}% of triggered")
+            print(f"  Avg bars saved:     {res['avg_bars_saved']:.1f}")
+            print(f"  Est. ROI impact:    {res['roi_impact_estimate_pp']:+.2f}pp")
+
+    print("\n" + "="*75)
+    print("EXP 2 — SHADOW EXIT SUMMARY")
+    print("="*75)
+    print(f"{'Threshold':<35} {'Trig%':>6} {'AvgDelta%':>11} "
+          f"{'EarlyBetter%':>13} {'ROI_pp':>8}")
+    print('-'*75)
+    for r in all_results:
+        if r['n_triggered'] > 0:
+            print(f"  {r['name']:<33} {r['pct_triggered']:>6.1f}% "
+                  f"{r['avg_pnl_delta_pct']:>10.4f}% "
+                  f"{r['early_better_rate']:>12.1f}% "
+                  f"{r['roi_impact_estimate_pp']:>8.2f}pp")
+        else:
+            print(f"  {r['name']:<33}  (no triggers)")
+
+    verdict = all_results[0] if all_results else {}
+    if verdict.get('avg_pnl_delta_pct', -1) > 0:
+        print("\n  → VERDICT: Early exit is BENEFICIAL (delta > 0)")
+    else:
+        print("\n  → VERDICT: Holding to natural exit is BETTER (early exit hurts)")
+
+    log_results(all_results, _HERE / 'exp2_proxy_exit_results.json',
+                meta={'experiment': 'proxy_B exit shadow (corrected)',
+                      'proxy': 'instability_50 - v750_lambda_max_velocity',
+                      'n_trades': len(tr),
+                      'baseline_roi': roi, 'baseline_pf': pf})
+
+
+if __name__ == '__main__':
+    main()