initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/nautilus_dolphin/run_structural_attribution.py
+++ b/nautilus_dolphin/run_structural_attribution.py
@@ -0,0 +1,304 @@
+import sys, time
+from pathlib import Path
+import numpy as np
+import pandas as pd
+from scipy.stats import skew, kurtosis
+import json
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
+from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
+from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
+from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
+
+VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
+META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
+             'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
+             'instability_50', 'instability_150'}
+
+parquet_files = sorted(VBT_DIR.glob("*.parquet"))
+parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
+
+print("Loading data...")
+all_vols = []
+for pf in parquet_files[:2]:
+    df = pd.read_parquet(pf)
+    if 'BTCUSDT' not in df.columns: continue
+    pr = df['BTCUSDT'].values
+    for i in range(60, len(pr)):
+        seg = pr[max(0,i-50):i]
+        if len(seg)<10: continue
+        v = float(np.std(np.diff(seg)/seg[:-1]))
+        if v > 0: all_vols.append(v)
+vol_p60 = float(np.percentile(all_vols, 60))
+
+pq_data = {}
+total_bars = 0
+for pf in parquet_files:
+    df = pd.read_parquet(pf)
+    total_bars += len(df)
+    ac = [c for c in df.columns if c not in META_COLS]
+    bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
+    dv = np.full(len(df), np.nan)
+    if bp is not None:
+        for i in range(50, len(bp)):
+            seg = bp[max(0,i-50):i]
+            if len(seg)<10: continue
+            dv[i] = float(np.std(np.diff(seg)/seg[:-1]))
+    pq_data[pf.stem] = (df, ac, dv)
+
+# Initialize systems
+acb = AdaptiveCircuitBreaker()
+acb.preload_w750([pf.stem for pf in parquet_files])
+
+mock = MockOBProvider(imbalance_bias=-0.09, depth_scale=1.0, 
+                      assets=["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"],
+                      imbalance_biases={"BNBUSDT": 0.20, "SOLUSDT": 0.20})
+ob_engine_inst = OBFeatureEngine(mock)
+ob_engine_inst.preload_date("mock", mock.get_assets())
+
+from mc.mc_ml import DolphinForewarner
+from mc.mc_sampler import MCTrialConfig
+forewarner = DolphinForewarner(models_dir=str(Path(__file__).parent / "mc_results" / "models"))
+config = MCTrialConfig(
+    trial_id="LIVE",
+    vel_div_threshold=-0.02, vel_div_extreme=-0.05,
+    min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
+    fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
+    use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
+    dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
+    vd_trend_lookback=20, use_sp_fees=True, use_sp_slippage=True,
+    sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.5,
+    use_asset_selection=True, min_irp_alignment=0.45,
+    use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
+    ob_imbalance_bias=0.0, ob_depth_scale=1.0,
+    lookback=100, use_alpha_layers=True, use_dynamic_leverage=True,
+    acb_beta_high=1.5, acb_beta_low=0.2, acb_w750_threshold_pct=60.0
+)
+report = forewarner.assess(config)
+is_green = (report.envelope_score > 0.5 and report.champion_probability > 0.6)
+
+def run_trajectory(name, lev_multiplier, use_ob_engine, dynamic_ceiling=False):
+    ENGINE_KWARGS = dict(
+        initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
+        min_leverage=0.5, max_leverage=5.0 * lev_multiplier, leverage_convexity=3.0,
+        fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
+        use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
+        dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
+        use_asset_selection=True, min_irp_alignment=0.45,
+        use_sp_fees=True, use_sp_slippage=True,
+        use_ob_edge=use_ob_engine, ob_edge_bps=5.0, ob_confirm_rate=0.40,
+        lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
+    )
+    import gc; gc.collect()
+    engine = NDAlphaEngine(**ENGINE_KWARGS)
+    
+    if dynamic_ceiling:
+        engine.set_mc_forewarner_status(is_green)
+        
+    ob_ref = ob_engine_inst if use_ob_engine else None
+    if ob_ref:
+        engine.set_ob_engine(ob_ref)
+        
+    bar_idx = 0; peak_cap = engine.capital; max_dd = 0.0
+    daily_returns = []
+    daily_capitals = [engine.capital]
+    exposure_bars = 0
+    
+    for pf in parquet_files:
+        ds = pf.stem
+        cs = engine.capital
+        
+        acb_info = acb.get_dynamic_boost_for_date(ds, ob_engine=ob_ref)
+        base_boost = acb_info['boost']
+        beta = acb_info['beta']
+        
+        df, acols, dvol = pq_data[ds]
+        ph = {}
+        for ri in range(len(df)):
+            row = df.iloc[ri]; vd = row.get("vel_div")
+            if vd is None or not np.isfinite(vd): bar_idx+=1; continue
+            prices = {}
+            for ac in acols:
+                p = row[ac]
+                if p and p > 0 and np.isfinite(p):
+                    prices[ac] = float(p)
+                    if ac not in ph: ph[ac] = []
+                    ph[ac].append(float(p))
+                    if len(ph[ac]) > 500: ph[ac] = ph[ac][-200:]
+            if not prices: bar_idx+=1; continue
+            
+            vrok = False if ri < 100 else (np.isfinite(dvol[ri]) and dvol[ri] > vol_p60)
+            if beta > 0:
+                ss = 0.0
+                if vd < -0.02:
+                    raw = (-0.02 - float(vd)) / (-0.02 - -0.05)
+                    ss = min(1.0, max(0.0, raw)) ** 3.0
+                engine.regime_size_mult = base_boost * (1.0 + beta * ss)
+            else:
+                engine.regime_size_mult = base_boost
+                
+            engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices, vol_regime_ok=vrok, price_histories=ph)
+            bar_idx += 1
+            if engine.position is not None:
+                exposure_bars += 1
+            
+        peak_cap = max(peak_cap, engine.capital)
+        dd = (peak_cap - engine.capital) / peak_cap
+        max_dd = max(max_dd, dd)
+        
+        daily_returns.append((engine.capital - cs) / cs if cs > 0 else 0)
+        daily_capitals.append(engine.capital)
+        
+    trades = engine.trade_history
+    R = np.array(daily_returns)
+    
+    return {
+        'name': name,
+        'daily_returns': R,
+        'daily_capitals': daily_capitals,
+        'trades': len(trades),
+        'exposure_bars': exposure_bars,
+        'max_dd': max_dd
+    }
+
+def analyze_structural(res):
+    R = res['daily_returns']
+    mu = np.mean(R)
+    var = np.var(R, ddof=1)
+    sk = skew(R)
+    kur = kurtosis(R)
+    p95_neg = np.percentile(R[R < 0], 5) if len(R[R<0])>0 else 0
+    exposure_pct = res['exposure_bars'] / total_bars * 100 if total_bars > 0 else 0
+    
+    # Loss clustering
+    max_loss_streak = 0
+    curr_streak = 0
+    for r in R:
+        if r < 0:
+            curr_streak += 1
+            max_loss_streak = max(max_loss_streak, curr_streak)
+        else:
+            curr_streak = 0
+            
+    # Autocorrelation lag 1
+    if len(R) > 1:
+        autocorr = np.corrcoef(R[:-1], R[1:])[0, 1]
+    else:
+        autocorr = 0
+        
+    # Geometric growth rate
+    ggr_daily = np.mean(np.log1p(R))
+    
+    return {
+        'mu': mu, 'var': var, 'skew': sk, 'kurt': kur, 
+        'p95_neg': p95_neg, 'max_loss_streak': max_loss_streak,
+        'trades': res['trades'], 'exposure_pct': exposure_pct,
+        'autocorr': autocorr, 'ggr_daily': ggr_daily
+    }
+
+print("\n--- STEP 1 & 2: Compare Before OB vs After OB (5.0x) ---")
+res_no_ob = run_trajectory("No OB 5x", 1.0, False)
+res_ob = run_trajectory("OB 5x", 1.0, True)
+
+st_no = analyze_structural(res_no_ob)
+st_ob = analyze_structural(res_ob)
+
+print(f"{'Metric':<25} | {'Before OB':<15} | {'After OB':<15} | {'Delta'}")
+print("-" * 75)
+print(f"{'Arithmetic Mean (u)':<25} | {st_no['mu']:<15.6f} | {st_ob['mu']:<15.6f} | {st_ob['mu'] - st_no['mu']:+.6f}")
+print(f"{'Daily Variance (s2)':<25} | {st_no['var']:<15.6f} | {st_ob['var']:<15.6f} | {st_ob['var'] - st_no['var']:+.6f}")
+print(f"{'Skew':<25} | {st_no['skew']:<15.3f} | {st_ob['skew']:<15.3f} | {st_ob['skew'] - st_no['skew']:+.3f}")
+print(f"{'Kurtosis':<25} | {st_no['kurt']:<15.3f} | {st_ob['kurt']:<15.3f} | {st_ob['kurt'] - st_no['kurt']:+.3f}")
+print(f"{'95th %ile Neg Return':<25} | {st_no['p95_neg']:<15.4f} | {st_ob['p95_neg']:<15.4f} | {st_ob['p95_neg'] - st_no['p95_neg']:+.4f}")
+print(f"{'Max Loss Streak (Days)':<25} | {st_no['max_loss_streak']:<15} | {st_ob['max_loss_streak']:<15} | {st_ob['max_loss_streak'] - st_no['max_loss_streak']:+}")
+print(f"{'Trade Count':<25} | {st_no['trades']:<15} | {st_ob['trades']:<15} | {st_ob['trades'] - st_no['trades']:+}")
+print(f"{'Exposure Time %':<25} | {st_no['exposure_pct']:<14.2f}% | {st_ob['exposure_pct']:<14.2f}% | {st_ob['exposure_pct'] - st_no['exposure_pct']:+.2f}%")
+print(f"{'Autocorrelation (Lag 1)':<25} | {st_no['autocorr']:<15.3f} | {st_ob['autocorr']:<15.3f} | {st_ob['autocorr'] - st_no['autocorr']:+.3f}")
+print(f"{'Geometric Growth Rate':<25} | {st_no['ggr_daily']:<15.6f} | {st_ob['ggr_daily']:<15.6f} | {st_ob['ggr_daily'] - st_no['ggr_daily']:+.6f}")
+
+print("\n--- STEP 3: Geometric Attribution ---")
+delta_ggr = st_ob['ggr_daily'] - st_no['ggr_daily']
+delta_mu = st_ob['mu'] - st_no['mu']
+delta_var = -(st_ob['var'] - st_no['var']) / 2.0
+# The rest is tail (skew/kurt terms and higher moments)
+delta_tail = delta_ggr - (delta_mu + delta_var)
+print(f"Total Daily GGR Improvement: {delta_ggr:+.6f}")
+print(f"  -> Contrib from Mean (u):       {delta_mu:+.6f} ({delta_mu/delta_ggr*100:.1f}%)")
+print(f"  -> Contrib from Variance (s2):  {delta_var:+.6f} ({delta_var/delta_ggr*100:.1f}%)")
+print(f"  -> Contrib from Tail Shape:     {delta_tail:+.6f} ({delta_tail/delta_ggr*100:.1f}%)")
+
+
+print("\n--- STEP 4: Fit Variance vs Leverage Curve ---")
+levs = [5.0, 5.5, 6.0, 6.5, 7.0]
+curve_results = []
+for l in levs:
+    res = run_trajectory(f"OB {l}x", l/5.0, True)
+    st = analyze_structural(res)
+    curve_results.append((l, st['mu'], st['var'], st['ggr_daily']))
+
+print(f"{'Leverage':<10} | {'Mu':<10} | {'Var (s2)':<10} | {'Marginal u':<12} | {'Marginal s2/2':<15} | {'GGR (Daily)'}")
+print("-" * 80)
+prev_mu, prev_var = None, None
+for l, mu, var, ggr in curve_results:
+    marg_mu = mu - prev_mu if prev_mu else 0
+    marg_var_2 = (var - prev_var)/2 if prev_var else 0
+    print(f"{l:<10.1f} | {mu:<10.6f} | {var:<10.6f} | {marg_mu:<12.6f} | {marg_var_2:<15.6f} | {ggr:.6f}")
+    prev_mu, prev_var = mu, var
+
+print("\n=> Geometric Growth caps out roughly where Marginal u < Marginal s2/2")
+
+
+print("\n--- STEP 5: Monte Carlo Simulation (Static 5x vs Static 6x vs Dynamic 5->6x) ---")
+def run_mc_sim(baseline_returns, periods=365, n_simulations=1000):
+    np.random.seed(42)
+    daily_returns = baseline_returns
+    
+    simulated_returns = np.random.choice(daily_returns, size=(n_simulations, periods), replace=True)
+    equity_curves = np.cumprod(1.0 + simulated_returns, axis=1)
+    cagrs = (equity_curves[:, -1] - 1.0) * 100
+    
+    median_cagr = np.median(cagrs)
+    p05_cagr = np.percentile(cagrs, 5)
+    
+    max_dds = np.zeros(n_simulations)
+    recovery_times = np.zeros(n_simulations)
+    
+    for i in range(n_simulations):
+        curve = equity_curves[i]
+        peaks = np.maximum.accumulate(curve)
+        drawdowns = (peaks - curve) / peaks
+        max_dd_idx = np.argmax(drawdowns)
+        max_dds[i] = drawdowns[max_dd_idx]
+        
+        if drawdowns[max_dd_idx] > 0:
+            peak_val = peaks[max_dd_idx]
+            recovery_idx = -1
+            for j in range(max_dd_idx, periods):
+                if curve[j] >= peak_val:
+                    recovery_idx = j
+                    break
+            
+            if recovery_idx != -1:
+                recovery_times[i] = recovery_idx - max_dd_idx
+            else:
+                recovery_times[i] = periods - max_dd_idx
+                
+    prob_40dd = np.mean(max_dds >= 0.40) * 100
+    median_rec = np.median(recovery_times[recovery_times > 0]) if np.any(recovery_times > 0) else -1
+    return median_cagr, p05_cagr, prob_40dd, median_rec
+
+res_5x = run_trajectory("Static 5x", 1.0, True, False)
+res_6x = run_trajectory("Static 6x", 1.2, True, False)
+res_dyn = run_trajectory("Dynamic 5->6x", 1.0, True, True)
+
+mc_5x = run_mc_sim(res_5x['daily_returns'])
+mc_6x = run_mc_sim(res_6x['daily_returns'])
+mc_dyn = run_mc_sim(res_dyn['daily_returns'])
+
+print(f"{'Strategy':<15} | {'Med CAGR':<15} | {'5% CAGR':<15} | {'P(>40% DD)':<15} | {'Median Recovery'}")
+print("-" * 80)
+print(f"{'Static 5x':<15} | {mc_5x[0]:<14.2f}% | {mc_5x[1]:<14.2f}% | {mc_5x[2]:<14.2f}% | {mc_5x[3]:.1f} days")
+print(f"{'Static 6x':<15} | {mc_6x[0]:<14.2f}% | {mc_6x[1]:<14.2f}% | {mc_6x[2]:<14.2f}% | {mc_6x[3]:.1f} days")
+print(f"{'Dyn 5->6x':<15} | {mc_dyn[0]:<14.2f}% | {mc_dyn[1]:<14.2f}% | {mc_dyn[2]:<14.2f}% | {mc_dyn[3]:.1f} days")