initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/nautilus_dolphin/test_1m_calibration_sweep.py
+++ b/nautilus_dolphin/test_1m_calibration_sweep.py
@@ -0,0 +1,307 @@
+"""1m Klines System Calibration — max_hold_bars × abs_max_lev sweep.
+
+Problem: 1m system has DD=31.69% (elevated). Cause: 2-hour max hold (120 bars at 1min)
+× avg leverage 2.57x — long holds amplify adverse periods.
+
+Sweep 1: max_hold_bars in [30, 45, 60, 90, 120]  (with abs_max_lev=5.0)
+Sweep 2: abs_max_lev in [3.0, 4.0, 5.0, 6.0]    (with best max_hold from sweep 1)
+Combined grid: 5 × 4 = 20 runs on 795-day klines window.
+
+Full engine stack identical to klines_2y_experiment (ACBv6 + OB + MC-Forewarner + EsoF neutral).
+Thresholds: vel_div_threshold=-0.50, vel_div_extreme=-1.25 (klines-adapted).
+Saves: run_logs/1m_calib_{TS}.csv + .json
+"""
+import sys, time, json, csv
+sys.stdout.reconfigure(encoding='utf-8', errors='replace')
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import pandas as pd
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+print("Compiling numba kernels...")
+t0c = time.time()
+from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
+from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
+from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
+from nautilus_dolphin.nautilus.ob_features import (
+    OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
+    compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
+    compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
+    compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
+)
+from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
+_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
+compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
+rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
+compute_sizing_nb(-0.55, -0.50, -1.25, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
+                  np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
+                  np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
+check_dc_nb(_p, 3, 1, 0.75)
+_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
+_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
+compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
+compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
+compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
+compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
+compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
+compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
+compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
+print(f"  JIT: {time.time()-t0c:.1f}s")
+
+from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
+from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
+from mc.mc_ml import DolphinForewarner
+
+VBT_DIR    = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
+DATE_START = '2024-01-01'
+DATE_END   = '2026-03-05'
+META_COLS  = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
+              'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
+              'instability_50', 'instability_150'}
+MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
+MC_BASE_CFG = {
+    'trial_id': 0, 'vel_div_threshold': -0.02, 'vel_div_extreme': -0.05,
+    'use_direction_confirm': True, 'dc_lookback_bars': 7,
+    'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
+    'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
+    'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
+    'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True,
+    'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0099, 'stop_pct': 1.00,
+    'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True,
+    'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
+    'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
+    'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
+    'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
+    'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
+}
+BASE_ENGINE_KWARGS = dict(
+    initial_capital=25000.0,
+    vel_div_threshold=-0.50, vel_div_extreme=-1.25,   # klines-adapted
+    min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
+    fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0,
+    use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
+    dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
+    use_asset_selection=True, min_irp_alignment=0.45,
+    use_sp_fees=True, use_sp_slippage=True,
+    sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
+    use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
+    lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
+)
+OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]
+
+# ── Shared state ───────────────────────────────────────────────────────────────
+print("\nLoading MC-Forewarner...")
+forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
+
+parquet_files = sorted(
+    p for p in VBT_DIR.glob("*.parquet")
+    if 'catalog' not in str(p) and DATE_START <= p.stem <= DATE_END
+)
+date_strings = [pf.stem for pf in parquet_files]
+print(f"Dates: {len(parquet_files)} ({date_strings[0]} to {date_strings[-1]})")
+
+# Vol calibration (first 5 dates for klines)
+all_vols = []
+for pf in parquet_files[:5]:
+    df = pd.read_parquet(pf)
+    if 'BTCUSDT' not in df.columns: continue
+    pr = df['BTCUSDT'].values
+    for i in range(60, len(pr)):
+        seg = pr[max(0,i-50):i]
+        if len(seg)<10: continue
+        v = float(np.std(np.diff(seg)/seg[:-1]))
+        if v > 0: all_vols.append(v)
+vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 1e-4
+print(f"Vol p60 (klines): {vol_p60:.6f}")
+
+print(f"Pre-loading {len(parquet_files)} parquets...")
+t_load = time.time()
+pq_data = {}
+for i, pf in enumerate(parquet_files):
+    df = pd.read_parquet(pf)
+    ac = [c for c in df.columns if c not in META_COLS]
+    bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
+    dv = np.full(len(df), np.nan)
+    if bp is not None:
+        for j in range(50, len(bp)):
+            seg = bp[max(0,j-50):j]
+            if len(seg)<10: continue
+            dv[j] = float(np.std(np.diff(seg)/seg[:-1]))
+    pq_data[pf.stem] = (df, ac, dv)
+    if (i+1) % 200 == 0:
+        print(f"  {i+1}/{len(parquet_files)} loaded...")
+print(f"  Done in {time.time()-t_load:.1f}s")
+
+# ACB w750 from klines parquet
+acb_master = AdaptiveCircuitBreaker()
+acb_master.preload_w750(date_strings)   # returns all-zero for klines (no NPZ files)
+for ds, (df, _, _) in pq_data.items():
+    if 'v750_lambda_max_velocity' in df.columns:
+        v750 = df['v750_lambda_max_velocity'].dropna()
+        if len(v750) > 0:
+            acb_master._w750_vel_cache[ds] = float(v750.median())
+_w750 = [v for v in acb_master._w750_vel_cache.values() if v != 0.0]
+if _w750:
+    acb_master._w750_threshold = float(np.percentile(_w750, acb_master.config.W750_THRESHOLD_PCT))
+print(f"ACB w750 p60 (klines): {acb_master._w750_threshold:.6f}")
+
+_mock_ob = MockOBProvider(
+    imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
+    imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
+                      "BNBUSDT": +0.05, "SOLUSDT": +0.05},
+)
+ob_eng = OBFeatureEngine(_mock_ob)
+ob_eng.preload_date("mock", OB_ASSETS)
+
+# ── Sweep grid ─────────────────────────────────────────────────────────────────
+MAX_HOLD_SWEEP = [30, 45, 60, 90, 120]   # bars (=minutes at 1m timescale)
+ABS_MAX_LEV_SWEEP = [3.0, 4.0, 5.0, 6.0]
+
+print(f"\n{'='*75}")
+print(f"  1m CALIBRATION SWEEP: max_hold × abs_max_lev")
+print(f"  max_hold_bars: {MAX_HOLD_SWEEP}")
+print(f"  abs_max_lev:   {ABS_MAX_LEV_SWEEP}")
+print(f"  Total runs: {len(MAX_HOLD_SWEEP) * len(ABS_MAX_LEV_SWEEP)}")
+print(f"  Baseline: max_hold=120  abs_max_lev=5.0  (795-day: ROI=+172.34% DD=31.69%)")
+print(f"{'='*75}")
+
+def run_klines(max_hold, abs_max_lev):
+    kw = dict(BASE_ENGINE_KWARGS, max_hold_bars=max_hold, abs_max_leverage=abs_max_lev)
+    engine = NDAlphaEngine(**kw)
+    engine.set_ob_engine(ob_eng)
+    engine.set_acb(acb_master)
+    engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
+    engine.set_esoteric_hazard_multiplier(0.0)
+
+    all_daily = []
+    for ds in date_strings:
+        df, acols, dvol = pq_data[ds]
+        vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
+        r = engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
+        all_daily.append({'pnl': r.get('pnl', 0.0), 'capital': r.get('capital', 25000.0),
+                          'trades': r.get('trades', 0)})
+
+    tr = engine.trade_history
+    wins   = [t for t in tr if t.pnl_absolute > 0]
+    losses = [t for t in tr if t.pnl_absolute <= 0]
+    gw = sum(t.pnl_absolute for t in wins)
+    gl = abs(sum(t.pnl_absolute for t in losses))
+    roi    = (engine.capital - 25000) / 25000 * 100
+    pf     = gw / gl if gl > 0 else 999.0
+    wr     = len(wins) / len(tr) * 100 if tr else 0.0
+    pnls   = np.array([s['pnl'] for s in all_daily])
+    sharpe = float(pnls.mean() / pnls.std() * np.sqrt(252)) if pnls.std() > 0 else 0.0
+    caps   = [s['capital'] for s in all_daily]
+    peak   = 25000.0; max_dd = 0.0
+    for c in caps:
+        if c > peak: peak = c
+        dd = (peak - c) / peak * 100
+        if dd > max_dd: max_dd = dd
+    tp_rate = engine.tp_exits / len(tr) * 100 if tr else 0.0
+    avg_lev = float(np.mean([t.leverage for t in tr])) if tr else 0.0
+    avg_bars = float(np.mean([t.bars_held for t in tr])) if tr else 0.0
+    h1 = [r for r in all_daily if pq_data and date_strings[all_daily.index(r)] < '2025-01-01']
+    h2 = [r for r in all_daily if pq_data and date_strings[all_daily.index(r)] >= '2025-01-01']
+    # simpler h1/h2 split by index
+    mid = len(all_daily) // 2
+    h1_roi = sum(s['pnl'] for s in all_daily[:mid]) / 25000 * 100
+    h2_roi = sum(s['pnl'] for s in all_daily[mid:]) / 25000 * 100
+    h2h1 = h2_roi / h1_roi if h1_roi != 0 else float('nan')
+
+    return {
+        'max_hold_bars': max_hold, 'abs_max_lev': abs_max_lev,
+        'max_hold_min': max_hold,  # 1min bars → minutes
+        'roi': roi, 'pf': pf, 'dd': max_dd, 'sharpe': sharpe, 'wr': wr,
+        'n_trades': len(tr), 'tp_rate_pct': tp_rate, 'avg_lev': avg_lev,
+        'avg_bars_held': avg_bars, 'h1_roi': h1_roi, 'h2_roi': h2_roi, 'h2h1': h2h1,
+    }
+
+results = []
+t_sweep_start = time.time()
+run_n = 0
+
+for max_hold in MAX_HOLD_SWEEP:
+    for abs_max_lev in ABS_MAX_LEV_SWEEP:
+        run_n += 1
+        t0r = time.time()
+        baseline_mark = " ← BASELINE" if (max_hold == 120 and abs_max_lev == 5.0) else ""
+        print(f"\n[{run_n}/{len(MAX_HOLD_SWEEP)*len(ABS_MAX_LEV_SWEEP)}] "
+              f"max_hold={max_hold}min  abs_max_lev={abs_max_lev}x{baseline_mark}")
+
+        row = run_klines(max_hold, abs_max_lev)
+        results.append(row)
+        elapsed_r = time.time() - t0r
+
+        print(f"  ROI={row['roi']:+.2f}%  PF={row['pf']:.4f}  DD={row['dd']:.2f}%  "
+              f"Sh={row['sharpe']:.3f}  WR={row['wr']:.1f}%  T={row['n_trades']}  "
+              f"TP%={row['tp_rate_pct']:.1f}%  AvgLev={row['avg_lev']:.2f}x  "
+              f"AvgBars={row['avg_bars_held']:.1f}  [{elapsed_r:.0f}s]")
+
+total_elapsed = time.time() - t_sweep_start
+
+# ── Analysis ────────────────────────────────────────────────────────────────────
+baseline = next(r for r in results if r['max_hold_bars'] == 120 and r['abs_max_lev'] == 5.0)
+best_roi    = max(results, key=lambda r: r['roi'])
+best_sharpe = max(results, key=lambda r: r['sharpe'])
+best_dd     = min(results, key=lambda r: r['dd'])
+# Best risk-adjusted: highest ROI with DD < 25%
+viable = [r for r in results if r['dd'] <= 25.0]
+best_viable = max(viable, key=lambda r: r['roi']) if viable else best_roi
+
+print(f"\n{'='*75}")
+print(f"  1m CALIBRATION SWEEP COMPLETE  ({total_elapsed/60:.1f} min)")
+print(f"{'='*75}")
+print(f"  Baseline (hold=120m lev=5x): ROI={baseline['roi']:+.2f}%  PF={baseline['pf']:.4f}  "
+      f"DD={baseline['dd']:.2f}%  Sh={baseline['sharpe']:.3f}")
+print(f"  Best ROI:     hold={best_roi['max_hold_bars']}m lev={best_roi['abs_max_lev']}x "
+      f"→ ROI={best_roi['roi']:+.2f}%  DD={best_roi['dd']:.2f}%")
+print(f"  Best Sharpe:  hold={best_sharpe['max_hold_bars']}m lev={best_sharpe['abs_max_lev']}x "
+      f"→ Sh={best_sharpe['sharpe']:.3f}  DD={best_sharpe['dd']:.2f}%")
+print(f"  Min DD:       hold={best_dd['max_hold_bars']}m lev={best_dd['abs_max_lev']}x "
+      f"→ DD={best_dd['dd']:.2f}%  ROI={best_dd['roi']:+.2f}%")
+print(f"  Best viable (DD≤25%): hold={best_viable['max_hold_bars']}m "
+      f"lev={best_viable['abs_max_lev']}x → ROI={best_viable['roi']:+.2f}%  "
+      f"DD={best_viable['dd']:.2f}%  Sh={best_viable['sharpe']:.3f}")
+
+print(f"\n  Grid summary (ROI | DD):")
+print(f"  {'':>12}", end='')
+for lev in ABS_MAX_LEV_SWEEP:
+    print(f"  lev={lev:.0f}x         ", end='')
+print()
+for mh in MAX_HOLD_SWEEP:
+    print(f"  hold={mh:3d}min  ", end='')
+    for lev in ABS_MAX_LEV_SWEEP:
+        row = next(r for r in results if r['max_hold_bars'] == mh and r['abs_max_lev'] == lev)
+        mk = '*' if (row == best_viable) else (' ' if (mh != 120 or lev != 5.0) else 'B')
+        print(f"  {row['roi']:+6.1f}%/{row['dd']:4.1f}%{mk}  ", end='')
+    print()
+
+# ── Save ────────────────────────────────────────────────────────────────────────
+ts = datetime.now().strftime('%Y%m%d_%H%M%S')
+run_dir = Path(__file__).parent / 'run_logs'
+run_dir.mkdir(exist_ok=True)
+
+with open(run_dir / f'1m_calib_{ts}.csv', 'w', newline='') as f:
+    w = csv.DictWriter(f, fieldnames=list(results[0].keys()))
+    w.writeheader(); w.writerows(results)
+
+summary = {
+    'experiment': '1m_klines_calibration_sweep',
+    'date_range': f'{DATE_START}_to_{DATE_END}',
+    'max_hold_sweep': MAX_HOLD_SWEEP,
+    'abs_max_lev_sweep': ABS_MAX_LEV_SWEEP,
+    'baseline': baseline,
+    'best_roi': best_roi,
+    'best_sharpe': best_sharpe,
+    'best_dd_reduction': best_dd,
+    'best_viable_dd25': best_viable,
+    'elapsed_s': total_elapsed,
+    'run_ts': ts,
+    'all_results': results,
+}
+with open(run_dir / f'1m_calib_{ts}.json', 'w') as f:
+    json.dump(summary, f, indent=2)
+
+print(f"\nSaved: run_logs/1m_calib_{ts}.csv + .json")