#!/usr/bin/env python3 """ test_exf_acb_marginal.py ======================== Marginal benefit test: add each significant ExF indicator to ACBv6 in isolation and test whether it improves daily PnL vs champion baseline. Method: Baseline: 55-day engine, ACBv6 (funding/dvol/fng/taker from NPZ, lag=0) Per-indicator: ACBv6 + one new ExF signal (+0.5 to signal count when fires) Statistics: Wilcoxon signed-rank (N=55 pairs) + 5000-trial permutation test Firing condition: lagged indicator value above/below median (direction from r-sign) Indicators (p<0.05 from exf_correlation sweep): claims lag=1 r=+0.40 p=0.0035 HIGH fires ycurve lag=2 r=+0.35 p=0.0105 HIGH fires us10y lag=1 r=+0.32 p=0.0195 HIGH fires funding_eth lag=0 r=-0.32 p=0.0183 LOW fires vol24 lag=0 r=+0.32 p=0.0178 HIGH fires stables lag=2 r=-0.36 p=0.0074 LOW fires m2 lag=7 r=-0.36 p=0.0195 LOW fires hashrate lag=7 r=-0.29 p=0.0479 LOW fires usdc lag=5 r=-0.29 p=0.0412 LOW fires fund_dbt_eth lag=7 r=+0.47 p=0.0278 HIGH fires (n=22, treat cautiously) dvol_btc lag=7 r=-0.33 p=0.0619 LOW fires (borderline) dvol_eth lag=7 r=-0.31 p=0.0777 LOW fires (borderline) """ import sys, time, math, json, csv from pathlib import Path from datetime import datetime import numpy as np import pandas as pd from scipy import stats sys.path.insert(0, str(Path(__file__).parent)) # ── JIT warmup ──────────────────────────────────────────────────────────────── print("Compiling numba kernels...") t0c = time.time() from nautilus_dolphin.nautilus.alpha_asset_selector import ( compute_irp_nb, compute_ars_nb, rank_assets_irp_nb) from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb from nautilus_dolphin.nautilus.ob_features import ( OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb, compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb, compute_depth_asymmetry_nb, compute_imbalance_persistence_nb, compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb, ) from nautilus_dolphin.nautilus.ob_provider import MockOBProvider _p = np.array([1.0, 2.0, 3.0], dtype=np.float64) compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01) rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20) compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0, np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64), np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04) check_dc_nb(_p, 3, 1, 0.75) _b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64) _a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64) compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a) compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0) compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a) compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2) compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1) compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2) compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10) print(f" JIT: {time.time() - t0c:.1f}s") from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker from mc.mc_ml import DolphinForewarner # ── Constants ───────────────────────────────────────────────────────────────── VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache") RUN_LOGS = Path(__file__).parent / "run_logs" RUN_LOGS.mkdir(exist_ok=True) MC_DIR = str(Path(__file__).parent / "mc_results" / "models") META_COLS = { 'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150', } ENGINE_KWARGS = dict( initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05, min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0, fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75, dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5, use_asset_selection=True, min_irp_alignment=0.45, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50, use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40, lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42, ) MC_BASE_CFG = dict( trial_id=0, vel_div_threshold=-0.020, vel_div_extreme=-0.050, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75, dc_skip_contradicts=True, dc_leverage_boost=1.00, dc_leverage_reduce=0.50, vd_trend_lookback=10, min_leverage=0.50, max_leverage=5.00, leverage_convexity=3.00, fraction=0.20, use_alpha_layers=True, use_dynamic_leverage=True, fixed_tp_pct=0.0099, stop_pct=1.00, max_hold_bars=120, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50, use_ob_edge=True, ob_edge_bps=5.00, ob_confirm_rate=0.40, ob_imbalance_bias=-0.09, ob_depth_scale=1.00, use_asset_selection=True, min_irp_alignment=0.45, lookback=100, acb_beta_high=0.80, acb_beta_low=0.20, acb_w750_threshold_pct=60, ) OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"] # ── ACBWithExF: wraps ACBv6, injects one ExF signal ─────────────────────────── class ACBWithExF(AdaptiveCircuitBreaker): """ACBv6 + one additional ExF indicator (+0.5 signal when fires). Inherits all NPZ-loaded signals; adds one ExF layer on top. Clones w750 cache + cut cache from base ACB to avoid re-reading NPZ files. """ def __init__(self, indicator: str, threshold: float, lag: int, direction: int, trading_days: list, exf_series: dict): super().__init__() self._exf_ind = indicator self._exf_thr = threshold self._exf_lag = lag self._exf_dir = direction # +1: high fires; -1: low fires # Build lagged lookup over trading days self._lagged: dict = {} for i, ds in enumerate(trading_days): j = i - lag if j >= 0: src = trading_days[j] self._lagged[ds] = exf_series.get(src, float('nan')) else: self._lagged[ds] = float('nan') n_fires = sum(1 for ds in trading_days if self._fires_on(ds)) n_valid = sum(1 for v in self._lagged.values() if not math.isnan(v)) print(f" [{indicator:14s} lag={lag} dir={'HIGH' if direction>0 else 'LOW ':4s} " f"thr={threshold:+.4g}] fires={n_fires}/{len(trading_days)} valid={n_valid}") @classmethod def clone_from(cls, base: 'AdaptiveCircuitBreaker', indicator: str, threshold: float, lag: int, direction: int, trading_days: list, exf_series: dict) -> 'ACBWithExF': """Create ACBWithExF pre-loaded from a base ACB's caches.""" obj = cls.__new__(cls) AdaptiveCircuitBreaker.__init__(obj) # Copy caches from base to avoid NPZ re-reads obj._w750_vel_cache = dict(base._w750_vel_cache) obj._w750_threshold = base._w750_threshold obj._cache = dict(base._cache) # ExF fields obj._exf_ind = indicator obj._exf_thr = threshold obj._exf_lag = lag obj._exf_dir = direction obj._lagged = {} for i, ds in enumerate(trading_days): j = i - lag if j >= 0: src = trading_days[j] obj._lagged[ds] = exf_series.get(src, float('nan')) else: obj._lagged[ds] = float('nan') n_fires = sum(1 for ds in trading_days if obj._fires_on(ds)) n_valid = sum(1 for v in obj._lagged.values() if not math.isnan(v)) print(f" [{indicator:14s} lag={lag} dir={'HIGH' if direction>0 else 'LOW ':4s} " f"thr={threshold:+.4g}] fires={n_fires}/{len(trading_days)} valid={n_valid}") return obj def _fires_on(self, date_str: str) -> bool: v = self._lagged.get(date_str, float('nan')) if math.isnan(v): return False return (self._exf_dir > 0 and v > self._exf_thr) or \ (self._exf_dir < 0 and v < self._exf_thr) def get_dynamic_boost_for_date(self, date_str: str, ob_engine=None) -> dict: info = super().get_dynamic_boost_for_date(date_str, ob_engine) if self._fires_on(date_str): base_sig = info['signals'] new_sig = base_sig + 0.5 new_boost = (1.0 + 0.5 * math.log1p(new_sig)) if new_sig >= 1.0 else 1.0 info['boost'] = new_boost info['signals'] = new_sig info['exf_signal_add'] = 0.5 else: info['exf_signal_add'] = 0.0 return info # ── Engine runner ───────────────────────────────────────────────────────────── def run_engine(acb, forewarner, pq_data, parquet_files, vol_p60, ob_eng, label="") -> dict: engine = NDAlphaEngine(**ENGINE_KWARGS) engine.set_ob_engine(ob_eng) engine.set_acb(acb) engine.set_mc_forewarner(forewarner, MC_BASE_CFG) engine.set_esoteric_hazard_multiplier(0.0) dstats = [] for pf in parquet_files: ds = pf.stem df, acols, dvol = pq_data[ds] vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False) s = engine.process_day(ds, df, acols, vol_regime_ok=vol_ok) dstats.append(s) tr = engine.trade_history w = [t for t in tr if t.pnl_absolute > 0] l = [t for t in tr if t.pnl_absolute <= 0] gw = sum(t.pnl_absolute for t in w) if w else 0 gl = abs(sum(t.pnl_absolute for t in l)) if l else 0 roi = (engine.capital - 25000) / 25000 * 100 pf_ = gw / gl if gl > 0 else 999.0 wr = len(w) / len(tr) * 100 if tr else 0.0 cap_series = np.array([25000.0] + [25000 + sum(s['pnl'] for s in dstats[:i+1]) for i in range(len(dstats))]) dr = np.diff(cap_series) / 25000 * 100 sh = float(np.mean(dr) / np.std(dr) * np.sqrt(365)) if np.std(dr) > 0 else 0.0 peak = np.maximum.accumulate(cap_series) dd = float(np.max((peak - cap_series) / np.where(peak > 0, peak, 1) * 100)) daily_pnl = {s['date']: s['pnl'] for s in dstats} return { 'label': label, 'roi': roi, 'pf': pf_, 'wr': wr, 'sharpe': sh, 'dd': dd, 'trades': len(tr), 'capital': engine.capital, 'daily_pnl': daily_pnl, } # ── Statistical tests ───────────────────────────────────────────────────────── def marginal_test(base_pnl: dict, test_pnl: dict, n_perm: int = 5000) -> dict: dates = sorted(set(base_pnl) & set(test_pnl)) diffs = np.array([test_pnl[d] - base_pnl[d] for d in dates]) n = len(diffs) mean_delta = float(np.mean(diffs)) pos_days = int(np.sum(diffs > 0)) neg_days = int(np.sum(diffs < 0)) # Wilcoxon signed-rank (two-sided; p < 0.10 noteworthy, p < 0.05 significant) if np.all(diffs == 0): w_stat, w_p = 0.0, 1.0 else: try: w_stat, w_p = stats.wilcoxon(diffs) except Exception: w_stat, w_p = 0.0, 1.0 # One-sided permutation test (H1: test > baseline) obs_mean = float(np.mean(diffs)) perm_means = np.zeros(n_perm) for i in range(n_perm): signs = np.random.choice([-1, 1], size=n) perm_means[i] = float(np.mean(np.abs(diffs) * signs)) perm_p = float(np.mean(perm_means >= obs_mean)) return { 'n_days': n, 'mean_delta_pnl': mean_delta, 'pos_days': pos_days, 'neg_days': neg_days, 'wilcoxon_p': float(w_p), 'perm_p': perm_p, } # ── Load & cache data ───────────────────────────────────────────────────────── parquet_files = sorted(p for p in VBT_DIR.glob("*.parquet") if 'catalog' not in str(p)) date_strings = [pf.stem for pf in parquet_files] print(f"\n[DATA] {len(parquet_files)} parquet files: {date_strings[0]} → {date_strings[-1]}") # Vol gate (p60 from first 2 days) print("[DATA] Computing vol p60 gate...") all_vols = [] for pf in parquet_files[:2]: df = pd.read_parquet(pf) if 'BTCUSDT' not in df.columns: continue pr = df['BTCUSDT'].values for i in range(60, len(pr)): seg = pr[max(0, i - 50):i] if len(seg) < 10: continue v = float(np.std(np.diff(seg) / seg[:-1])) if v > 0: all_vols.append(v) vol_p60 = float(np.percentile(all_vols, 60)) print(f" vol_p60 = {vol_p60:.6f}") print("[DATA] Loading parquet files...") pq_data = {} for pf in parquet_files: df = pd.read_parquet(pf) ac = [c for c in df.columns if c not in META_COLS] bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None dv = np.full(len(df), np.nan) if bp is not None: for i in range(50, len(bp)): seg = bp[max(0, i - 50):i] if len(seg) < 10: continue dv[i] = float(np.std(np.diff(seg) / seg[:-1])) pq_data[pf.stem] = (df, ac, dv) # OB engine (shared across all runs) _mock_ob = MockOBProvider( imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS, imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092, "BNBUSDT": +0.05, "SOLUSDT": +0.05}, ) ob_eng = OBFeatureEngine(_mock_ob) ob_eng.preload_date("mock", OB_ASSETS) # MC-Forewarner (shared) print("[DATA] Loading MC-Forewarner...") forewarner = DolphinForewarner(models_dir=MC_DIR) # Base ACB: load w750 + warm cut cache for all 55 days print("[DATA] Initializing base ACB, preloading w750 + cut cache...") base_acb = AdaptiveCircuitBreaker() base_acb.preload_w750(date_strings) for ds in date_strings: base_acb.get_dynamic_boost_for_date(ds) # warm _cache print(f" w750_threshold = {base_acb._w750_threshold:.6f}") # ── Load ExF daily data ─────────────────────────────────────────────────────── exf_csvs = sorted(RUN_LOGS.glob("exf_daily_*.csv"), reverse=True) if not exf_csvs: raise RuntimeError("No exf_daily_*.csv found. Run test_exf_correlation.py first.") exf_df = pd.read_csv(exf_csvs[0]) exf_df["date"] = exf_df["date"].astype(str) exf_df = exf_df.set_index("date") print(f"[DATA] ExF daily loaded: {exf_csvs[0].name} ({len(exf_df)} rows)") def exf_series(col: str) -> dict: """Extract {date_str: float} for one ExF column, drop NaN.""" if col not in exf_df.columns: return {} s = exf_df[col].dropna() return {str(d): float(v) for d, v in s.items()} def median_threshold(col: str) -> float: s = exf_series(col) if not s: return 0.0 return float(np.median(list(s.values()))) # ── Define indicator tests ──────────────────────────────────────────────────── INDICATORS = [ # (name, col, lag, direction, note) ("claims", "claims", 1, +1, "p=0.0035 **"), ("ycurve", "ycurve", 2, +1, "p=0.0105 *"), ("stables", "stables", 2, -1, "p=0.0074 ** (Spearman weak)"), ("us10y", "us10y", 1, +1, "p=0.0195 *"), ("funding_eth", "funding_eth", 0, -1, "p=0.0183 * (lag=0)"), ("vol24", "vol24", 0, +1, "p=0.0178 * (Spearman weak, lag=0)"), ("m2", "m2", 7, -1, "p=0.0195 * (lag=7)"), ("usdc", "usdc", 5, -1, "p=0.0412 * (lag=5)"), ("hashrate", "hashrate", 7, -1, "p=0.0479 * (lag=7)"), ("fund_dbt_eth", "fund_dbt_eth", 7, +1, "p=0.0278 * (n=22 only!)"), ("dvol_btc", "dvol_btc", 7, -1, "p=0.062 borderline"), ("dvol_eth", "dvol_eth", 7, -1, "p=0.078 borderline"), ] # ── Baseline run ────────────────────────────────────────────────────────────── print(f"\n{'='*70}") print(" RUN 0/12: BASELINE (ACBv6, no ExF additions)") print(f"{'='*70}") t0 = time.time() baseline = run_engine(base_acb, forewarner, pq_data, parquet_files, vol_p60, ob_eng, "BASELINE") print(f" {time.time()-t0:.1f}s | ROI={baseline['roi']:+.2f}% PF={baseline['pf']:.3f} " f"WR={baseline['wr']:.1f}% Sharpe={baseline['sharpe']:.2f} DD={baseline['dd']:.2f}%") # ── Per-indicator marginal runs ─────────────────────────────────────────────── results = [] np.random.seed(0) for run_i, (name, col, lag, direction, note) in enumerate(INDICATORS, 1): print(f"\n{'='*70}") print(f" RUN {run_i}/{len(INDICATORS)}: {name} [{note}]") print(f"{'='*70}") series = exf_series(col) if not series: print(f" [SKIP] Column '{col}' not found or all-NaN in exf_daily CSV") results.append({'indicator': name, 'col': col, 'lag': lag, 'note': note, 'status': 'SKIP', 'roi_delta': 0, 'sharpe_delta': 0, 'wilcoxon_p': 1.0, 'perm_p': 1.0, 'verdict': 'SKIP'}) continue thr = median_threshold(col) test_acb = ACBWithExF.clone_from( base_acb, name, thr, lag, direction, date_strings, series) t0 = time.time() run = run_engine(test_acb, forewarner, pq_data, parquet_files, vol_p60, ob_eng, name) elapsed = time.time() - t0 stat = marginal_test(baseline['daily_pnl'], run['daily_pnl']) roi_delta = run['roi'] - baseline['roi'] sharpe_delta = run['sharpe'] - baseline['sharpe'] pf_delta = run['pf'] - baseline['pf'] dd_delta = run['dd'] - baseline['dd'] wr_delta = run['wr'] - baseline['wr'] # Verdict: significant = Wilcoxon p<0.10 AND positive roi_delta AND perm_p<0.15 if stat['wilcoxon_p'] < 0.05 and roi_delta > 0 and stat['perm_p'] < 0.10: verdict = "SIGNIFICANT ✓" elif stat['wilcoxon_p'] < 0.10 and roi_delta > 0: verdict = "MARGINAL" elif roi_delta > 0: verdict = "POSITIVE (not sig)" else: verdict = "NO BENEFIT" print(f" {elapsed:.1f}s") print(f" ROI: {baseline['roi']:+.2f}% → {run['roi']:+.2f}% Δ={roi_delta:+.2f}%") print(f" Sharpe: {baseline['sharpe']:.3f} → {run['sharpe']:.3f} Δ={sharpe_delta:+.3f}") print(f" PF: {baseline['pf']:.4f} → {run['pf']:.4f} Δ={pf_delta:+.4f}") print(f" DD: {baseline['dd']:.2f}% → {run['dd']:.2f}% Δ={dd_delta:+.2f}%") print(f" WR: {baseline['wr']:.1f}% → {run['wr']:.1f}% Δ={wr_delta:+.1f}%") print(f" Trades: {baseline['trades']} → {run['trades']}") print(f" PnL delta: mean={stat['mean_delta_pnl']:+.1f}$/day " f"pos={stat['pos_days']} neg={stat['neg_days']} days") print(f" Wilcoxon p={stat['wilcoxon_p']:.4f} perm_p={stat['perm_p']:.4f}") print(f" VERDICT: {verdict}") results.append({ 'indicator': name, 'col': col, 'lag': lag, 'direction': '+' if direction > 0 else '-', 'note': note, 'threshold': thr, 'status': 'OK', 'baseline_roi': baseline['roi'], 'test_roi': run['roi'], 'roi_delta': roi_delta, 'baseline_pf': baseline['pf'], 'test_pf': run['pf'], 'pf_delta': pf_delta, 'baseline_sh': baseline['sharpe'], 'test_sh': run['sharpe'], 'sharpe_delta': sharpe_delta, 'baseline_dd': baseline['dd'], 'test_dd': run['dd'], 'dd_delta': dd_delta, 'baseline_wr': baseline['wr'], 'test_wr': run['wr'], 'wr_delta': wr_delta, 'mean_delta_pnl': stat['mean_delta_pnl'], 'pos_days': stat['pos_days'], 'neg_days': stat['neg_days'], 'wilcoxon_p': stat['wilcoxon_p'], 'perm_p': stat['perm_p'], 'verdict': verdict, }) # ── Summary table ───────────────────────────────────────────────────────────── print(f"\n{'='*110}") print(" MARGINAL BENEFIT SUMMARY") print(f"{'='*110}") print(f" {'indicator':14s} {'lag':3s} {'dir':3s} {'ΔROI':>7s} {'ΔSharpe':>8s} " f"{'ΔPF':>7s} {'ΔDD':>6s} {'Wilcox_p':>9s} {'perm_p':>7s} verdict") print(f" {'-'*104}") ok_results = [r for r in results if r['status'] == 'OK'] ok_results.sort(key=lambda r: r['wilcoxon_p']) for r in ok_results: sig_marker = "**" if r['wilcoxon_p'] < 0.05 else (" *" if r['wilcoxon_p'] < 0.10 else " ") print(f" {r['indicator']:14s} {r['lag']:3d} {r['direction']:3s} " f"{r['roi_delta']:+7.2f}% {r['sharpe_delta']:+8.3f} " f"{r['pf_delta']:+7.4f} {r['dd_delta']:+6.2f}% " f"{r['wilcoxon_p']:9.4f}{sig_marker} {r['perm_p']:7.4f} {r['verdict']}") # ── Save results ────────────────────────────────────────────────────────────── ts = datetime.now().strftime("%Y%m%d_%H%M%S") out_csv = RUN_LOGS / f"exf_acb_marginal_{ts}.csv" if ok_results: keys = list(ok_results[0].keys()) with open(out_csv, "w", newline="") as f: w = csv.DictWriter(f, fieldnames=keys) w.writeheader() w.writerows(ok_results) print(f"\n[SAVED] {out_csv}") # Identify winners winners = [r for r in ok_results if r['verdict'] in ("SIGNIFICANT ✓", "MARGINAL") and r['roi_delta'] > 0] print(f"\n[RESULT] {len(winners)} indicators pass marginal threshold:") for r in winners: print(f" {r['indicator']:14s} lag={r['lag']} dir={r['direction']} " f"ΔROI={r['roi_delta']:+.2f}% Wilcoxon_p={r['wilcoxon_p']:.4f} verdict={r['verdict']}") if winners: print("\n[ACTION] These indicators qualify for ACBv7. Run fork step next.") else: print("\n[ACTION] No indicator shows statistically significant marginal benefit.") print(" Champion baseline is robust — ACBv6 remains optimal.") print("\n[DONE]")