DOLPHIN/nautilus_dolphin/test_pf_dynamic_beta.py

"""Dynamic beta: find external factors that predict optimal beta per date.

Phase 1: Load ALL external factors (85 API + 45 scan_global) per date
Phase 2: Run meta-boost at fixed beta=0.5, record per-date P&L
Phase 3: Rank-correlate each factor with daily P&L -> candidate beta governors
Phase 4: Test dynamic beta governed by top factors vs fixed beta
Phase 5: Half-split overfitting validation
"""
import sys, time, math
from pathlib import Path
import numpy as np
import pandas as pd
from scipy import stats as sp_stats

sys.path.insert(0, str(Path(__file__).parent))

print("Compiling numba kernels...")
t0c = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
                  np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
                  np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
print(f"  JIT: {time.time() - t0c:.1f}s")

from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker

VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
EIGEN_DIR = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues")
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
             'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
             'instability_50', 'instability_150'}
ENGINE_KWARGS = dict(
    initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
    min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
    fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
    use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
    dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
    use_asset_selection=True, min_irp_alignment=0.45,
    use_sp_fees=True, use_sp_slippage=True,
    sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
    use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
    lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
VD_THRESH = -0.02; VD_EXTREME = -0.05; CONVEXITY = 3.0

# ─── PHASE 1: Load ALL external factors per date ───
print("\n=== PHASE 1: Loading external factors ===")
acb = AdaptiveCircuitBreaker()
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
acb_signals = {pf.stem: acb.get_cut_for_date(pf.stem)['signals'] for pf in parquet_files}

# Load full factor matrix from NPZ files
date_factors = {}  # date -> {factor_name: value}
api_name_set = set()
global_name_set = set()

for pf in parquet_files:
    ds = pf.stem
    date_path = EIGEN_DIR / ds
    if not date_path.exists():
        continue
    files = sorted(date_path.glob('scan_*__Indicators.npz'))[:10]
    if not files:
        continue

    # Collect from multiple scans, take median
    api_vals = {}   # name -> [values]
    glob_vals = {}  # name -> [values]

    for f in files:
        try:
            data = np.load(f, allow_pickle=True)
            # API indicators
            if 'api_names' in data and 'api_indicators' in data:
                names = list(data['api_names'])
                vals = data['api_indicators']
                succ = data['api_success'] if 'api_success' in data else np.ones(len(names), dtype=bool)
                for i, nm in enumerate(names):
                    if i < len(vals) and (i >= len(succ) or succ[i]) and np.isfinite(vals[i]):
                        api_vals.setdefault(nm, []).append(float(vals[i]))
                        api_name_set.add(nm)
            # Scan global metrics
            if 'scan_global_names' in data and 'scan_global' in data:
                gnames = list(data['scan_global_names'])
                gvals = data['scan_global']
                for i, nm in enumerate(gnames):
                    if i < len(gvals) and np.isfinite(gvals[i]):
                        glob_vals.setdefault(nm, []).append(float(gvals[i]))
                        global_name_set.add(nm)
        except Exception:
            continue

    factors = {}
    for nm, vs in api_vals.items():
        factors[f"api_{nm}"] = float(np.median(vs))
    for nm, vs in glob_vals.items():
        factors[f"glob_{nm}"] = float(np.median(vs))
    factors['acb_signals'] = acb_signals[ds]
    date_factors[ds] = factors

print(f"  Loaded factors for {len(date_factors)}/{len(parquet_files)} dates")
print(f"  API indicators: {len(api_name_set)}, Global metrics: {len(global_name_set)}")

# ─── PHASE 2: Run fixed beta=0.5, record per-date P&L ───
print("\n=== PHASE 2: Running meta-boost beta=0.5 baseline ===")

# Pre-load parquet data
all_vols = []
for pf in parquet_files[:2]:
    df = pd.read_parquet(pf)
    if 'BTCUSDT' not in df.columns: continue
    pr = df['BTCUSDT'].values
    for i in range(60, len(pr)):
        seg = pr[max(0,i-50):i]
        if len(seg)<10: continue
        v = float(np.std(np.diff(seg)/seg[:-1]))
        if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))

pq_data = {}
for pf in parquet_files:
    df = pd.read_parquet(pf)
    ac = [c for c in df.columns if c not in META_COLS]
    bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
    dv = np.full(len(df), np.nan)
    if bp is not None:
        for i in range(50, len(bp)):
            seg = bp[max(0,i-50):i]
            if len(seg)<10: continue
            dv[i] = float(np.std(np.diff(seg)/seg[:-1]))
    pq_data[pf.stem] = (df, ac, dv)

def log05(s):
    return 1.0 + 0.5 * math.log1p(s) if s >= 1.0 else 1.0

def strength_cubic(vel_div):
    if vel_div >= VD_THRESH: return 0.0
    raw = (VD_THRESH - vel_div) / (VD_THRESH - VD_EXTREME)
    return min(1.0, max(0.0, raw)) ** CONVEXITY

def run_with_beta_fn(beta_fn):
    """Run engine where beta_fn(date_str, factors) -> beta for that date."""
    engine = NDAlphaEngine(**ENGINE_KWARGS)
    bar_idx = 0; ph = {}; dstats = []

    for pf in parquet_files:
        ds = pf.stem; cs = engine.capital
        signals = acb_signals[ds]
        base_boost = log05(signals)
        engine.regime_direction = -1
        engine.regime_dd_halt = False

        # Get beta for this date
        factors = date_factors.get(ds, {})
        beta = beta_fn(ds, factors)

        df, acols, dvol = pq_data[ds]
        bid = 0
        for ri in range(len(df)):
            row = df.iloc[ri]; vd = row.get("vel_div")
            if vd is None or not np.isfinite(vd): bar_idx+=1; bid+=1; continue
            prices = {}
            for ac in acols:
                p = row[ac]
                if p and p > 0 and np.isfinite(p):
                    prices[ac] = float(p)
                    if ac not in ph: ph[ac] = []
                    ph[ac].append(float(p))
            if not prices: bar_idx+=1; bid+=1; continue
            vrok = False if bid < 100 else (np.isfinite(dvol[ri]) and dvol[ri] > vol_p60)

            if beta > 0 and base_boost > 1.0:
                ss = strength_cubic(float(vd))
                engine.regime_size_mult = base_boost * (1.0 + beta * ss)
            else:
                engine.regime_size_mult = base_boost

            engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices,
                               vol_regime_ok=vrok, price_histories=ph)
            bar_idx+=1; bid+=1

        dstats.append({'date': ds, 'pnl': engine.capital - cs, 'cap': engine.capital, 'beta': beta})

    tr = engine.trade_history
    w = [t for t in tr if t.pnl_absolute > 0]; l = [t for t in tr if t.pnl_absolute <= 0]
    gw = sum(t.pnl_absolute for t in w) if w else 0
    gl = abs(sum(t.pnl_absolute for t in l)) if l else 0
    dr = [s['pnl']/25000*100 for s in dstats]
    peak_cap = 25000.0; max_dd = 0.0
    for s in dstats:
        peak_cap = max(peak_cap, s['cap'])
        dd = (peak_cap - s['cap']) / peak_cap * 100
        max_dd = max(max_dd, dd)
    return {
        'roi': (engine.capital - 25000) / 25000 * 100,
        'pf': gw / gl if gl > 0 else 999,
        'dd': max_dd,
        'sharpe': np.mean(dr) / np.std(dr) * np.sqrt(365) if np.std(dr) > 0 else 0,
        'trades': len(tr),
        'cap': engine.capital,
    }, dstats

t0 = time.time()
r_fixed, ds_fixed = run_with_beta_fn(lambda ds, f: 0.5)
print(f"  Fixed beta=0.5: ROI={r_fixed['roi']:+.2f}%, PF={r_fixed['pf']:.3f}, "
      f"DD={r_fixed['dd']:.2f}%, Sharpe={r_fixed['sharpe']:.2f} [{time.time()-t0:.0f}s]")

# ─── PHASE 3: Rank-correlate factors with per-date P&L ───
print(f"\n=== PHASE 3: Factor -> P&L correlation scan ===")

# Build aligned arrays: factor[date] vs pnl[date]
pnl_by_date = {s['date']: s['pnl'] for s in ds_fixed}
dates_with_factors = [ds for ds in pnl_by_date if ds in date_factors]
pnl_arr = np.array([pnl_by_date[ds] for ds in dates_with_factors])

# Already-used factors (exclude from "new" candidates)
USED_FACTORS = {'api_funding_btc', 'api_dvol_btc', 'api_fng', 'api_taker'}

# Collect all factor names
all_factor_names = set()
for f in date_factors.values():
    all_factor_names.update(f.keys())
all_factor_names -= {'acb_signals'}  # meta, not a raw factor

correlations = []
for fname in sorted(all_factor_names):
    vals = []
    valid = True
    for ds in dates_with_factors:
        v = date_factors[ds].get(fname, np.nan)
        if not np.isfinite(v):
            valid = False; break
        vals.append(v)
    if not valid or len(set(vals)) < 3:
        continue
    vals = np.array(vals)
    # Spearman rank correlation (robust to non-linearity)
    rho, pval = sp_stats.spearmanr(vals, pnl_arr)
    if np.isfinite(rho):
        correlations.append({
            'factor': fname, 'rho': rho, 'pval': pval,
            'abs_rho': abs(rho), 'is_new': fname not in USED_FACTORS,
        })

correlations.sort(key=lambda x: -x['abs_rho'])

print(f"\n{'FACTOR':<35} {'RHO':>7} {'P-VAL':>8} {'NEW?':>5}")
print(f"{'-'*60}")
for c in correlations[:30]:
    marker = " ***" if c['is_new'] and c['abs_rho'] > 0.25 else " *" if c['is_new'] else ""
    print(f"  {c['factor']:<33} {c['rho']:>+7.3f} {c['pval']:>8.4f} {'YES' if c['is_new'] else 'no':>5}{marker}")

# Filter: new factors with |rho| > 0.2 and p < 0.15
candidates = [c for c in correlations if c['is_new'] and c['abs_rho'] > 0.20 and c['pval'] < 0.15]
print(f"\n  Candidate beta governors (new, |rho|>0.20, p<0.15): {len(candidates)}")
for c in candidates[:10]:
    print(f"    {c['factor']:<33} rho={c['rho']:+.3f} p={c['pval']:.4f}")

# ─── PHASE 4: Test dynamic beta strategies ───
print(f"\n=== PHASE 4: Dynamic beta tests ===")

# Strategy: scale beta by factor percentile
# High-rho factor -> when factor is elevated, beta should be higher (or lower, depending on sign)
def make_percentile_beta_fn(factor_name, rho_sign, base_beta=0.5, min_beta=0.1, max_beta=1.0):
    """Scale beta based on percentile of factor across dates."""
    # Collect factor values across all dates
    fvals = []
    for ds in dates_with_factors:
        v = date_factors[ds].get(factor_name, np.nan)
        if np.isfinite(v):
            fvals.append(v)
    if not fvals:
        return lambda ds, f: base_beta
    p25, p50, p75 = np.percentile(fvals, [25, 50, 75])

    def beta_fn(ds, factors):
        v = factors.get(factor_name, np.nan)
        if not np.isfinite(v):
            return base_beta
        # Normalize to [0, 1] via percentile rank
        rank = sp_stats.percentileofscore(fvals, v) / 100.0
        if rho_sign > 0:
            # Higher factor -> higher beta
            beta = min_beta + (max_beta - min_beta) * rank
        else:
            # Higher factor -> lower beta
            beta = min_beta + (max_beta - min_beta) * (1.0 - rank)
        return beta
    return beta_fn

# Also test: composite of top-N factors
def make_composite_beta_fn(factor_list, base_beta=0.5, min_beta=0.1, max_beta=1.0):
    """Average percentile rank of multiple factors -> beta."""
    # Pre-compute percentile distributions
    factor_dists = {}
    for fname, rho_sign in factor_list:
        fvals = [date_factors[ds].get(fname, np.nan) for ds in dates_with_factors]
        fvals = [v for v in fvals if np.isfinite(v)]
        if fvals:
            factor_dists[fname] = (fvals, rho_sign)

    def beta_fn(ds, factors):
        ranks = []
        for fname, (fvals, rho_sign) in factor_dists.items():
            v = factors.get(fname, np.nan)
            if np.isfinite(v):
                r = sp_stats.percentileofscore(fvals, v) / 100.0
                ranks.append(r if rho_sign > 0 else 1.0 - r)
        if not ranks:
            return base_beta
        avg_rank = np.mean(ranks)
        return min_beta + (max_beta - min_beta) * avg_rank
    return beta_fn

# Also test: VIX-like regime switch (simple threshold)
def make_threshold_beta_fn(factor_name, rho_sign, threshold_pct=60,
                           beta_low=0.3, beta_high=0.7):
    """Binary: factor above/below threshold -> two beta levels."""
    fvals = [date_factors[ds].get(factor_name, np.nan) for ds in dates_with_factors]
    fvals = [v for v in fvals if np.isfinite(v)]
    thresh = np.percentile(fvals, threshold_pct) if fvals else 0

    def beta_fn(ds, factors):
        v = factors.get(factor_name, np.nan)
        if not np.isfinite(v):
            return (beta_low + beta_high) / 2
        if rho_sign > 0:
            return beta_high if v >= thresh else beta_low
        else:
            return beta_low if v >= thresh else beta_high
    return beta_fn

print(f"\n{'STRATEGY':<40} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'SHARPE':>7} {'TRADES':>7}")
print(f"{'='*75}")

# Fixed baselines
for fb in [0.0, 0.3, 0.5, 0.7]:
    label = f"fixed_beta={fb}"
    t1 = time.time()
    r, ds = run_with_beta_fn(lambda ds, f, b=fb: b)
    print(f"  {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7}  [{time.time()-t1:.0f}s]")

# Test top candidate factors
tested_strategies = {}
for c in candidates[:5]:
    fname = c['factor']
    rho_sign = 1 if c['rho'] > 0 else -1
    short_name = fname.replace('api_', '').replace('glob_', 'g:')

    # Percentile scaling
    label = f"pctl_{short_name}"
    t1 = time.time()
    beta_fn = make_percentile_beta_fn(fname, rho_sign, min_beta=0.1, max_beta=0.9)
    r, ds = run_with_beta_fn(beta_fn)
    tested_strategies[label] = (r, ds)
    print(f"  {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7}  [{time.time()-t1:.0f}s]")

    # Threshold switch
    label = f"thresh_{short_name}"
    t1 = time.time()
    beta_fn = make_threshold_beta_fn(fname, rho_sign, beta_low=0.2, beta_high=0.8)
    r, ds = run_with_beta_fn(beta_fn)
    tested_strategies[label] = (r, ds)
    print(f"  {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7}  [{time.time()-t1:.0f}s]")

# Composite: top 3 new factors
if len(candidates) >= 3:
    top3 = [(c['factor'], 1 if c['rho'] > 0 else -1) for c in candidates[:3]]
    label = "composite_top3"
    t1 = time.time()
    beta_fn = make_composite_beta_fn(top3, min_beta=0.1, max_beta=0.9)
    r, ds = run_with_beta_fn(beta_fn)
    tested_strategies[label] = (r, ds)
    print(f"  {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7}  [{time.time()-t1:.0f}s]")

# ─── PHASE 5: Overfitting check ───
print(f"\n=== PHASE 5: Overfitting validation (H1 vs H2) ===")
mid = len(parquet_files) // 2
print(f"  H1: {parquet_files[0].stem} to {parquet_files[mid-1].stem}")
print(f"  H2: {parquet_files[mid].stem} to {parquet_files[-1].stem}")

# Fixed beta baselines
for fb in [0.0, 0.3, 0.5]:
    label = f"fixed_beta={fb}"
    _, ds = run_with_beta_fn(lambda ds, f, b=fb: b)
    h1 = sum(s['pnl'] for s in ds[:mid])
    h2 = sum(s['pnl'] for s in ds[mid:])
    ratio = h2/h1 if h1 != 0 else 0
    print(f"  {label:<38} H1=${h1:>+9.2f}  H2=${h2:>+9.2f}  H2/H1={ratio:.2f}")

# Dynamic strategies
for label, (r, ds) in tested_strategies.items():
    h1 = sum(s['pnl'] for s in ds[:mid])
    h2 = sum(s['pnl'] for s in ds[mid:])
    ratio = h2/h1 if h1 != 0 else 0
    stable = "YES" if 0.3 < ratio < 3.0 else "OVERFIT"
    print(f"  {label:<38} H1=${h1:>+9.2f}  H2=${h2:>+9.2f}  H2/H1={ratio:.2f}  {stable}")

# Per-date beta values for best dynamic strategy
best_dyn = max(tested_strategies.items(), key=lambda x: x[1][0]['roi'])
print(f"\n--- BEST DYNAMIC: {best_dyn[0]} ---")
print(f"  ROI={best_dyn[1][0]['roi']:+.2f}%, PF={best_dyn[1][0]['pf']:.3f}, "
      f"Sharpe={best_dyn[1][0]['sharpe']:.2f}, DD={best_dyn[1][0]['dd']:.2f}%")
print(f"  Per-date beta values:")
for s in best_dyn[1][1]:
    marker = " $$$" if s['pnl'] > 100 else " ---" if s['pnl'] < -100 else ""
    print(f"    {s['date']} beta={s['beta']:.2f} pnl=${s['pnl']:>+8.2f}{marker}")

print(f"\nTotal time: {time.time()-t0:.0f}s")
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`"""Dynamic beta: find external factors that predict optimal beta per date.`

			`Phase 1: Load ALL external factors (85 API + 45 scan_global) per date`
			`Phase 2: Run meta-boost at fixed beta=0.5, record per-date P&L`
			`Phase 3: Rank-correlate each factor with daily P&L -> candidate beta governors`
			`Phase 4: Test dynamic beta governed by top factors vs fixed beta`
			`Phase 5: Half-split overfitting validation`
			`"""`
			`import sys, time, math`
			`from pathlib import Path`
			`import numpy as np`
			`import pandas as pd`
			`from scipy import stats as sp_stats`

			`sys.path.insert(0, str(Path(__file__).parent))`

			`print("Compiling numba kernels...")`
			`t0c = time.time()`
			`from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb`
			`from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb`
			`from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb`
			`_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)`
			`compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)`
			`rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)`
			`compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,`
			`np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),`
			`np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)`
			`check_dc_nb(_p, 3, 1, 0.75)`
			`print(f" JIT: {time.time() - t0c:.1f}s")`

			`from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine`
			`from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker`

			`VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")`
			`EIGEN_DIR = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues")`
			`META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',`
			`'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',`
			`'instability_50', 'instability_150'}`
			`ENGINE_KWARGS = dict(`
			`initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,`
			`min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,`
			`fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,`
			`use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,`
			`dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,`
			`use_asset_selection=True, min_irp_alignment=0.45,`
			`use_sp_fees=True, use_sp_slippage=True,`
			`sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,`
			`use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,`
			`lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,`
			`)`
			`VD_THRESH = -0.02; VD_EXTREME = -0.05; CONVEXITY = 3.0`

			`# ─── PHASE 1: Load ALL external factors per date ───`
			`print("\n=== PHASE 1: Loading external factors ===")`
			`acb = AdaptiveCircuitBreaker()`
			`parquet_files = sorted(VBT_DIR.glob("*.parquet"))`
			`acb_signals = {pf.stem: acb.get_cut_for_date(pf.stem)['signals'] for pf in parquet_files}`

			`# Load full factor matrix from NPZ files`
			`date_factors = {} # date -> {factor_name: value}`
			`api_name_set = set()`
			`global_name_set = set()`

			`for pf in parquet_files:`
			`ds = pf.stem`
			`date_path = EIGEN_DIR / ds`
			`if not date_path.exists():`
			`continue`
			`files = sorted(date_path.glob('scan_*__Indicators.npz'))[:10]`
			`if not files:`
			`continue`

			`# Collect from multiple scans, take median`
			`api_vals = {} # name -> [values]`
			`glob_vals = {} # name -> [values]`

			`for f in files:`
			`try:`
			`data = np.load(f, allow_pickle=True)`
			`# API indicators`
			`if 'api_names' in data and 'api_indicators' in data:`
			`names = list(data['api_names'])`
			`vals = data['api_indicators']`
			`succ = data['api_success'] if 'api_success' in data else np.ones(len(names), dtype=bool)`
			`for i, nm in enumerate(names):`
			`if i < len(vals) and (i >= len(succ) or succ[i]) and np.isfinite(vals[i]):`
			`api_vals.setdefault(nm, []).append(float(vals[i]))`
			`api_name_set.add(nm)`
			`# Scan global metrics`
			`if 'scan_global_names' in data and 'scan_global' in data:`
			`gnames = list(data['scan_global_names'])`
			`gvals = data['scan_global']`
			`for i, nm in enumerate(gnames):`
			`if i < len(gvals) and np.isfinite(gvals[i]):`
			`glob_vals.setdefault(nm, []).append(float(gvals[i]))`
			`global_name_set.add(nm)`
			`except Exception:`
			`continue`

			`factors = {}`
			`for nm, vs in api_vals.items():`
			`factors[f"api_{nm}"] = float(np.median(vs))`
			`for nm, vs in glob_vals.items():`
			`factors[f"glob_{nm}"] = float(np.median(vs))`
			`factors['acb_signals'] = acb_signals[ds]`
			`date_factors[ds] = factors`

			`print(f" Loaded factors for {len(date_factors)}/{len(parquet_files)} dates")`
			`print(f" API indicators: {len(api_name_set)}, Global metrics: {len(global_name_set)}")`

			`# ─── PHASE 2: Run fixed beta=0.5, record per-date P&L ───`
			`print("\n=== PHASE 2: Running meta-boost beta=0.5 baseline ===")`

			`# Pre-load parquet data`
			`all_vols = []`
			`for pf in parquet_files[:2]:`
			`df = pd.read_parquet(pf)`
			`if 'BTCUSDT' not in df.columns: continue`
			`pr = df['BTCUSDT'].values`
			`for i in range(60, len(pr)):`
			`seg = pr[max(0,i-50):i]`
			`if len(seg)<10: continue`
			`v = float(np.std(np.diff(seg)/seg[:-1]))`
			`if v > 0: all_vols.append(v)`
			`vol_p60 = float(np.percentile(all_vols, 60))`

			`pq_data = {}`
			`for pf in parquet_files:`
			`df = pd.read_parquet(pf)`
			`ac = [c for c in df.columns if c not in META_COLS]`
			`bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None`
			`dv = np.full(len(df), np.nan)`
			`if bp is not None:`
			`for i in range(50, len(bp)):`
			`seg = bp[max(0,i-50):i]`
			`if len(seg)<10: continue`
			`dv[i] = float(np.std(np.diff(seg)/seg[:-1]))`
			`pq_data[pf.stem] = (df, ac, dv)`

			`def log05(s):`
			`return 1.0 + 0.5 * math.log1p(s) if s >= 1.0 else 1.0`

			`def strength_cubic(vel_div):`
			`if vel_div >= VD_THRESH: return 0.0`
			`raw = (VD_THRESH - vel_div) / (VD_THRESH - VD_EXTREME)`
			`return min(1.0, max(0.0, raw)) ** CONVEXITY`

			`def run_with_beta_fn(beta_fn):`
			`"""Run engine where beta_fn(date_str, factors) -> beta for that date."""`
			`engine = NDAlphaEngine(**ENGINE_KWARGS)`
			`bar_idx = 0; ph = {}; dstats = []`

			`for pf in parquet_files:`
			`ds = pf.stem; cs = engine.capital`
			`signals = acb_signals[ds]`
			`base_boost = log05(signals)`
			`engine.regime_direction = -1`
			`engine.regime_dd_halt = False`

			`# Get beta for this date`
			`factors = date_factors.get(ds, {})`
			`beta = beta_fn(ds, factors)`

			`df, acols, dvol = pq_data[ds]`
			`bid = 0`
			`for ri in range(len(df)):`
			`row = df.iloc[ri]; vd = row.get("vel_div")`
			`if vd is None or not np.isfinite(vd): bar_idx+=1; bid+=1; continue`
			`prices = {}`
			`for ac in acols:`
			`p = row[ac]`
			`if p and p > 0 and np.isfinite(p):`
			`prices[ac] = float(p)`
			`if ac not in ph: ph[ac] = []`
			`ph[ac].append(float(p))`
			`if not prices: bar_idx+=1; bid+=1; continue`
			`vrok = False if bid < 100 else (np.isfinite(dvol[ri]) and dvol[ri] > vol_p60)`

			`if beta > 0 and base_boost > 1.0:`
			`ss = strength_cubic(float(vd))`
			`engine.regime_size_mult = base_boost * (1.0 + beta * ss)`
			`else:`
			`engine.regime_size_mult = base_boost`

			`engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices,`
			`vol_regime_ok=vrok, price_histories=ph)`
			`bar_idx+=1; bid+=1`

			`dstats.append({'date': ds, 'pnl': engine.capital - cs, 'cap': engine.capital, 'beta': beta})`

			`tr = engine.trade_history`
			`w = [t for t in tr if t.pnl_absolute > 0]; l = [t for t in tr if t.pnl_absolute <= 0]`
			`gw = sum(t.pnl_absolute for t in w) if w else 0`
			`gl = abs(sum(t.pnl_absolute for t in l)) if l else 0`
			`dr = [s['pnl']/25000*100 for s in dstats]`
			`peak_cap = 25000.0; max_dd = 0.0`
			`for s in dstats:`
			`peak_cap = max(peak_cap, s['cap'])`
			`dd = (peak_cap - s['cap']) / peak_cap * 100`
			`max_dd = max(max_dd, dd)`
			`return {`
			`'roi': (engine.capital - 25000) / 25000 * 100,`
			`'pf': gw / gl if gl > 0 else 999,`
			`'dd': max_dd,`
			`'sharpe': np.mean(dr) / np.std(dr) * np.sqrt(365) if np.std(dr) > 0 else 0,`
			`'trades': len(tr),`
			`'cap': engine.capital,`
			`}, dstats`

			`t0 = time.time()`
			`r_fixed, ds_fixed = run_with_beta_fn(lambda ds, f: 0.5)`
			`print(f" Fixed beta=0.5: ROI={r_fixed['roi']:+.2f}%, PF={r_fixed['pf']:.3f}, "`
			`f"DD={r_fixed['dd']:.2f}%, Sharpe={r_fixed['sharpe']:.2f} [{time.time()-t0:.0f}s]")`

			`# ─── PHASE 3: Rank-correlate factors with per-date P&L ───`
			`print(f"\n=== PHASE 3: Factor -> P&L correlation scan ===")`

			`# Build aligned arrays: factor[date] vs pnl[date]`
			`pnl_by_date = {s['date']: s['pnl'] for s in ds_fixed}`
			`dates_with_factors = [ds for ds in pnl_by_date if ds in date_factors]`
			`pnl_arr = np.array([pnl_by_date[ds] for ds in dates_with_factors])`

			`# Already-used factors (exclude from "new" candidates)`
			`USED_FACTORS = {'api_funding_btc', 'api_dvol_btc', 'api_fng', 'api_taker'}`

			`# Collect all factor names`
			`all_factor_names = set()`
			`for f in date_factors.values():`
			`all_factor_names.update(f.keys())`
			`all_factor_names -= {'acb_signals'} # meta, not a raw factor`

			`correlations = []`
			`for fname in sorted(all_factor_names):`
			`vals = []`
			`valid = True`
			`for ds in dates_with_factors:`
			`v = date_factors[ds].get(fname, np.nan)`
			`if not np.isfinite(v):`
			`valid = False; break`
			`vals.append(v)`
			`if not valid or len(set(vals)) < 3:`
			`continue`
			`vals = np.array(vals)`
			`# Spearman rank correlation (robust to non-linearity)`
			`rho, pval = sp_stats.spearmanr(vals, pnl_arr)`
			`if np.isfinite(rho):`
			`correlations.append({`
			`'factor': fname, 'rho': rho, 'pval': pval,`
			`'abs_rho': abs(rho), 'is_new': fname not in USED_FACTORS,`
			`})`

			`correlations.sort(key=lambda x: -x['abs_rho'])`

			`print(f"\n{'FACTOR':<35} {'RHO':>7} {'P-VAL':>8} {'NEW?':>5}")`
			`print(f"{'-'*60}")`
			`for c in correlations[:30]:`
			`marker = " **" if c['is_new'] and c['abs_rho'] > 0.25 else " " if c['is_new'] else ""`
			`print(f" {c['factor']:<33} {c['rho']:>+7.3f} {c['pval']:>8.4f} {'YES' if c['is_new'] else 'no':>5}{marker}")`

			`# Filter: new factors with \|rho\| > 0.2 and p < 0.15`
			`candidates = [c for c in correlations if c['is_new'] and c['abs_rho'] > 0.20 and c['pval'] < 0.15]`
			`print(f"\n Candidate beta governors (new, \|rho\|>0.20, p<0.15): {len(candidates)}")`
			`for c in candidates[:10]:`
			`print(f" {c['factor']:<33} rho={c['rho']:+.3f} p={c['pval']:.4f}")`

			`# ─── PHASE 4: Test dynamic beta strategies ───`
			`print(f"\n=== PHASE 4: Dynamic beta tests ===")`

			`# Strategy: scale beta by factor percentile`
			`# High-rho factor -> when factor is elevated, beta should be higher (or lower, depending on sign)`
			`def make_percentile_beta_fn(factor_name, rho_sign, base_beta=0.5, min_beta=0.1, max_beta=1.0):`
			`"""Scale beta based on percentile of factor across dates."""`
			`# Collect factor values across all dates`
			`fvals = []`
			`for ds in dates_with_factors:`
			`v = date_factors[ds].get(factor_name, np.nan)`
			`if np.isfinite(v):`
			`fvals.append(v)`
			`if not fvals:`
			`return lambda ds, f: base_beta`
			`p25, p50, p75 = np.percentile(fvals, [25, 50, 75])`

			`def beta_fn(ds, factors):`
			`v = factors.get(factor_name, np.nan)`
			`if not np.isfinite(v):`
			`return base_beta`
			`# Normalize to [0, 1] via percentile rank`
			`rank = sp_stats.percentileofscore(fvals, v) / 100.0`
			`if rho_sign > 0:`
			`# Higher factor -> higher beta`
			`beta = min_beta + (max_beta - min_beta) * rank`
			`else:`
			`# Higher factor -> lower beta`
			`beta = min_beta + (max_beta - min_beta) * (1.0 - rank)`
			`return beta`
			`return beta_fn`

			`# Also test: composite of top-N factors`
			`def make_composite_beta_fn(factor_list, base_beta=0.5, min_beta=0.1, max_beta=1.0):`
			`"""Average percentile rank of multiple factors -> beta."""`
			`# Pre-compute percentile distributions`
			`factor_dists = {}`
			`for fname, rho_sign in factor_list:`
			`fvals = [date_factors[ds].get(fname, np.nan) for ds in dates_with_factors]`
			`fvals = [v for v in fvals if np.isfinite(v)]`
			`if fvals:`
			`factor_dists[fname] = (fvals, rho_sign)`

			`def beta_fn(ds, factors):`
			`ranks = []`
			`for fname, (fvals, rho_sign) in factor_dists.items():`
			`v = factors.get(fname, np.nan)`
			`if np.isfinite(v):`
			`r = sp_stats.percentileofscore(fvals, v) / 100.0`
			`ranks.append(r if rho_sign > 0 else 1.0 - r)`
			`if not ranks:`
			`return base_beta`
			`avg_rank = np.mean(ranks)`
			`return min_beta + (max_beta - min_beta) * avg_rank`
			`return beta_fn`

			`# Also test: VIX-like regime switch (simple threshold)`
			`def make_threshold_beta_fn(factor_name, rho_sign, threshold_pct=60,`
			`beta_low=0.3, beta_high=0.7):`
			`"""Binary: factor above/below threshold -> two beta levels."""`
			`fvals = [date_factors[ds].get(factor_name, np.nan) for ds in dates_with_factors]`
			`fvals = [v for v in fvals if np.isfinite(v)]`
			`thresh = np.percentile(fvals, threshold_pct) if fvals else 0`

			`def beta_fn(ds, factors):`
			`v = factors.get(factor_name, np.nan)`
			`if not np.isfinite(v):`
			`return (beta_low + beta_high) / 2`
			`if rho_sign > 0:`
			`return beta_high if v >= thresh else beta_low`
			`else:`
			`return beta_low if v >= thresh else beta_high`
			`return beta_fn`

			`print(f"\n{'STRATEGY':<40} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'SHARPE':>7} {'TRADES':>7}")`
			`print(f"{'='*75}")`

			`# Fixed baselines`
			`for fb in [0.0, 0.3, 0.5, 0.7]:`
			`label = f"fixed_beta={fb}"`
			`t1 = time.time()`
			`r, ds = run_with_beta_fn(lambda ds, f, b=fb: b)`
			`print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")`

			`# Test top candidate factors`
			`tested_strategies = {}`
			`for c in candidates[:5]:`
			`fname = c['factor']`
			`rho_sign = 1 if c['rho'] > 0 else -1`
			`short_name = fname.replace('api_', '').replace('glob_', 'g:')`

			`# Percentile scaling`
			`label = f"pctl_{short_name}"`
			`t1 = time.time()`
			`beta_fn = make_percentile_beta_fn(fname, rho_sign, min_beta=0.1, max_beta=0.9)`
			`r, ds = run_with_beta_fn(beta_fn)`
			`tested_strategies[label] = (r, ds)`
			`print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")`

			`# Threshold switch`
			`label = f"thresh_{short_name}"`
			`t1 = time.time()`
			`beta_fn = make_threshold_beta_fn(fname, rho_sign, beta_low=0.2, beta_high=0.8)`
			`r, ds = run_with_beta_fn(beta_fn)`
			`tested_strategies[label] = (r, ds)`
			`print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")`

			`# Composite: top 3 new factors`
			`if len(candidates) >= 3:`
			`top3 = [(c['factor'], 1 if c['rho'] > 0 else -1) for c in candidates[:3]]`
			`label = "composite_top3"`
			`t1 = time.time()`
			`beta_fn = make_composite_beta_fn(top3, min_beta=0.1, max_beta=0.9)`
			`r, ds = run_with_beta_fn(beta_fn)`
			`tested_strategies[label] = (r, ds)`
			`print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")`

			`# ─── PHASE 5: Overfitting check ───`
			`print(f"\n=== PHASE 5: Overfitting validation (H1 vs H2) ===")`
			`mid = len(parquet_files) // 2`
			`print(f" H1: {parquet_files[0].stem} to {parquet_files[mid-1].stem}")`
			`print(f" H2: {parquet_files[mid].stem} to {parquet_files[-1].stem}")`

			`# Fixed beta baselines`
			`for fb in [0.0, 0.3, 0.5]:`
			`label = f"fixed_beta={fb}"`
			`_, ds = run_with_beta_fn(lambda ds, f, b=fb: b)`
			`h1 = sum(s['pnl'] for s in ds[:mid])`
			`h2 = sum(s['pnl'] for s in ds[mid:])`
			`ratio = h2/h1 if h1 != 0 else 0`
			`print(f" {label:<38} H1=${h1:>+9.2f} H2=${h2:>+9.2f} H2/H1={ratio:.2f}")`

			`# Dynamic strategies`
			`for label, (r, ds) in tested_strategies.items():`
			`h1 = sum(s['pnl'] for s in ds[:mid])`
			`h2 = sum(s['pnl'] for s in ds[mid:])`
			`ratio = h2/h1 if h1 != 0 else 0`
			`stable = "YES" if 0.3 < ratio < 3.0 else "OVERFIT"`
			`print(f" {label:<38} H1=${h1:>+9.2f} H2=${h2:>+9.2f} H2/H1={ratio:.2f} {stable}")`

			`# Per-date beta values for best dynamic strategy`
			`best_dyn = max(tested_strategies.items(), key=lambda x: x[1][0]['roi'])`
			`print(f"\n--- BEST DYNAMIC: {best_dyn[0]} ---")`
			`print(f" ROI={best_dyn[1][0]['roi']:+.2f}%, PF={best_dyn[1][0]['pf']:.3f}, "`
			`f"Sharpe={best_dyn[1][0]['sharpe']:.2f}, DD={best_dyn[1][0]['dd']:.2f}%")`
			`print(f" Per-date beta values:")`
			`for s in best_dyn[1][1]:`
			`marker = " $$$" if s['pnl'] > 100 else " ---" if s['pnl'] < -100 else ""`
			`print(f" {s['date']} beta={s['beta']:.2f} pnl=${s['pnl']:>+8.2f}{marker}")`

			`print(f"\nTotal time: {time.time()-t0:.0f}s")`