417 lines
17 KiB
Python
417 lines
17 KiB
Python
|
|
"""Dynamic beta: find external factors that predict optimal beta per date.
|
||
|
|
|
||
|
|
Phase 1: Load ALL external factors (85 API + 45 scan_global) per date
|
||
|
|
Phase 2: Run meta-boost at fixed beta=0.5, record per-date P&L
|
||
|
|
Phase 3: Rank-correlate each factor with daily P&L -> candidate beta governors
|
||
|
|
Phase 4: Test dynamic beta governed by top factors vs fixed beta
|
||
|
|
Phase 5: Half-split overfitting validation
|
||
|
|
"""
|
||
|
|
import sys, time, math
|
||
|
|
from pathlib import Path
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
from scipy import stats as sp_stats
|
||
|
|
|
||
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
||
|
|
|
||
|
|
print("Compiling numba kernels...")
|
||
|
|
t0c = time.time()
|
||
|
|
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
|
||
|
|
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
|
||
|
|
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
|
||
|
|
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
||
|
|
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
|
||
|
|
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
|
||
|
|
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
|
||
|
|
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
|
||
|
|
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
|
||
|
|
check_dc_nb(_p, 3, 1, 0.75)
|
||
|
|
print(f" JIT: {time.time() - t0c:.1f}s")
|
||
|
|
|
||
|
|
from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
|
||
|
|
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
||
|
|
|
||
|
|
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
|
||
|
|
EIGEN_DIR = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues")
|
||
|
|
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
||
|
|
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
|
||
|
|
'instability_50', 'instability_150'}
|
||
|
|
ENGINE_KWARGS = dict(
|
||
|
|
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
|
||
|
|
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
|
||
|
|
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
|
||
|
|
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
||
|
|
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
||
|
|
use_asset_selection=True, min_irp_alignment=0.45,
|
||
|
|
use_sp_fees=True, use_sp_slippage=True,
|
||
|
|
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
|
||
|
|
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
||
|
|
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
||
|
|
)
|
||
|
|
VD_THRESH = -0.02; VD_EXTREME = -0.05; CONVEXITY = 3.0
|
||
|
|
|
||
|
|
# ─── PHASE 1: Load ALL external factors per date ───
|
||
|
|
print("\n=== PHASE 1: Loading external factors ===")
|
||
|
|
acb = AdaptiveCircuitBreaker()
|
||
|
|
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
||
|
|
acb_signals = {pf.stem: acb.get_cut_for_date(pf.stem)['signals'] for pf in parquet_files}
|
||
|
|
|
||
|
|
# Load full factor matrix from NPZ files
|
||
|
|
date_factors = {} # date -> {factor_name: value}
|
||
|
|
api_name_set = set()
|
||
|
|
global_name_set = set()
|
||
|
|
|
||
|
|
for pf in parquet_files:
|
||
|
|
ds = pf.stem
|
||
|
|
date_path = EIGEN_DIR / ds
|
||
|
|
if not date_path.exists():
|
||
|
|
continue
|
||
|
|
files = sorted(date_path.glob('scan_*__Indicators.npz'))[:10]
|
||
|
|
if not files:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Collect from multiple scans, take median
|
||
|
|
api_vals = {} # name -> [values]
|
||
|
|
glob_vals = {} # name -> [values]
|
||
|
|
|
||
|
|
for f in files:
|
||
|
|
try:
|
||
|
|
data = np.load(f, allow_pickle=True)
|
||
|
|
# API indicators
|
||
|
|
if 'api_names' in data and 'api_indicators' in data:
|
||
|
|
names = list(data['api_names'])
|
||
|
|
vals = data['api_indicators']
|
||
|
|
succ = data['api_success'] if 'api_success' in data else np.ones(len(names), dtype=bool)
|
||
|
|
for i, nm in enumerate(names):
|
||
|
|
if i < len(vals) and (i >= len(succ) or succ[i]) and np.isfinite(vals[i]):
|
||
|
|
api_vals.setdefault(nm, []).append(float(vals[i]))
|
||
|
|
api_name_set.add(nm)
|
||
|
|
# Scan global metrics
|
||
|
|
if 'scan_global_names' in data and 'scan_global' in data:
|
||
|
|
gnames = list(data['scan_global_names'])
|
||
|
|
gvals = data['scan_global']
|
||
|
|
for i, nm in enumerate(gnames):
|
||
|
|
if i < len(gvals) and np.isfinite(gvals[i]):
|
||
|
|
glob_vals.setdefault(nm, []).append(float(gvals[i]))
|
||
|
|
global_name_set.add(nm)
|
||
|
|
except Exception:
|
||
|
|
continue
|
||
|
|
|
||
|
|
factors = {}
|
||
|
|
for nm, vs in api_vals.items():
|
||
|
|
factors[f"api_{nm}"] = float(np.median(vs))
|
||
|
|
for nm, vs in glob_vals.items():
|
||
|
|
factors[f"glob_{nm}"] = float(np.median(vs))
|
||
|
|
factors['acb_signals'] = acb_signals[ds]
|
||
|
|
date_factors[ds] = factors
|
||
|
|
|
||
|
|
print(f" Loaded factors for {len(date_factors)}/{len(parquet_files)} dates")
|
||
|
|
print(f" API indicators: {len(api_name_set)}, Global metrics: {len(global_name_set)}")
|
||
|
|
|
||
|
|
# ─── PHASE 2: Run fixed beta=0.5, record per-date P&L ───
|
||
|
|
print("\n=== PHASE 2: Running meta-boost beta=0.5 baseline ===")
|
||
|
|
|
||
|
|
# Pre-load parquet data
|
||
|
|
all_vols = []
|
||
|
|
for pf in parquet_files[:2]:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
if 'BTCUSDT' not in df.columns: continue
|
||
|
|
pr = df['BTCUSDT'].values
|
||
|
|
for i in range(60, len(pr)):
|
||
|
|
seg = pr[max(0,i-50):i]
|
||
|
|
if len(seg)<10: continue
|
||
|
|
v = float(np.std(np.diff(seg)/seg[:-1]))
|
||
|
|
if v > 0: all_vols.append(v)
|
||
|
|
vol_p60 = float(np.percentile(all_vols, 60))
|
||
|
|
|
||
|
|
pq_data = {}
|
||
|
|
for pf in parquet_files:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
ac = [c for c in df.columns if c not in META_COLS]
|
||
|
|
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
|
||
|
|
dv = np.full(len(df), np.nan)
|
||
|
|
if bp is not None:
|
||
|
|
for i in range(50, len(bp)):
|
||
|
|
seg = bp[max(0,i-50):i]
|
||
|
|
if len(seg)<10: continue
|
||
|
|
dv[i] = float(np.std(np.diff(seg)/seg[:-1]))
|
||
|
|
pq_data[pf.stem] = (df, ac, dv)
|
||
|
|
|
||
|
|
def log05(s):
|
||
|
|
return 1.0 + 0.5 * math.log1p(s) if s >= 1.0 else 1.0
|
||
|
|
|
||
|
|
def strength_cubic(vel_div):
|
||
|
|
if vel_div >= VD_THRESH: return 0.0
|
||
|
|
raw = (VD_THRESH - vel_div) / (VD_THRESH - VD_EXTREME)
|
||
|
|
return min(1.0, max(0.0, raw)) ** CONVEXITY
|
||
|
|
|
||
|
|
def run_with_beta_fn(beta_fn):
|
||
|
|
"""Run engine where beta_fn(date_str, factors) -> beta for that date."""
|
||
|
|
engine = NDAlphaEngine(**ENGINE_KWARGS)
|
||
|
|
bar_idx = 0; ph = {}; dstats = []
|
||
|
|
|
||
|
|
for pf in parquet_files:
|
||
|
|
ds = pf.stem; cs = engine.capital
|
||
|
|
signals = acb_signals[ds]
|
||
|
|
base_boost = log05(signals)
|
||
|
|
engine.regime_direction = -1
|
||
|
|
engine.regime_dd_halt = False
|
||
|
|
|
||
|
|
# Get beta for this date
|
||
|
|
factors = date_factors.get(ds, {})
|
||
|
|
beta = beta_fn(ds, factors)
|
||
|
|
|
||
|
|
df, acols, dvol = pq_data[ds]
|
||
|
|
bid = 0
|
||
|
|
for ri in range(len(df)):
|
||
|
|
row = df.iloc[ri]; vd = row.get("vel_div")
|
||
|
|
if vd is None or not np.isfinite(vd): bar_idx+=1; bid+=1; continue
|
||
|
|
prices = {}
|
||
|
|
for ac in acols:
|
||
|
|
p = row[ac]
|
||
|
|
if p and p > 0 and np.isfinite(p):
|
||
|
|
prices[ac] = float(p)
|
||
|
|
if ac not in ph: ph[ac] = []
|
||
|
|
ph[ac].append(float(p))
|
||
|
|
if not prices: bar_idx+=1; bid+=1; continue
|
||
|
|
vrok = False if bid < 100 else (np.isfinite(dvol[ri]) and dvol[ri] > vol_p60)
|
||
|
|
|
||
|
|
if beta > 0 and base_boost > 1.0:
|
||
|
|
ss = strength_cubic(float(vd))
|
||
|
|
engine.regime_size_mult = base_boost * (1.0 + beta * ss)
|
||
|
|
else:
|
||
|
|
engine.regime_size_mult = base_boost
|
||
|
|
|
||
|
|
engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices,
|
||
|
|
vol_regime_ok=vrok, price_histories=ph)
|
||
|
|
bar_idx+=1; bid+=1
|
||
|
|
|
||
|
|
dstats.append({'date': ds, 'pnl': engine.capital - cs, 'cap': engine.capital, 'beta': beta})
|
||
|
|
|
||
|
|
tr = engine.trade_history
|
||
|
|
w = [t for t in tr if t.pnl_absolute > 0]; l = [t for t in tr if t.pnl_absolute <= 0]
|
||
|
|
gw = sum(t.pnl_absolute for t in w) if w else 0
|
||
|
|
gl = abs(sum(t.pnl_absolute for t in l)) if l else 0
|
||
|
|
dr = [s['pnl']/25000*100 for s in dstats]
|
||
|
|
peak_cap = 25000.0; max_dd = 0.0
|
||
|
|
for s in dstats:
|
||
|
|
peak_cap = max(peak_cap, s['cap'])
|
||
|
|
dd = (peak_cap - s['cap']) / peak_cap * 100
|
||
|
|
max_dd = max(max_dd, dd)
|
||
|
|
return {
|
||
|
|
'roi': (engine.capital - 25000) / 25000 * 100,
|
||
|
|
'pf': gw / gl if gl > 0 else 999,
|
||
|
|
'dd': max_dd,
|
||
|
|
'sharpe': np.mean(dr) / np.std(dr) * np.sqrt(365) if np.std(dr) > 0 else 0,
|
||
|
|
'trades': len(tr),
|
||
|
|
'cap': engine.capital,
|
||
|
|
}, dstats
|
||
|
|
|
||
|
|
t0 = time.time()
|
||
|
|
r_fixed, ds_fixed = run_with_beta_fn(lambda ds, f: 0.5)
|
||
|
|
print(f" Fixed beta=0.5: ROI={r_fixed['roi']:+.2f}%, PF={r_fixed['pf']:.3f}, "
|
||
|
|
f"DD={r_fixed['dd']:.2f}%, Sharpe={r_fixed['sharpe']:.2f} [{time.time()-t0:.0f}s]")
|
||
|
|
|
||
|
|
# ─── PHASE 3: Rank-correlate factors with per-date P&L ───
|
||
|
|
print(f"\n=== PHASE 3: Factor -> P&L correlation scan ===")
|
||
|
|
|
||
|
|
# Build aligned arrays: factor[date] vs pnl[date]
|
||
|
|
pnl_by_date = {s['date']: s['pnl'] for s in ds_fixed}
|
||
|
|
dates_with_factors = [ds for ds in pnl_by_date if ds in date_factors]
|
||
|
|
pnl_arr = np.array([pnl_by_date[ds] for ds in dates_with_factors])
|
||
|
|
|
||
|
|
# Already-used factors (exclude from "new" candidates)
|
||
|
|
USED_FACTORS = {'api_funding_btc', 'api_dvol_btc', 'api_fng', 'api_taker'}
|
||
|
|
|
||
|
|
# Collect all factor names
|
||
|
|
all_factor_names = set()
|
||
|
|
for f in date_factors.values():
|
||
|
|
all_factor_names.update(f.keys())
|
||
|
|
all_factor_names -= {'acb_signals'} # meta, not a raw factor
|
||
|
|
|
||
|
|
correlations = []
|
||
|
|
for fname in sorted(all_factor_names):
|
||
|
|
vals = []
|
||
|
|
valid = True
|
||
|
|
for ds in dates_with_factors:
|
||
|
|
v = date_factors[ds].get(fname, np.nan)
|
||
|
|
if not np.isfinite(v):
|
||
|
|
valid = False; break
|
||
|
|
vals.append(v)
|
||
|
|
if not valid or len(set(vals)) < 3:
|
||
|
|
continue
|
||
|
|
vals = np.array(vals)
|
||
|
|
# Spearman rank correlation (robust to non-linearity)
|
||
|
|
rho, pval = sp_stats.spearmanr(vals, pnl_arr)
|
||
|
|
if np.isfinite(rho):
|
||
|
|
correlations.append({
|
||
|
|
'factor': fname, 'rho': rho, 'pval': pval,
|
||
|
|
'abs_rho': abs(rho), 'is_new': fname not in USED_FACTORS,
|
||
|
|
})
|
||
|
|
|
||
|
|
correlations.sort(key=lambda x: -x['abs_rho'])
|
||
|
|
|
||
|
|
print(f"\n{'FACTOR':<35} {'RHO':>7} {'P-VAL':>8} {'NEW?':>5}")
|
||
|
|
print(f"{'-'*60}")
|
||
|
|
for c in correlations[:30]:
|
||
|
|
marker = " ***" if c['is_new'] and c['abs_rho'] > 0.25 else " *" if c['is_new'] else ""
|
||
|
|
print(f" {c['factor']:<33} {c['rho']:>+7.3f} {c['pval']:>8.4f} {'YES' if c['is_new'] else 'no':>5}{marker}")
|
||
|
|
|
||
|
|
# Filter: new factors with |rho| > 0.2 and p < 0.15
|
||
|
|
candidates = [c for c in correlations if c['is_new'] and c['abs_rho'] > 0.20 and c['pval'] < 0.15]
|
||
|
|
print(f"\n Candidate beta governors (new, |rho|>0.20, p<0.15): {len(candidates)}")
|
||
|
|
for c in candidates[:10]:
|
||
|
|
print(f" {c['factor']:<33} rho={c['rho']:+.3f} p={c['pval']:.4f}")
|
||
|
|
|
||
|
|
# ─── PHASE 4: Test dynamic beta strategies ───
|
||
|
|
print(f"\n=== PHASE 4: Dynamic beta tests ===")
|
||
|
|
|
||
|
|
# Strategy: scale beta by factor percentile
|
||
|
|
# High-rho factor -> when factor is elevated, beta should be higher (or lower, depending on sign)
|
||
|
|
def make_percentile_beta_fn(factor_name, rho_sign, base_beta=0.5, min_beta=0.1, max_beta=1.0):
|
||
|
|
"""Scale beta based on percentile of factor across dates."""
|
||
|
|
# Collect factor values across all dates
|
||
|
|
fvals = []
|
||
|
|
for ds in dates_with_factors:
|
||
|
|
v = date_factors[ds].get(factor_name, np.nan)
|
||
|
|
if np.isfinite(v):
|
||
|
|
fvals.append(v)
|
||
|
|
if not fvals:
|
||
|
|
return lambda ds, f: base_beta
|
||
|
|
p25, p50, p75 = np.percentile(fvals, [25, 50, 75])
|
||
|
|
|
||
|
|
def beta_fn(ds, factors):
|
||
|
|
v = factors.get(factor_name, np.nan)
|
||
|
|
if not np.isfinite(v):
|
||
|
|
return base_beta
|
||
|
|
# Normalize to [0, 1] via percentile rank
|
||
|
|
rank = sp_stats.percentileofscore(fvals, v) / 100.0
|
||
|
|
if rho_sign > 0:
|
||
|
|
# Higher factor -> higher beta
|
||
|
|
beta = min_beta + (max_beta - min_beta) * rank
|
||
|
|
else:
|
||
|
|
# Higher factor -> lower beta
|
||
|
|
beta = min_beta + (max_beta - min_beta) * (1.0 - rank)
|
||
|
|
return beta
|
||
|
|
return beta_fn
|
||
|
|
|
||
|
|
# Also test: composite of top-N factors
|
||
|
|
def make_composite_beta_fn(factor_list, base_beta=0.5, min_beta=0.1, max_beta=1.0):
|
||
|
|
"""Average percentile rank of multiple factors -> beta."""
|
||
|
|
# Pre-compute percentile distributions
|
||
|
|
factor_dists = {}
|
||
|
|
for fname, rho_sign in factor_list:
|
||
|
|
fvals = [date_factors[ds].get(fname, np.nan) for ds in dates_with_factors]
|
||
|
|
fvals = [v for v in fvals if np.isfinite(v)]
|
||
|
|
if fvals:
|
||
|
|
factor_dists[fname] = (fvals, rho_sign)
|
||
|
|
|
||
|
|
def beta_fn(ds, factors):
|
||
|
|
ranks = []
|
||
|
|
for fname, (fvals, rho_sign) in factor_dists.items():
|
||
|
|
v = factors.get(fname, np.nan)
|
||
|
|
if np.isfinite(v):
|
||
|
|
r = sp_stats.percentileofscore(fvals, v) / 100.0
|
||
|
|
ranks.append(r if rho_sign > 0 else 1.0 - r)
|
||
|
|
if not ranks:
|
||
|
|
return base_beta
|
||
|
|
avg_rank = np.mean(ranks)
|
||
|
|
return min_beta + (max_beta - min_beta) * avg_rank
|
||
|
|
return beta_fn
|
||
|
|
|
||
|
|
# Also test: VIX-like regime switch (simple threshold)
|
||
|
|
def make_threshold_beta_fn(factor_name, rho_sign, threshold_pct=60,
|
||
|
|
beta_low=0.3, beta_high=0.7):
|
||
|
|
"""Binary: factor above/below threshold -> two beta levels."""
|
||
|
|
fvals = [date_factors[ds].get(factor_name, np.nan) for ds in dates_with_factors]
|
||
|
|
fvals = [v for v in fvals if np.isfinite(v)]
|
||
|
|
thresh = np.percentile(fvals, threshold_pct) if fvals else 0
|
||
|
|
|
||
|
|
def beta_fn(ds, factors):
|
||
|
|
v = factors.get(factor_name, np.nan)
|
||
|
|
if not np.isfinite(v):
|
||
|
|
return (beta_low + beta_high) / 2
|
||
|
|
if rho_sign > 0:
|
||
|
|
return beta_high if v >= thresh else beta_low
|
||
|
|
else:
|
||
|
|
return beta_low if v >= thresh else beta_high
|
||
|
|
return beta_fn
|
||
|
|
|
||
|
|
print(f"\n{'STRATEGY':<40} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'SHARPE':>7} {'TRADES':>7}")
|
||
|
|
print(f"{'='*75}")
|
||
|
|
|
||
|
|
# Fixed baselines
|
||
|
|
for fb in [0.0, 0.3, 0.5, 0.7]:
|
||
|
|
label = f"fixed_beta={fb}"
|
||
|
|
t1 = time.time()
|
||
|
|
r, ds = run_with_beta_fn(lambda ds, f, b=fb: b)
|
||
|
|
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")
|
||
|
|
|
||
|
|
# Test top candidate factors
|
||
|
|
tested_strategies = {}
|
||
|
|
for c in candidates[:5]:
|
||
|
|
fname = c['factor']
|
||
|
|
rho_sign = 1 if c['rho'] > 0 else -1
|
||
|
|
short_name = fname.replace('api_', '').replace('glob_', 'g:')
|
||
|
|
|
||
|
|
# Percentile scaling
|
||
|
|
label = f"pctl_{short_name}"
|
||
|
|
t1 = time.time()
|
||
|
|
beta_fn = make_percentile_beta_fn(fname, rho_sign, min_beta=0.1, max_beta=0.9)
|
||
|
|
r, ds = run_with_beta_fn(beta_fn)
|
||
|
|
tested_strategies[label] = (r, ds)
|
||
|
|
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")
|
||
|
|
|
||
|
|
# Threshold switch
|
||
|
|
label = f"thresh_{short_name}"
|
||
|
|
t1 = time.time()
|
||
|
|
beta_fn = make_threshold_beta_fn(fname, rho_sign, beta_low=0.2, beta_high=0.8)
|
||
|
|
r, ds = run_with_beta_fn(beta_fn)
|
||
|
|
tested_strategies[label] = (r, ds)
|
||
|
|
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")
|
||
|
|
|
||
|
|
# Composite: top 3 new factors
|
||
|
|
if len(candidates) >= 3:
|
||
|
|
top3 = [(c['factor'], 1 if c['rho'] > 0 else -1) for c in candidates[:3]]
|
||
|
|
label = "composite_top3"
|
||
|
|
t1 = time.time()
|
||
|
|
beta_fn = make_composite_beta_fn(top3, min_beta=0.1, max_beta=0.9)
|
||
|
|
r, ds = run_with_beta_fn(beta_fn)
|
||
|
|
tested_strategies[label] = (r, ds)
|
||
|
|
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} {r['trades']:>7} [{time.time()-t1:.0f}s]")
|
||
|
|
|
||
|
|
# ─── PHASE 5: Overfitting check ───
|
||
|
|
print(f"\n=== PHASE 5: Overfitting validation (H1 vs H2) ===")
|
||
|
|
mid = len(parquet_files) // 2
|
||
|
|
print(f" H1: {parquet_files[0].stem} to {parquet_files[mid-1].stem}")
|
||
|
|
print(f" H2: {parquet_files[mid].stem} to {parquet_files[-1].stem}")
|
||
|
|
|
||
|
|
# Fixed beta baselines
|
||
|
|
for fb in [0.0, 0.3, 0.5]:
|
||
|
|
label = f"fixed_beta={fb}"
|
||
|
|
_, ds = run_with_beta_fn(lambda ds, f, b=fb: b)
|
||
|
|
h1 = sum(s['pnl'] for s in ds[:mid])
|
||
|
|
h2 = sum(s['pnl'] for s in ds[mid:])
|
||
|
|
ratio = h2/h1 if h1 != 0 else 0
|
||
|
|
print(f" {label:<38} H1=${h1:>+9.2f} H2=${h2:>+9.2f} H2/H1={ratio:.2f}")
|
||
|
|
|
||
|
|
# Dynamic strategies
|
||
|
|
for label, (r, ds) in tested_strategies.items():
|
||
|
|
h1 = sum(s['pnl'] for s in ds[:mid])
|
||
|
|
h2 = sum(s['pnl'] for s in ds[mid:])
|
||
|
|
ratio = h2/h1 if h1 != 0 else 0
|
||
|
|
stable = "YES" if 0.3 < ratio < 3.0 else "OVERFIT"
|
||
|
|
print(f" {label:<38} H1=${h1:>+9.2f} H2=${h2:>+9.2f} H2/H1={ratio:.2f} {stable}")
|
||
|
|
|
||
|
|
# Per-date beta values for best dynamic strategy
|
||
|
|
best_dyn = max(tested_strategies.items(), key=lambda x: x[1][0]['roi'])
|
||
|
|
print(f"\n--- BEST DYNAMIC: {best_dyn[0]} ---")
|
||
|
|
print(f" ROI={best_dyn[1][0]['roi']:+.2f}%, PF={best_dyn[1][0]['pf']:.3f}, "
|
||
|
|
f"Sharpe={best_dyn[1][0]['sharpe']:.2f}, DD={best_dyn[1][0]['dd']:.2f}%")
|
||
|
|
print(f" Per-date beta values:")
|
||
|
|
for s in best_dyn[1][1]:
|
||
|
|
marker = " $$$" if s['pnl'] > 100 else " ---" if s['pnl'] < -100 else ""
|
||
|
|
print(f" {s['date']} beta={s['beta']:.2f} pnl=${s['pnl']:>+8.2f}{marker}")
|
||
|
|
|
||
|
|
print(f"\nTotal time: {time.time()-t0:.0f}s")
|