initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
467
nautilus_dolphin/test_pf_leverage_factor_govern.py
Executable file
467
nautilus_dolphin/test_pf_leverage_factor_govern.py
Executable file
@@ -0,0 +1,467 @@
|
||||
"""Find external factors to govern max_leverage dynamically.
|
||||
|
||||
Phase 1: Load all factors (85 API + 45 scan_global)
|
||||
Phase 2: Compute per-date "leverage benefit" (pnl_15x - pnl_5x)
|
||||
Phase 3: Correlate factors with benefit; EXCLUDE w750-correlated factors
|
||||
Phase 4: Test dynamic max_leverage strategies
|
||||
Phase 5: Risk analysis + overfitting validation
|
||||
"""
|
||||
import sys, time, math
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy import stats as sp_stats
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
print("Compiling numba kernels...")
|
||||
t0c = time.time()
|
||||
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
|
||||
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
|
||||
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
|
||||
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
||||
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
|
||||
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
|
||||
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
|
||||
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
|
||||
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
|
||||
check_dc_nb(_p, 3, 1, 0.75)
|
||||
print(f" JIT: {time.time() - t0c:.1f}s")
|
||||
|
||||
from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
|
||||
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
||||
|
||||
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
|
||||
EIGEN_DIR = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues")
|
||||
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
||||
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
|
||||
'instability_50', 'instability_150'}
|
||||
BASE_ENGINE_KWARGS = dict(
|
||||
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
|
||||
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
|
||||
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
|
||||
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
||||
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
||||
use_asset_selection=True, min_irp_alignment=0.45,
|
||||
use_sp_fees=True, use_sp_slippage=True,
|
||||
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
|
||||
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
||||
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
||||
)
|
||||
VD_THRESH = -0.02; VD_EXTREME = -0.05; CONVEXITY = 3.0
|
||||
BINANCE_MAINT_MARGIN = 0.004
|
||||
|
||||
# --- PHASE 1: Load ALL external factors ---
|
||||
print("\n=== PHASE 1: Loading external factors ===")
|
||||
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
||||
date_strings = [pf.stem for pf in parquet_files]
|
||||
|
||||
acb = AdaptiveCircuitBreaker()
|
||||
acb.preload_w750(date_strings)
|
||||
acb_info_by_date = {ds: acb.get_dynamic_boost_for_date(ds) for ds in date_strings}
|
||||
w750_by_date = {ds: acb._w750_vel_cache.get(ds, 0.0) for ds in date_strings}
|
||||
|
||||
# Load full factor matrix from NPZ
|
||||
date_factors = {}
|
||||
api_name_set = set(); global_name_set = set()
|
||||
for ds in date_strings:
|
||||
date_path = EIGEN_DIR / ds
|
||||
if not date_path.exists(): continue
|
||||
files = sorted(date_path.glob('scan_*__Indicators.npz'))[:10]
|
||||
if not files: continue
|
||||
api_vals = {}; glob_vals = {}
|
||||
for f in files:
|
||||
try:
|
||||
data = np.load(f, allow_pickle=True)
|
||||
if 'api_names' in data and 'api_indicators' in data:
|
||||
names = list(data['api_names'])
|
||||
vals = data['api_indicators']
|
||||
succ = data['api_success'] if 'api_success' in data else np.ones(len(names), dtype=bool)
|
||||
for i, nm in enumerate(names):
|
||||
if i < len(vals) and (i >= len(succ) or succ[i]) and np.isfinite(vals[i]):
|
||||
api_vals.setdefault(nm, []).append(float(vals[i]))
|
||||
api_name_set.add(nm)
|
||||
if 'scan_global_names' in data and 'scan_global' in data:
|
||||
gnames = list(data['scan_global_names'])
|
||||
gvals = data['scan_global']
|
||||
for i, nm in enumerate(gnames):
|
||||
if i < len(gvals) and np.isfinite(gvals[i]):
|
||||
glob_vals.setdefault(nm, []).append(float(gvals[i]))
|
||||
global_name_set.add(nm)
|
||||
except Exception:
|
||||
continue
|
||||
factors = {}
|
||||
for nm, vs in api_vals.items(): factors[f"api_{nm}"] = float(np.median(vs))
|
||||
for nm, vs in glob_vals.items(): factors[f"glob_{nm}"] = float(np.median(vs))
|
||||
date_factors[ds] = factors
|
||||
|
||||
print(f" Factors for {len(date_factors)}/{len(date_strings)} dates")
|
||||
print(f" API: {len(api_name_set)}, Global: {len(global_name_set)}")
|
||||
|
||||
# Pre-load parquet data
|
||||
all_vols = []
|
||||
for pf in parquet_files[:2]:
|
||||
df = pd.read_parquet(pf)
|
||||
if 'BTCUSDT' not in df.columns: continue
|
||||
pr = df['BTCUSDT'].values
|
||||
for i in range(60, len(pr)):
|
||||
seg = pr[max(0,i-50):i]
|
||||
if len(seg)<10: continue
|
||||
v = float(np.std(np.diff(seg)/seg[:-1]))
|
||||
if v > 0: all_vols.append(v)
|
||||
vol_p60 = float(np.percentile(all_vols, 60))
|
||||
|
||||
pq_data = {}
|
||||
for pf in parquet_files:
|
||||
df = pd.read_parquet(pf)
|
||||
ac = [c for c in df.columns if c not in META_COLS]
|
||||
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
|
||||
dv = np.full(len(df), np.nan)
|
||||
if bp is not None:
|
||||
for i in range(50, len(bp)):
|
||||
seg = bp[max(0,i-50):i]
|
||||
if len(seg)<10: continue
|
||||
dv[i] = float(np.std(np.diff(seg)/seg[:-1]))
|
||||
pq_data[pf.stem] = (df, ac, dv)
|
||||
|
||||
def strength_cubic(vel_div):
|
||||
if vel_div >= VD_THRESH: return 0.0
|
||||
raw = (VD_THRESH - vel_div) / (VD_THRESH - VD_EXTREME)
|
||||
return min(1.0, max(0.0, raw)) ** CONVEXITY
|
||||
|
||||
|
||||
def run_with_max_lev_fn(max_lev_fn):
|
||||
"""Run engine where max_lev_fn(date_str, factors) -> max_leverage per date."""
|
||||
engine = NDAlphaEngine(**BASE_ENGINE_KWARGS)
|
||||
bar_idx = 0; ph = {}; dstats = []
|
||||
risk_log = []
|
||||
max_eff_lev = 0.0; min_liq_dist = float('inf')
|
||||
worst_trade_abs = 0.0
|
||||
capital_series = [engine.capital]
|
||||
|
||||
for pf in parquet_files:
|
||||
ds = pf.stem; cs = engine.capital
|
||||
engine.regime_direction = -1
|
||||
engine.regime_dd_halt = False
|
||||
info = acb_info_by_date[ds]
|
||||
base_boost = info['boost']
|
||||
beta = info['beta']
|
||||
|
||||
# Dynamic max_leverage for this date
|
||||
factors = date_factors.get(ds, {})
|
||||
max_lev = max_lev_fn(ds, factors)
|
||||
engine.bet_sizer.max_leverage = max_lev
|
||||
|
||||
df, acols, dvol = pq_data[ds]
|
||||
bid = 0
|
||||
for ri in range(len(df)):
|
||||
row = df.iloc[ri]; vd = row.get("vel_div")
|
||||
if vd is None or not np.isfinite(vd): bar_idx+=1; bid+=1; continue
|
||||
prices = {}
|
||||
for ac in acols:
|
||||
p = row[ac]
|
||||
if p and p > 0 and np.isfinite(p):
|
||||
prices[ac] = float(p)
|
||||
if ac not in ph: ph[ac] = []
|
||||
ph[ac].append(float(p))
|
||||
if not prices: bar_idx+=1; bid+=1; continue
|
||||
vrok = False if bid < 100 else (np.isfinite(dvol[ri]) and dvol[ri] > vol_p60)
|
||||
|
||||
if beta > 0 and base_boost > 1.0:
|
||||
ss = strength_cubic(float(vd))
|
||||
engine.regime_size_mult = base_boost * (1.0 + beta * ss)
|
||||
else:
|
||||
engine.regime_size_mult = base_boost
|
||||
|
||||
had_pos = engine.position is not None
|
||||
old_trades = len(engine.trade_history)
|
||||
engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices,
|
||||
vol_regime_ok=vrok, price_histories=ph)
|
||||
|
||||
if engine.position is not None and not had_pos:
|
||||
pos = engine.position
|
||||
eff_lev = pos.notional / engine.capital if engine.capital > 0 else 999
|
||||
liq_dist = (engine.capital / pos.notional - BINANCE_MAINT_MARGIN) * 100 if pos.notional > 0 else 999
|
||||
max_eff_lev = max(max_eff_lev, eff_lev)
|
||||
min_liq_dist = min(min_liq_dist, liq_dist)
|
||||
risk_log.append({'date': ds, 'eff_lev': eff_lev, 'liq_dist': liq_dist,
|
||||
'max_lev': max_lev, 'notional': pos.notional})
|
||||
|
||||
if len(engine.trade_history) > old_trades:
|
||||
t = engine.trade_history[-1]
|
||||
if t.pnl_absolute < worst_trade_abs:
|
||||
worst_trade_abs = t.pnl_absolute
|
||||
|
||||
capital_series.append(engine.capital)
|
||||
bar_idx+=1; bid+=1
|
||||
|
||||
dstats.append({'date': ds, 'pnl': engine.capital - cs, 'cap': engine.capital, 'max_lev': max_lev})
|
||||
|
||||
tr = engine.trade_history
|
||||
w = [t for t in tr if t.pnl_absolute > 0]; l = [t for t in tr if t.pnl_absolute <= 0]
|
||||
gw = sum(t.pnl_absolute for t in w) if w else 0
|
||||
gl = abs(sum(t.pnl_absolute for t in l)) if l else 0
|
||||
dr = [s['pnl']/25000*100 for s in dstats]
|
||||
cap_arr = np.array(capital_series)
|
||||
peak_arr = np.maximum.accumulate(cap_arr)
|
||||
dd_arr = (peak_arr - cap_arr) / peak_arr * 100
|
||||
max_dd = float(np.max(dd_arr))
|
||||
|
||||
# Monte Carlo ruin
|
||||
ruin_pct = 0.0
|
||||
if tr:
|
||||
pnl_dist = np.array([t.pnl_absolute for t in tr])
|
||||
ruin_count = 0
|
||||
for _ in range(5000):
|
||||
cap = 25000.0
|
||||
sim = np.random.choice(pnl_dist, size=len(tr), replace=True)
|
||||
for pnl in sim:
|
||||
cap += pnl
|
||||
if cap < 12500: ruin_count += 1; break
|
||||
ruin_pct = ruin_count / 5000 * 100
|
||||
|
||||
mid = len(parquet_files) // 2
|
||||
h1 = sum(s['pnl'] for s in dstats[:mid])
|
||||
h2 = sum(s['pnl'] for s in dstats[mid:])
|
||||
|
||||
return {
|
||||
'roi': (engine.capital - 25000) / 25000 * 100,
|
||||
'pf': gw / gl if gl > 0 else 999,
|
||||
'dd': max_dd,
|
||||
'sharpe': np.mean(dr) / np.std(dr) * np.sqrt(365) if np.std(dr) > 0 else 0,
|
||||
'trades': len(tr), 'cap': engine.capital,
|
||||
'max_eff_lev': max_eff_lev, 'min_liq_dist': min_liq_dist,
|
||||
'worst_abs': worst_trade_abs, 'ruin_50dd': ruin_pct,
|
||||
'h1': h1, 'h2': h2, 'h2_h1': h2/h1 if h1 != 0 else 0,
|
||||
}, dstats
|
||||
|
||||
|
||||
# --- PHASE 2: Compute leverage benefit signal ---
|
||||
print("\n=== PHASE 2: Computing leverage benefit signal ===")
|
||||
t0 = time.time()
|
||||
|
||||
r5, ds5 = run_with_max_lev_fn(lambda ds, f: 5.0)
|
||||
print(f" 5x baseline: ROI={r5['roi']:+.2f}%, Sharpe={r5['sharpe']:.2f} [{time.time()-t0:.0f}s]")
|
||||
|
||||
t1 = time.time()
|
||||
r15, ds15 = run_with_max_lev_fn(lambda ds, f: 15.0)
|
||||
print(f" 15x target: ROI={r15['roi']:+.2f}%, Sharpe={r15['sharpe']:.2f} [{time.time()-t1:.0f}s]")
|
||||
|
||||
# Per-date leverage benefit
|
||||
benefit_by_date = {}
|
||||
for s5, s15 in zip(ds5, ds15):
|
||||
benefit_by_date[s5['date']] = s15['pnl'] - s5['pnl']
|
||||
|
||||
pos_days = sum(1 for v in benefit_by_date.values() if v > 0)
|
||||
neg_days = sum(1 for v in benefit_by_date.values() if v < 0)
|
||||
print(f" Leverage benefit: {pos_days} positive days, {neg_days} negative days")
|
||||
|
||||
# --- PHASE 3: Factor correlation scan with anti-compounding filter ---
|
||||
print(f"\n=== PHASE 3: Factor -> Leverage Benefit correlation (w750-filtered) ===")
|
||||
|
||||
dates_with_factors = [ds for ds in benefit_by_date if ds in date_factors]
|
||||
benefit_arr = np.array([benefit_by_date[ds] for ds in dates_with_factors])
|
||||
w750_arr = np.array([w750_by_date[ds] for ds in dates_with_factors])
|
||||
|
||||
# Already-used factors
|
||||
USED_FACTORS = {'api_funding_btc', 'api_dvol_btc', 'api_fng', 'api_taker'}
|
||||
|
||||
all_factor_names = set()
|
||||
for f in date_factors.values(): all_factor_names.update(f.keys())
|
||||
|
||||
correlations = []
|
||||
for fname in sorted(all_factor_names):
|
||||
vals = []
|
||||
valid = True
|
||||
for ds in dates_with_factors:
|
||||
v = date_factors[ds].get(fname, np.nan)
|
||||
if not np.isfinite(v): valid = False; break
|
||||
vals.append(v)
|
||||
if not valid or len(set(vals)) < 3: continue
|
||||
vals = np.array(vals)
|
||||
|
||||
# Correlation with leverage benefit
|
||||
rho, pval = sp_stats.spearmanr(vals, benefit_arr)
|
||||
if not np.isfinite(rho): continue
|
||||
|
||||
# Correlation with w750 (anti-compounding check)
|
||||
rho_w750, _ = sp_stats.spearmanr(vals, w750_arr)
|
||||
rho_w750 = rho_w750 if np.isfinite(rho_w750) else 0.0
|
||||
|
||||
is_new = fname not in USED_FACTORS
|
||||
is_w750_safe = abs(rho_w750) < 0.3
|
||||
|
||||
correlations.append({
|
||||
'factor': fname, 'rho': rho, 'pval': pval, 'abs_rho': abs(rho),
|
||||
'rho_w750': rho_w750, 'is_new': is_new, 'is_w750_safe': is_w750_safe,
|
||||
})
|
||||
|
||||
correlations.sort(key=lambda x: -x['abs_rho'])
|
||||
|
||||
print(f"\n{'FACTOR':<35} {'RHO':>7} {'P-VAL':>8} {'W750_CORR':>10} {'SAFE':>5}")
|
||||
print(f"{'-'*70}")
|
||||
for c in correlations[:30]:
|
||||
marker = ""
|
||||
if c['is_new'] and c['is_w750_safe'] and c['abs_rho'] > 0.25:
|
||||
marker = " ***"
|
||||
elif c['is_new'] and c['is_w750_safe'] and c['abs_rho'] > 0.15:
|
||||
marker = " *"
|
||||
elif not c['is_w750_safe']:
|
||||
marker = " [w750]"
|
||||
elif not c['is_new']:
|
||||
marker = " [used]"
|
||||
print(f" {c['factor']:<33} {c['rho']:>+7.3f} {c['pval']:>8.4f} {c['rho_w750']:>+9.3f} "
|
||||
f"{'YES' if c['is_w750_safe'] else 'no':>5}{marker}")
|
||||
|
||||
# Candidates: new, w750-safe, |rho| > 0.20, p < 0.15
|
||||
candidates = [c for c in correlations
|
||||
if c['is_new'] and c['is_w750_safe'] and c['abs_rho'] > 0.20 and c['pval'] < 0.15]
|
||||
print(f"\n Candidates (new, w750-safe, |rho|>0.20, p<0.15): {len(candidates)}")
|
||||
for c in candidates[:10]:
|
||||
print(f" {c['factor']:<33} rho={c['rho']:+.3f} p={c['pval']:.4f} w750_corr={c['rho_w750']:+.3f}")
|
||||
|
||||
# --- PHASE 4: Test dynamic max_leverage strategies ---
|
||||
print(f"\n=== PHASE 4: Dynamic max_leverage tests ===")
|
||||
|
||||
def make_pctl_lev_fn(factor_name, rho_sign, min_lev=5, max_lev=15):
|
||||
fvals = [date_factors[ds].get(factor_name, np.nan) for ds in dates_with_factors]
|
||||
fvals = [v for v in fvals if np.isfinite(v)]
|
||||
if not fvals: return lambda ds, f: (min_lev + max_lev) / 2
|
||||
def fn(ds, factors):
|
||||
v = factors.get(factor_name, np.nan)
|
||||
if not np.isfinite(v): return (min_lev + max_lev) / 2
|
||||
rank = sp_stats.percentileofscore(fvals, v) / 100.0
|
||||
if rho_sign > 0: return min_lev + rank * (max_lev - min_lev)
|
||||
return min_lev + (1.0 - rank) * (max_lev - min_lev)
|
||||
return fn
|
||||
|
||||
def make_thresh_lev_fn(factor_name, rho_sign, thresh_pct=60, lev_low=5, lev_high=15):
|
||||
fvals = [date_factors[ds].get(factor_name, np.nan) for ds in dates_with_factors]
|
||||
fvals = [v for v in fvals if np.isfinite(v)]
|
||||
if not fvals: return lambda ds, f: (lev_low + lev_high) / 2
|
||||
thresh = np.percentile(fvals, thresh_pct)
|
||||
def fn(ds, factors):
|
||||
v = factors.get(factor_name, np.nan)
|
||||
if not np.isfinite(v): return (lev_low + lev_high) / 2
|
||||
if rho_sign > 0: return lev_high if v >= thresh else lev_low
|
||||
return lev_low if v >= thresh else lev_high
|
||||
return fn
|
||||
|
||||
def make_composite_lev_fn(factor_list, min_lev=5, max_lev=15):
|
||||
factor_dists = {}
|
||||
for fname, rho_sign in factor_list:
|
||||
fvals = [date_factors[ds].get(fname, np.nan) for ds in dates_with_factors]
|
||||
fvals = [v for v in fvals if np.isfinite(v)]
|
||||
if fvals: factor_dists[fname] = (fvals, rho_sign)
|
||||
def fn(ds, factors):
|
||||
ranks = []
|
||||
for fname, (fvals, rho_sign) in factor_dists.items():
|
||||
v = factors.get(fname, np.nan)
|
||||
if np.isfinite(v):
|
||||
r = sp_stats.percentileofscore(fvals, v) / 100.0
|
||||
ranks.append(r if rho_sign > 0 else 1.0 - r)
|
||||
if not ranks: return (min_lev + max_lev) / 2
|
||||
return min_lev + np.mean(ranks) * (max_lev - min_lev)
|
||||
return fn
|
||||
|
||||
print(f"\n{'STRATEGY':<40} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'SHARPE':>7} "
|
||||
f"{'MAX_EFF':>8} {'MIN_LIQ':>8} {'RUIN%':>6}")
|
||||
print(f"{'='*90}")
|
||||
|
||||
# Fixed baselines
|
||||
tested = {}
|
||||
for fl in [5, 10, 15]:
|
||||
label = f"fixed_{fl}x"
|
||||
t1 = time.time()
|
||||
r, ds = run_with_max_lev_fn(lambda ds, f, l=fl: float(l))
|
||||
tested[label] = (r, ds)
|
||||
danger = " !" if r['min_liq_dist'] < 10 else ""
|
||||
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} "
|
||||
f"{r['max_eff_lev']:>7.2f}x {r['min_liq_dist']:>7.1f}% {r['ruin_50dd']:>5.1f}%{danger} [{time.time()-t1:.0f}s]")
|
||||
|
||||
# Factor-governed strategies
|
||||
for c in candidates[:5]:
|
||||
fname = c['factor']
|
||||
rho_sign = 1 if c['rho'] > 0 else -1
|
||||
short = fname.replace('api_', '').replace('glob_', 'g:')
|
||||
|
||||
label = f"pctl_{short}"
|
||||
t1 = time.time()
|
||||
fn = make_pctl_lev_fn(fname, rho_sign, min_lev=5, max_lev=15)
|
||||
r, ds = run_with_max_lev_fn(fn)
|
||||
tested[label] = (r, ds)
|
||||
danger = " !" if r['min_liq_dist'] < 10 else ""
|
||||
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} "
|
||||
f"{r['max_eff_lev']:>7.2f}x {r['min_liq_dist']:>7.1f}% {r['ruin_50dd']:>5.1f}%{danger} [{time.time()-t1:.0f}s]")
|
||||
|
||||
label = f"thresh_{short}"
|
||||
t1 = time.time()
|
||||
fn = make_thresh_lev_fn(fname, rho_sign, lev_low=5, lev_high=15)
|
||||
r, ds = run_with_max_lev_fn(fn)
|
||||
tested[label] = (r, ds)
|
||||
danger = " !" if r['min_liq_dist'] < 10 else ""
|
||||
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} "
|
||||
f"{r['max_eff_lev']:>7.2f}x {r['min_liq_dist']:>7.1f}% {r['ruin_50dd']:>5.1f}%{danger} [{time.time()-t1:.0f}s]")
|
||||
|
||||
# Composite top 3
|
||||
if len(candidates) >= 3:
|
||||
top3 = [(c['factor'], 1 if c['rho'] > 0 else -1) for c in candidates[:3]]
|
||||
label = "composite_top3"
|
||||
t1 = time.time()
|
||||
fn = make_composite_lev_fn(top3, min_lev=5, max_lev=15)
|
||||
r, ds = run_with_max_lev_fn(fn)
|
||||
tested[label] = (r, ds)
|
||||
danger = " !" if r['min_liq_dist'] < 10 else ""
|
||||
print(f" {label:<38} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['dd']:>6.2f} {r['sharpe']:>7.2f} "
|
||||
f"{r['max_eff_lev']:>7.2f}x {r['min_liq_dist']:>7.1f}% {r['ruin_50dd']:>5.1f}%{danger} [{time.time()-t1:.0f}s]")
|
||||
|
||||
# --- PHASE 5: Overfitting + multicollinearity ---
|
||||
print(f"\n=== PHASE 5: Overfitting validation ===")
|
||||
mid = len(parquet_files) // 2
|
||||
print(f" H1: {date_strings[0]} to {date_strings[mid-1]}")
|
||||
print(f" H2: {date_strings[mid]} to {date_strings[-1]}")
|
||||
|
||||
print(f"\n{'STRATEGY':<40} {'H1_PNL':>10} {'H2_PNL':>10} {'H2/H1':>6} {'STABLE':>7}")
|
||||
print(f"{'-'*75}")
|
||||
for label, (r, ds) in tested.items():
|
||||
h1 = sum(s['pnl'] for s in ds[:mid])
|
||||
h2 = sum(s['pnl'] for s in ds[mid:])
|
||||
ratio = h2/h1 if h1 != 0 else 0
|
||||
stable = "YES" if 0.3 < ratio < 3.0 else "OVERFIT"
|
||||
print(f" {label:<38} {h1:>+10.2f} {h2:>+10.2f} {ratio:>6.2f} {stable:>7}")
|
||||
|
||||
# Multicollinearity check: pairwise correlation of top candidate factors
|
||||
if len(candidates) >= 2:
|
||||
print(f"\n--- MULTICOLLINEARITY: pairwise Spearman among top candidates ---")
|
||||
top_names = [c['factor'] for c in candidates[:5]]
|
||||
for i in range(len(top_names)):
|
||||
for j in range(i+1, len(top_names)):
|
||||
f1, f2 = top_names[i], top_names[j]
|
||||
v1 = [date_factors[ds].get(f1, np.nan) for ds in dates_with_factors]
|
||||
v2 = [date_factors[ds].get(f2, np.nan) for ds in dates_with_factors]
|
||||
mask = [np.isfinite(a) and np.isfinite(b) for a, b in zip(v1, v2)]
|
||||
if sum(mask) < 5: continue
|
||||
a = np.array([v for v, m in zip(v1, mask) if m])
|
||||
b = np.array([v for v, m in zip(v2, mask) if m])
|
||||
rho, _ = sp_stats.spearmanr(a, b)
|
||||
warn = " CORRELATED" if abs(rho) > 0.5 else ""
|
||||
s1 = f1.replace('api_','').replace('glob_','g:')
|
||||
s2 = f2.replace('api_','').replace('glob_','g:')
|
||||
print(f" {s1:<25} vs {s2:<25} rho={rho:+.3f}{warn}")
|
||||
|
||||
# Best dynamic strategy
|
||||
best = max(tested.items(), key=lambda x: x[1][0]['sharpe']
|
||||
if x[1][0]['ruin_50dd'] < 15 and x[1][0]['min_liq_dist'] > 5 else -999)
|
||||
print(f"\n=== BEST SAFE STRATEGY: {best[0]} ===")
|
||||
br = best[1][0]
|
||||
print(f" ROI={br['roi']:+.2f}%, PF={br['pf']:.3f}, Sharpe={br['sharpe']:.2f}, DD={br['dd']:.2f}%")
|
||||
print(f" Max eff lev={br['max_eff_lev']:.2f}x, Min liq={br['min_liq_dist']:.1f}%, Ruin={br['ruin_50dd']:.1f}%")
|
||||
print(f" H2/H1={br['h2_h1']:.2f}")
|
||||
|
||||
# Per-date leverage assignments for best
|
||||
print(f"\n Per-date leverage for {best[0]}:")
|
||||
for s in best[1][1]:
|
||||
marker = " $$$" if s['pnl'] > 200 else " ---" if s['pnl'] < -200 else ""
|
||||
print(f" {s['date']} max_lev={s['max_lev']:.1f}x pnl=${s['pnl']:>+9.2f}{marker}")
|
||||
|
||||
print(f"\nTotal time: {time.time()-t0:.0f}s")
|
||||
Reference in New Issue
Block a user