Files
DOLPHIN/nautilus_dolphin/run_structural_attribution.py

305 lines
13 KiB
Python
Raw Normal View History

import sys, time
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
import json
sys.path.insert(0, str(Path(__file__).parent))
from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
'instability_50', 'instability_150'}
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
print("Loading data...")
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' not in df.columns: continue
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0,i-50):i]
if len(seg)<10: continue
v = float(np.std(np.diff(seg)/seg[:-1]))
if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))
pq_data = {}
total_bars = 0
for pf in parquet_files:
df = pd.read_parquet(pf)
total_bars += len(df)
ac = [c for c in df.columns if c not in META_COLS]
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for i in range(50, len(bp)):
seg = bp[max(0,i-50):i]
if len(seg)<10: continue
dv[i] = float(np.std(np.diff(seg)/seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
# Initialize systems
acb = AdaptiveCircuitBreaker()
acb.preload_w750([pf.stem for pf in parquet_files])
mock = MockOBProvider(imbalance_bias=-0.09, depth_scale=1.0,
assets=["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"],
imbalance_biases={"BNBUSDT": 0.20, "SOLUSDT": 0.20})
ob_engine_inst = OBFeatureEngine(mock)
ob_engine_inst.preload_date("mock", mock.get_assets())
from mc.mc_ml import DolphinForewarner
from mc.mc_sampler import MCTrialConfig
forewarner = DolphinForewarner(models_dir=str(Path(__file__).parent / "mc_results" / "models"))
config = MCTrialConfig(
trial_id="LIVE",
vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
vd_trend_lookback=20, use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
ob_imbalance_bias=0.0, ob_depth_scale=1.0,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True,
acb_beta_high=1.5, acb_beta_low=0.2, acb_w750_threshold_pct=60.0
)
report = forewarner.assess(config)
is_green = (report.envelope_score > 0.5 and report.champion_probability > 0.6)
def run_trajectory(name, lev_multiplier, use_ob_engine, dynamic_ceiling=False):
ENGINE_KWARGS = dict(
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0 * lev_multiplier, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
use_ob_edge=use_ob_engine, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
import gc; gc.collect()
engine = NDAlphaEngine(**ENGINE_KWARGS)
if dynamic_ceiling:
engine.set_mc_forewarner_status(is_green)
ob_ref = ob_engine_inst if use_ob_engine else None
if ob_ref:
engine.set_ob_engine(ob_ref)
bar_idx = 0; peak_cap = engine.capital; max_dd = 0.0
daily_returns = []
daily_capitals = [engine.capital]
exposure_bars = 0
for pf in parquet_files:
ds = pf.stem
cs = engine.capital
acb_info = acb.get_dynamic_boost_for_date(ds, ob_engine=ob_ref)
base_boost = acb_info['boost']
beta = acb_info['beta']
df, acols, dvol = pq_data[ds]
ph = {}
for ri in range(len(df)):
row = df.iloc[ri]; vd = row.get("vel_div")
if vd is None or not np.isfinite(vd): bar_idx+=1; continue
prices = {}
for ac in acols:
p = row[ac]
if p and p > 0 and np.isfinite(p):
prices[ac] = float(p)
if ac not in ph: ph[ac] = []
ph[ac].append(float(p))
if len(ph[ac]) > 500: ph[ac] = ph[ac][-200:]
if not prices: bar_idx+=1; continue
vrok = False if ri < 100 else (np.isfinite(dvol[ri]) and dvol[ri] > vol_p60)
if beta > 0:
ss = 0.0
if vd < -0.02:
raw = (-0.02 - float(vd)) / (-0.02 - -0.05)
ss = min(1.0, max(0.0, raw)) ** 3.0
engine.regime_size_mult = base_boost * (1.0 + beta * ss)
else:
engine.regime_size_mult = base_boost
engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices, vol_regime_ok=vrok, price_histories=ph)
bar_idx += 1
if engine.position is not None:
exposure_bars += 1
peak_cap = max(peak_cap, engine.capital)
dd = (peak_cap - engine.capital) / peak_cap
max_dd = max(max_dd, dd)
daily_returns.append((engine.capital - cs) / cs if cs > 0 else 0)
daily_capitals.append(engine.capital)
trades = engine.trade_history
R = np.array(daily_returns)
return {
'name': name,
'daily_returns': R,
'daily_capitals': daily_capitals,
'trades': len(trades),
'exposure_bars': exposure_bars,
'max_dd': max_dd
}
def analyze_structural(res):
R = res['daily_returns']
mu = np.mean(R)
var = np.var(R, ddof=1)
sk = skew(R)
kur = kurtosis(R)
p95_neg = np.percentile(R[R < 0], 5) if len(R[R<0])>0 else 0
exposure_pct = res['exposure_bars'] / total_bars * 100 if total_bars > 0 else 0
# Loss clustering
max_loss_streak = 0
curr_streak = 0
for r in R:
if r < 0:
curr_streak += 1
max_loss_streak = max(max_loss_streak, curr_streak)
else:
curr_streak = 0
# Autocorrelation lag 1
if len(R) > 1:
autocorr = np.corrcoef(R[:-1], R[1:])[0, 1]
else:
autocorr = 0
# Geometric growth rate
ggr_daily = np.mean(np.log1p(R))
return {
'mu': mu, 'var': var, 'skew': sk, 'kurt': kur,
'p95_neg': p95_neg, 'max_loss_streak': max_loss_streak,
'trades': res['trades'], 'exposure_pct': exposure_pct,
'autocorr': autocorr, 'ggr_daily': ggr_daily
}
print("\n--- STEP 1 & 2: Compare Before OB vs After OB (5.0x) ---")
res_no_ob = run_trajectory("No OB 5x", 1.0, False)
res_ob = run_trajectory("OB 5x", 1.0, True)
st_no = analyze_structural(res_no_ob)
st_ob = analyze_structural(res_ob)
print(f"{'Metric':<25} | {'Before OB':<15} | {'After OB':<15} | {'Delta'}")
print("-" * 75)
print(f"{'Arithmetic Mean (u)':<25} | {st_no['mu']:<15.6f} | {st_ob['mu']:<15.6f} | {st_ob['mu'] - st_no['mu']:+.6f}")
print(f"{'Daily Variance (s2)':<25} | {st_no['var']:<15.6f} | {st_ob['var']:<15.6f} | {st_ob['var'] - st_no['var']:+.6f}")
print(f"{'Skew':<25} | {st_no['skew']:<15.3f} | {st_ob['skew']:<15.3f} | {st_ob['skew'] - st_no['skew']:+.3f}")
print(f"{'Kurtosis':<25} | {st_no['kurt']:<15.3f} | {st_ob['kurt']:<15.3f} | {st_ob['kurt'] - st_no['kurt']:+.3f}")
print(f"{'95th %ile Neg Return':<25} | {st_no['p95_neg']:<15.4f} | {st_ob['p95_neg']:<15.4f} | {st_ob['p95_neg'] - st_no['p95_neg']:+.4f}")
print(f"{'Max Loss Streak (Days)':<25} | {st_no['max_loss_streak']:<15} | {st_ob['max_loss_streak']:<15} | {st_ob['max_loss_streak'] - st_no['max_loss_streak']:+}")
print(f"{'Trade Count':<25} | {st_no['trades']:<15} | {st_ob['trades']:<15} | {st_ob['trades'] - st_no['trades']:+}")
print(f"{'Exposure Time %':<25} | {st_no['exposure_pct']:<14.2f}% | {st_ob['exposure_pct']:<14.2f}% | {st_ob['exposure_pct'] - st_no['exposure_pct']:+.2f}%")
print(f"{'Autocorrelation (Lag 1)':<25} | {st_no['autocorr']:<15.3f} | {st_ob['autocorr']:<15.3f} | {st_ob['autocorr'] - st_no['autocorr']:+.3f}")
print(f"{'Geometric Growth Rate':<25} | {st_no['ggr_daily']:<15.6f} | {st_ob['ggr_daily']:<15.6f} | {st_ob['ggr_daily'] - st_no['ggr_daily']:+.6f}")
print("\n--- STEP 3: Geometric Attribution ---")
delta_ggr = st_ob['ggr_daily'] - st_no['ggr_daily']
delta_mu = st_ob['mu'] - st_no['mu']
delta_var = -(st_ob['var'] - st_no['var']) / 2.0
# The rest is tail (skew/kurt terms and higher moments)
delta_tail = delta_ggr - (delta_mu + delta_var)
print(f"Total Daily GGR Improvement: {delta_ggr:+.6f}")
print(f" -> Contrib from Mean (u): {delta_mu:+.6f} ({delta_mu/delta_ggr*100:.1f}%)")
print(f" -> Contrib from Variance (s2): {delta_var:+.6f} ({delta_var/delta_ggr*100:.1f}%)")
print(f" -> Contrib from Tail Shape: {delta_tail:+.6f} ({delta_tail/delta_ggr*100:.1f}%)")
print("\n--- STEP 4: Fit Variance vs Leverage Curve ---")
levs = [5.0, 5.5, 6.0, 6.5, 7.0]
curve_results = []
for l in levs:
res = run_trajectory(f"OB {l}x", l/5.0, True)
st = analyze_structural(res)
curve_results.append((l, st['mu'], st['var'], st['ggr_daily']))
print(f"{'Leverage':<10} | {'Mu':<10} | {'Var (s2)':<10} | {'Marginal u':<12} | {'Marginal s2/2':<15} | {'GGR (Daily)'}")
print("-" * 80)
prev_mu, prev_var = None, None
for l, mu, var, ggr in curve_results:
marg_mu = mu - prev_mu if prev_mu else 0
marg_var_2 = (var - prev_var)/2 if prev_var else 0
print(f"{l:<10.1f} | {mu:<10.6f} | {var:<10.6f} | {marg_mu:<12.6f} | {marg_var_2:<15.6f} | {ggr:.6f}")
prev_mu, prev_var = mu, var
print("\n=> Geometric Growth caps out roughly where Marginal u < Marginal s2/2")
print("\n--- STEP 5: Monte Carlo Simulation (Static 5x vs Static 6x vs Dynamic 5->6x) ---")
def run_mc_sim(baseline_returns, periods=365, n_simulations=1000):
np.random.seed(42)
daily_returns = baseline_returns
simulated_returns = np.random.choice(daily_returns, size=(n_simulations, periods), replace=True)
equity_curves = np.cumprod(1.0 + simulated_returns, axis=1)
cagrs = (equity_curves[:, -1] - 1.0) * 100
median_cagr = np.median(cagrs)
p05_cagr = np.percentile(cagrs, 5)
max_dds = np.zeros(n_simulations)
recovery_times = np.zeros(n_simulations)
for i in range(n_simulations):
curve = equity_curves[i]
peaks = np.maximum.accumulate(curve)
drawdowns = (peaks - curve) / peaks
max_dd_idx = np.argmax(drawdowns)
max_dds[i] = drawdowns[max_dd_idx]
if drawdowns[max_dd_idx] > 0:
peak_val = peaks[max_dd_idx]
recovery_idx = -1
for j in range(max_dd_idx, periods):
if curve[j] >= peak_val:
recovery_idx = j
break
if recovery_idx != -1:
recovery_times[i] = recovery_idx - max_dd_idx
else:
recovery_times[i] = periods - max_dd_idx
prob_40dd = np.mean(max_dds >= 0.40) * 100
median_rec = np.median(recovery_times[recovery_times > 0]) if np.any(recovery_times > 0) else -1
return median_cagr, p05_cagr, prob_40dd, median_rec
res_5x = run_trajectory("Static 5x", 1.0, True, False)
res_6x = run_trajectory("Static 6x", 1.2, True, False)
res_dyn = run_trajectory("Dynamic 5->6x", 1.0, True, True)
mc_5x = run_mc_sim(res_5x['daily_returns'])
mc_6x = run_mc_sim(res_6x['daily_returns'])
mc_dyn = run_mc_sim(res_dyn['daily_returns'])
print(f"{'Strategy':<15} | {'Med CAGR':<15} | {'5% CAGR':<15} | {'P(>40% DD)':<15} | {'Median Recovery'}")
print("-" * 80)
print(f"{'Static 5x':<15} | {mc_5x[0]:<14.2f}% | {mc_5x[1]:<14.2f}% | {mc_5x[2]:<14.2f}% | {mc_5x[3]:.1f} days")
print(f"{'Static 6x':<15} | {mc_6x[0]:<14.2f}% | {mc_6x[1]:<14.2f}% | {mc_6x[2]:<14.2f}% | {mc_6x[3]:.1f} days")
print(f"{'Dyn 5->6x':<15} | {mc_dyn[0]:<14.2f}% | {mc_dyn[1]:<14.2f}% | {mc_dyn[2]:<14.2f}% | {mc_dyn[3]:.1f} days")