239 lines
9.6 KiB
Python
239 lines
9.6 KiB
Python
|
|
import sys, time
|
||
|
|
from pathlib import Path
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
from scipy.stats import pearsonr, ttest_ind
|
||
|
|
|
||
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
||
|
|
|
||
|
|
from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
|
||
|
|
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
||
|
|
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
|
||
|
|
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
|
||
|
|
from mc.mc_ml import DolphinForewarner
|
||
|
|
from mc.mc_sampler import MCTrialConfig
|
||
|
|
|
||
|
|
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
|
||
|
|
|
||
|
|
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
||
|
|
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
||
|
|
|
||
|
|
print("Loading data & extracting daily precursor metrics...")
|
||
|
|
|
||
|
|
daily_metrics = []
|
||
|
|
all_vols = []
|
||
|
|
|
||
|
|
# Pre-parse metrics to build precursor sets
|
||
|
|
for pf in parquet_files:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
ds = pf.stem
|
||
|
|
|
||
|
|
# 1. Volatility acceleration (second derivative of vel_div)
|
||
|
|
# df['vel_div'] is the instability proxy.
|
||
|
|
vd = df['vel_div'].fillna(0).values
|
||
|
|
vol_accel = np.diff(vd, prepend=vd[0])
|
||
|
|
daily_vol_accel_max = np.max(np.abs(vol_accel))
|
||
|
|
daily_vol_accel_mean = np.mean(np.abs(vol_accel))
|
||
|
|
|
||
|
|
# 2. Cross-asset correlation spike
|
||
|
|
assets = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT']
|
||
|
|
valid_assets = [a for a in assets if a in df.columns]
|
||
|
|
if len(valid_assets) > 1:
|
||
|
|
rets = df[valid_assets].pct_change().fillna(0)
|
||
|
|
corr_matrix = rets.corr().values
|
||
|
|
# upper triangle
|
||
|
|
cross_corr = corr_matrix[np.triu_indices_from(corr_matrix, k=1)]
|
||
|
|
mean_cross_corr = np.nanmean(cross_corr)
|
||
|
|
max_cross_corr = np.nanmax(cross_corr)
|
||
|
|
else:
|
||
|
|
mean_cross_corr = 0
|
||
|
|
max_cross_corr = 0
|
||
|
|
|
||
|
|
# 3. Regime entropy spike
|
||
|
|
if 'instability_50' in df.columns:
|
||
|
|
entropy_max = df['instability_50'].max()
|
||
|
|
entropy_mean = df['instability_50'].mean()
|
||
|
|
else:
|
||
|
|
entropy_max = 0
|
||
|
|
entropy_mean = 0
|
||
|
|
|
||
|
|
# 4. Eigenvalue dynamics (v750, v300, etc.)
|
||
|
|
v750_mean = df['v750_lambda_max_velocity'].mean() if 'v750_lambda_max_velocity' in df.columns else 0
|
||
|
|
v750_max = df['v750_lambda_max_velocity'].max() if 'v750_lambda_max_velocity' in df.columns else 0
|
||
|
|
v50_max = df['v50_lambda_max_velocity'].max() if 'v50_lambda_max_velocity' in df.columns else 0
|
||
|
|
|
||
|
|
daily_metrics.append({
|
||
|
|
'Date': ds,
|
||
|
|
'vol_accel_max': daily_vol_accel_max,
|
||
|
|
'cross_corr_mean': mean_cross_corr,
|
||
|
|
'cross_corr_max': max_cross_corr,
|
||
|
|
'entropy_max': entropy_max,
|
||
|
|
'v750_max': v750_max,
|
||
|
|
'v50_max': v50_max,
|
||
|
|
'vol_p60_proxy': np.percentile(np.abs(vd), 60) if len(vd)>0 else 0
|
||
|
|
})
|
||
|
|
|
||
|
|
metrics_df = pd.DataFrame(daily_metrics).set_index('Date')
|
||
|
|
|
||
|
|
# Shift metrics by 1 day so we are testing PRECURSORS (T-1) predicting T's return
|
||
|
|
precursor_df = metrics_df.shift(1).dropna()
|
||
|
|
|
||
|
|
|
||
|
|
# Now, run the actual engine to extract the daily returns
|
||
|
|
print("Running fast 6.0x trajectory to isolate daily PnL...")
|
||
|
|
|
||
|
|
pq_data = {}
|
||
|
|
for pf in parquet_files:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
ac = [c for c in df.columns if c not in {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
||
|
|
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
|
||
|
|
'instability_50', 'instability_150'}]
|
||
|
|
dv = df['vel_div'].values if 'vel_div' in df.columns else np.zeros(len(df))
|
||
|
|
pq_data[pf.stem] = (df, ac, dv)
|
||
|
|
|
||
|
|
acb = AdaptiveCircuitBreaker()
|
||
|
|
acb.preload_w750([pf.stem for pf in parquet_files])
|
||
|
|
|
||
|
|
mock = MockOBProvider(imbalance_bias=-0.09, depth_scale=1.0,
|
||
|
|
assets=["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"],
|
||
|
|
imbalance_biases={"BNBUSDT": 0.20, "SOLUSDT": 0.20})
|
||
|
|
ob_engine_inst = OBFeatureEngine(mock)
|
||
|
|
ob_engine_inst.preload_date("mock", mock.get_assets())
|
||
|
|
|
||
|
|
ENGINE_KWARGS = dict(
|
||
|
|
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
|
||
|
|
min_leverage=0.5, max_leverage=6.0, leverage_convexity=3.0,
|
||
|
|
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
|
||
|
|
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
||
|
|
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
||
|
|
use_asset_selection=True, min_irp_alignment=0.45,
|
||
|
|
use_sp_fees=True, use_sp_slippage=True,
|
||
|
|
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
||
|
|
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
||
|
|
)
|
||
|
|
|
||
|
|
engine = NDAlphaEngine(**ENGINE_KWARGS)
|
||
|
|
engine.set_ob_engine(ob_engine_inst)
|
||
|
|
|
||
|
|
daily_returns = {}
|
||
|
|
bar_idx = 0
|
||
|
|
all_vols_engine = []
|
||
|
|
|
||
|
|
for pf in parquet_files:
|
||
|
|
ds = pf.stem
|
||
|
|
cs = engine.capital
|
||
|
|
|
||
|
|
acb_info = acb.get_dynamic_boost_for_date(ds, ob_engine=ob_engine_inst)
|
||
|
|
base_boost = acb_info['boost']
|
||
|
|
beta = acb_info['beta']
|
||
|
|
|
||
|
|
df, acols, dvol_raw = pq_data[ds]
|
||
|
|
ph = {}
|
||
|
|
|
||
|
|
for ri in range(len(df)):
|
||
|
|
row = df.iloc[ri]
|
||
|
|
vd = dvol_raw[ri]
|
||
|
|
if not np.isfinite(vd): bar_idx+=1; continue
|
||
|
|
|
||
|
|
prices = {}
|
||
|
|
for ac in acols:
|
||
|
|
p = row[ac]
|
||
|
|
if p and p > 0 and np.isfinite(p):
|
||
|
|
prices[ac] = float(p)
|
||
|
|
if ac not in ph: ph[ac] = []
|
||
|
|
ph[ac].append(float(p))
|
||
|
|
if len(ph[ac]) > 500: ph[ac] = ph[ac][-200:]
|
||
|
|
if not prices: bar_idx+=1; continue
|
||
|
|
|
||
|
|
btc_hist = ph.get("BTCUSDT", [])
|
||
|
|
engine_vrok = False
|
||
|
|
if len(btc_hist) >= 50:
|
||
|
|
seg = btc_hist[-50:]
|
||
|
|
vd_eng = float(np.std(np.diff(seg)/np.array(seg[:-1])))
|
||
|
|
all_vols_engine.append(vd_eng)
|
||
|
|
if len(all_vols_engine) > 100:
|
||
|
|
engine_vrok = vd_eng > np.percentile(all_vols_engine, 60)
|
||
|
|
|
||
|
|
if beta > 0:
|
||
|
|
ss = 0.0
|
||
|
|
if vd < -0.02:
|
||
|
|
raw = (-0.02 - float(vd)) / (-0.02 - -0.05)
|
||
|
|
ss = min(1.0, max(0.0, raw)) ** 3.0
|
||
|
|
engine.regime_size_mult = base_boost * (1.0 + beta * ss)
|
||
|
|
else:
|
||
|
|
engine.regime_size_mult = base_boost
|
||
|
|
|
||
|
|
engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices, vol_regime_ok=engine_vrok, price_histories=ph)
|
||
|
|
bar_idx += 1
|
||
|
|
|
||
|
|
daily_returns[ds] = (engine.capital - cs) / cs if cs > 0 else 0
|
||
|
|
|
||
|
|
# Merge returns and precursors
|
||
|
|
returns_df = pd.DataFrame.from_dict(daily_returns, orient='index', columns=['Return'])
|
||
|
|
merged = precursor_df.join(returns_df, how='inner')
|
||
|
|
|
||
|
|
# Identify the extreme left tail (bottom 10% of days)
|
||
|
|
threshold_pnl = merged['Return'].quantile(0.10)
|
||
|
|
merged['Is_Extreme'] = merged['Return'] <= threshold_pnl
|
||
|
|
|
||
|
|
print(f"\nIdentified threshold for Extreme Left-Tail days: < {threshold_pnl:.2%} daily return")
|
||
|
|
|
||
|
|
extreme_days = merged[merged['Is_Extreme']]
|
||
|
|
normal_days = merged[~merged['Is_Extreme']]
|
||
|
|
|
||
|
|
print(f"\n==========================================================================================")
|
||
|
|
print(f" PRECURSOR SEPARATION ANALYSIS: Extreme Tail (N={len(extreme_days)}) vs Normal (N={len(normal_days)})")
|
||
|
|
print(f"==========================================================================================")
|
||
|
|
print(f"{'Feature':<20} | {'Normal Mean':<18} | {'Tail Mean':<18} | {'Significant? (p<0.05)'}")
|
||
|
|
print("-" * 88)
|
||
|
|
|
||
|
|
features = ['vol_accel_max', 'cross_corr_mean', 'cross_corr_max', 'entropy_max', 'v750_max', 'v50_max']
|
||
|
|
precursor_hit_rates = {}
|
||
|
|
|
||
|
|
for f in features:
|
||
|
|
norm_val = normal_days[f].mean()
|
||
|
|
tail_val = extreme_days[f].mean()
|
||
|
|
stat, p = ttest_ind(normal_days[f], extreme_days[f], equal_var=False)
|
||
|
|
|
||
|
|
sig = f"YES (p={p:.4f})" if p < 0.05 else "NO"
|
||
|
|
print(f"{f:<20} | {norm_val:<18.6f} | {tail_val:<18.6f} | {sig}")
|
||
|
|
|
||
|
|
# Check if tail value is significantly higher (e.g. > 75th percentile of normal)
|
||
|
|
norm_75 = normal_days[f].quantile(0.75)
|
||
|
|
hit_rate = (extreme_days[f] > norm_75).mean()
|
||
|
|
precursor_hit_rates[f] = hit_rate
|
||
|
|
|
||
|
|
print(f"\n==========================================================================================")
|
||
|
|
print(f" PRECURSOR OVERLAP (Do >80% of extreme days share these precursors?)")
|
||
|
|
print(f"==========================================================================================")
|
||
|
|
|
||
|
|
# Count how many extreme days have AT LEAST ONE precursor above the normal 75th percentile
|
||
|
|
# Using the most significant features (p < 0.05)
|
||
|
|
sig_features = [f for f in features if ttest_ind(normal_days[f], extreme_days[f], equal_var=False)[1] < 0.05]
|
||
|
|
|
||
|
|
if not sig_features:
|
||
|
|
print("WARNING: None of the tested precursors are strongly statistically significant.")
|
||
|
|
sig_features = features # Fallback to all
|
||
|
|
|
||
|
|
extreme_days['Precursors_Active'] = 0
|
||
|
|
for f in sig_features:
|
||
|
|
norm_75 = normal_days[f].quantile(0.75)
|
||
|
|
extreme_days.loc[:, 'Precursors_Active'] += (extreme_days[f] > norm_75).astype(int)
|
||
|
|
|
||
|
|
pct_shared = (extreme_days['Precursors_Active'] >= 1).mean() * 100
|
||
|
|
avg_active = extreme_days['Precursors_Active'].mean()
|
||
|
|
|
||
|
|
print(f"Features used for overlap: {sig_features}")
|
||
|
|
for f in sig_features:
|
||
|
|
print(f" - {f}: {precursor_hit_rates[f]:.1%} of extreme days had spikes")
|
||
|
|
|
||
|
|
print(f"\nFinal Verdict:")
|
||
|
|
print(f" Do >80% of extreme negative days share AT LEAST ONE precursor? {pct_shared:.1f}%")
|
||
|
|
|
||
|
|
if pct_shared >= 80.0:
|
||
|
|
print("\nCONCLUSION: YES. You have a surgical tail-dodger. The extremes are preceded by structural market decay.")
|
||
|
|
else:
|
||
|
|
print("\nCONCLUSION: NO. <80% overlap.")
|
||
|
|
print("You are dealing with true stochastic tails (Black Swans), and a rigid leverage ceiling is the only absolute control.")
|
||
|
|
|