Files
DOLPHIN/nautilus_dolphin/dvae/exp_shared.py

278 lines
12 KiB
Python
Raw Normal View History

"""
Shared infrastructure for proxy-B experiments (exp1exp3, fast sweep).
Provides: data loading, run_backtest() with gold-matching metrics, log_results().
Gold baseline (2026-03-14 confirmed):
ROI=+88.55%, PF=1.215, DD=15.05%, Sharpe=4.38, WR=50.5%, Trades=2155
"""
import sys, time, math, json
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
import pandas as pd
_HERE = Path(__file__).resolve().parent
_ND_ROOT = _HERE.parent
sys.path.insert(0, str(_ND_ROOT))
# ── Lazy JIT warmup (done once per process) ──────────────────────────────────
_jit_done = False
def ensure_jit():
global _jit_done
if _jit_done: return
print("JIT warmup...")
t0 = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
compute_imbalance_nb, compute_depth_1pct_nb, compute_market_agreement_nb,
compute_cascade_signal_nb,
)
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03,-0.02,-0.05,3.0,0.5,5.0,0.20,True,True,0.0,
np.zeros(4,dtype=np.int64),np.zeros(4,dtype=np.int64),
np.zeros(5,dtype=np.float64),0,-1,0.01,0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100.,200.,300.,400.,500.], dtype=np.float64)
_a = np.array([110.,190.,310.,390.,510.], dtype=np.float64)
compute_imbalance_nb(_b,_a); compute_depth_1pct_nb(_b,_a)
compute_market_agreement_nb(np.array([0.1,-0.05],dtype=np.float64),2)
compute_cascade_signal_nb(np.array([-0.05,-0.15],dtype=np.float64),2,-0.10)
print(f" JIT: {time.time()-t0:.1f}s")
_jit_done = True
# ── Paths ─────────────────────────────────────────────────────────────────────
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
MC_MODELS_DIR= str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
META_COLS = {'timestamp','scan_number','v50_lambda_max_velocity','v150_lambda_max_velocity',
'v300_lambda_max_velocity','v750_lambda_max_velocity','vel_div',
'instability_50','instability_150'}
ENGINE_KWARGS = dict(
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
MC_BASE_CFG = {
'trial_id':0, 'vel_div_threshold':-0.020, 'vel_div_extreme':-0.050,
'use_direction_confirm':True, 'dc_lookback_bars':7, 'dc_min_magnitude_bps':0.75,
'dc_skip_contradicts':True, 'dc_leverage_boost':1.00, 'dc_leverage_reduce':0.50,
'vd_trend_lookback':10, 'min_leverage':0.50, 'max_leverage':5.00,
'leverage_convexity':3.00, 'fraction':0.20, 'use_alpha_layers':True,
'use_dynamic_leverage':True, 'fixed_tp_pct':0.0095, 'stop_pct':1.00,
'max_hold_bars':120, 'use_sp_fees':True, 'use_sp_slippage':True,
'sp_maker_entry_rate':0.62, 'sp_maker_exit_rate':0.50, 'use_ob_edge':True,
'ob_edge_bps':5.00, 'ob_confirm_rate':0.40, 'ob_imbalance_bias':-0.09,
'ob_depth_scale':1.00, 'use_asset_selection':True, 'min_irp_alignment':0.45,
'lookback':100, 'acb_beta_high':0.80, 'acb_beta_low':0.20, 'acb_w750_threshold_pct':60,
}
GOLD = dict(roi=88.55, pf=1.215, dd=15.05, sharpe=4.38, wr=50.5, trades=2155)
# ── Data loading (cached per process) ────────────────────────────────────────
_data_cache = {}
def load_data():
"""Load gold-standard data: float64 pq_data, correct vol_p60 (2-file, offset-60), 48 OB assets."""
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
date_strings = [p.stem for p in parquet_files]
# GOLD vol_p60: 2 files, range(60), seg-based, v>0 filter
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' in df.columns:
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i-50):i]
if len(seg) < 10: continue
v = float(np.std(np.diff(seg) / seg[:-1]))
if v > 0: all_vols.append(v)
del df
vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 0.0002
print(f" Calibrated vol_p60 (gold method): {vol_p60:.8f}")
# GOLD pq_data: float64, all assets, dvol per bar
pq_data = {}
all_assets = set()
for pf in parquet_files:
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
all_assets.update(ac)
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for i in range(50, len(bp)):
seg = bp[max(0, i-50):i]
if len(seg) < 10: continue
dv[i] = float(np.std(np.diff(seg) / seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
OB_ASSETS = sorted(list(all_assets))
_mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
print(f" OB_ASSETS={len(OB_ASSETS)}, vol_p60={vol_p60:.8f}, days={len(parquet_files)}")
return dict(
parquet_files=parquet_files, date_strings=date_strings,
vol_p60=vol_p60, ob_eng=ob_eng, OB_ASSETS=OB_ASSETS,
pq_data=pq_data,
)
def load_forewarner():
try:
from mc.mc_ml import DolphinForewarner
fw = DolphinForewarner(models_dir=MC_MODELS_DIR)
print(" MC-Forewarner loaded (5 models)")
return fw
except Exception as e:
print(f" MC-Forewarner unavailable: {e}")
return None
def run_backtest(engine_factory, name, forewarner=None, extra_kwargs=None):
"""
Run full 55-day backtest with gold-matching metrics (Lazy loading).
"""
import gc
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
d = load_data()
kw = ENGINE_KWARGS.copy()
if extra_kwargs: kw.update(extra_kwargs)
acb = AdaptiveCircuitBreaker()
acb.preload_w750(d['date_strings'])
eng = engine_factory(kw)
eng.set_ob_engine(d['ob_eng'])
eng.set_acb(acb)
if forewarner is not None:
eng.set_mc_forewarner(forewarner, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
daily_caps, daily_pnls = [], []
all_vols = []
for i, pf in enumerate(d['parquet_files']):
ds = pf.stem
# Lazy Load and cast to float32 to save RAM
df = pd.read_parquet(pf)
for c in df.columns:
if df[c].dtype == 'float64':
df[c] = df[c].astype('float32')
acols = [c for c in df.columns if c not in META_COLS]
# Per-day OB Preloading (Crucial for 230MB RAM)
if eng.ob_engine is not None:
eng.ob_engine.preload_date(ds, d['OB_ASSETS'])
# Optimized 5s dvol approximation
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dvol = np.zeros(len(df), dtype=np.float32)
if bp is not None:
rets = np.diff(bp.astype('float64')) / (bp[:-1].astype('float64') + 1e-9)
for j in range(50, len(rets)):
v = np.std(rets[j-50:j])
dvol[j+1] = v
if v > 0: all_vols.append(v)
cap_before = eng.capital
vp60 = np.percentile(all_vols, 60) if len(all_vols) > 1000 else d['vol_p60']
vol_ok = np.where(dvol > 0, dvol > vp60, False)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
daily_caps.append(eng.capital)
daily_pnls.append(eng.capital - cap_before)
# CLEAR OB CACHE FOR DAY
if eng.ob_engine is not None:
eng.ob_engine._preloaded_placement.clear()
eng.ob_engine._preloaded_signal.clear()
eng.ob_engine._preloaded_market.clear()
eng.ob_engine._ts_to_idx.clear()
del df
gc.collect()
tr = eng.trade_history
n = len(tr)
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
if n == 0:
return dict(name=name, roi=roi, pf=0, dd=0, wr=0, sharpe=0, trades=0)
def _abs(t): return t.pnl_absolute if hasattr(t,'pnl_absolute') else t.pnl_pct*250.
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
wr = len(wins) / n * 100.0
pf = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in losses)), 1e-9)
peak_cap, max_dd = 25000.0, 0.0
for cap in daily_caps:
peak_cap = max(peak_cap, cap)
max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)
dr = np.array([p/25000.*100. for p in daily_pnls])
sharpe = float(dr.mean()/(dr.std()+1e-9)*math.sqrt(365)) if len(dr)>1 else 0.
# Gather any engine-specific extra stats
extra = {}
for attr in ('gate_suppressed','gate_allowed','early_exits','sizing_scale_mean'):
v = getattr(eng, attr, None)
if v is not None: extra[attr] = v
return dict(name=name, roi=roi, pf=pf, dd=max_dd, wr=wr, sharpe=sharpe,
trades=n, **extra)
def print_table(results, gold=None):
hdr = f"{'Config':<42} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'WR%':>6} {'Sharpe':>7} {'Trades':>7}"
print(hdr); print('-'*83)
if gold:
g = gold
print(f"{'*** GOLD ***':<42} {g['roi']:>7.2f} {g['pf']:>6.4f} {g['dd']:>6.2f} "
f"{g['wr']:>6.2f} {g['sharpe']:>7.3f} {g['trades']:>7d}")
print('-'*83)
for r in results:
extra = ''
if 'suppression_rate' in r: extra += f" gate_supp={r['suppression_rate']:.1f}%"
if 'early_exits' in r: extra += f" early_exits={r['early_exits']}"
if 'sizing_scale_mean' in r: extra += f" scale_mean={r['sizing_scale_mean']:.3f}"
print(f"{r['name']:<42} {r['roi']:>7.2f} {r['pf']:>6.4f} {r['dd']:>6.2f} "
f"{r['wr']:>6.2f} {r['sharpe']:>7.3f} {r['trades']:>7d}{extra}")
def log_results(results, outfile, gold=None, meta=None):
payload = {'gold': gold or GOLD, 'results': results}
if meta: payload['meta'] = meta
outfile = Path(outfile)
outfile.parent.mkdir(parents=True, exist_ok=True)
with open(outfile, 'w', encoding='utf-8') as f:
json.dump(payload, f, indent=2)
print(f"\n Logged → {outfile}")