Files
DOLPHIN/nautilus_dolphin/dvae/exp_shared.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

278 lines
12 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Shared infrastructure for proxy-B experiments (exp1exp3, fast sweep).
Provides: data loading, run_backtest() with gold-matching metrics, log_results().
Gold baseline (2026-03-14 confirmed):
ROI=+88.55%, PF=1.215, DD=15.05%, Sharpe=4.38, WR=50.5%, Trades=2155
"""
import sys, time, math, json
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
import pandas as pd
_HERE = Path(__file__).resolve().parent
_ND_ROOT = _HERE.parent
sys.path.insert(0, str(_ND_ROOT))
# ── Lazy JIT warmup (done once per process) ──────────────────────────────────
_jit_done = False
def ensure_jit():
global _jit_done
if _jit_done: return
print("JIT warmup...")
t0 = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
compute_imbalance_nb, compute_depth_1pct_nb, compute_market_agreement_nb,
compute_cascade_signal_nb,
)
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03,-0.02,-0.05,3.0,0.5,5.0,0.20,True,True,0.0,
np.zeros(4,dtype=np.int64),np.zeros(4,dtype=np.int64),
np.zeros(5,dtype=np.float64),0,-1,0.01,0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100.,200.,300.,400.,500.], dtype=np.float64)
_a = np.array([110.,190.,310.,390.,510.], dtype=np.float64)
compute_imbalance_nb(_b,_a); compute_depth_1pct_nb(_b,_a)
compute_market_agreement_nb(np.array([0.1,-0.05],dtype=np.float64),2)
compute_cascade_signal_nb(np.array([-0.05,-0.15],dtype=np.float64),2,-0.10)
print(f" JIT: {time.time()-t0:.1f}s")
_jit_done = True
# ── Paths ─────────────────────────────────────────────────────────────────────
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
MC_MODELS_DIR= str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
META_COLS = {'timestamp','scan_number','v50_lambda_max_velocity','v150_lambda_max_velocity',
'v300_lambda_max_velocity','v750_lambda_max_velocity','vel_div',
'instability_50','instability_150'}
ENGINE_KWARGS = dict(
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
MC_BASE_CFG = {
'trial_id':0, 'vel_div_threshold':-0.020, 'vel_div_extreme':-0.050,
'use_direction_confirm':True, 'dc_lookback_bars':7, 'dc_min_magnitude_bps':0.75,
'dc_skip_contradicts':True, 'dc_leverage_boost':1.00, 'dc_leverage_reduce':0.50,
'vd_trend_lookback':10, 'min_leverage':0.50, 'max_leverage':5.00,
'leverage_convexity':3.00, 'fraction':0.20, 'use_alpha_layers':True,
'use_dynamic_leverage':True, 'fixed_tp_pct':0.0095, 'stop_pct':1.00,
'max_hold_bars':120, 'use_sp_fees':True, 'use_sp_slippage':True,
'sp_maker_entry_rate':0.62, 'sp_maker_exit_rate':0.50, 'use_ob_edge':True,
'ob_edge_bps':5.00, 'ob_confirm_rate':0.40, 'ob_imbalance_bias':-0.09,
'ob_depth_scale':1.00, 'use_asset_selection':True, 'min_irp_alignment':0.45,
'lookback':100, 'acb_beta_high':0.80, 'acb_beta_low':0.20, 'acb_w750_threshold_pct':60,
}
GOLD = dict(roi=88.55, pf=1.215, dd=15.05, sharpe=4.38, wr=50.5, trades=2155)
# ── Data loading (cached per process) ────────────────────────────────────────
_data_cache = {}
def load_data():
"""Load gold-standard data: float64 pq_data, correct vol_p60 (2-file, offset-60), 48 OB assets."""
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
date_strings = [p.stem for p in parquet_files]
# GOLD vol_p60: 2 files, range(60), seg-based, v>0 filter
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' in df.columns:
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i-50):i]
if len(seg) < 10: continue
v = float(np.std(np.diff(seg) / seg[:-1]))
if v > 0: all_vols.append(v)
del df
vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 0.0002
print(f" Calibrated vol_p60 (gold method): {vol_p60:.8f}")
# GOLD pq_data: float64, all assets, dvol per bar
pq_data = {}
all_assets = set()
for pf in parquet_files:
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
all_assets.update(ac)
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for i in range(50, len(bp)):
seg = bp[max(0, i-50):i]
if len(seg) < 10: continue
dv[i] = float(np.std(np.diff(seg) / seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
OB_ASSETS = sorted(list(all_assets))
_mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
print(f" OB_ASSETS={len(OB_ASSETS)}, vol_p60={vol_p60:.8f}, days={len(parquet_files)}")
return dict(
parquet_files=parquet_files, date_strings=date_strings,
vol_p60=vol_p60, ob_eng=ob_eng, OB_ASSETS=OB_ASSETS,
pq_data=pq_data,
)
def load_forewarner():
try:
from mc.mc_ml import DolphinForewarner
fw = DolphinForewarner(models_dir=MC_MODELS_DIR)
print(" MC-Forewarner loaded (5 models)")
return fw
except Exception as e:
print(f" MC-Forewarner unavailable: {e}")
return None
def run_backtest(engine_factory, name, forewarner=None, extra_kwargs=None):
"""
Run full 55-day backtest with gold-matching metrics (Lazy loading).
"""
import gc
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
d = load_data()
kw = ENGINE_KWARGS.copy()
if extra_kwargs: kw.update(extra_kwargs)
acb = AdaptiveCircuitBreaker()
acb.preload_w750(d['date_strings'])
eng = engine_factory(kw)
eng.set_ob_engine(d['ob_eng'])
eng.set_acb(acb)
if forewarner is not None:
eng.set_mc_forewarner(forewarner, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
daily_caps, daily_pnls = [], []
all_vols = []
for i, pf in enumerate(d['parquet_files']):
ds = pf.stem
# Lazy Load and cast to float32 to save RAM
df = pd.read_parquet(pf)
for c in df.columns:
if df[c].dtype == 'float64':
df[c] = df[c].astype('float32')
acols = [c for c in df.columns if c not in META_COLS]
# Per-day OB Preloading (Crucial for 230MB RAM)
if eng.ob_engine is not None:
eng.ob_engine.preload_date(ds, d['OB_ASSETS'])
# Optimized 5s dvol approximation
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dvol = np.zeros(len(df), dtype=np.float32)
if bp is not None:
rets = np.diff(bp.astype('float64')) / (bp[:-1].astype('float64') + 1e-9)
for j in range(50, len(rets)):
v = np.std(rets[j-50:j])
dvol[j+1] = v
if v > 0: all_vols.append(v)
cap_before = eng.capital
vp60 = np.percentile(all_vols, 60) if len(all_vols) > 1000 else d['vol_p60']
vol_ok = np.where(dvol > 0, dvol > vp60, False)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
daily_caps.append(eng.capital)
daily_pnls.append(eng.capital - cap_before)
# CLEAR OB CACHE FOR DAY
if eng.ob_engine is not None:
eng.ob_engine._preloaded_placement.clear()
eng.ob_engine._preloaded_signal.clear()
eng.ob_engine._preloaded_market.clear()
eng.ob_engine._ts_to_idx.clear()
del df
gc.collect()
tr = eng.trade_history
n = len(tr)
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
if n == 0:
return dict(name=name, roi=roi, pf=0, dd=0, wr=0, sharpe=0, trades=0)
def _abs(t): return t.pnl_absolute if hasattr(t,'pnl_absolute') else t.pnl_pct*250.
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
wr = len(wins) / n * 100.0
pf = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in losses)), 1e-9)
peak_cap, max_dd = 25000.0, 0.0
for cap in daily_caps:
peak_cap = max(peak_cap, cap)
max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)
dr = np.array([p/25000.*100. for p in daily_pnls])
sharpe = float(dr.mean()/(dr.std()+1e-9)*math.sqrt(365)) if len(dr)>1 else 0.
# Gather any engine-specific extra stats
extra = {}
for attr in ('gate_suppressed','gate_allowed','early_exits','sizing_scale_mean'):
v = getattr(eng, attr, None)
if v is not None: extra[attr] = v
return dict(name=name, roi=roi, pf=pf, dd=max_dd, wr=wr, sharpe=sharpe,
trades=n, **extra)
def print_table(results, gold=None):
hdr = f"{'Config':<42} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'WR%':>6} {'Sharpe':>7} {'Trades':>7}"
print(hdr); print('-'*83)
if gold:
g = gold
print(f"{'*** GOLD ***':<42} {g['roi']:>7.2f} {g['pf']:>6.4f} {g['dd']:>6.2f} "
f"{g['wr']:>6.2f} {g['sharpe']:>7.3f} {g['trades']:>7d}")
print('-'*83)
for r in results:
extra = ''
if 'suppression_rate' in r: extra += f" gate_supp={r['suppression_rate']:.1f}%"
if 'early_exits' in r: extra += f" early_exits={r['early_exits']}"
if 'sizing_scale_mean' in r: extra += f" scale_mean={r['sizing_scale_mean']:.3f}"
print(f"{r['name']:<42} {r['roi']:>7.2f} {r['pf']:>6.4f} {r['dd']:>6.2f} "
f"{r['wr']:>6.2f} {r['sharpe']:>7.3f} {r['trades']:>7d}{extra}")
def log_results(results, outfile, gold=None, meta=None):
payload = {'gold': gold or GOLD, 'results': results}
if meta: payload['meta'] = meta
outfile = Path(outfile)
outfile.parent.mkdir(parents=True, exist_ok=True)
with open(outfile, 'w', encoding='utf-8') as f:
json.dump(payload, f, indent=2)
print(f"\n Logged → {outfile}")