Files
DOLPHIN/prod/vbt_nautilus_56day_backtest.py

292 lines
12 KiB
Python
Raw Normal View History

"""
56-Day VBT-Vector Nautilus Backtest
=====================================
Linux-native port of replicate_181_gold.py
Uses local vbt_cache_klines/, D_LIQ engine, static vol_p60 (gold path).
Gold reference (D_LIQ_GOLD, Windows/full stack):
ROI=+181.81%, Trades=2155, DD=17.65%
Current champion (SYSTEM_BIBLE, ACBv6 refactor, this parquet state):
ROI=+54.67%, Trades=2145, DD=15.80%
(gold regression ~111% expected from ACB/orchestrator refactor not agent-caused)
What this tests:
- NDAlphaEngine + D_LIQ_GOLD config (8x soft / 9x hard)
- MockOBProvider OB approximation (asset-specific biases)
- ACBv6 with NG6 eigenvalues (if mounted)
- MC forewarner gate (if models present)
- Stochastic fill sim: sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50
- GOLD vol_p60: static, calibrated from first 2 parquets (correct gold path)
- NO set_esoteric_hazard_multiplier call (gold path invariant)
- Lazy loading per day (RAM-safe)
"""
import sys
import time
import gc
import json
from pathlib import Path
from datetime import datetime, timezone
import numpy as np
import pandas as pd
# ── Path setup ────────────────────────────────────────────────────────────────
_PROD_DIR = Path(__file__).resolve().parent
_HCM_DIR = _PROD_DIR.parent
_ND_DIR = _HCM_DIR / 'nautilus_dolphin'
sys.path.insert(0, str(_HCM_DIR))
sys.path.insert(0, str(_ND_DIR))
VBT_KLINES_DIR = _HCM_DIR / 'vbt_cache_klines'
MC_MODELS_DIR = str(_ND_DIR / 'mc_results' / 'models')
RUN_LOGS_DIR = _ND_DIR / 'run_logs'
RUN_LOGS_DIR.mkdir(parents=True, exist_ok=True)
# ── Gold 56-day window ────────────────────────────────────────────────────────
WINDOW_START = '2025-12-31'
WINDOW_END = '2026-02-25' # inclusive; gold spec: 56-day Dec31Feb25
# ── Champion ENGINE_KWARGS (frozen — mirrors blue.yml + gold spec) ────────────
ENGINE_KWARGS = dict(
initial_capital=25000.0,
vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
MC_BASE_CFG = {
'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True,
'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0095, 'stop_pct': 1.00,
'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
META_COLS = {
'timestamp', 'scan_number',
'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
'vel_div', 'instability_50', 'instability_150',
}
def get_parquet_files():
"""Return sorted parquet files within the gold 56-day window."""
all_pq = sorted(VBT_KLINES_DIR.glob('*.parquet'))
filtered = [p for p in all_pq
if 'catalog' not in str(p)
and WINDOW_START <= p.stem <= WINDOW_END]
return filtered
def calibrate_vol_p60(parquet_files):
"""Static gold-path vol_p60: calibrate from first 2 files only."""
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' in df.columns:
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i - 50):i]
if len(seg) < 10:
continue
v = float(np.std(np.diff(seg) / seg[:-1]))
if v > 0:
all_vols.append(v)
del df
vp60 = float(np.percentile(all_vols, 60)) if all_vols else 0.0002
print(f" Static vol_p60 (gold method, 2-file calibration): {vp60:.8f}")
return vp60
def build_ob_engine(parquet_files):
"""Build MockOBProvider with gold-spec asset biases."""
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
# Scan all files to collect asset universe
all_assets = set()
for pf in parquet_files:
df_cols = pd.read_parquet(pf, columns=None).columns
all_assets.update(c for c in df_cols if c not in META_COLS)
OB_ASSETS = sorted(list(all_assets))
mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={
'BTCUSDT': -0.086, 'ETHUSDT': -0.092,
'BNBUSDT': +0.05, 'SOLUSDT': +0.05,
},
)
ob_eng = OBFeatureEngine(mock_ob)
ob_eng.preload_date('mock', OB_ASSETS)
print(f" OB_ASSETS={len(OB_ASSETS)}")
return ob_eng, OB_ASSETS
def run_backtest():
print('=' * 70)
print('56-DAY VBT-VECTOR NAUTILUS BACKTEST — D_LIQ_GOLD CONFIG')
print(f'Window: {WINDOW_START}{WINDOW_END}')
print('=' * 70)
# ── Imports ────────────────────────────────────────────────────────────
from nautilus_dolphin.nautilus.proxy_boost_engine import create_d_liq_engine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
t_start = time.time()
# ── Data setup ─────────────────────────────────────────────────────────
parquet_files = get_parquet_files()
if not parquet_files:
print(f'ERROR: No parquet files found in {VBT_KLINES_DIR} for window {WINDOW_START}{WINDOW_END}')
return None
print(f' Parquet files: {len(parquet_files)} ({parquet_files[0].stem}{parquet_files[-1].stem})')
vol_p60 = calibrate_vol_p60(parquet_files)
ob_eng, OB_ASSETS = build_ob_engine(parquet_files)
# ── Engine creation ─────────────────────────────────────────────────────
kw = ENGINE_KWARGS.copy()
eng = create_d_liq_engine(**kw)
eng.set_ob_engine(ob_eng)
print(f' Engine: {type(eng).__name__} | leverage: soft={eng.base_max_leverage}x abs={eng.abs_max_leverage}x')
# ── ACBv6 setup ─────────────────────────────────────────────────────────
date_strings = [p.stem for p in parquet_files]
acb = AdaptiveCircuitBreaker()
try:
acb.preload_w750(date_strings)
eng.set_acb(acb)
print(' ACBv6: loaded with NG6 eigenvalues')
except Exception as e:
print(f' ACBv6: preload failed ({e}) — running without external factors')
# ── MC Forewarner ───────────────────────────────────────────────────────
if Path(MC_MODELS_DIR).exists():
try:
from mc.mc_ml import DolphinForewarner
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
eng.set_mc_forewarner(forewarner, MC_BASE_CFG)
print(f' MC Forewarner: wired ({MC_MODELS_DIR})')
except Exception as e:
print(f' MC Forewarner: init failed ({e}) — disabled')
else:
print(f' MC Forewarner: models dir not found — disabled')
# ── NOTE: NO set_esoteric_hazard_multiplier call (gold path invariant) ─
print(f' Hazard call: NOT called (gold path — base_max_leverage stays at {eng.base_max_leverage}x)')
print(f'\n Starting 56-day loop...\n {"Day":<6} {"Date":<12} {"Capital":>12} {"Trades":>7} {"DayPnL":>10}')
print(f' {"-"*6} {"-"*12} {"-"*12} {"-"*7} {"-"*10}')
daily_caps = []
daily_pnls = []
total_days = len(parquet_files)
for i, pf in enumerate(parquet_files):
ds = pf.stem
# Lazy load + float64 (gold path: no float32 cast to preserve precision)
df = pd.read_parquet(pf)
acols = [c for c in df.columns if c not in META_COLS]
# dvol approximation (identical to replicate_181_gold.py)
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dvol = np.full(len(df), np.nan)
if bp is not None:
diffs = np.zeros(len(bp), dtype=np.float64)
diffs[1:] = np.diff(bp) / bp[:-1]
for j in range(50, len(bp)):
dvol[j] = np.std(diffs[j - 50:j])
# Static vol_p60 (gold path — NOT rolling)
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
cap_before = eng.capital
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
day_pnl = eng.capital - cap_before
daily_caps.append(eng.capital)
daily_pnls.append(day_pnl)
if i == 0 or i == total_days - 1 or (i + 1) % 10 == 0 or abs(day_pnl) > 500:
elapsed = time.time() - t_start
print(f' {i+1:<6} {ds:<12} ${eng.capital:>11,.2f} {len(eng.trade_history):>7} {day_pnl:>+10.2f} [{elapsed:.0f}s]')
del df
gc.collect()
# ── Metrics ────────────────────────────────────────────────────────────
elapsed_total = time.time() - t_start
tr = eng.trade_history
n = len(tr)
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
print(f'\n {"="*60}')
print(f' RESULT: ROI={roi:+.2f}% | Trades={n} | Capital=${eng.capital:,.2f}')
if n == 0:
print(' No trades — check signal thresholds or data')
return dict(roi=roi, trades=0, pf=0, dd=0, wr=0, sharpe=0)
def _abs(t):
return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
wr = len(wins) / n * 100.0
pf = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in losses)), 1e-9)
# Drawdown from daily equity curve
caps_arr = np.array(daily_caps)
roll_max = np.maximum.accumulate(caps_arr)
dd_arr = (roll_max - caps_arr) / roll_max * 100.0
max_dd = float(np.max(dd_arr))
# Sharpe (daily returns)
daily_rets = np.array(daily_pnls) / 25000.0
sharpe = float(np.mean(daily_rets) / (np.std(daily_rets) + 1e-12) * np.sqrt(252))
print(f' PF={pf:.3f} | WR={wr:.1f}% | MaxDD={max_dd:.2f}% | Sharpe={sharpe:.2f}')
print(f' Elapsed: {elapsed_total:.1f}s')
print(f'\n GOLD REFERENCE (D_LIQ_GOLD, Windows): ROI=+181.81%, T=2155, DD=17.65%')
print(f' EXPECTED (post-ACB-refactor regression): ROI~+111%, T~1959')
print(f' {"="*60}')
result = dict(
roi=round(roi, 2), trades=n, pf=round(pf, 3),
dd=round(max_dd, 2), wr=round(wr, 1), sharpe=round(sharpe, 2),
capital=round(eng.capital, 2),
window=f'{WINDOW_START}:{WINDOW_END}',
days=total_days,
elapsed_s=round(elapsed_total, 1),
engine='D_LIQ_GOLD (8x/9x)',
run_ts=datetime.now(timezone.utc).isoformat(),
)
# Save result
out_path = RUN_LOGS_DIR / f'vbt_56day_nautilus_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
out_path.write_text(json.dumps(result, indent=2))
print(f' Saved: {out_path}')
return result
if __name__ == '__main__':
run_backtest()