Files
DOLPHIN/prod/backtest_gold_verify.py

385 lines
16 KiB
Python
Raw Normal View History

"""
backtest_gold_verify.py Gold parity verification via direct engine codepath.
Runs all 56 backtest dates through the same engine codepath used in production:
same step_bar loop, same OB preload, same vol_ok, same hazard multiplier,
same ACB, same MC forewarner.
Avoids DolphinActor/Nautilus Strategy overhead (Strategy.log is Rust-backed
read-only; Strategy requires a kernel context to initialise). Instead this
harness directly instantiates and wires the same sub-components that
DolphinActor.on_start() wires, then replicates _run_replay_day() inline.
Gold targets (post-fix D_LIQ):
T=2155 (exact) ROI+181% (no ACB, Linux) ROI+189% (full ACB on Windows)
Usage:
/usr/bin/python3 prod/backtest_gold_verify.py
/usr/bin/python3 prod/backtest_gold_verify.py --summary # quick summary only
"""
import sys, time, argparse, yaml
from pathlib import Path
from datetime import datetime, timezone
import numpy as np
import pandas as pd
HCM_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(HCM_DIR / 'nautilus_dolphin'))
sys.path.insert(0, str(HCM_DIR))
PARQUET_DIR = HCM_DIR / 'vbt_cache'
MC_MODELS_DIR = str(HCM_DIR / 'nautilus_dolphin' / 'mc_results' / 'models')
CONFIG_PATH = Path(__file__).parent / 'configs' / 'blue.yml'
INITIAL_CAPITAL = 25_000.0
GOLD_T = 2155
GOLD_ROI_LO = 175.0 # lower bound (no ACB, no w750)
GOLD_ROI_HI = 195.0 # upper bound (full ACB)
_META_COLS_SET = {
'timestamp', 'scan_number', 'v50_lambda_max_velocity',
'v150_lambda_max_velocity', 'v300_lambda_max_velocity',
'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150',
}
_MC_BASE_CFG = {
'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
'leverage_convexity': 3.00, 'fraction': 0.20,
'use_alpha_layers': True, 'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
def _load_config() -> dict:
with open(CONFIG_PATH) as f:
return yaml.safe_load(f)
def _build_engine(cfg: dict, initial_capital: float):
"""Mirror DolphinActor.on_start() engine + subsystem wiring."""
from nautilus_dolphin.nautilus.proxy_boost_engine import create_boost_engine, DEFAULT_BOOST_MODE
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
eng_cfg = cfg.get('engine', {})
boost_mode = eng_cfg.get('boost_mode', DEFAULT_BOOST_MODE)
engine = create_boost_engine(
mode=boost_mode,
initial_capital=initial_capital,
vel_div_threshold=eng_cfg.get('vel_div_threshold', -0.02),
vel_div_extreme=eng_cfg.get('vel_div_extreme', -0.05),
min_leverage=eng_cfg.get('min_leverage', 0.5),
max_leverage=eng_cfg.get('max_leverage', 5.0),
abs_max_leverage=eng_cfg.get('abs_max_leverage', 6.0),
leverage_convexity=eng_cfg.get('leverage_convexity', 3.0),
fraction=eng_cfg.get('fraction', 0.20),
fixed_tp_pct=eng_cfg.get('fixed_tp_pct', 0.0095),
stop_pct=eng_cfg.get('stop_pct', 1.0),
max_hold_bars=eng_cfg.get('max_hold_bars', 120),
use_direction_confirm=eng_cfg.get('use_direction_confirm', True),
dc_lookback_bars=eng_cfg.get('dc_lookback_bars', 7),
dc_min_magnitude_bps=eng_cfg.get('dc_min_magnitude_bps', 0.75),
dc_skip_contradicts=eng_cfg.get('dc_skip_contradicts', True),
dc_leverage_boost=eng_cfg.get('dc_leverage_boost', 1.0),
dc_leverage_reduce=eng_cfg.get('dc_leverage_reduce', 0.5),
use_asset_selection=eng_cfg.get('use_asset_selection', True),
min_irp_alignment=eng_cfg.get('min_irp_alignment', 0.45),
use_sp_fees=eng_cfg.get('use_sp_fees', True),
use_sp_slippage=eng_cfg.get('use_sp_slippage', True),
sp_maker_entry_rate=eng_cfg.get('sp_maker_entry_rate', 0.62),
sp_maker_exit_rate=eng_cfg.get('sp_maker_exit_rate', 0.50),
use_ob_edge=eng_cfg.get('use_ob_edge', True),
ob_edge_bps=eng_cfg.get('ob_edge_bps', 5.0),
ob_confirm_rate=eng_cfg.get('ob_confirm_rate', 0.40),
lookback=eng_cfg.get('lookback', 100),
use_alpha_layers=eng_cfg.get('use_alpha_layers', True),
use_dynamic_leverage=eng_cfg.get('use_dynamic_leverage', True),
seed=eng_cfg.get('seed', 42),
)
engine.set_esoteric_hazard_multiplier(0.0) # gold spec: hazard=0 → base_max_leverage=8.0
print(f"[INIT] Engine created: {type(engine).__name__}, base_max_leverage={getattr(engine,'base_max_leverage','?')}", flush=True)
# MC Forewarner
mc_models_dir = MC_MODELS_DIR
if Path(mc_models_dir).exists():
try:
from mc.mc_ml import DolphinForewarner
fw = DolphinForewarner(models_dir=mc_models_dir)
engine.set_mc_forewarner(fw, _MC_BASE_CFG)
print(f"[INIT] DolphinForewarner wired from {mc_models_dir}", flush=True)
except Exception as e:
print(f"[WARN] MC Forewarner init failed: {e}", flush=True)
else:
print(f"[WARN] MC models dir not found: {mc_models_dir}", flush=True)
# Discover asset columns from first 5 parquet files
_all_bt_assets: list = []
try:
_seen: set = set()
for _pf in sorted(PARQUET_DIR.glob('*.parquet'))[:5]:
_df_h = pd.read_parquet(_pf)
_seen.update(c for c in _df_h.columns if c not in _META_COLS_SET)
_all_bt_assets = sorted(_seen)
print(f"[INIT] Discovered {len(_all_bt_assets)} asset columns: {_all_bt_assets}", flush=True)
except Exception as e:
print(f"[WARN] Could not scan parquet assets: {e}", flush=True)
# ACB injection (matches gold_repro)
try:
acb = AdaptiveCircuitBreaker()
_linux_eigen_paths = [
Path('/mnt/ng6_data/eigenvalues'),
Path('/mnt/dolphin_training/data/eigenvalues'),
Path('/mnt/dolphinng6_data/eigenvalues'),
]
for _ep in _linux_eigen_paths:
if _ep.exists():
acb.config.EIGENVALUES_PATH = _ep
print(f"[INIT] ACB eigenvalues path → {_ep}", flush=True)
break
files = sorted(PARQUET_DIR.glob('*.parquet'))
preload_dates = [pf.stem for pf in files]
acb.preload_w750(preload_dates)
engine.set_acb(acb)
print(f"[INIT] ACB injected ({len(preload_dates)} dates preloaded)", flush=True)
except Exception as e:
print(f"[WARN] ACB injection failed: {e}", flush=True)
# MockOBProvider injection (Gold Biases)
# Preload ONCE with "mock" — matches exp_shared.py gold reference exactly.
# MockOBProvider produces identical synthetic data on every call so a single
# preload populates the full snap-index cache used for all 56 replay days.
try:
gold_biases = {
'BTCUSDT': -0.086, 'ETHUSDT': -0.092, 'BNBUSDT': +0.05, 'SOLUSDT': +0.05,
}
mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0,
assets=_all_bt_assets,
imbalance_biases=gold_biases,
)
ob_eng = OBFeatureEngine(mock_ob)
ob_eng.preload_date("mock", _all_bt_assets) # gold method: single global preload
engine.set_ob_engine(ob_eng)
print(f"[INIT] MockOBProvider injected + preloaded (Gold Biases, {len(_all_bt_assets)} assets)", flush=True)
except Exception as e:
print(f"[WARN] OB injection failed: {e}", flush=True)
return engine
def _compute_vol_ok(df: pd.DataFrame, vol_p60: float) -> np.ndarray:
"""Gold vol_ok method — matches exp_shared.load_data() / process_day() exactly.
Uses segment-based dvol: std(diff(seg) / seg[:-1]) for 50-bar sliding window,
starting at bar 50. Rows without finite dvol or below threshold False.
"""
vol_ok = np.zeros(len(df), dtype=bool)
if 'BTCUSDT' not in df.columns or vol_p60 <= 0:
return vol_ok
bp = df['BTCUSDT'].values
dv = np.full(len(bp), np.nan)
for i in range(50, len(bp)):
seg = bp[max(0, i - 50):i]
if len(seg) < 10:
continue
with np.errstate(invalid='ignore', divide='ignore'):
rets = np.diff(seg) / seg[:-1]
fin = rets[np.isfinite(rets)]
if len(fin) >= 5:
dv[i] = float(np.std(fin))
vol_ok = np.where(np.isfinite(dv), dv > vol_p60, False)
return vol_ok
def _compute_mae_for_day(trades_today: list, df: pd.DataFrame) -> list:
"""Compute per-trade Maximum Adverse Excursion (MAE) for trades closed today.
For SHORT trades, adverse excursion = price moving UP from entry.
MAE_pct = max(price[entry_bar:exit_bar+1] / entry_price - 1) * 100 (positive = adverse)
Uses close prices only (1-min bars don't have OHLC), so MAE is a lower-bound
estimate true intra-bar MAE could be higher.
Returns list of (trade_record, mae_pct) pairs.
"""
results = []
for t in trades_today:
asset = getattr(t, 'asset', None)
entry_bar = getattr(t, 'entry_bar', None)
exit_bar = getattr(t, 'exit_bar', None)
entry_price = getattr(t, 'entry_price', None)
direction = getattr(t, 'direction', -1)
if asset is None or entry_bar is None or exit_bar is None or not entry_price:
results.append((t, float('nan')))
continue
if asset not in df.columns:
results.append((t, float('nan')))
continue
lo = max(0, int(entry_bar))
hi = min(len(df) - 1, int(exit_bar))
prices = df[asset].iloc[lo:hi + 1].values.astype(float)
prices = prices[np.isfinite(prices) & (prices > 0)]
if len(prices) == 0:
results.append((t, float('nan')))
continue
if direction == -1: # SHORT: adverse = price going up
mae_pct = float(np.max(prices) / entry_price - 1.0) * 100.0
else: # LONG: adverse = price going down
mae_pct = float(1.0 - np.min(prices) / entry_price) * 100.0
mae_pct = max(0.0, mae_pct) # clamp: negative means favorable the whole time
results.append((t, mae_pct))
return results
def _run_day(engine, cfg: dict, date_str: str, posture: str = 'APEX') -> tuple:
"""Run a single replay day via engine.process_day() — identical to gold reference path.
Uses process_day() directly (same as test_dliq_fix_verify.py / exp_shared.py) so
NaN-vel_div skipping, bar_idx assignment, and proxy_B updates are bit-for-bit identical.
OB preload is done once globally in _build_engine(), not per-day.
Returns (n_bars, df) where df is the loaded parquet (used for MAE computation).
"""
dir_str = cfg.get('direction', 'short_only')
direction_val = 1 if dir_str in ['long', 'long_only'] else -1
pq_file = PARQUET_DIR / f"{date_str}.parquet"
if not pq_file.exists():
print(f"[WARN] No parquet for {date_str} — skipping", flush=True)
return 0, pd.DataFrame()
df = pd.read_parquet(pq_file)
asset_columns = [c for c in df.columns if c not in _META_COLS_SET]
vol_p60 = float(
cfg.get('paper_trade', {}).get('vol_p60')
or cfg.get('vol_p60')
or 0.00009868
)
vol_ok = _compute_vol_ok(df, vol_p60)
engine.process_day(
date_str,
df,
asset_columns,
vol_regime_ok=vol_ok,
direction=direction_val,
posture=posture,
)
return len(df), df
def run_verify(summary_only: bool = False):
cfg = _load_config()
files = sorted(PARQUET_DIR.glob('*.parquet'))
if not files:
print(f"[ERROR] No parquet files in {PARQUET_DIR}", flush=True)
sys.exit(1)
all_dates = [pf.stem for pf in files]
print(f"[VERIFY] {len(files)} dates: {all_dates[0]}{all_dates[-1]}", flush=True)
engine = _build_engine(cfg, INITIAL_CAPITAL)
total_T = 0
peak_cap = INITIAL_CAPITAL
max_dd = 0.0
all_mae: list = [] # (mae_pct, trade) — collected across all days
t0 = time.time()
for pf in files:
date_str = pf.stem
t_before = len(engine.trade_history)
_, day_df = _run_day(engine, cfg, date_str)
cap_after = engine.capital
trades_today = engine.trade_history[t_before:]
day_trades = len(trades_today)
total_T = len(engine.trade_history)
peak_cap = max(peak_cap, cap_after)
dd = (peak_cap - cap_after) / peak_cap * 100.0
max_dd = max(max_dd, dd)
# MAE per trade (uses same parquet df, no extra I/O)
if not day_df.empty and trades_today:
for t, mae in _compute_mae_for_day(trades_today, day_df):
all_mae.append((mae, t))
if not summary_only:
print(
f"{date_str}: T+{day_trades:3d} (cum={total_T:4d}) "
f"${cap_after:9,.0f} dd={dd:.2f}%",
flush=True,
)
elapsed = time.time() - t0
roi = (engine.capital / INITIAL_CAPITAL - 1.0) * 100.0
# ── MAE summary ─────────────────────────────────────────────────────────────
valid_mae = [(m, t) for m, t in all_mae if not (m != m)] # exclude NaN
mae_arr = np.array([m for m, _ in valid_mae]) if valid_mae else np.array([])
print(flush=True)
print(f"{'='*60}", flush=True)
print(f"RESULT: T={total_T} ROI={roi:+.2f}% DD={max_dd:.2f}% ({elapsed:.0f}s)", flush=True)
print(f"TARGET: T={GOLD_T} ROI={GOLD_ROI_LO:.0f}{GOLD_ROI_HI:.0f}% (gold range)", flush=True)
print(flush=True)
if len(mae_arr) > 0:
worst_mae = float(np.max(mae_arr))
p90_mae = float(np.percentile(mae_arr, 90))
p50_mae = float(np.percentile(mae_arr, 50))
worst_idx = int(np.argmax(mae_arr))
worst_t = valid_mae[worst_idx][1]
mae_as_dd_pct = (worst_mae / max_dd * 100.0) if max_dd > 0 else float('nan')
print(f"MAE (close-price lower bound, SHORT=adverse-up):", flush=True)
print(f" worst single trade : {worst_mae:.4f}% "
f"({worst_t.asset if hasattr(worst_t,'asset') else '?'} "
f"bars {getattr(worst_t,'entry_bar','?')}{getattr(worst_t,'exit_bar','?')} "
f"exit={getattr(worst_t,'exit_reason','?')})", flush=True)
print(f" worst / max_DD : {mae_as_dd_pct:.1f}% ({worst_mae:.4f}% vs DD={max_dd:.2f}%)", flush=True)
print(f" p90 / p50 / mean : {p90_mae:.4f}% / {p50_mae:.4f}% / {np.mean(mae_arr):.4f}%", flush=True)
print(flush=True)
t_ok = (total_T == GOLD_T)
roi_ok = (GOLD_ROI_LO <= roi <= GOLD_ROI_HI)
print(f"T={total_T} {'✓ PASS' if t_ok else '✗ FAIL (expected 2155)':30s}", flush=True)
print(f"ROI={roi:+.2f}% {'✓ PASS' if roi_ok else f'✗ FAIL (expected {GOLD_ROI_LO:.0f}{GOLD_ROI_HI:.0f}%)':30s}", flush=True)
print(f"{'='*60}", flush=True)
if t_ok and roi_ok:
print("\n=== GOLD PARITY CONFIRMED ===\n", flush=True)
return True
else:
print("\n!!! GOLD PARITY MISMATCH — investigate !!!\n", flush=True)
return False
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('--summary', action='store_true', help='Print summary only (no per-day output)')
args = ap.parse_args()
ok = run_verify(summary_only=args.summary)
sys.exit(0 if ok else 1)