Files
DOLPHIN/nautilus_dolphin/dvae/run_gold_with_flint_gate.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

288 lines
13 KiB
Python
Executable File

"""
Task 5: Backtest with proxy_B Flint Gate vs Gold Standard.
Tests three gate configs against the gold baseline:
Gold: ROI~+44.89%, PF~1.123, DD~14.95%, Sharpe~2.50, Trades~2128
(same 55-day NG3 5s dataset, same engine stack)
Gate variants:
A. No gate (baseline reproduction — sanity check)
B. Fixed threshold=0.0 (allow when proxy_B > 0)
C. Adaptive p50 (allow when proxy_B > rolling median)
D. Adaptive p75 (allow when proxy_B > rolling p75)
Measures: ROI, PF, DD, WR, Sharpe, Trades, gate suppression rate.
DOES NOT MODIFY ANY PRODUCTION CODE.
"""
import sys, time, math
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
_HERE = Path(__file__).resolve().parent
_ND_ROOT = _HERE.parent # nautilus_dolphin/ outer dir — contains nautilus_dolphin pkg + mc/
# Insert ND_ROOT at index 0 so it takes priority over any stub nautilus_dolphin at project root
sys.path.insert(0, str(_ND_ROOT))
print("Compiling numba kernels...")
t0c = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
)
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
# DolphinForewarner skipped — pickle hangs on sklearn 1.8 vs 1.7.1 mismatch; 0 interventions anyway
from mc.mc_ml import DolphinForewarner
from alpha_signal_generator_flint_gate import FlintGatedEngine
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
print(f" JIT: {time.time() - t0c:.1f}s")
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
'instability_50', 'instability_150'}
ENGINE_KWARGS = dict(
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
MC_BASE_CFG = {
'trial_id': 0,
'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50,
'max_leverage': 5.00, 'leverage_convexity': 3.00, 'fraction': 0.20,
'use_alpha_layers': True, 'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
print("\nLoading MC-Forewarner...")
try:
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
print(" MC-Forewarner ready")
except Exception as e:
print(f" [WARN] MC-Forewarner failed to load: {e} — running without it")
forewarner = None
# ── Load data ─────────────────────────────────────────────────────
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
acb_master = AdaptiveCircuitBreaker()
date_strings = [pf.stem for pf in parquet_files]
acb_master.preload_w750(date_strings)
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' not in df.columns: continue
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0,i-50):i]
if len(seg)<10: continue
v = float(np.std(np.diff(seg)/seg[:-1]))
if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))
pq_data = {}
all_assets = set()
for pf in parquet_files:
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
all_assets.update(ac)
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for i in range(50, len(bp)):
seg = bp[max(0,i-50):i]
if len(seg)<10: continue
dv[i] = float(np.std(np.diff(seg)/seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
OB_ASSETS = sorted(list(all_assets))
_mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
# ── Helpers ───────────────────────────────────────────────────────
def run_backtest(engine_cls, engine_kwargs, name, gate_kwargs=None):
"""Run full 55-day backtest. Returns metrics dict.
Metrics use the SAME methodology as test_pf_dynamic_beta_validate.py:
- PF: pnl_absolute (dollar-weighted, matches gold script)
- DD: day-end capital snapshots (not trade-level curve)
- Sharpe: daily P&L, annualized with sqrt(365)
"""
gate_kwargs = gate_kwargs or {}
acb = AdaptiveCircuitBreaker()
acb.preload_w750(date_strings)
if engine_cls is NDAlphaEngine:
eng = NDAlphaEngine(**engine_kwargs)
else:
eng = FlintGatedEngine(**engine_kwargs, **gate_kwargs)
eng.set_ob_engine(ob_eng)
eng.set_acb(acb)
if forewarner is not None:
eng.set_mc_forewarner(forewarner, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
daily_caps = []
daily_pnls = []
for pf in parquet_files:
ds = pf.stem
df, acols, dvol = pq_data[ds]
cap_before = eng.capital
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
stats = eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
daily_caps.append(eng.capital)
daily_pnls.append(eng.capital - cap_before)
tr = eng.trade_history
n = len(tr)
if n == 0:
return {'name': name, 'roi': 0, 'pf': 0, 'dd': 0, 'wr': 0, 'sharpe': 0,
'trades': 0, 'suppressed': 0, 'suppression_rate': 0}
roi = (eng.capital - 25000.0) / 25000.0 * 100.0
# PF: dollar-weighted (matches gold script which uses pnl_absolute)
def _abs(t):
return t.pnl_absolute if hasattr(t, 'pnl_absolute') else t.pnl_pct * 250.0
wins = [t for t in tr if _abs(t) > 0]
losses = [t for t in tr if _abs(t) <= 0]
wr = len(wins) / n * 100.0
gross_profit = sum(_abs(t) for t in wins)
gross_loss = abs(sum(_abs(t) for t in losses))
pf = gross_profit / max(gross_loss, 1e-9)
# DD: day-end capital snapshots (matches gold script)
peak_cap = 25000.0
max_dd = 0.0
for cap in daily_caps:
peak_cap = max(peak_cap, cap)
dd = (peak_cap - cap) / peak_cap * 100.0
max_dd = max(max_dd, dd)
# Sharpe: daily P&L annualized (matches gold script)
dr = np.array([p / 25000.0 * 100.0 for p in daily_pnls])
sharpe = float(dr.mean() / (dr.std() + 1e-9) * math.sqrt(365)) if len(dr) > 1 else 0.0
suppressed = getattr(eng, 'gate_suppressed', 0)
allowed = getattr(eng, 'gate_allowed', 0)
sup_rate = suppressed / max(1, suppressed + allowed) * 100.0
return {
'name': name,
'roi': roi, 'pf': pf, 'dd': max_dd, 'wr': wr, 'sharpe': sharpe,
'trades': n, 'suppressed': suppressed, 'suppression_rate': sup_rate,
}
# ── Run all configs ───────────────────────────────────────────────
GOLD = {'name': 'GOLD REFERENCE', 'roi': 88.55, 'pf': 1.215, 'dd': 15.05,
'wr': 50.5, 'sharpe': 4.38, 'trades': 2155}
configs = [
(NDAlphaEngine, {}, 'A. Baseline (no gate)', {}),
(FlintGatedEngine, {}, 'B. Gate: proxy_B > 0.00 (fixed)', {'proxy_b_threshold': 0.00}),
]
results = []
for engine_cls, _, name, gate_kw in configs:
print(f"\n{'='*55}")
print(f"Running: {name}")
t0 = time.time()
r = run_backtest(engine_cls, ENGINE_KWARGS.copy(), name, gate_kw)
r['elapsed'] = time.time() - t0
results.append(r)
print(f" Done in {r['elapsed']:.1f}s Trades={r['trades']} ROI={r['roi']:.2f}% PF={r['pf']:.4f}")
# ── Final comparison ──────────────────────────────────────────────
print("\n" + "="*75)
print("FLINT GATE vs GOLD STANDARD — FINAL COMPARISON")
print("="*75)
hdr = f"{'Config':<35} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'WR%':>6} {'Sharpe':>7} {'Trades':>7} {'Supp%':>7}"
print(hdr)
print("-"*75)
g = GOLD
print(f"{'*** GOLD REFERENCE ***':<35} {g['roi']:>7.2f} {g['pf']:>6.4f} {g['dd']:>6.2f} {'N/A':>6} {g['sharpe']:>7.2f} {g['trades']:>7d} {'N/A':>7}")
print("-"*75)
for r in results:
print(f"{r['name']:<35} {r['roi']:>7.2f} {r['pf']:>6.4f} {r['dd']:>6.2f} "
f"{r['wr']:>6.2f} {r['sharpe']:>7.3f} {r['trades']:>7d} {r['suppression_rate']:>7.1f}%")
print("="*75)
print("\nLEGEND:")
print(" ROI: Return on Initial Capital ($25k)")
print(" PF : Profit Factor (gross profit / gross loss)")
print(" DD : Max Drawdown from equity peak")
print(" WR : Win Rate")
print(" Sharpe: trade-based Sharpe")
print(" Supp%: % of entry attempts suppressed by gate")
print("\n VERDICT:")
base = results[0] if results else None
best_gated = max(results[1:], key=lambda r: r['pf']) if len(results) > 1 else None
if base and best_gated:
pf_delta = best_gated['pf'] - base['pf']
roi_delta = best_gated['roi'] - base['roi']
dd_delta = best_gated['dd'] - base['dd']
print(f" Best gate ({best_gated['name']}):")
print(f" PF: {base['pf']:.4f}{best_gated['pf']:.4f} ({pf_delta:+.4f})")
print(f" ROI: {base['roi']:.2f}% → {best_gated['roi']:.2f}% ({roi_delta:+.2f}pp)")
print(f" DD: {base['dd']:.2f}% → {best_gated['dd']:.2f}% ({dd_delta:+.2f}pp)")
print(f" Trades: {base['trades']}{best_gated['trades']} ({best_gated['suppression_rate']:.1f}% suppressed)")
if best_gated['pf'] > base['pf'] * 1.01:
print(" → GATE IS BENEFICIAL: PF improved >1%")
elif best_gated['pf'] > base['pf']:
print(" → GATE SHOWS MARGINAL IMPROVEMENT")
else:
print(" → GATE IS NEUTRAL/NEGATIVE: no improvement over baseline")