"""DOLPHIN Backtest Prefect Flow — NDAlphaEngine (Gold Standard Path). Wraps test_pf_dynamic_beta_validate.py as a Prefect flow. Results stored in Hazelcast + run_logs/REGISTRY.md + date-partitioned dir. GOLD STANDARD (55-day Dec31-Feb25, CURRENT parquet state, seed=42, TP=95bps): ROI=+66.26%, PF=1.175, DD=14.94%, Sharpe=3.62*, WR=49.6%, Trades=2143 File: nautilus_dolphin/run_logs/summary_20260307_222506.json *Sharpe=3.62 is N=55 artifact (95% CI [-2.2, +8.1]). Economic Sharpe ~2.5-3.0. SUPERSEDED: summary_20260307_163401.json (+54.67%, T=2145) — different Feb25 parquet state. IMPORTANT: Uses NDAlphaEngine directly — NOT dolphin_vbt_real.py. dolphin_vbt_real.py is a parallel implementation and cannot match the gold standard byte-for-byte. Only this NDAlphaEngine path achieves 1e-6 parity. """ import os import sys import json import csv import time from pathlib import Path from datetime import datetime, timezone from typing import Optional import numpy as np import pandas as pd from prefect import flow, task, get_run_logger from prefect.artifacts import create_markdown_artifact os.environ.setdefault('PREFECT_API_URL', 'http://localhost:4200/api') # ── Paths ──────────────────────────────────────────────────────────────────── HCM_DIR = Path(__file__).parent.parent NAUTILUS_DIR = HCM_DIR / 'nautilus_dolphin' VBT_DIR = HCM_DIR / 'vbt_cache' REGISTRY_PATH = HCM_DIR / 'run_logs' / 'REGISTRY.md' MC_MODELS_DIR = str(NAUTILUS_DIR / 'mc_results' / 'models') HZ_HOST = 'localhost:5701' HZ_CLUSTER = 'dolphin' sys.path.insert(0, str(NAUTILUS_DIR)) # ── Canonical champion ENGINE_KWARGS — matches test_pf_dynamic_beta_validate.py ─ # NOTE: abs_max_leverage is NOT passed here — the engine default (6.0) applies, # which is correct for the current gold standard (summary_20260307_222506.json, # ROI=+66.26%, T=2143). The old +54.67% reference used a different Feb25 parquet # state and is superseded. Do not add abs_max_leverage=6.0 explicitly — it's already # the engine default and explicitly passing it is redundant. ENGINE_KWARGS = dict( initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05, min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0, fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75, dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5, use_asset_selection=True, min_irp_alignment=0.45, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50, use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40, lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42, ) # ── MC-Forewarner config (frozen champion) ─────────────────────────────────── MC_BASE_CFG = { 'trial_id': 0, 'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050, 'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True, 'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50, 'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00, 'leverage_convexity': 3.00, 'fraction': 0.20, 'use_alpha_layers': True, 'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True, 'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50, 'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40, 'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00, 'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100, 'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60, } # ── OB calibration — real Binance observation 2025-01-15 ───────────────────── OB_ASSETS = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT'] OB_IMBALANCE_BIASES = { 'BTCUSDT': -0.086, # sell pressure, confirms SHORT 'ETHUSDT': -0.092, # sell pressure, confirms SHORT 'BNBUSDT': +0.05, # mild buy, mild contradict 'SOLUSDT': +0.05, # mild buy, mild contradict } META_COLS = { 'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150', } GOLD = dict(roi=66.26, pf=1.175, dd=14.94, sharpe=3.62, wr=49.6, trades=2143) # ── Tasks ──────────────────────────────────────────────────────────────────── @task(name='load-parquet-data', retries=1) def load_parquet_data(date_from: Optional[str], date_to: Optional[str]) -> dict: """Load vbt_cache parquets. CRITICAL: ACB preload and vol_p60 use ALL available parquets (no date filter), matching test_pf_dynamic_beta_validate.py exactly. The date filter applies only to the backtest loop. This is what the gold standard run did. """ log = get_run_logger() # ALL parquets — for ACB calibration and vol_p60 (no date filter) all_parquet_files = sorted(VBT_DIR.glob('*.parquet')) all_parquet_files = [p for p in all_parquet_files if 'catalog' not in p.name] # Date-filtered subset — for actual backtest loop loop_files = all_parquet_files[:] if date_from: loop_files = [p for p in loop_files if p.stem >= date_from] if date_to: loop_files = [p for p in loop_files if p.stem <= date_to] if not loop_files: raise ValueError(f'No parquets in {VBT_DIR} for {date_from} to {date_to}') log.info(f'All parquets in vbt_cache: {len(all_parquet_files)} ' f'({all_parquet_files[0].stem} to {all_parquet_files[-1].stem})') log.info(f'Backtest window: {len(loop_files)} dates ' f'({loop_files[0].stem} to {loop_files[-1].stem})') # vol_p60: from first 2 of ALL parquets — matches test_pf_dynamic_beta_validate.py all_vols = [] for pf in all_parquet_files[:2]: df = pd.read_parquet(pf) if 'BTCUSDT' not in df.columns: continue pr = df['BTCUSDT'].values for i in range(60, len(pr)): seg = pr[max(0, i - 50):i] if len(seg) < 10: continue v = float(np.std(np.diff(seg) / seg[:-1])) if v > 0: all_vols.append(v) vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 0.0 log.info(f'vol_p60={vol_p60:.6f}') # Load loop-window parquets into memory + per-bar vol regime signal pq_data = {} for pf in loop_files: df = pd.read_parquet(pf) ac = [c for c in df.columns if c not in META_COLS] bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None dv = np.full(len(df), np.nan) if bp is not None: for i in range(50, len(bp)): seg = bp[max(0, i - 50):i] if len(seg) < 10: continue dv[i] = float(np.std(np.diff(seg) / seg[:-1])) pq_data[pf.stem] = (df, ac, dv) log.info(f'Data loaded: {len(pq_data)} dates in memory') return { 'pq_data': pq_data, 'parquet_stems': [p.stem for p in loop_files], 'all_stems': [p.stem for p in all_parquet_files], # for ACB preload 'vol_p60': vol_p60, } @task(name='run-nd-backtest', timeout_seconds=900) def run_nd_backtest(data: dict) -> dict: """Run NDAlphaEngine — exact match to test_pf_dynamic_beta_validate.py.""" log = get_run_logger() from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine from nautilus_dolphin.nautilus.ob_provider import MockOBProvider from mc.mc_ml import DolphinForewarner pq_data = data['pq_data'] parquet_stems = data['parquet_stems'] vol_p60 = data['vol_p60'] all_stems = data['all_stems'] # ALL parquets — for ACB calibration log.info('Initialising ACB v6...') acb = AdaptiveCircuitBreaker() acb.preload_w750(all_stems) # ALL stems — matches test_pf_dynamic_beta_validate.py log.info(f' w750 threshold (p60): {acb._w750_threshold:.6f}') log.info('Loading MC-Forewarner...') forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR) log.info('Building OB engine (4D MockOBProvider, real-calibrated)...') mock_ob = MockOBProvider( imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS, imbalance_biases=OB_IMBALANCE_BIASES, ) ob_eng = OBFeatureEngine(mock_ob) ob_eng.preload_date('mock', OB_ASSETS) log.info('Assembling stack: ACBv6 + OB 4D + MC-Forewarner + EsoF(neutral)...') engine = NDAlphaEngine(**ENGINE_KWARGS) engine.set_ob_engine(ob_eng) engine.set_acb(acb) engine.set_mc_forewarner(forewarner, MC_BASE_CFG) engine.set_esoteric_hazard_multiplier(0.0) engine._bar_log_enabled = False # off for flow performance t0 = time.time() dstats = [] for stem in parquet_stems: df, acols, dvol = pq_data[stem] vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False) stats = engine.process_day(stem, df, acols, vol_regime_ok=vol_ok) dstats.append({**stats, 'cap': engine.capital}) elapsed = time.time() - t0 # ── Metrics — identical computation to test_pf_dynamic_beta_validate.py ── tr = engine.trade_history wins = [t for t in tr if t.pnl_absolute > 0] loses = [t for t in tr if t.pnl_absolute <= 0] gw = sum(t.pnl_absolute for t in wins) if wins else 0.0 gl = abs(sum(t.pnl_absolute for t in loses)) if loses else 0.0 roi = (engine.capital - 25000.0) / 25000.0 * 100.0 pf = gw / gl if gl > 0 else 999.0 wr = len(wins) / len(tr) * 100.0 if tr else 0.0 dr_all = np.array([s['pnl'] / 25000.0 * 100.0 for s in dstats]) sharpe = (float(np.mean(dr_all) / np.std(dr_all) * np.sqrt(365)) if np.std(dr_all) > 0 else 0.0) peak_cap = 25000.0 max_dd = 0.0 for s in dstats: peak_cap = max(peak_cap, s['cap']) max_dd = max(max_dd, (peak_cap - s['cap']) / peak_cap * 100.0) exit_ctr = {} for t in tr: exit_ctr[t.exit_reason] = exit_ctr.get(t.exit_reason, 0) + 1 trade_records = [] for t in tr: trade_records.append({ 'date': getattr(t, 'date_str', ''), 'entry_bar': t.entry_bar, 'exit_bar': t.exit_bar, 'bars_held': t.bars_held, 'direction': t.direction, 'leverage': round(t.leverage, 6), 'pnl_pct': round(t.pnl_pct, 8), 'pnl_abs': round(t.pnl_absolute, 4), 'exit_reason': t.exit_reason, }) metrics = { 'roi_pct': round(roi, 4), 'pf': round(pf, 4), 'max_dd_pct': round(max_dd, 4), 'sharpe': round(sharpe, 4), 'wr_pct': round(wr, 4), 'trades': len(tr), 'wins': len(wins), 'losses': len(loses), 'capital_final': round(engine.capital, 2), 'exit_breakdown': exit_ctr, 'mc_red_days': sum(1 for s in dstats if s.get('mc_status') == 'RED'), 'mc_orange_days': sum(1 for s in dstats if s.get('mc_status') == 'ORANGE'), 'elapsed_sec': round(elapsed, 1), 'n_dates': len(parquet_stems), 'date_from': parquet_stems[0] if parquet_stems else '', 'date_to': parquet_stems[-1] if parquet_stems else '', 'delta_roi': round(roi - GOLD['roi'], 4), 'delta_pf': round(pf - GOLD['pf'], 4), 'delta_trades': len(tr) - GOLD['trades'], } log.info(f'ROI={roi:+.2f}% PF={pf:.3f} DD={max_dd:.2f}% Sh={sharpe:.2f} ' f'WR={wr:.1f}% T={len(tr)} ({elapsed:.0f}s)') log.info(f'vs Gold: ΔROI={metrics["delta_roi"]:+.2f}pp ' f'ΔPF={metrics["delta_pf"]:+.3f} ΔT={metrics["delta_trades"]:+d}') return {'metrics': metrics, 'trade_records': trade_records} @task(name='report-results', retries=2) def report_results(result: dict, date_from: str, date_to: str) -> str: """Write to date-partitioned run_logs/, Hazelcast, REGISTRY.md.""" log = get_run_logger() m = result['metrics'] tr = result['trade_records'] run_ts = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') run_hm = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M') run_day = datetime.now(timezone.utc).strftime('%Y-%m-%d') validated = abs(m['delta_roi']) < 5.0 and abs(m['delta_pf']) < 0.05 status = 'VALIDATED' if validated else 'DIVERGED' # 1. Date-partitioned output output_dir = HCM_DIR / 'run_logs' / run_day / f'nd_backtest_{run_ts}' output_dir.mkdir(parents=True, exist_ok=True) summary = { 'script': 'vbt_backtest_flow (NDAlphaEngine)', 'timestamp': run_ts, 'engine_kwargs': {k: v for k, v in ENGINE_KWARGS.items()}, 'results': m, 'gold_ref': GOLD, 'status': status, 'output_dir': str(output_dir), } (output_dir / 'summary.json').write_text(json.dumps(summary, indent=2)) if tr: with open(output_dir / 'trades.csv', 'w', newline='') as f: w = csv.DictWriter(f, fieldnames=list(tr[0].keys())) w.writeheader() w.writerows(tr) log.info(f'Output: {output_dir}') # 2. Hazelcast try: import hazelcast client = hazelcast.HazelcastClient( cluster_name=HZ_CLUSTER, cluster_members=[HZ_HOST]) imap = client.get_map('vbt_metrics_history').blocking() imap.put(f'nd_{run_ts}', json.dumps({**m, 'output_dir': str(output_dir)})) client.shutdown() log.info(f'HZ OK → vbt_metrics_history[nd_{run_ts}]') except Exception as e: log.warning(f'HZ reporting failed (non-fatal): {e}') # 3. REGISTRY.md try: key_params = f'seed=42 tp=95bps abs_max_lev=6.0 {date_from}→{date_to}' res_str = (f'ROI={m["roi_pct"]:+.2f}% PF={m["pf"]:.3f} ' f'T={m["trades"]} DD={m["max_dd_pct"]:.1f}% Sh={m["sharpe"]:.2f}') row = [run_hm, 'nd_backtest_flow', key_params, res_str, status, str(output_dir)] with open(REGISTRY_PATH, 'a', encoding='utf-8') as f: f.write(f'| {" | ".join(row)} |\n') log.info('REGISTRY.md updated') except Exception as e: log.error(f'REGISTRY update failed: {e}') return str(output_dir) # ── Flow ───────────────────────────────────────────────────────────────────── @flow( name='dolphin-nd-backtest', description=( 'NDAlphaEngine backtest — gold-standard code path. ' '1e-6 parity with test_pf_dynamic_beta_validate.py. ' 'Default: 55-day champion window Dec31→Feb25.' ), log_prints=True, ) def vbt_backtest_flow( date_from: str = '2025-12-31', date_to: str = '2026-02-25', ): log = get_run_logger() log.info(f'=== DOLPHIN NDAlphaEngine Backtest {date_from} → {date_to} ===') log.info(f'Gold: ROI=+{GOLD["roi"]}% PF={GOLD["pf"]} T={GOLD["trades"]}') data = load_parquet_data(date_from=date_from, date_to=date_to) result = run_nd_backtest(data) output = report_results(result, date_from=date_from, date_to=date_to) m = result['metrics'] validated = abs(m['delta_roi']) < 5.0 and abs(m['delta_pf']) < 0.05 create_markdown_artifact( key='nd-backtest-result', markdown=f""" ## NDAlphaEngine Backtest: {date_from} → {date_to} | Metric | Result | Gold Standard | Delta | |--------|--------|--------------|-------| | ROI | {m['roi_pct']:+.2f}% | +{GOLD['roi']}% | {m['delta_roi']:+.2f}pp | | PF | {m['pf']:.3f} | {GOLD['pf']} | {m['delta_pf']:+.3f} | | DD | {m['max_dd_pct']:.2f}% | {GOLD['dd']}% | — | | Sharpe | {m['sharpe']:.2f} | {GOLD['sharpe']} | — | | WR | {m['wr_pct']:.1f}% | {GOLD['wr']}% | — | | Trades | {m['trades']} | {GOLD['trades']} | {m['delta_trades']:+d} | **Status**: {'VALIDATED' if validated else 'DIVERGED'} **Output**: `{output}` **MC**: {m['mc_red_days']} RED / {m['mc_orange_days']} ORANGE **Exits**: {m['exit_breakdown']} """, description=f'NDAlphaEngine {date_from}→{date_to}', ) return m # ── Entry point ─────────────────────────────────────────────────────────────── if __name__ == '__main__': import argparse p = argparse.ArgumentParser() p.add_argument('--date-from', default='2025-12-31') p.add_argument('--date-to', default='2026-02-25') p.add_argument('--register', action='store_true') args = p.parse_args() if args.register: dep = vbt_backtest_flow.to_deployment( name='dolphin-nd-champion', tags=['backtest', 'ndengine', 'champion'], work_pool_name='dolphin', ) dep.apply() print('Registered: dolphin-nd-champion') else: vbt_backtest_flow(date_from=args.date_from, date_to=args.date_to)