Files
DOLPHIN/prod/vbt_backtest_flow.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

429 lines
17 KiB
Python
Executable File

"""DOLPHIN Backtest Prefect Flow — NDAlphaEngine (Gold Standard Path).
Wraps test_pf_dynamic_beta_validate.py as a Prefect flow.
Results stored in Hazelcast + run_logs/REGISTRY.md + date-partitioned dir.
GOLD STANDARD (55-day Dec31-Feb25, CURRENT parquet state, seed=42, TP=95bps):
ROI=+66.26%, PF=1.175, DD=14.94%, Sharpe=3.62*, WR=49.6%, Trades=2143
File: nautilus_dolphin/run_logs/summary_20260307_222506.json
*Sharpe=3.62 is N=55 artifact (95% CI [-2.2, +8.1]). Economic Sharpe ~2.5-3.0.
SUPERSEDED: summary_20260307_163401.json (+54.67%, T=2145) — different Feb25 parquet state.
IMPORTANT: Uses NDAlphaEngine directly — NOT dolphin_vbt_real.py.
dolphin_vbt_real.py is a parallel implementation and cannot match the gold
standard byte-for-byte. Only this NDAlphaEngine path achieves 1e-6 parity.
"""
import os
import sys
import json
import csv
import time
from pathlib import Path
from datetime import datetime, timezone
from typing import Optional
import numpy as np
import pandas as pd
from prefect import flow, task, get_run_logger
from prefect.artifacts import create_markdown_artifact
os.environ.setdefault('PREFECT_API_URL', 'http://localhost:4200/api')
# ── Paths ────────────────────────────────────────────────────────────────────
HCM_DIR = Path(__file__).parent.parent
NAUTILUS_DIR = HCM_DIR / 'nautilus_dolphin'
VBT_DIR = HCM_DIR / 'vbt_cache'
REGISTRY_PATH = HCM_DIR / 'run_logs' / 'REGISTRY.md'
MC_MODELS_DIR = str(NAUTILUS_DIR / 'mc_results' / 'models')
HZ_HOST = 'localhost:5701'
HZ_CLUSTER = 'dolphin'
sys.path.insert(0, str(NAUTILUS_DIR))
# ── Canonical champion ENGINE_KWARGS — matches test_pf_dynamic_beta_validate.py ─
# NOTE: abs_max_leverage is NOT passed here — the engine default (6.0) applies,
# which is correct for the current gold standard (summary_20260307_222506.json,
# ROI=+66.26%, T=2143). The old +54.67% reference used a different Feb25 parquet
# state and is superseded. Do not add abs_max_leverage=6.0 explicitly — it's already
# the engine default and explicitly passing it is redundant.
ENGINE_KWARGS = dict(
initial_capital=25000.0,
vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0,
leverage_convexity=3.0, fraction=0.20,
fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7,
dc_min_magnitude_bps=0.75, dc_skip_contradicts=True,
dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
# ── MC-Forewarner config (frozen champion) ───────────────────────────────────
MC_BASE_CFG = {
'trial_id': 0,
'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50,
'max_leverage': 5.00, 'leverage_convexity': 3.00,
'fraction': 0.20, 'use_alpha_layers': True,
'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0095,
'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
# ── OB calibration — real Binance observation 2025-01-15 ─────────────────────
OB_ASSETS = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT']
OB_IMBALANCE_BIASES = {
'BTCUSDT': -0.086, # sell pressure, confirms SHORT
'ETHUSDT': -0.092, # sell pressure, confirms SHORT
'BNBUSDT': +0.05, # mild buy, mild contradict
'SOLUSDT': +0.05, # mild buy, mild contradict
}
META_COLS = {
'timestamp', 'scan_number',
'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
'vel_div', 'instability_50', 'instability_150',
}
GOLD = dict(roi=66.26, pf=1.175, dd=14.94, sharpe=3.62, wr=49.6, trades=2143)
# ── Tasks ────────────────────────────────────────────────────────────────────
@task(name='load-parquet-data', retries=1)
def load_parquet_data(date_from: Optional[str], date_to: Optional[str]) -> dict:
"""Load vbt_cache parquets.
CRITICAL: ACB preload and vol_p60 use ALL available parquets (no date filter),
matching test_pf_dynamic_beta_validate.py exactly. The date filter applies only
to the backtest loop. This is what the gold standard run did.
"""
log = get_run_logger()
# ALL parquets — for ACB calibration and vol_p60 (no date filter)
all_parquet_files = sorted(VBT_DIR.glob('*.parquet'))
all_parquet_files = [p for p in all_parquet_files if 'catalog' not in p.name]
# Date-filtered subset — for actual backtest loop
loop_files = all_parquet_files[:]
if date_from:
loop_files = [p for p in loop_files if p.stem >= date_from]
if date_to:
loop_files = [p for p in loop_files if p.stem <= date_to]
if not loop_files:
raise ValueError(f'No parquets in {VBT_DIR} for {date_from} to {date_to}')
log.info(f'All parquets in vbt_cache: {len(all_parquet_files)} '
f'({all_parquet_files[0].stem} to {all_parquet_files[-1].stem})')
log.info(f'Backtest window: {len(loop_files)} dates '
f'({loop_files[0].stem} to {loop_files[-1].stem})')
# vol_p60: from first 2 of ALL parquets — matches test_pf_dynamic_beta_validate.py
all_vols = []
for pf in all_parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' not in df.columns:
continue
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i - 50):i]
if len(seg) < 10:
continue
v = float(np.std(np.diff(seg) / seg[:-1]))
if v > 0:
all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 0.0
log.info(f'vol_p60={vol_p60:.6f}')
# Load loop-window parquets into memory + per-bar vol regime signal
pq_data = {}
for pf in loop_files:
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for i in range(50, len(bp)):
seg = bp[max(0, i - 50):i]
if len(seg) < 10:
continue
dv[i] = float(np.std(np.diff(seg) / seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
log.info(f'Data loaded: {len(pq_data)} dates in memory')
return {
'pq_data': pq_data,
'parquet_stems': [p.stem for p in loop_files],
'all_stems': [p.stem for p in all_parquet_files], # for ACB preload
'vol_p60': vol_p60,
}
@task(name='run-nd-backtest', timeout_seconds=900)
def run_nd_backtest(data: dict) -> dict:
"""Run NDAlphaEngine — exact match to test_pf_dynamic_beta_validate.py."""
log = get_run_logger()
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
from mc.mc_ml import DolphinForewarner
pq_data = data['pq_data']
parquet_stems = data['parquet_stems']
vol_p60 = data['vol_p60']
all_stems = data['all_stems'] # ALL parquets — for ACB calibration
log.info('Initialising ACB v6...')
acb = AdaptiveCircuitBreaker()
acb.preload_w750(all_stems) # ALL stems — matches test_pf_dynamic_beta_validate.py
log.info(f' w750 threshold (p60): {acb._w750_threshold:.6f}')
log.info('Loading MC-Forewarner...')
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
log.info('Building OB engine (4D MockOBProvider, real-calibrated)...')
mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases=OB_IMBALANCE_BIASES,
)
ob_eng = OBFeatureEngine(mock_ob)
ob_eng.preload_date('mock', OB_ASSETS)
log.info('Assembling stack: ACBv6 + OB 4D + MC-Forewarner + EsoF(neutral)...')
engine = NDAlphaEngine(**ENGINE_KWARGS)
engine.set_ob_engine(ob_eng)
engine.set_acb(acb)
engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
engine.set_esoteric_hazard_multiplier(0.0)
engine._bar_log_enabled = False # off for flow performance
t0 = time.time()
dstats = []
for stem in parquet_stems:
df, acols, dvol = pq_data[stem]
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
stats = engine.process_day(stem, df, acols, vol_regime_ok=vol_ok)
dstats.append({**stats, 'cap': engine.capital})
elapsed = time.time() - t0
# ── Metrics — identical computation to test_pf_dynamic_beta_validate.py ──
tr = engine.trade_history
wins = [t for t in tr if t.pnl_absolute > 0]
loses = [t for t in tr if t.pnl_absolute <= 0]
gw = sum(t.pnl_absolute for t in wins) if wins else 0.0
gl = abs(sum(t.pnl_absolute for t in loses)) if loses else 0.0
roi = (engine.capital - 25000.0) / 25000.0 * 100.0
pf = gw / gl if gl > 0 else 999.0
wr = len(wins) / len(tr) * 100.0 if tr else 0.0
dr_all = np.array([s['pnl'] / 25000.0 * 100.0 for s in dstats])
sharpe = (float(np.mean(dr_all) / np.std(dr_all) * np.sqrt(365))
if np.std(dr_all) > 0 else 0.0)
peak_cap = 25000.0
max_dd = 0.0
for s in dstats:
peak_cap = max(peak_cap, s['cap'])
max_dd = max(max_dd, (peak_cap - s['cap']) / peak_cap * 100.0)
exit_ctr = {}
for t in tr:
exit_ctr[t.exit_reason] = exit_ctr.get(t.exit_reason, 0) + 1
trade_records = []
for t in tr:
trade_records.append({
'date': getattr(t, 'date_str', ''),
'entry_bar': t.entry_bar,
'exit_bar': t.exit_bar,
'bars_held': t.bars_held,
'direction': t.direction,
'leverage': round(t.leverage, 6),
'pnl_pct': round(t.pnl_pct, 8),
'pnl_abs': round(t.pnl_absolute, 4),
'exit_reason': t.exit_reason,
})
metrics = {
'roi_pct': round(roi, 4),
'pf': round(pf, 4),
'max_dd_pct': round(max_dd, 4),
'sharpe': round(sharpe, 4),
'wr_pct': round(wr, 4),
'trades': len(tr),
'wins': len(wins),
'losses': len(loses),
'capital_final': round(engine.capital, 2),
'exit_breakdown': exit_ctr,
'mc_red_days': sum(1 for s in dstats if s.get('mc_status') == 'RED'),
'mc_orange_days': sum(1 for s in dstats if s.get('mc_status') == 'ORANGE'),
'elapsed_sec': round(elapsed, 1),
'n_dates': len(parquet_stems),
'date_from': parquet_stems[0] if parquet_stems else '',
'date_to': parquet_stems[-1] if parquet_stems else '',
'delta_roi': round(roi - GOLD['roi'], 4),
'delta_pf': round(pf - GOLD['pf'], 4),
'delta_trades': len(tr) - GOLD['trades'],
}
log.info(f'ROI={roi:+.2f}% PF={pf:.3f} DD={max_dd:.2f}% Sh={sharpe:.2f} '
f'WR={wr:.1f}% T={len(tr)} ({elapsed:.0f}s)')
log.info(f'vs Gold: ΔROI={metrics["delta_roi"]:+.2f}pp '
f'ΔPF={metrics["delta_pf"]:+.3f} ΔT={metrics["delta_trades"]:+d}')
return {'metrics': metrics, 'trade_records': trade_records}
@task(name='report-results', retries=2)
def report_results(result: dict, date_from: str, date_to: str) -> str:
"""Write to date-partitioned run_logs/, Hazelcast, REGISTRY.md."""
log = get_run_logger()
m = result['metrics']
tr = result['trade_records']
run_ts = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')
run_hm = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M')
run_day = datetime.now(timezone.utc).strftime('%Y-%m-%d')
validated = abs(m['delta_roi']) < 5.0 and abs(m['delta_pf']) < 0.05
status = 'VALIDATED' if validated else 'DIVERGED'
# 1. Date-partitioned output
output_dir = HCM_DIR / 'run_logs' / run_day / f'nd_backtest_{run_ts}'
output_dir.mkdir(parents=True, exist_ok=True)
summary = {
'script': 'vbt_backtest_flow (NDAlphaEngine)',
'timestamp': run_ts,
'engine_kwargs': {k: v for k, v in ENGINE_KWARGS.items()},
'results': m,
'gold_ref': GOLD,
'status': status,
'output_dir': str(output_dir),
}
(output_dir / 'summary.json').write_text(json.dumps(summary, indent=2))
if tr:
with open(output_dir / 'trades.csv', 'w', newline='') as f:
w = csv.DictWriter(f, fieldnames=list(tr[0].keys()))
w.writeheader()
w.writerows(tr)
log.info(f'Output: {output_dir}')
# 2. Hazelcast
try:
import hazelcast
client = hazelcast.HazelcastClient(
cluster_name=HZ_CLUSTER, cluster_members=[HZ_HOST])
imap = client.get_map('vbt_metrics_history').blocking()
imap.put(f'nd_{run_ts}', json.dumps({**m, 'output_dir': str(output_dir)}))
client.shutdown()
log.info(f'HZ OK → vbt_metrics_history[nd_{run_ts}]')
except Exception as e:
log.warning(f'HZ reporting failed (non-fatal): {e}')
# 3. REGISTRY.md
try:
key_params = f'seed=42 tp=95bps abs_max_lev=6.0 {date_from}{date_to}'
res_str = (f'ROI={m["roi_pct"]:+.2f}% PF={m["pf"]:.3f} '
f'T={m["trades"]} DD={m["max_dd_pct"]:.1f}% Sh={m["sharpe"]:.2f}')
row = [run_hm, 'nd_backtest_flow', key_params, res_str, status, str(output_dir)]
with open(REGISTRY_PATH, 'a', encoding='utf-8') as f:
f.write(f'| {" | ".join(row)} |\n')
log.info('REGISTRY.md updated')
except Exception as e:
log.error(f'REGISTRY update failed: {e}')
return str(output_dir)
# ── Flow ─────────────────────────────────────────────────────────────────────
@flow(
name='dolphin-nd-backtest',
description=(
'NDAlphaEngine backtest — gold-standard code path. '
'1e-6 parity with test_pf_dynamic_beta_validate.py. '
'Default: 55-day champion window Dec31→Feb25.'
),
log_prints=True,
)
def vbt_backtest_flow(
date_from: str = '2025-12-31',
date_to: str = '2026-02-25',
):
log = get_run_logger()
log.info(f'=== DOLPHIN NDAlphaEngine Backtest {date_from}{date_to} ===')
log.info(f'Gold: ROI=+{GOLD["roi"]}% PF={GOLD["pf"]} T={GOLD["trades"]}')
data = load_parquet_data(date_from=date_from, date_to=date_to)
result = run_nd_backtest(data)
output = report_results(result, date_from=date_from, date_to=date_to)
m = result['metrics']
validated = abs(m['delta_roi']) < 5.0 and abs(m['delta_pf']) < 0.05
create_markdown_artifact(
key='nd-backtest-result',
markdown=f"""
## NDAlphaEngine Backtest: {date_from}{date_to}
| Metric | Result | Gold Standard | Delta |
|--------|--------|--------------|-------|
| ROI | {m['roi_pct']:+.2f}% | +{GOLD['roi']}% | {m['delta_roi']:+.2f}pp |
| PF | {m['pf']:.3f} | {GOLD['pf']} | {m['delta_pf']:+.3f} |
| DD | {m['max_dd_pct']:.2f}% | {GOLD['dd']}% | — |
| Sharpe | {m['sharpe']:.2f} | {GOLD['sharpe']} | — |
| WR | {m['wr_pct']:.1f}% | {GOLD['wr']}% | — |
| Trades | {m['trades']} | {GOLD['trades']} | {m['delta_trades']:+d} |
**Status**: {'VALIDATED' if validated else 'DIVERGED'}
**Output**: `{output}`
**MC**: {m['mc_red_days']} RED / {m['mc_orange_days']} ORANGE
**Exits**: {m['exit_breakdown']}
""",
description=f'NDAlphaEngine {date_from}{date_to}',
)
return m
# ── Entry point ───────────────────────────────────────────────────────────────
if __name__ == '__main__':
import argparse
p = argparse.ArgumentParser()
p.add_argument('--date-from', default='2025-12-31')
p.add_argument('--date-to', default='2026-02-25')
p.add_argument('--register', action='store_true')
args = p.parse_args()
if args.register:
dep = vbt_backtest_flow.to_deployment(
name='dolphin-nd-champion',
tags=['backtest', 'ndengine', 'champion'],
work_pool_name='dolphin',
)
dep.apply()
print('Registered: dolphin-nd-champion')
else:
vbt_backtest_flow(date_from=args.date_from, date_to=args.date_to)