Files
DOLPHIN/nautilus_dolphin/test_tp_sweep_klines.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

319 lines
15 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""TP Sweep — 795-Day Klines Dataset, 85121bps in 2bp steps.
Purpose: Validate that 95bps TP optimality (confirmed on 55-day NG3 5s bear window)
holds across broader regime sample (2 years: bull, bear, sideways, ranging).
Dataset: vbt_cache_klines/ 2024-01-01 to 2026-03-05 (~795 1-min parquets).
Thresholds: adapted for 1-min timescale (vel_div distribution ~23x wider than 5s NG3).
vel_div_threshold=-0.50 (champion NG3: -0.02, same ~7th pctile signal rate)
vel_div_extreme =-1.25 (champion NG3: -0.05, same 2.5x ratio)
Full engine stack: ACBv6 + OB 4D (MockOB) + MC-Forewarner + EsoF(neutral).
Seed=42 throughout. ACB w750 populated from klines parquet v750 column.
Saves:
run_logs/tp_sweep_klines_{TS}.csv (one row per TP: tp_bps, roi, pf, dd, sharpe, wr, n_trades, tp_rate_pct)
run_logs/tp_sweep_klines_{TS}.json (full summary + best config)
Expected runtime: ~4-6 hrs (795 dates × 19 TP steps × full engine stack).
"""
import sys, time, json, csv
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
sys.path.insert(0, str(Path(__file__).parent))
print("Compiling numba kernels...")
t0c = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
)
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.55, -0.50, -1.25, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
print(f" JIT: {time.time()-t0c:.1f}s")
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from mc.mc_ml import DolphinForewarner
# ── Config ──────────────────────────────────────────────────────────────────────
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
DATE_START = '2024-01-01'
DATE_END = '2026-03-05'
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
'instability_50', 'instability_150'}
# Thresholds adapted for 1-min timescale (klines vel_div ~23x wider than NG3 5s)
VD_THRESHOLD = -0.50 # p~7 (champion NG3: -0.02)
VD_EXTREME = -1.25 # p~2 (champion NG3: -0.05)
MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
MC_BASE_CFG = {
'trial_id': 0,
# MC-Forewarner trained on champion thresholds — pass those for correct risk envelope
'vel_div_threshold': -0.02, 'vel_div_extreme': -0.05,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
'leverage_convexity': 3.00, 'fraction': 0.20,
'use_alpha_layers': True, 'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
BASE_ENGINE_KWARGS = dict(
initial_capital=25000.0,
vel_div_threshold=VD_THRESHOLD,
vel_div_extreme=VD_EXTREME,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]
# ── Load shared state ────────────────────────────────────────────────────────────
print("\nLoading MC-Forewarner trained models...")
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
print(" MC-Forewarner ready")
parquet_files = sorted(
p for p in VBT_DIR.glob("*.parquet")
if 'catalog' not in str(p) and DATE_START <= p.stem <= DATE_END
)
date_strings = [pf.stem for pf in parquet_files]
print(f"\nKlines parquet files: {len(parquet_files)} dates ({date_strings[0]} to {date_strings[-1]})")
# ACB init — w750 will be overridden from parquet v750 column below
print("\nInitializing ACB v6...")
acb_master = AdaptiveCircuitBreaker()
acb_master.preload_w750(date_strings)
# Vol calibration from first 5 dates
print("\nCalibrating vol p60 from first 5 dates...")
all_vols = []
for pf in parquet_files[:5]:
df = pd.read_parquet(pf)
if 'BTCUSDT' not in df.columns: continue
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0, i-50):i]
if len(seg) < 10: continue
v = float(np.std(np.diff(seg)/seg[:-1]))
if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 1e-4
print(f" Vol p60: {vol_p60:.6f}")
# Pre-load all parquets
print(f"\nPre-loading {len(parquet_files)} parquet files (this takes a few minutes)...")
t_load = time.time()
pq_data = {}
for i, pf in enumerate(parquet_files):
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for j in range(50, len(bp)):
seg = bp[max(0, j-50):j]
if len(seg) < 10: continue
dv[j] = float(np.std(np.diff(seg)/seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
if (i+1) % 100 == 0:
print(f" Loaded {i+1}/{len(parquet_files)} dates...")
print(f" Done in {time.time()-t_load:.1f}s")
# Override ACB w750 cache from klines v750 column (NG3 NPZ not available for 2024-2025)
print("\nPopulating ACB w750 cache from klines v750_lambda_max_velocity...")
for date_str, (df, _, _) in pq_data.items():
if 'v750_lambda_max_velocity' in df.columns:
v750_vals = df['v750_lambda_max_velocity'].dropna()
if len(v750_vals) > 0:
acb_master._w750_vel_cache[date_str] = float(v750_vals.median())
_w750_vals = [v for v in acb_master._w750_vel_cache.values() if v != 0.0]
if _w750_vals:
acb_master._w750_threshold = float(np.percentile(_w750_vals, acb_master.config.W750_THRESHOLD_PCT))
print(f" w750 klines p60 threshold: {acb_master._w750_threshold:.6f}")
print(f" Dates with klines w750 data: {len(_w750_vals)}/{len(date_strings)}")
else:
print(" WARNING: no klines w750 data — ACB beta will be constant 0.2")
# OB engine (shared, reset per TP run)
_mock_ob = MockOBProvider(
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
# ── TP sweep values ──────────────────────────────────────────────────────────────
TP_VALUES_BPS = list(range(85, 122, 2)) # 85,87,...,121 → 19 values
BASELINE_BPS = 95 # current champion (was 99 pre-sweep)
print(f"\n{'='*70}")
print(f" TP SWEEP — 795-DAY KLINES")
print(f" Range: {TP_VALUES_BPS[0]}{TP_VALUES_BPS[-1]} bps, {len(TP_VALUES_BPS)} steps")
print(f" Baseline reference: {BASELINE_BPS}bps (current 55-day champion)")
print(f" Dates: {date_strings[0]} to {date_strings[-1]} ({len(date_strings)} days)")
print(f"{'='*70}\n")
results = []
t_sweep_start = time.time()
for step_i, tp_bps in enumerate(TP_VALUES_BPS):
tp_pct = tp_bps / 10000.0
kw = dict(BASE_ENGINE_KWARGS, fixed_tp_pct=tp_pct)
engine = NDAlphaEngine(**kw)
engine.set_ob_engine(ob_eng)
engine.set_acb(acb_master)
engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
engine.set_esoteric_hazard_multiplier(0.0)
dstats = []
for ds in date_strings:
df, acols, dvol = pq_data[ds]
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
r = engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
dstats.append({'pnl': r.get('pnl', 0.0), 'capital': r.get('capital', 25000.0),
'trades': r.get('trades', 0)})
tr = engine.trade_history
wins = [t for t in tr if t.pnl_absolute > 0]
losses = [t for t in tr if t.pnl_absolute <= 0]
gw = sum(t.pnl_absolute for t in wins)
gl = abs(sum(t.pnl_absolute for t in losses))
roi = (engine.capital - 25000.0) / 25000.0 * 100.0
pf = gw / gl if gl > 0 else 999.0
wr = len(wins) / len(tr) * 100.0 if tr else 0.0
pnls = np.array([s['pnl'] for s in dstats])
sharpe = float(pnls.mean() / pnls.std() * np.sqrt(252)) if pnls.std() > 0 else 0.0
caps = [s['capital'] for s in dstats]
peak = 25000.0; max_dd = 0.0
for c in caps:
if c > peak: peak = c
dd = (peak - c) / peak * 100.0
if dd > max_dd: max_dd = dd
tp_hits = engine.tp_exits
mh_exits = engine.hold_exits
tp_rate = tp_hits / len(tr) * 100.0 if tr else 0.0
elapsed_step = time.time() - t_sweep_start
row = {'tp_bps': tp_bps, 'roi': roi, 'pf': pf, 'dd': max_dd, 'sharpe': sharpe,
'wr': wr, 'n_trades': len(tr), 'tp_hits': tp_hits, 'mh_exits': mh_exits,
'tp_rate_pct': tp_rate}
results.append(row)
marker = f" <- BASELINE ({BASELINE_BPS}bps)" if tp_bps == BASELINE_BPS else ""
print(f" [{step_i+1:2d}/{len(TP_VALUES_BPS)}] TP={tp_bps:3d}bps "
f"ROI={roi:+7.2f}% PF={pf:.4f} DD={max_dd:5.2f}% "
f"Sh={sharpe:.3f} WR={wr:.1f}% T={len(tr):5d} TP%={tp_rate:.1f}%"
f" ({elapsed_step/60:.0f}min){marker}")
sys.stdout.flush()
elapsed_total = time.time() - t_sweep_start
# ── Analysis ─────────────────────────────────────────────────────────────────────
best_roi = max(results, key=lambda r: r['roi'])
best_pf = max(results, key=lambda r: r['pf'])
best_sharpe = max(results, key=lambda r: r['sharpe'])
baseline = next((r for r in results if r['tp_bps'] == BASELINE_BPS), results[0])
print(f"\n{'='*70}")
print(f" TP SWEEP KLINES COMPLETE ({elapsed_total/60:.1f} min, {len(date_strings)} days)")
print(f"{'='*70}")
print(f" Baseline ({BASELINE_BPS}bps): ROI={baseline['roi']:+.2f}% PF={baseline['pf']:.4f} "
f"Sh={baseline['sharpe']:.3f} DD={baseline['dd']:.2f}% T={baseline['n_trades']}")
print(f" Best ROI: {best_roi['tp_bps']}bps → ROI={best_roi['roi']:+.2f}% "
f"ΔROI={best_roi['roi']-baseline['roi']:+.2f}%")
print(f" Best PF: {best_pf['tp_bps']}bps → PF={best_pf['pf']:.4f} "
f"ΔPF={best_pf['pf']-baseline['pf']:+.4f}")
print(f" Best Sharpe: {best_sharpe['tp_bps']}bps → Sh={best_sharpe['sharpe']:.3f} "
f"ΔSh={best_sharpe['sharpe']-baseline['sharpe']:+.3f}")
print()
print(f" {'TP':>6} {'ROI':>8} {'PF':>7} {'DD':>6} {'Sharpe':>7} {'WR':>5} {'TP%':>5} {'Trades':>7}")
for r in results:
mk = " *BEST_ROI" if r['tp_bps'] == best_roi['tp_bps'] else (
f" *BASELINE" if r['tp_bps'] == BASELINE_BPS else "")
print(f" {r['tp_bps']:>4}bps {r['roi']:>+7.2f}% {r['pf']:>7.4f} {r['dd']:>5.2f}% "
f"{r['sharpe']:>7.3f} {r['wr']:>4.1f}% {r['tp_rate_pct']:>4.1f}% {r['n_trades']:>7}{mk}")
# ── Save ─────────────────────────────────────────────────────────────────────────
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
run_dir = Path(__file__).parent / 'run_logs'
run_dir.mkdir(exist_ok=True)
csv_path = run_dir / f'tp_sweep_klines_{ts}.csv'
json_path = run_dir / f'tp_sweep_klines_{ts}.json'
with open(csv_path, 'w', newline='') as f:
w = csv.DictWriter(f, fieldnames=list(results[0].keys()))
w.writeheader(); w.writerows(results)
summary = {
'experiment': 'tp_sweep_klines_795day',
'date_range': f'{DATE_START}_to_{DATE_END}',
'n_dates': len(date_strings),
'tp_range_bps': [TP_VALUES_BPS[0], TP_VALUES_BPS[-1]],
'tp_step_bps': 2,
'n_steps': len(TP_VALUES_BPS),
'baseline_tp_bps': BASELINE_BPS,
'vd_threshold': VD_THRESHOLD,
'vd_extreme': VD_EXTREME,
'vol_p60': vol_p60,
'baseline': baseline,
'best_roi': best_roi,
'best_pf': best_pf,
'best_sharpe': best_sharpe,
'delta_roi_best_vs_baseline': best_roi['roi'] - baseline['roi'],
'elapsed_s': elapsed_total,
'run_ts': ts,
'all_results': results,
}
with open(json_path, 'w') as f:
json.dump(summary, f, indent=2)
print(f"\nSaved:")
print(f" {csv_path}")
print(f" {json_path}")
verdict = "95bps HOLDS on 795-day" if best_roi['tp_bps'] == 95 else f"Optimal is {best_roi['tp_bps']}bps on 795-day — review blue.yml"
print(f"\nVerdict: {verdict}")