Files
DOLPHIN/nautilus_dolphin/test_pf_klines_2y_long.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

327 lines
14 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""LONG Validation — 2Y Klines, Mirror Hypothesis.
Hypothesis: vel_div = w50_vel - w150_vel is symmetric.
SHORT (champion): vel_div <= -0.50 → covariance structure breaking DOWN → SHORT
LONG (this run): vel_div >= +0.50 → covariance structure breaking UP → LONG
Same engine stack, same 795-day dataset, same sizing, same ACB/OB/MC.
Only change: regime_direction = +1 (LONG), threshold sign flipped.
If PF_long is comparable to PF_short (1.148), the mirror hypothesis holds and
a bidirectional system becomes the path to smooth cashflow.
"""
import sys, time, json, csv
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import pandas as pd
sys.path.insert(0, str(Path(__file__).parent))
print("Compiling numba kernels...")
t0c = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
)
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(+0.55, -0.50, -1.25, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
print(f" JIT: {time.time()-t0c:.1f}s")
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from mc.mc_ml import DolphinForewarner
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
DATE_START = '2024-01-01'
DATE_END = '2026-03-05'
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
'instability_50', 'instability_150'}
# LONG mirror thresholds — set directly on signal_gen after construction (long_threshold is separate from SHORT)
# Mirror of SHORT: short=-0.02/-0.05 → long=+0.02/+0.05
VD_THRESHOLD_LONG = +0.02
VD_EXTREME_LONG = +0.05
ENGINE_KWARGS = dict(
initial_capital=25000.0,
vel_div_threshold=-0.02, # SHORT threshold (unused when direction=+1, but must not corrupt signal_gen)
vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
MC_BASE_CFG = {
'trial_id': 0, 'vel_div_threshold': -0.02, 'vel_div_extreme': -0.05,
'use_direction_confirm': True, 'dc_lookback_bars': 7,
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
'leverage_convexity': 3.00, 'fraction': 0.20,
'use_alpha_layers': True, 'use_dynamic_leverage': True,
'fixed_tp_pct': 0.0099, 'stop_pct': 1.00, 'max_hold_bars': 120,
'use_sp_fees': True, 'use_sp_slippage': True,
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}
print("\nLoading MC-Forewarner...")
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
parquet_files = sorted(
p for p in VBT_DIR.glob("*.parquet")
if 'catalog' not in str(p) and DATE_START <= p.stem <= DATE_END
)
date_strings = [pf.stem for pf in parquet_files]
print(f"Dates: {len(parquet_files)} ({date_strings[0]} to {date_strings[-1]})")
# ACB
acb = AdaptiveCircuitBreaker()
acb.preload_w750(date_strings)
print(f"\nACB w750 p60: {acb._w750_threshold:.6f}")
# Vol p60
all_vols = []
for pf in parquet_files[:5]:
df = pd.read_parquet(pf)
if 'BTCUSDT' not in df.columns: continue
pr = df['BTCUSDT'].values
for i in range(60, len(pr)):
seg = pr[max(0,i-50):i]
if len(seg)<10: continue
v = float(np.std(np.diff(seg)/seg[:-1]))
if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 1e-4
print(f"Vol p60: {vol_p60:.6f}")
# Pre-load
print(f"\nPre-loading {len(parquet_files)} parquets...")
t_load = time.time()
pq_data = {}
for i, pf in enumerate(parquet_files):
df = pd.read_parquet(pf)
ac = [c for c in df.columns if c not in META_COLS]
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
dv = np.full(len(df), np.nan)
if bp is not None:
for j in range(50, len(bp)):
seg = bp[max(0,j-50):j]
if len(seg)<10: continue
dv[j] = float(np.std(np.diff(seg)/seg[:-1]))
pq_data[pf.stem] = (df, ac, dv)
if (i+1) % 100 == 0:
print(f" {i+1}/{len(parquet_files)}...")
print(f" Done in {time.time()-t_load:.1f}s")
# ACB w750 from klines parquet
for ds, (df, _, _) in pq_data.items():
if 'v750_lambda_max_velocity' in df.columns:
v750 = df['v750_lambda_max_velocity'].dropna()
if len(v750) > 0:
acb._w750_vel_cache[ds] = float(v750.median())
_w750 = [v for v in acb._w750_vel_cache.values() if v != 0.0]
if _w750:
acb._w750_threshold = float(np.percentile(_w750, acb.config.W750_THRESHOLD_PCT))
print(f"ACB w750 p60 (klines): {acb._w750_threshold:.6f}")
# OB
OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]
_mock_ob = MockOBProvider(
imbalance_bias=+0.09, depth_scale=1.0, assets=OB_ASSETS, # LONG: positive imbalance bias
imbalance_biases={"BTCUSDT": +0.086, "ETHUSDT": +0.092,
"BNBUSDT": -0.05, "SOLUSDT": -0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)
print(f"\n=== 2Y KLINES LONG VALIDATION: vel_div >= +{VD_THRESHOLD_LONG} ===")
print(f" Period: {DATE_START} to {DATE_END} ({len(parquet_files)} days)")
print(f" Threshold: +{VD_THRESHOLD_LONG} (mirror of SHORT -{VD_THRESHOLD_LONG})")
print(f" SHORT baseline: ROI=+172.34% PF=1.1482 DD=31.69% Sh=0.982 WR=58.88%")
t0 = time.time()
engine = NDAlphaEngine(**ENGINE_KWARGS)
engine.set_ob_engine(ob_eng)
engine.set_acb(acb)
engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
engine.set_esoteric_hazard_multiplier(0.0)
# Wire LONG thresholds into signal generator (long_threshold is separate from vel_div_threshold)
engine.signal_gen.long_threshold = VD_THRESHOLD_LONG
engine.signal_gen.long_extreme = VD_EXTREME_LONG
all_daily = []
for date_str in date_strings:
if date_str not in pq_data:
continue
df, asset_cols, dvol_arr = pq_data[date_str]
vol_ok = np.where(np.isfinite(dvol_arr), dvol_arr > vol_p60, False)
result = engine.process_day(date_str, df, asset_cols, vol_regime_ok=vol_ok, direction=+1)
all_daily.append({
'date': date_str,
'pnl': result.get('pnl', 0.0),
'capital': result.get('capital', ENGINE_KWARGS['initial_capital']),
'trades': result.get('trades', 0),
'boost': result.get('boost', 1.0),
'beta': result.get('beta', 0.0),
'mc_status': result.get('mc_status', 'OK'),
})
if len(all_daily) % 50 == 0:
roi = (all_daily[-1]['capital'] - 25000) / 25000 * 100
ntrades = sum(r['trades'] for r in all_daily[-50:])
print(f" [{date_str}] Day {len(all_daily)}/{len(date_strings)} | ROI={roi:+.1f}% | Last-50d T={ntrades}")
t_elapsed = time.time() - t0
all_trades = [
{'pnl': t.pnl_absolute, 'pnl_pct': t.pnl_pct*100, 'asset': t.asset,
'bars_held': t.bars_held, 'exit_reason': t.exit_reason, 'leverage': t.leverage}
for t in engine.trade_history
]
capitals = [r['capital'] for r in all_daily]
pnls = [r['pnl'] for r in all_daily]
final_cap = capitals[-1] if capitals else 25000.0
roi = (final_cap - 25000) / 25000 * 100
peak = 25000.0; max_dd = 0.0
for c in capitals:
if c > peak: peak = c
dd = (peak - c) / peak * 100
if dd > max_dd: max_dd = dd
pnl_arr = np.array(pnls)
sharpe = float(pnl_arr.mean() / pnl_arr.std() * np.sqrt(252)) if pnl_arr.std() > 0 else 0.0
wins = [t for t in all_trades if t['pnl'] > 0]
losses = [t for t in all_trades if t['pnl'] <= 0]
gw = sum(t['pnl'] for t in wins)
gl = abs(sum(t['pnl'] for t in losses))
pf = gw / gl if gl > 0 else float('inf')
wr = len(wins) / len(all_trades) * 100 if all_trades else 0.0
n_trades = len(all_trades)
h1_pnl = sum(r['pnl'] for r in all_daily if r['date'] < '2025-01-01')
h2_pnl = sum(r['pnl'] for r in all_daily if r['date'] >= '2025-01-01')
h2h1 = h2_pnl / h1_pnl if h1_pnl != 0 else float('nan')
tp_rate = engine.tp_exits / n_trades * 100 if n_trades else 0.0
avg_lev = float(np.mean([t['leverage'] for t in all_trades])) if all_trades else 0.0
monthly_buckets = defaultdict(list)
for r in all_daily: monthly_buckets[r['date'][:7]].append(r)
print(f"\n{'='*65}")
print(f" 2Y KLINES LONG VALIDATION ({DATE_START} to {DATE_END})")
print(f"{'='*65}")
print(f" ROI: {roi:+.2f}%")
print(f" PF: {pf:.4f}")
print(f" Max DD: {max_dd:.2f}%")
print(f" Sharpe: {sharpe:.3f}")
print(f" Win Rate: {wr:.1f}% (W={len(wins)} L={len(losses)})")
print(f" Trades: {n_trades:,} ({n_trades/len(all_daily):.1f}/day avg)")
print(f" TP rate: {tp_rate:.1f}%")
print(f" Avg lev: {avg_lev:.2f}x")
print(f" H1 (2024): {h1_pnl/25000*100:+.2f}%")
print(f" H2 (2025+): {h2_pnl/25000*100:+.2f}%")
print(f" H2/H1: {h2h1:.3f}x")
print(f" Runtime: {t_elapsed/60:.1f} min")
print(f"{'='*65}")
print(f" SHORT baseline: ROI=+172.34% PF=1.1482 DD=31.69% Sh=0.982 WR=58.88% T=3042")
print(f" LONG result: ROI={roi:+.2f}% PF={pf:.4f} DD={max_dd:.2f}% Sh={sharpe:.3f} WR={wr:.1f}% T={n_trades}")
if pf > 1.05:
verdict = "CONFIRMED — LONG vel_div signal has edge. Bidirectional system viable."
elif pf > 1.00:
verdict = "MARGINAL — weak LONG edge. Further calibration needed before bidirectional."
else:
verdict = "REJECTED — LONG vel_div has no edge at +0.50 threshold. Mirror hypothesis fails."
print(f"\n Mirror hypothesis: {verdict}")
print(f"\n Quarterly breakdown:")
q = len(all_daily) // 4
for qi, (qa, qb) in enumerate([(0,q),(q,2*q),(2*q,3*q),(3*q,len(all_daily))]):
sl = all_daily[qa:qb]
if not sl: continue
ic = all_daily[qa-1]['capital'] if qa > 0 else 25000.0
r_s = (sl[-1]['capital'] - ic) / ic * 100
t_s = sum(s['trades'] for s in sl)
print(f" Q{qi+1} ({sl[0]['date']} {sl[-1]['date']}): ROI={r_s:+.1f}% T={t_s}")
print(f"\n Monthly breakdown:")
for mo in sorted(monthly_buckets):
sl_m = monthly_buckets[mo]
idx0 = next(i for i,r in enumerate(all_daily) if r['date'] == sl_m[0]['date'])
ic_m = all_daily[idx0-1]['capital'] if idx0 > 0 else 25000.0
r_m = (sl_m[-1]['capital'] - ic_m) / ic_m * 100
t_m = sum(s['trades'] for s in sl_m)
print(f" {mo}: ROI={r_m:+.1f}% T={t_m}")
# Save
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
run_dir = Path(__file__).parent / 'run_logs'
run_dir.mkdir(exist_ok=True)
summary = {
'experiment': '2y_klines_long_validation',
'date_range': f'{DATE_START}_to_{DATE_END}',
'direction': 'LONG',
'vd_threshold': VD_THRESHOLD_LONG,
'vd_extreme': VD_EXTREME_LONG,
'roi_pct': roi, 'pf': pf, 'max_dd_pct': max_dd, 'sharpe': sharpe,
'win_rate_pct': wr, 'n_trades': n_trades, 'n_days': len(all_daily),
'trades_per_day': n_trades/len(all_daily) if all_daily else 0,
'h1_roi_pct': h1_pnl/25000*100, 'h2_roi_pct': h2_pnl/25000*100, 'h2h1': h2h1,
'tp_rate_pct': tp_rate, 'avg_leverage': avg_lev,
'mirror_hypothesis_verdict': verdict,
'short_baseline': {'roi': 172.34, 'pf': 1.1482, 'dd': 31.69, 'sharpe': 0.982,
'wr': 58.88, 'n_trades': 3042},
'engine_kwargs': ENGINE_KWARGS,
'runtime_s': t_elapsed, 'run_ts': ts,
}
with open(run_dir / f'klines_2y_long_{ts}.json', 'w') as f:
json.dump(summary, f, indent=2)
with open(run_dir / f'klines_2y_long_daily_{ts}.csv', 'w', newline='') as f:
w = csv.writer(f)
w.writerow(['date','pnl','capital','trades','boost','beta','mc_status'])
for r in all_daily:
w.writerow([r['date'],f"{r['pnl']:.4f}",f"{r['capital']:.4f}",
r['trades'],f"{r['boost']:.4f}",f"{r['beta']:.4f}",r['mc_status']])
print(f"\nSaved: run_logs/klines_2y_long_{ts}.json + _daily_{ts}.csv")