Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
327 lines
14 KiB
Python
Executable File
327 lines
14 KiB
Python
Executable File
"""LONG Validation — 2Y Klines, Mirror Hypothesis.
|
||
|
||
Hypothesis: vel_div = w50_vel - w150_vel is symmetric.
|
||
SHORT (champion): vel_div <= -0.50 → covariance structure breaking DOWN → SHORT
|
||
LONG (this run): vel_div >= +0.50 → covariance structure breaking UP → LONG
|
||
|
||
Same engine stack, same 795-day dataset, same sizing, same ACB/OB/MC.
|
||
Only change: regime_direction = +1 (LONG), threshold sign flipped.
|
||
|
||
If PF_long is comparable to PF_short (1.148), the mirror hypothesis holds and
|
||
a bidirectional system becomes the path to smooth cashflow.
|
||
"""
|
||
import sys, time, json, csv
|
||
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
from collections import defaultdict
|
||
import numpy as np
|
||
import pandas as pd
|
||
|
||
sys.path.insert(0, str(Path(__file__).parent))
|
||
|
||
print("Compiling numba kernels...")
|
||
t0c = time.time()
|
||
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
|
||
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
|
||
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
|
||
from nautilus_dolphin.nautilus.ob_features import (
|
||
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
|
||
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
|
||
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
|
||
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
|
||
)
|
||
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
|
||
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
||
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
|
||
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
|
||
compute_sizing_nb(+0.55, -0.50, -1.25, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
|
||
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
|
||
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
|
||
check_dc_nb(_p, 3, 1, 0.75)
|
||
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
|
||
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
|
||
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
|
||
compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
|
||
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
|
||
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
|
||
compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
|
||
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
|
||
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
|
||
print(f" JIT: {time.time()-t0c:.1f}s")
|
||
|
||
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
|
||
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
||
from mc.mc_ml import DolphinForewarner
|
||
|
||
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
|
||
DATE_START = '2024-01-01'
|
||
DATE_END = '2026-03-05'
|
||
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
||
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
|
||
'instability_50', 'instability_150'}
|
||
|
||
# LONG mirror thresholds — set directly on signal_gen after construction (long_threshold is separate from SHORT)
|
||
# Mirror of SHORT: short=-0.02/-0.05 → long=+0.02/+0.05
|
||
VD_THRESHOLD_LONG = +0.02
|
||
VD_EXTREME_LONG = +0.05
|
||
|
||
ENGINE_KWARGS = dict(
|
||
initial_capital=25000.0,
|
||
vel_div_threshold=-0.02, # SHORT threshold (unused when direction=+1, but must not corrupt signal_gen)
|
||
vel_div_extreme=-0.05,
|
||
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
|
||
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
|
||
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
||
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
||
use_asset_selection=True, min_irp_alignment=0.45,
|
||
use_sp_fees=True, use_sp_slippage=True,
|
||
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
|
||
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
||
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
||
)
|
||
|
||
MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
|
||
MC_BASE_CFG = {
|
||
'trial_id': 0, 'vel_div_threshold': -0.02, 'vel_div_extreme': -0.05,
|
||
'use_direction_confirm': True, 'dc_lookback_bars': 7,
|
||
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
|
||
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
|
||
'vd_trend_lookback': 10, 'min_leverage': 0.50, 'max_leverage': 5.00,
|
||
'leverage_convexity': 3.00, 'fraction': 0.20,
|
||
'use_alpha_layers': True, 'use_dynamic_leverage': True,
|
||
'fixed_tp_pct': 0.0099, 'stop_pct': 1.00, 'max_hold_bars': 120,
|
||
'use_sp_fees': True, 'use_sp_slippage': True,
|
||
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
|
||
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
|
||
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
|
||
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
|
||
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
|
||
}
|
||
|
||
print("\nLoading MC-Forewarner...")
|
||
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
|
||
|
||
parquet_files = sorted(
|
||
p for p in VBT_DIR.glob("*.parquet")
|
||
if 'catalog' not in str(p) and DATE_START <= p.stem <= DATE_END
|
||
)
|
||
date_strings = [pf.stem for pf in parquet_files]
|
||
print(f"Dates: {len(parquet_files)} ({date_strings[0]} to {date_strings[-1]})")
|
||
|
||
# ACB
|
||
acb = AdaptiveCircuitBreaker()
|
||
acb.preload_w750(date_strings)
|
||
print(f"\nACB w750 p60: {acb._w750_threshold:.6f}")
|
||
|
||
# Vol p60
|
||
all_vols = []
|
||
for pf in parquet_files[:5]:
|
||
df = pd.read_parquet(pf)
|
||
if 'BTCUSDT' not in df.columns: continue
|
||
pr = df['BTCUSDT'].values
|
||
for i in range(60, len(pr)):
|
||
seg = pr[max(0,i-50):i]
|
||
if len(seg)<10: continue
|
||
v = float(np.std(np.diff(seg)/seg[:-1]))
|
||
if v > 0: all_vols.append(v)
|
||
vol_p60 = float(np.percentile(all_vols, 60)) if all_vols else 1e-4
|
||
print(f"Vol p60: {vol_p60:.6f}")
|
||
|
||
# Pre-load
|
||
print(f"\nPre-loading {len(parquet_files)} parquets...")
|
||
t_load = time.time()
|
||
pq_data = {}
|
||
for i, pf in enumerate(parquet_files):
|
||
df = pd.read_parquet(pf)
|
||
ac = [c for c in df.columns if c not in META_COLS]
|
||
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
|
||
dv = np.full(len(df), np.nan)
|
||
if bp is not None:
|
||
for j in range(50, len(bp)):
|
||
seg = bp[max(0,j-50):j]
|
||
if len(seg)<10: continue
|
||
dv[j] = float(np.std(np.diff(seg)/seg[:-1]))
|
||
pq_data[pf.stem] = (df, ac, dv)
|
||
if (i+1) % 100 == 0:
|
||
print(f" {i+1}/{len(parquet_files)}...")
|
||
print(f" Done in {time.time()-t_load:.1f}s")
|
||
|
||
# ACB w750 from klines parquet
|
||
for ds, (df, _, _) in pq_data.items():
|
||
if 'v750_lambda_max_velocity' in df.columns:
|
||
v750 = df['v750_lambda_max_velocity'].dropna()
|
||
if len(v750) > 0:
|
||
acb._w750_vel_cache[ds] = float(v750.median())
|
||
_w750 = [v for v in acb._w750_vel_cache.values() if v != 0.0]
|
||
if _w750:
|
||
acb._w750_threshold = float(np.percentile(_w750, acb.config.W750_THRESHOLD_PCT))
|
||
print(f"ACB w750 p60 (klines): {acb._w750_threshold:.6f}")
|
||
|
||
# OB
|
||
OB_ASSETS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]
|
||
_mock_ob = MockOBProvider(
|
||
imbalance_bias=+0.09, depth_scale=1.0, assets=OB_ASSETS, # LONG: positive imbalance bias
|
||
imbalance_biases={"BTCUSDT": +0.086, "ETHUSDT": +0.092,
|
||
"BNBUSDT": -0.05, "SOLUSDT": -0.05},
|
||
)
|
||
ob_eng = OBFeatureEngine(_mock_ob)
|
||
ob_eng.preload_date("mock", OB_ASSETS)
|
||
|
||
print(f"\n=== 2Y KLINES LONG VALIDATION: vel_div >= +{VD_THRESHOLD_LONG} ===")
|
||
print(f" Period: {DATE_START} to {DATE_END} ({len(parquet_files)} days)")
|
||
print(f" Threshold: +{VD_THRESHOLD_LONG} (mirror of SHORT -{VD_THRESHOLD_LONG})")
|
||
print(f" SHORT baseline: ROI=+172.34% PF=1.1482 DD=31.69% Sh=0.982 WR=58.88%")
|
||
t0 = time.time()
|
||
|
||
engine = NDAlphaEngine(**ENGINE_KWARGS)
|
||
engine.set_ob_engine(ob_eng)
|
||
engine.set_acb(acb)
|
||
engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
|
||
engine.set_esoteric_hazard_multiplier(0.0)
|
||
|
||
# Wire LONG thresholds into signal generator (long_threshold is separate from vel_div_threshold)
|
||
engine.signal_gen.long_threshold = VD_THRESHOLD_LONG
|
||
engine.signal_gen.long_extreme = VD_EXTREME_LONG
|
||
|
||
all_daily = []
|
||
for date_str in date_strings:
|
||
if date_str not in pq_data:
|
||
continue
|
||
df, asset_cols, dvol_arr = pq_data[date_str]
|
||
vol_ok = np.where(np.isfinite(dvol_arr), dvol_arr > vol_p60, False)
|
||
result = engine.process_day(date_str, df, asset_cols, vol_regime_ok=vol_ok, direction=+1)
|
||
all_daily.append({
|
||
'date': date_str,
|
||
'pnl': result.get('pnl', 0.0),
|
||
'capital': result.get('capital', ENGINE_KWARGS['initial_capital']),
|
||
'trades': result.get('trades', 0),
|
||
'boost': result.get('boost', 1.0),
|
||
'beta': result.get('beta', 0.0),
|
||
'mc_status': result.get('mc_status', 'OK'),
|
||
})
|
||
if len(all_daily) % 50 == 0:
|
||
roi = (all_daily[-1]['capital'] - 25000) / 25000 * 100
|
||
ntrades = sum(r['trades'] for r in all_daily[-50:])
|
||
print(f" [{date_str}] Day {len(all_daily)}/{len(date_strings)} | ROI={roi:+.1f}% | Last-50d T={ntrades}")
|
||
|
||
t_elapsed = time.time() - t0
|
||
|
||
all_trades = [
|
||
{'pnl': t.pnl_absolute, 'pnl_pct': t.pnl_pct*100, 'asset': t.asset,
|
||
'bars_held': t.bars_held, 'exit_reason': t.exit_reason, 'leverage': t.leverage}
|
||
for t in engine.trade_history
|
||
]
|
||
|
||
capitals = [r['capital'] for r in all_daily]
|
||
pnls = [r['pnl'] for r in all_daily]
|
||
final_cap = capitals[-1] if capitals else 25000.0
|
||
roi = (final_cap - 25000) / 25000 * 100
|
||
|
||
peak = 25000.0; max_dd = 0.0
|
||
for c in capitals:
|
||
if c > peak: peak = c
|
||
dd = (peak - c) / peak * 100
|
||
if dd > max_dd: max_dd = dd
|
||
|
||
pnl_arr = np.array(pnls)
|
||
sharpe = float(pnl_arr.mean() / pnl_arr.std() * np.sqrt(252)) if pnl_arr.std() > 0 else 0.0
|
||
|
||
wins = [t for t in all_trades if t['pnl'] > 0]
|
||
losses = [t for t in all_trades if t['pnl'] <= 0]
|
||
gw = sum(t['pnl'] for t in wins)
|
||
gl = abs(sum(t['pnl'] for t in losses))
|
||
pf = gw / gl if gl > 0 else float('inf')
|
||
wr = len(wins) / len(all_trades) * 100 if all_trades else 0.0
|
||
n_trades = len(all_trades)
|
||
|
||
h1_pnl = sum(r['pnl'] for r in all_daily if r['date'] < '2025-01-01')
|
||
h2_pnl = sum(r['pnl'] for r in all_daily if r['date'] >= '2025-01-01')
|
||
h2h1 = h2_pnl / h1_pnl if h1_pnl != 0 else float('nan')
|
||
|
||
tp_rate = engine.tp_exits / n_trades * 100 if n_trades else 0.0
|
||
avg_lev = float(np.mean([t['leverage'] for t in all_trades])) if all_trades else 0.0
|
||
|
||
monthly_buckets = defaultdict(list)
|
||
for r in all_daily: monthly_buckets[r['date'][:7]].append(r)
|
||
|
||
print(f"\n{'='*65}")
|
||
print(f" 2Y KLINES LONG VALIDATION ({DATE_START} to {DATE_END})")
|
||
print(f"{'='*65}")
|
||
print(f" ROI: {roi:+.2f}%")
|
||
print(f" PF: {pf:.4f}")
|
||
print(f" Max DD: {max_dd:.2f}%")
|
||
print(f" Sharpe: {sharpe:.3f}")
|
||
print(f" Win Rate: {wr:.1f}% (W={len(wins)} L={len(losses)})")
|
||
print(f" Trades: {n_trades:,} ({n_trades/len(all_daily):.1f}/day avg)")
|
||
print(f" TP rate: {tp_rate:.1f}%")
|
||
print(f" Avg lev: {avg_lev:.2f}x")
|
||
print(f" H1 (2024): {h1_pnl/25000*100:+.2f}%")
|
||
print(f" H2 (2025+): {h2_pnl/25000*100:+.2f}%")
|
||
print(f" H2/H1: {h2h1:.3f}x")
|
||
print(f" Runtime: {t_elapsed/60:.1f} min")
|
||
print(f"{'='*65}")
|
||
print(f" SHORT baseline: ROI=+172.34% PF=1.1482 DD=31.69% Sh=0.982 WR=58.88% T=3042")
|
||
print(f" LONG result: ROI={roi:+.2f}% PF={pf:.4f} DD={max_dd:.2f}% Sh={sharpe:.3f} WR={wr:.1f}% T={n_trades}")
|
||
|
||
if pf > 1.05:
|
||
verdict = "CONFIRMED — LONG vel_div signal has edge. Bidirectional system viable."
|
||
elif pf > 1.00:
|
||
verdict = "MARGINAL — weak LONG edge. Further calibration needed before bidirectional."
|
||
else:
|
||
verdict = "REJECTED — LONG vel_div has no edge at +0.50 threshold. Mirror hypothesis fails."
|
||
print(f"\n Mirror hypothesis: {verdict}")
|
||
|
||
print(f"\n Quarterly breakdown:")
|
||
q = len(all_daily) // 4
|
||
for qi, (qa, qb) in enumerate([(0,q),(q,2*q),(2*q,3*q),(3*q,len(all_daily))]):
|
||
sl = all_daily[qa:qb]
|
||
if not sl: continue
|
||
ic = all_daily[qa-1]['capital'] if qa > 0 else 25000.0
|
||
r_s = (sl[-1]['capital'] - ic) / ic * 100
|
||
t_s = sum(s['trades'] for s in sl)
|
||
print(f" Q{qi+1} ({sl[0]['date']} – {sl[-1]['date']}): ROI={r_s:+.1f}% T={t_s}")
|
||
|
||
print(f"\n Monthly breakdown:")
|
||
for mo in sorted(monthly_buckets):
|
||
sl_m = monthly_buckets[mo]
|
||
idx0 = next(i for i,r in enumerate(all_daily) if r['date'] == sl_m[0]['date'])
|
||
ic_m = all_daily[idx0-1]['capital'] if idx0 > 0 else 25000.0
|
||
r_m = (sl_m[-1]['capital'] - ic_m) / ic_m * 100
|
||
t_m = sum(s['trades'] for s in sl_m)
|
||
print(f" {mo}: ROI={r_m:+.1f}% T={t_m}")
|
||
|
||
# Save
|
||
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||
run_dir = Path(__file__).parent / 'run_logs'
|
||
run_dir.mkdir(exist_ok=True)
|
||
|
||
summary = {
|
||
'experiment': '2y_klines_long_validation',
|
||
'date_range': f'{DATE_START}_to_{DATE_END}',
|
||
'direction': 'LONG',
|
||
'vd_threshold': VD_THRESHOLD_LONG,
|
||
'vd_extreme': VD_EXTREME_LONG,
|
||
'roi_pct': roi, 'pf': pf, 'max_dd_pct': max_dd, 'sharpe': sharpe,
|
||
'win_rate_pct': wr, 'n_trades': n_trades, 'n_days': len(all_daily),
|
||
'trades_per_day': n_trades/len(all_daily) if all_daily else 0,
|
||
'h1_roi_pct': h1_pnl/25000*100, 'h2_roi_pct': h2_pnl/25000*100, 'h2h1': h2h1,
|
||
'tp_rate_pct': tp_rate, 'avg_leverage': avg_lev,
|
||
'mirror_hypothesis_verdict': verdict,
|
||
'short_baseline': {'roi': 172.34, 'pf': 1.1482, 'dd': 31.69, 'sharpe': 0.982,
|
||
'wr': 58.88, 'n_trades': 3042},
|
||
'engine_kwargs': ENGINE_KWARGS,
|
||
'runtime_s': t_elapsed, 'run_ts': ts,
|
||
}
|
||
with open(run_dir / f'klines_2y_long_{ts}.json', 'w') as f:
|
||
json.dump(summary, f, indent=2)
|
||
|
||
with open(run_dir / f'klines_2y_long_daily_{ts}.csv', 'w', newline='') as f:
|
||
w = csv.writer(f)
|
||
w.writerow(['date','pnl','capital','trades','boost','beta','mc_status'])
|
||
for r in all_daily:
|
||
w.writerow([r['date'],f"{r['pnl']:.4f}",f"{r['capital']:.4f}",
|
||
r['trades'],f"{r['boost']:.4f}",f"{r['beta']:.4f}",r['mc_status']])
|
||
|
||
print(f"\nSaved: run_logs/klines_2y_long_{ts}.json + _daily_{ts}.csv")
|