507 lines
23 KiB
Python
507 lines
23 KiB
Python
|
|
"""Full-Stack Alpha Engine — 5-Year Klines Backtest
|
|||
|
|
====================================================
|
|||
|
|
Dataset: vbt_cache_klines/ — 1,710 files, 2021-06-15 to 2026-03-05
|
|||
|
|
|
|||
|
|
Stack (identical to test_pf_dynamic_beta_validate.py gold standard):
|
|||
|
|
- Signal: vel_div <= -0.02 (SHORT only)
|
|||
|
|
- Vol gate: > vol_p60 (computed from klines price returns)
|
|||
|
|
- IRP/ARS: 48-asset selector, min_irp_alignment=0.45
|
|||
|
|
- OB: MockOBProvider, 4D, real-calibrated biases
|
|||
|
|
- ACBv6: Dynamic beta, w750 from parquet column (klines fix), INVERSE_MODE
|
|||
|
|
- MC-Forewarner: SVM envelope + XGB, per-date gate
|
|||
|
|
- EsoF: hazard=0.0 (neutral)
|
|||
|
|
- ExF: neutral (no NPZ backfill for klines dates — see caveat)
|
|||
|
|
- TP: 95bps, MAX_HOLD=120 bars, abs_max_leverage=6.0
|
|||
|
|
|
|||
|
|
KNOWN DIFFERENCES vs NG3 5s gold standard:
|
|||
|
|
1. vel_div magnitude: klines 1m produces ~7x higher vol than 5s scans.
|
|||
|
|
The -0.02 threshold is the same — this means the signal fires differently
|
|||
|
|
on klines data. The 2y run showed ~3042 trades (vs 2143 on 55-day 5s).
|
|||
|
|
2. ExF ACB gate: NPZ indicator files do not exist for klines dates.
|
|||
|
|
ACB runs without ExF signal boost. Dynamic beta from w750 still active.
|
|||
|
|
3. MC-Forewarner: models trained on 55-day NG3 5s feature distribution.
|
|||
|
|
Generalization to klines is unvalidated — treat MC gate as informational.
|
|||
|
|
4. Asset universe: klines assets may differ slightly by date (NKNUSDT in
|
|||
|
|
early data, STXUSDT later). Script must handle variable asset columns.
|
|||
|
|
5. ACB w750: EIGENVALUES_PATH NPZ files do not exist for klines dates.
|
|||
|
|
w750 velocity is loaded directly from parquet column v750_lambda_max_velocity
|
|||
|
|
(median per date), bypassing the ACB's NPZ reader entirely.
|
|||
|
|
6. ACBv6 DEGRADED — ExF stress signals (funding_btc, dvol_btc, fng, taker) are
|
|||
|
|
NOT available for klines dates (no NPZ Indicators files exist outside NG3 JSON
|
|||
|
|
scan dates ~Dec31-Feb25). ACB._load_external_factors() returns neutral defaults
|
|||
|
|
(funding=0.0, dvol=50, fng=50, taker=1.0) → signals=0 → boost=1.0 on ALL dates.
|
|||
|
|
CONSEQUENCE: The inverse-boost component of ACBv6 is inactive for this run.
|
|||
|
|
Only the w750-based dynamic beta (high=0.8 / low=0.2 based on p60 threshold)
|
|||
|
|
remains active. ACBv6 degrades to a pure beta-switcher, not a stress amplifier.
|
|||
|
|
This means leverage headroom is ~5–10% lower than a fully-armed ACBv6 would
|
|||
|
|
produce on genuine stress days (where boost could reach 1.3–1.6x).
|
|||
|
|
IMPLICATION: This run is CONSERVATIVE vs a system with full ExF backfill.
|
|||
|
|
To get a true full-stack result, backfill ExF indicators for 2021-2026 first
|
|||
|
|
(funding rate, Deribit DVOL, FNG, taker ratio all have historical APIs available).
|
|||
|
|
|
|||
|
|
This run is RESEARCH ONLY. It does not replace or modify the gold standard.
|
|||
|
|
Gold standard (test_pf_dynamic_beta_validate.py) uses vbt_cache/ (NG3 5s, 55d).
|
|||
|
|
"""
|
|||
|
|
import sys, time, math, json, csv, gc
|
|||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
|||
|
|
from pathlib import Path
|
|||
|
|
from datetime import datetime
|
|||
|
|
from collections import defaultdict
|
|||
|
|
import numpy as np
|
|||
|
|
import pandas as pd
|
|||
|
|
|
|||
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|||
|
|
|
|||
|
|
print("Compiling numba kernels...")
|
|||
|
|
t0c = time.time()
|
|||
|
|
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
|
|||
|
|
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
|
|||
|
|
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
|
|||
|
|
from nautilus_dolphin.nautilus.ob_features import (
|
|||
|
|
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
|
|||
|
|
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
|
|||
|
|
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
|
|||
|
|
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
|
|||
|
|
)
|
|||
|
|
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
|
|||
|
|
|
|||
|
|
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
|||
|
|
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
|
|||
|
|
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
|
|||
|
|
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
|
|||
|
|
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
|
|||
|
|
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
|
|||
|
|
check_dc_nb(_p, 3, 1, 0.75)
|
|||
|
|
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
|
|||
|
|
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
|
|||
|
|
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
|
|||
|
|
compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
|
|||
|
|
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
|
|||
|
|
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
|
|||
|
|
compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
|
|||
|
|
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
|
|||
|
|
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
|
|||
|
|
print(f" JIT: {time.time() - t0c:.1f}s")
|
|||
|
|
|
|||
|
|
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
|
|||
|
|
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
|||
|
|
from mc.mc_ml import DolphinForewarner
|
|||
|
|
|
|||
|
|
# KLINES DATA DIRECTORY
|
|||
|
|
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
|
|||
|
|
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
|
|||
|
|
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
|||
|
|
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
|
|||
|
|
'instability_50', 'instability_150'}
|
|||
|
|
|
|||
|
|
ENGINE_KWARGS = dict(
|
|||
|
|
initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
|
|||
|
|
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
|
|||
|
|
fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120,
|
|||
|
|
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
|||
|
|
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
|||
|
|
use_asset_selection=True, min_irp_alignment=0.45,
|
|||
|
|
use_sp_fees=True, use_sp_slippage=True,
|
|||
|
|
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
|
|||
|
|
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
|||
|
|
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# MC-Forewarner config (champion — identical to gold standard)
|
|||
|
|
MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
|
|||
|
|
MC_BASE_CFG = {
|
|||
|
|
'trial_id': 0,
|
|||
|
|
'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
|
|||
|
|
'use_direction_confirm': True, 'dc_lookback_bars': 7,
|
|||
|
|
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
|
|||
|
|
'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
|
|||
|
|
'vd_trend_lookback': 10, 'min_leverage': 0.50,
|
|||
|
|
'max_leverage': 5.00,
|
|||
|
|
'leverage_convexity': 3.00, 'fraction': 0.20,
|
|||
|
|
'use_alpha_layers': True, 'use_dynamic_leverage': True,
|
|||
|
|
'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120,
|
|||
|
|
'use_sp_fees': True, 'use_sp_slippage': True,
|
|||
|
|
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
|
|||
|
|
'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
|
|||
|
|
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
|
|||
|
|
'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
|
|||
|
|
'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# --- Load MC-Forewarner ---
|
|||
|
|
print("\nLoading MC-Forewarner trained models...")
|
|||
|
|
try:
|
|||
|
|
forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
|
|||
|
|
use_mc = True
|
|||
|
|
print(" MC-Forewarner ready (One-Class SVM envelope + XGBoost)")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" WARNING: MC-Forewarner failed to load: {e}")
|
|||
|
|
print(" MC-Forewarner DISABLED for this run.")
|
|||
|
|
forewarner = None
|
|||
|
|
use_mc = False
|
|||
|
|
|
|||
|
|
# --- Enumerate parquet files ---
|
|||
|
|
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
|||
|
|
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
|||
|
|
total_files = len(parquet_files)
|
|||
|
|
date_strings = [pf.stem for pf in parquet_files]
|
|||
|
|
print(f"\nLoaded {total_files} parquet files from vbt_cache_klines/")
|
|||
|
|
|
|||
|
|
# --- Collect all asset columns (sample first 100 files) ---
|
|||
|
|
print(f"\nScanning {min(100, total_files)} parquet files for asset universe...")
|
|||
|
|
all_assets = set()
|
|||
|
|
for pf in parquet_files[:100]:
|
|||
|
|
df_cols = pd.read_parquet(pf, columns=[])
|
|||
|
|
all_assets.update([c for c in df_cols.columns if c not in META_COLS])
|
|||
|
|
print(f" Total assets in universe: {len(all_assets)}")
|
|||
|
|
|
|||
|
|
# --- ACB v6 with w750 from parquet column (klines fix) ---
|
|||
|
|
# ACB._load_w750_velocity() reads from NG3 NPZ scan files (EIGENVALUES_PATH).
|
|||
|
|
# Klines dates have no NPZ files (~56/1710 would have data).
|
|||
|
|
# Fix: pre-populate _w750_vel_cache directly from parquet v750_lambda_max_velocity
|
|||
|
|
# (median per date), then preload_w750() finds cached values and just computes threshold.
|
|||
|
|
print("\nInitializing ACB v6 + pre-populating w750 from parquet columns...")
|
|||
|
|
acb = AdaptiveCircuitBreaker()
|
|||
|
|
print(f" Loading w750 medians from {total_files} parquets (this takes ~1-2 min)...")
|
|||
|
|
t_w750 = time.time()
|
|||
|
|
w750_loaded = 0
|
|||
|
|
for pf in parquet_files:
|
|||
|
|
ds = pf.stem
|
|||
|
|
try:
|
|||
|
|
df_w = pd.read_parquet(pf, columns=['v750_lambda_max_velocity'])
|
|||
|
|
vals = df_w['v750_lambda_max_velocity'].dropna().values
|
|||
|
|
if len(vals) > 0:
|
|||
|
|
acb._w750_vel_cache[ds] = float(np.median(vals))
|
|||
|
|
w750_loaded += 1
|
|||
|
|
else:
|
|||
|
|
acb._w750_vel_cache[ds] = 0.0
|
|||
|
|
except Exception:
|
|||
|
|
acb._w750_vel_cache[ds] = 0.0
|
|||
|
|
print(f" w750 loaded from parquets: {w750_loaded}/{total_files} dates ({time.time()-t_w750:.1f}s)")
|
|||
|
|
|
|||
|
|
# Now preload_w750 just computes the percentile threshold (cache already filled)
|
|||
|
|
acb.preload_w750(date_strings)
|
|||
|
|
print(f" w750 threshold (p60): {acb._w750_threshold:.6f}")
|
|||
|
|
print(f" Dates with nonzero w750: {sum(1 for v in acb._w750_vel_cache.values() if v != 0.0)}/{total_files}")
|
|||
|
|
print(f" *** ACBv6 DEGRADED: ExF NPZ files absent for klines dates.")
|
|||
|
|
print(f" *** boost=1.0 on all dates (funding/dvol/fng/taker signals unavailable).")
|
|||
|
|
print(f" *** Only w750 dynamic beta (high=0.8/low=0.2) remains active.")
|
|||
|
|
print(f" *** Results are CONSERVATIVE vs fully-armed ACBv6 with ExF backfill.")
|
|||
|
|
|
|||
|
|
# --- Compute vol_p60 from first 5 files ---
|
|||
|
|
print("\nComputing vol_p60 from first 5 files...")
|
|||
|
|
all_vols = []
|
|||
|
|
for pf in parquet_files[:5]:
|
|||
|
|
df = pd.read_parquet(pf)
|
|||
|
|
if 'BTCUSDT' not in df.columns:
|
|||
|
|
continue
|
|||
|
|
pr = df['BTCUSDT'].values
|
|||
|
|
for i in range(50, len(pr)):
|
|||
|
|
seg = pr[max(0, i-50):i]
|
|||
|
|
if len(seg) < 10:
|
|||
|
|
continue
|
|||
|
|
v = float(np.std(np.diff(seg) / seg[:-1]))
|
|||
|
|
if v > 0:
|
|||
|
|
all_vols.append(v)
|
|||
|
|
vol_p60 = float(np.percentile(all_vols, 60))
|
|||
|
|
print(f" vol_p60 (60th percentile): {vol_p60:.6f}")
|
|||
|
|
|
|||
|
|
# --- Build OB engine (4 dimensions, real-calibrated MockOBProvider) ---
|
|||
|
|
# Calibration from real Binance OB data (2025-01-15 observation):
|
|||
|
|
# BTC: -0.086 (sell pressure, confirms SHORT)
|
|||
|
|
# ETH: -0.092 (sell pressure, confirms SHORT)
|
|||
|
|
# BNB: +0.05 (mild buy pressure, mild contradict)
|
|||
|
|
# SOL: +0.05 (mild buy pressure, mild contradict)
|
|||
|
|
OB_ASSETS = sorted(list(all_assets))
|
|||
|
|
_mock_ob = MockOBProvider(
|
|||
|
|
imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
|
|||
|
|
imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
|
|||
|
|
"BNBUSDT": +0.05, "SOLUSDT": +0.05},
|
|||
|
|
)
|
|||
|
|
ob_eng = OBFeatureEngine(_mock_ob)
|
|||
|
|
ob_eng.preload_date("mock", OB_ASSETS)
|
|||
|
|
|
|||
|
|
# --- Assemble full stack ---
|
|||
|
|
print(f"\n=== Running full stack: ACBv6 + OB 4D + MC-Forewarner + EsoF(neutral) ===")
|
|||
|
|
print(f"=== Dataset: 5-Year Klines ({total_files} dates, 2021-06-15 to 2026-03-05) ===")
|
|||
|
|
print(f"=== abs_max_leverage=6.0 seed=42 TP=95bps SHORT-only ===")
|
|||
|
|
t0 = time.time()
|
|||
|
|
|
|||
|
|
engine = NDAlphaEngine(**ENGINE_KWARGS)
|
|||
|
|
engine.set_ob_engine(ob_eng) # OB Sub-1/2/3/4
|
|||
|
|
engine.set_acb(acb) # ACBv6 dynamic beta + 3-scale meta-boost
|
|||
|
|
if use_mc:
|
|||
|
|
engine.set_mc_forewarner(forewarner, MC_BASE_CFG) # per-date envelope gate
|
|||
|
|
engine.set_esoteric_hazard_multiplier(0.0) # EsoF neutral
|
|||
|
|
engine._bar_log_enabled = False # MUST be False for 1710-day run (OOM risk)
|
|||
|
|
|
|||
|
|
# --- Main loop with lazy loading ---
|
|||
|
|
dstats = []
|
|||
|
|
year_trades = defaultdict(list) # year -> list of NDTrade
|
|||
|
|
|
|||
|
|
for i, pf in enumerate(parquet_files):
|
|||
|
|
ds = pf.stem
|
|||
|
|
year = ds[:4]
|
|||
|
|
|
|||
|
|
df = pd.read_parquet(pf)
|
|||
|
|
acols = [c for c in df.columns if c not in META_COLS]
|
|||
|
|
|
|||
|
|
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
|
|||
|
|
dvol = np.full(len(df), np.nan)
|
|||
|
|
if bp is not None:
|
|||
|
|
for j in range(50, len(bp)):
|
|||
|
|
seg = bp[max(0, j-50):j]
|
|||
|
|
if len(seg) < 10:
|
|||
|
|
continue
|
|||
|
|
dvol[j] = float(np.std(np.diff(seg) / seg[:-1]))
|
|||
|
|
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
|
|||
|
|
|
|||
|
|
n_before = len(engine.trade_history)
|
|||
|
|
stats = engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
|
|||
|
|
dstats.append({**stats, 'cap': engine.capital, 'date': ds})
|
|||
|
|
# Capture trades attributed to this date
|
|||
|
|
year_trades[year].extend(engine.trade_history[n_before:])
|
|||
|
|
del df, bp, dvol, vol_ok # free parquet memory immediately
|
|||
|
|
|
|||
|
|
if (i + 1) % 100 == 0:
|
|||
|
|
gc.collect()
|
|||
|
|
|
|||
|
|
if (i + 1) % 50 == 0:
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
rate = (i + 1) / elapsed
|
|||
|
|
eta = (total_files - i - 1) / rate
|
|||
|
|
print(f" [{i+1}/{total_files}] {ds} cap=${engine.capital:,.0f} "
|
|||
|
|
f"elapsed={elapsed/60:.1f}m eta={eta/60:.1f}m")
|
|||
|
|
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
|
|||
|
|
# --- Overall metrics ---
|
|||
|
|
tr = engine.trade_history
|
|||
|
|
w = [t for t in tr if t.pnl_absolute > 0]
|
|||
|
|
l = [t for t in tr if t.pnl_absolute <= 0]
|
|||
|
|
gw = sum(t.pnl_absolute for t in w) if w else 0
|
|||
|
|
gl = abs(sum(t.pnl_absolute for t in l)) if l else 0
|
|||
|
|
roi = (engine.capital - 25000) / 25000 * 100
|
|||
|
|
pf_all = gw / gl if gl > 0 else 999
|
|||
|
|
wr_all = len(w) / len(tr) * 100 if tr else 0.0
|
|||
|
|
dr = [s['pnl'] / 25000 * 100 for s in dstats]
|
|||
|
|
sharpe = np.mean(dr) / np.std(dr) * np.sqrt(365) if np.std(dr) > 0 else 0
|
|||
|
|
peak_cap = 25000.0
|
|||
|
|
max_dd = 0.0
|
|||
|
|
for s in dstats:
|
|||
|
|
peak_cap = max(peak_cap, s['cap'])
|
|||
|
|
dd = (peak_cap - s['cap']) / peak_cap * 100
|
|||
|
|
max_dd = max(max_dd, dd)
|
|||
|
|
|
|||
|
|
mid_idx = len(parquet_files) // 2
|
|||
|
|
h1 = sum(s['pnl'] for s in dstats[:mid_idx])
|
|||
|
|
h2 = sum(s['pnl'] for s in dstats[mid_idx:])
|
|||
|
|
h2h1 = h2 / h1 if h1 != 0 else float('nan')
|
|||
|
|
|
|||
|
|
avg_win = float(np.mean([t.pnl_pct for t in w]) * 100) if w else 0.0
|
|||
|
|
avg_loss = float(np.mean([t.pnl_pct for t in l]) * 100) if l else 0.0
|
|||
|
|
|
|||
|
|
# --- MC-Forewarner summary ---
|
|||
|
|
if use_mc:
|
|||
|
|
red_days = [s for s in dstats if s.get('mc_status') == 'RED']
|
|||
|
|
orng_days = [s for s in dstats if s.get('mc_status') == 'ORANGE']
|
|||
|
|
print(f"\n MC-Forewarner: {len(red_days)} RED, {len(orng_days)} ORANGE "
|
|||
|
|
f"({(len(red_days)+len(orng_days))/total_files*100:.1f}% of dates intervened)")
|
|||
|
|
shown = 0
|
|||
|
|
for s in dstats:
|
|||
|
|
if s.get('mc_status') not in ('RED', 'ORANGE'):
|
|||
|
|
continue
|
|||
|
|
print(f" {s['date']}: {s['mc_status']:6s} boost={s['boost']:.2f}x P&L={s['pnl']:+.0f}")
|
|||
|
|
shown += 1
|
|||
|
|
if shown >= 10:
|
|||
|
|
remaining = len(red_days) + len(orng_days) - shown
|
|||
|
|
if remaining > 0:
|
|||
|
|
print(f" ... and {remaining} more")
|
|||
|
|
break
|
|||
|
|
else:
|
|||
|
|
red_days = []; orng_days = []
|
|||
|
|
print(f"\n MC-Forewarner: NOT ACTIVE (failed to load)")
|
|||
|
|
|
|||
|
|
print(f"\n{'='*70}")
|
|||
|
|
print(f" 5-YEAR Klines Backtest — Full Stack")
|
|||
|
|
print(f"{'='*70}")
|
|||
|
|
print(f" Dataset: {total_files} dates (2021-06-15 to 2026-03-05)")
|
|||
|
|
print(f" ROI: {roi:+.2f}%")
|
|||
|
|
print(f" PF: {pf_all:.3f}")
|
|||
|
|
print(f" DD: {max_dd:.2f}%")
|
|||
|
|
print(f" Sharpe: {sharpe:.2f}")
|
|||
|
|
print(f" WR: {wr_all:.1f}% (W={len(w)} L={len(l)})")
|
|||
|
|
print(f" AvgWin: {avg_win:+.3f}% AvgLoss: {avg_loss:+.3f}%")
|
|||
|
|
print(f" Trades: {len(tr)}")
|
|||
|
|
print(f" Capital: ${engine.capital:,.2f}")
|
|||
|
|
print(f" H1 P&L: ${h1:+,.2f}")
|
|||
|
|
print(f" H2 P&L: ${h2:+,.2f}")
|
|||
|
|
print(f" H2/H1: {h2h1:.2f}")
|
|||
|
|
print(f" Time: {elapsed:.0f}s ({elapsed/60:.1f}m)")
|
|||
|
|
print(f"{'='*70}")
|
|||
|
|
|
|||
|
|
# --- Per-Year Breakdown ---
|
|||
|
|
print(f"\n{'='*70}")
|
|||
|
|
print(f" PER-YEAR BREAKDOWN")
|
|||
|
|
print(f"{'='*70}")
|
|||
|
|
print(f" {'Year':<10} {'Dates':>6} {'Trades':>8} {'ROI':>10} {'PF':>8} {'DD':>8} {'Sharpe':>8} {'WR':>8}")
|
|||
|
|
print(f" {'-'*70}")
|
|||
|
|
|
|||
|
|
year_summary = {}
|
|||
|
|
for year in ['2021', '2022', '2023', '2024', '2025', '2026']:
|
|||
|
|
year_ds = [s for s in dstats if s['date'][:4] == year]
|
|||
|
|
if not year_ds:
|
|||
|
|
continue
|
|||
|
|
n_dates = len(year_ds)
|
|||
|
|
year_tr = year_trades[year]
|
|||
|
|
n_trades = len(year_tr)
|
|||
|
|
|
|||
|
|
first_idx = next((i for i, s in enumerate(dstats) if s['date'][:4] == year), 0)
|
|||
|
|
cap_start = dstats[first_idx-1]['cap'] if first_idx > 0 else 25000.0
|
|||
|
|
cap_end = year_ds[-1]['cap']
|
|||
|
|
roi_y = (cap_end - cap_start) / cap_start * 100
|
|||
|
|
|
|||
|
|
# Per-year max DD
|
|||
|
|
peak = cap_start
|
|||
|
|
max_dd_y = 0.0
|
|||
|
|
for s in year_ds:
|
|||
|
|
peak = max(peak, s['cap'])
|
|||
|
|
dd_y = (peak - s['cap']) / peak * 100
|
|||
|
|
max_dd_y = max(max_dd_y, dd_y)
|
|||
|
|
|
|||
|
|
# Per-year Sharpe
|
|||
|
|
dr_y = [s['pnl'] / cap_start * 100 for s in year_ds]
|
|||
|
|
sharpe_y = np.mean(dr_y) / np.std(dr_y) * np.sqrt(365) if np.std(dr_y) > 0 else 0
|
|||
|
|
|
|||
|
|
# Per-year WR and PF from actual trades
|
|||
|
|
w_y = [t for t in year_tr if t.pnl_absolute > 0]
|
|||
|
|
l_y = [t for t in year_tr if t.pnl_absolute <= 0]
|
|||
|
|
wr_y = len(w_y) / len(year_tr) * 100 if year_tr else 0.0
|
|||
|
|
gw_y = sum(t.pnl_absolute for t in w_y) if w_y else 0
|
|||
|
|
gl_y = abs(sum(t.pnl_absolute for t in l_y)) if l_y else 0
|
|||
|
|
pf_y = gw_y / gl_y if gl_y > 0 else 999
|
|||
|
|
|
|||
|
|
year_summary[year] = {
|
|||
|
|
'n_dates': n_dates, 'n_trades': n_trades, 'roi_pct': roi_y,
|
|||
|
|
'pf': pf_y, 'max_dd_pct': max_dd_y, 'sharpe': sharpe_y, 'wr': wr_y,
|
|||
|
|
'cap_start': cap_start, 'cap_end': cap_end,
|
|||
|
|
}
|
|||
|
|
print(f" {year:<10} {n_dates:>6} {n_trades:>8} {roi_y:>+9.1f}% {pf_y:>8.3f} {max_dd_y:>7.1f}% {sharpe_y:>+8.2f} {wr_y:>7.1f}%")
|
|||
|
|
|
|||
|
|
print(f" {'-'*70}")
|
|||
|
|
print(f" {'TOTAL':<10} {total_files:>6} {len(tr):>8} {roi:>+9.1f}% {pf_all:>8.3f} {max_dd:>7.1f}% {sharpe:>+8.2f} {wr_all:>7.1f}%")
|
|||
|
|
print(f"{'='*70}")
|
|||
|
|
|
|||
|
|
# H2/H1 per year
|
|||
|
|
print(f"\n H2/H1 Ratio per Year:")
|
|||
|
|
for year in sorted(year_summary.keys()):
|
|||
|
|
year_ds = [s for s in dstats if s['date'][:4] == year]
|
|||
|
|
if len(year_ds) < 10:
|
|||
|
|
continue
|
|||
|
|
mid = len(year_ds) // 2
|
|||
|
|
h1_y = sum(s['pnl'] for s in year_ds[:mid])
|
|||
|
|
h2_y = sum(s['pnl'] for s in year_ds[mid:])
|
|||
|
|
ratio_y = h2_y / h1_y if h1_y != 0 else float('nan')
|
|||
|
|
print(f" {year}: H1=${h1_y:+,.0f} H2=${h2_y:+,.0f} Ratio={ratio_y:.2f}")
|
|||
|
|
|
|||
|
|
# Quarterly breakdown
|
|||
|
|
print(f"\n Quarterly Breakdown:")
|
|||
|
|
qtrs = {}
|
|||
|
|
for s in dstats:
|
|||
|
|
d = s['date']
|
|||
|
|
m = int(d[5:7])
|
|||
|
|
q = (m - 1) // 3 + 1
|
|||
|
|
key = f"{d[:4]}-Q{q}"
|
|||
|
|
if key not in qtrs:
|
|||
|
|
qtrs[key] = {'dates': [], 'trades': 0}
|
|||
|
|
qtrs[key]['dates'].append(s)
|
|||
|
|
|
|||
|
|
for qkey in sorted(qtrs.keys()):
|
|||
|
|
q_ds = qtrs[qkey]['dates']
|
|||
|
|
if not q_ds:
|
|||
|
|
continue
|
|||
|
|
cap_s = dstats[dstats.index(q_ds[0])-1]['cap'] if dstats.index(q_ds[0]) > 0 else 25000.0
|
|||
|
|
q_roi = (q_ds[-1]['cap'] - cap_s) / cap_s * 100
|
|||
|
|
q_tr = sum(s['trades'] for s in q_ds)
|
|||
|
|
print(f" {qkey}: {len(q_ds):>3} dates {q_tr:>5} trades ROI={q_roi:>+7.1f}% cap=${q_ds[-1]['cap']:,.0f}")
|
|||
|
|
|
|||
|
|
# --- Save logs ---
|
|||
|
|
LOG_DIR.mkdir(exist_ok=True)
|
|||
|
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|||
|
|
print(f"\n Saving logs...")
|
|||
|
|
|
|||
|
|
trades_path = LOG_DIR / f"trades_5y_{ts}.csv"
|
|||
|
|
with open(trades_path, 'w', newline='') as f:
|
|||
|
|
cw = csv.writer(f)
|
|||
|
|
cw.writerow(['trade_id', 'asset', 'direction', 'entry_price', 'exit_price',
|
|||
|
|
'entry_bar', 'exit_bar', 'bars_held', 'leverage', 'notional',
|
|||
|
|
'pnl_pct', 'pnl_absolute', 'exit_reason', 'bucket_idx'])
|
|||
|
|
for t in tr:
|
|||
|
|
cw.writerow([t.trade_id, t.asset, t.direction,
|
|||
|
|
f"{t.entry_price:.6f}", f"{t.exit_price:.6f}",
|
|||
|
|
t.entry_bar, t.exit_bar, t.bars_held,
|
|||
|
|
f"{t.leverage:.4f}", f"{t.notional:.4f}",
|
|||
|
|
f"{t.pnl_pct:.8f}", f"{t.pnl_absolute:.4f}",
|
|||
|
|
t.exit_reason, t.bucket_idx])
|
|||
|
|
|
|||
|
|
daily_path = LOG_DIR / f"daily_5y_{ts}.csv"
|
|||
|
|
with open(daily_path, 'w', newline='') as f:
|
|||
|
|
cw = csv.writer(f)
|
|||
|
|
cw.writerow(['date', 'pnl', 'capital', 'dd_pct', 'boost', 'beta', 'mc_status', 'trades'])
|
|||
|
|
pk_log = 25000.0
|
|||
|
|
for s in dstats:
|
|||
|
|
pk_log = max(pk_log, s['cap'])
|
|||
|
|
cw.writerow([s['date'], f"{s['pnl']:.4f}", f"{s['cap']:.4f}",
|
|||
|
|
f"{(pk_log - s['cap'])/pk_log*100:.4f}",
|
|||
|
|
f"{s.get('boost', 1.0):.4f}", f"{s.get('beta', 0.5):.2f}",
|
|||
|
|
s.get('mc_status', 'N/A'), s['trades']])
|
|||
|
|
|
|||
|
|
summary = {
|
|||
|
|
'script': 'test_pf_5y_klines',
|
|||
|
|
'timestamp': ts,
|
|||
|
|
'date_range': f'{date_strings[0]} to {date_strings[-1]}',
|
|||
|
|
'n_dates': total_files,
|
|||
|
|
'total_trades': len(tr),
|
|||
|
|
'roi_pct': round(roi, 4),
|
|||
|
|
'pf': round(pf_all, 4),
|
|||
|
|
'max_dd_pct': round(max_dd, 4),
|
|||
|
|
'sharpe': round(sharpe, 4),
|
|||
|
|
'win_rate': round(wr_all, 4),
|
|||
|
|
'h2h1_ratio': round(h2h1, 4) if not math.isnan(h2h1) else None,
|
|||
|
|
'mc_red_days': len(red_days) if use_mc else None,
|
|||
|
|
'mc_orange_days': len(orng_days) if use_mc else None,
|
|||
|
|
'mc_active': use_mc,
|
|||
|
|
'capital_final': round(engine.capital, 4),
|
|||
|
|
'per_year': {
|
|||
|
|
y: {k: round(v, 4) if isinstance(v, float) else v
|
|||
|
|
for k, v in ys.items()}
|
|||
|
|
for y, ys in year_summary.items()
|
|||
|
|
},
|
|||
|
|
'engine_kwargs': ENGINE_KWARGS,
|
|||
|
|
'acb_config': {
|
|||
|
|
'w750_source': 'parquet_column_v750_lambda_max_velocity',
|
|||
|
|
'w750_threshold_pct': 60,
|
|||
|
|
'w750_threshold_value': round(float(acb._w750_threshold), 8),
|
|||
|
|
'w750_dates_loaded': w750_loaded,
|
|||
|
|
'w750_total_dates': total_files,
|
|||
|
|
},
|
|||
|
|
'stack_notes': [
|
|||
|
|
'OB: MockOBProvider 4D real-calibrated (BTC=-0.086, ETH=-0.092)',
|
|||
|
|
'ACBv6: w750 from parquet column (klines fix, not NPZ)',
|
|||
|
|
'ACBv6 DEGRADED: ExF boost=1.0 on ALL dates (no NPZ Indicators for klines). Only w750 beta active.',
|
|||
|
|
f'MC-Forewarner: {"ACTIVE" if use_mc else "INACTIVE (failed to load)"}',
|
|||
|
|
'EsoF: hazard=0.0 neutral',
|
|||
|
|
'ExF: neutral (no NPZ for klines dates) — results are CONSERVATIVE vs full ExF backfill',
|
|||
|
|
],
|
|||
|
|
}
|
|||
|
|
summary_path = LOG_DIR / f"summary_5y_{ts}.json"
|
|||
|
|
with open(summary_path, 'w') as f:
|
|||
|
|
json.dump(summary, f, indent=2)
|
|||
|
|
|
|||
|
|
print(f" trades → {trades_path}")
|
|||
|
|
print(f" daily → {daily_path}")
|
|||
|
|
print(f" summary → {summary_path}")
|
|||
|
|
print(f"{'='*70}")
|
|||
|
|
print(f"\n 5-YEAR Klines Backtest COMPLETE")
|
|||
|
|
print(f"{'='*70}")
|