DOLPHIN/nautilus_dolphin/test_pf_5y_klines.py

"""Full-Stack Alpha Engine — 5-Year Klines Backtest
====================================================
Dataset: vbt_cache_klines/ — 1,710 files, 2021-06-15 to 2026-03-05

Stack (identical to test_pf_dynamic_beta_validate.py gold standard):
  - Signal:         vel_div <= -0.02 (SHORT only)
  - Vol gate:       > vol_p60 (computed from klines price returns)
  - IRP/ARS:        48-asset selector, min_irp_alignment=0.45
  - OB:             MockOBProvider, 4D, real-calibrated biases
  - ACBv6:          Dynamic beta, w750 from parquet column (klines fix), INVERSE_MODE
  - MC-Forewarner:  SVM envelope + XGB, per-date gate
  - EsoF:           hazard=0.0 (neutral)
  - ExF:            neutral (no NPZ backfill for klines dates — see caveat)
  - TP:             95bps, MAX_HOLD=120 bars, abs_max_leverage=6.0

KNOWN DIFFERENCES vs NG3 5s gold standard:
  1. vel_div magnitude: klines 1m produces ~7x higher vol than 5s scans.
     The -0.02 threshold is the same — this means the signal fires differently
     on klines data. The 2y run showed ~3042 trades (vs 2143 on 55-day 5s).
  2. ExF ACB gate: NPZ indicator files do not exist for klines dates.
     ACB runs without ExF signal boost. Dynamic beta from w750 still active.
  3. MC-Forewarner: models trained on 55-day NG3 5s feature distribution.
     Generalization to klines is unvalidated — treat MC gate as informational.
  4. Asset universe: klines assets may differ slightly by date (NKNUSDT in
     early data, STXUSDT later). Script must handle variable asset columns.
  5. ACB w750: EIGENVALUES_PATH NPZ files do not exist for klines dates.
     w750 velocity is loaded directly from parquet column v750_lambda_max_velocity
     (median per date), bypassing the ACB's NPZ reader entirely.
  6. ACBv6 DEGRADED — ExF stress signals (funding_btc, dvol_btc, fng, taker) are
     NOT available for klines dates (no NPZ Indicators files exist outside NG3 JSON
     scan dates ~Dec31-Feb25). ACB._load_external_factors() returns neutral defaults
     (funding=0.0, dvol=50, fng=50, taker=1.0) → signals=0 → boost=1.0 on ALL dates.
     CONSEQUENCE: The inverse-boost component of ACBv6 is inactive for this run.
     Only the w750-based dynamic beta (high=0.8 / low=0.2 based on p60 threshold)
     remains active. ACBv6 degrades to a pure beta-switcher, not a stress amplifier.
     This means leverage headroom is ~5–10% lower than a fully-armed ACBv6 would
     produce on genuine stress days (where boost could reach 1.3–1.6x).
     IMPLICATION: This run is CONSERVATIVE vs a system with full ExF backfill.
     To get a true full-stack result, backfill ExF indicators for 2021-2026 first
     (funding rate, Deribit DVOL, FNG, taker ratio all have historical APIs available).

This run is RESEARCH ONLY. It does not replace or modify the gold standard.
Gold standard (test_pf_dynamic_beta_validate.py) uses vbt_cache/ (NG3 5s, 55d).
"""
import sys, time, math, json, csv, gc
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import pandas as pd

sys.path.insert(0, str(Path(__file__).parent))

print("Compiling numba kernels...")
t0c = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import (
    OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
    compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
    compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
    compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
)
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider

_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
                  np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
                  np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
print(f"  JIT: {time.time() - t0c:.1f}s")

from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
from mc.mc_ml import DolphinForewarner

# KLINES DATA DIRECTORY
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
             'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
             'instability_50', 'instability_150'}

ENGINE_KWARGS = dict(
    initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
    min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
    fraction=0.20, fixed_tp_pct=0.0095, stop_pct=1.0, max_hold_bars=120,
    use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
    dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
    use_asset_selection=True, min_irp_alignment=0.45,
    use_sp_fees=True, use_sp_slippage=True,
    sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
    use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
    lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)

# MC-Forewarner config (champion — identical to gold standard)
MC_MODELS_DIR = str(Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\mc_results\models"))
MC_BASE_CFG = {
    'trial_id': 0,
    'vel_div_threshold': -0.020, 'vel_div_extreme': -0.050,
    'use_direction_confirm': True, 'dc_lookback_bars': 7,
    'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
    'dc_leverage_boost': 1.00, 'dc_leverage_reduce': 0.50,
    'vd_trend_lookback': 10, 'min_leverage': 0.50,
    'max_leverage': 5.00,
    'leverage_convexity': 3.00, 'fraction': 0.20,
    'use_alpha_layers': True, 'use_dynamic_leverage': True,
    'fixed_tp_pct': 0.0095, 'stop_pct': 1.00, 'max_hold_bars': 120,
    'use_sp_fees': True, 'use_sp_slippage': True,
    'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.50,
    'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': 0.40,
    'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.00,
    'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100,
    'acb_beta_high': 0.80, 'acb_beta_low': 0.20, 'acb_w750_threshold_pct': 60,
}

# --- Load MC-Forewarner ---
print("\nLoading MC-Forewarner trained models...")
try:
    forewarner = DolphinForewarner(models_dir=MC_MODELS_DIR)
    use_mc = True
    print("  MC-Forewarner ready (One-Class SVM envelope + XGBoost)")
except Exception as e:
    print(f"  WARNING: MC-Forewarner failed to load: {e}")
    print("  MC-Forewarner DISABLED for this run.")
    forewarner = None
    use_mc = False

# --- Enumerate parquet files ---
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total_files = len(parquet_files)
date_strings = [pf.stem for pf in parquet_files]
print(f"\nLoaded {total_files} parquet files from vbt_cache_klines/")

# --- Collect all asset columns (sample first 100 files) ---
print(f"\nScanning {min(100, total_files)} parquet files for asset universe...")
all_assets = set()
for pf in parquet_files[:100]:
    df_cols = pd.read_parquet(pf, columns=[])
    all_assets.update([c for c in df_cols.columns if c not in META_COLS])
print(f"  Total assets in universe: {len(all_assets)}")

# --- ACB v6 with w750 from parquet column (klines fix) ---
# ACB._load_w750_velocity() reads from NG3 NPZ scan files (EIGENVALUES_PATH).
# Klines dates have no NPZ files (~56/1710 would have data).
# Fix: pre-populate _w750_vel_cache directly from parquet v750_lambda_max_velocity
# (median per date), then preload_w750() finds cached values and just computes threshold.
print("\nInitializing ACB v6 + pre-populating w750 from parquet columns...")
acb = AdaptiveCircuitBreaker()
print(f"  Loading w750 medians from {total_files} parquets (this takes ~1-2 min)...")
t_w750 = time.time()
w750_loaded = 0
for pf in parquet_files:
    ds = pf.stem
    try:
        df_w = pd.read_parquet(pf, columns=['v750_lambda_max_velocity'])
        vals = df_w['v750_lambda_max_velocity'].dropna().values
        if len(vals) > 0:
            acb._w750_vel_cache[ds] = float(np.median(vals))
            w750_loaded += 1
        else:
            acb._w750_vel_cache[ds] = 0.0
    except Exception:
        acb._w750_vel_cache[ds] = 0.0
print(f"  w750 loaded from parquets: {w750_loaded}/{total_files} dates  ({time.time()-t_w750:.1f}s)")

# Now preload_w750 just computes the percentile threshold (cache already filled)
acb.preload_w750(date_strings)
print(f"  w750 threshold (p60): {acb._w750_threshold:.6f}")
print(f"  Dates with nonzero w750: {sum(1 for v in acb._w750_vel_cache.values() if v != 0.0)}/{total_files}")
print(f"  *** ACBv6 DEGRADED: ExF NPZ files absent for klines dates.")
print(f"  *** boost=1.0 on all dates (funding/dvol/fng/taker signals unavailable).")
print(f"  *** Only w750 dynamic beta (high=0.8/low=0.2) remains active.")
print(f"  *** Results are CONSERVATIVE vs fully-armed ACBv6 with ExF backfill.")

# --- Compute vol_p60 from first 5 files ---
print("\nComputing vol_p60 from first 5 files...")
all_vols = []
for pf in parquet_files[:5]:
    df = pd.read_parquet(pf)
    if 'BTCUSDT' not in df.columns:
        continue
    pr = df['BTCUSDT'].values
    for i in range(50, len(pr)):
        seg = pr[max(0, i-50):i]
        if len(seg) < 10:
            continue
        v = float(np.std(np.diff(seg) / seg[:-1]))
        if v > 0:
            all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))
print(f"  vol_p60 (60th percentile): {vol_p60:.6f}")

# --- Build OB engine (4 dimensions, real-calibrated MockOBProvider) ---
# Calibration from real Binance OB data (2025-01-15 observation):
#   BTC: -0.086 (sell pressure, confirms SHORT)
#   ETH: -0.092 (sell pressure, confirms SHORT)
#   BNB: +0.05  (mild buy pressure, mild contradict)
#   SOL: +0.05  (mild buy pressure, mild contradict)
OB_ASSETS = sorted(list(all_assets))
_mock_ob = MockOBProvider(
    imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
    imbalance_biases={"BTCUSDT": -0.086, "ETHUSDT": -0.092,
                      "BNBUSDT": +0.05, "SOLUSDT": +0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)

# --- Assemble full stack ---
print(f"\n=== Running full stack: ACBv6 + OB 4D + MC-Forewarner + EsoF(neutral) ===")
print(f"=== Dataset: 5-Year Klines ({total_files} dates, 2021-06-15 to 2026-03-05)  ===")
print(f"=== abs_max_leverage=6.0  seed=42  TP=95bps  SHORT-only  ===")
t0 = time.time()

engine = NDAlphaEngine(**ENGINE_KWARGS)
engine.set_ob_engine(ob_eng)                   # OB Sub-1/2/3/4
engine.set_acb(acb)                            # ACBv6 dynamic beta + 3-scale meta-boost
if use_mc:
    engine.set_mc_forewarner(forewarner, MC_BASE_CFG)  # per-date envelope gate
engine.set_esoteric_hazard_multiplier(0.0)     # EsoF neutral
engine._bar_log_enabled = False                # MUST be False for 1710-day run (OOM risk)

# --- Main loop with lazy loading ---
dstats = []
year_trades = defaultdict(list)  # year -> list of NDTrade

for i, pf in enumerate(parquet_files):
    ds = pf.stem
    year = ds[:4]

    df = pd.read_parquet(pf)
    acols = [c for c in df.columns if c not in META_COLS]

    bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
    dvol = np.full(len(df), np.nan)
    if bp is not None:
        for j in range(50, len(bp)):
            seg = bp[max(0, j-50):j]
            if len(seg) < 10:
                continue
            dvol[j] = float(np.std(np.diff(seg) / seg[:-1]))
    vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)

    n_before = len(engine.trade_history)
    stats = engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
    dstats.append({**stats, 'cap': engine.capital, 'date': ds})
    # Capture trades attributed to this date
    year_trades[year].extend(engine.trade_history[n_before:])
    del df, bp, dvol, vol_ok  # free parquet memory immediately

    if (i + 1) % 100 == 0:
        gc.collect()

    if (i + 1) % 50 == 0:
        elapsed = time.time() - t0
        rate = (i + 1) / elapsed
        eta = (total_files - i - 1) / rate
        print(f"  [{i+1}/{total_files}] {ds}  cap=${engine.capital:,.0f}  "
              f"elapsed={elapsed/60:.1f}m  eta={eta/60:.1f}m")

elapsed = time.time() - t0

# --- Overall metrics ---
tr = engine.trade_history
w = [t for t in tr if t.pnl_absolute > 0]
l = [t for t in tr if t.pnl_absolute <= 0]
gw = sum(t.pnl_absolute for t in w) if w else 0
gl = abs(sum(t.pnl_absolute for t in l)) if l else 0
roi = (engine.capital - 25000) / 25000 * 100
pf_all = gw / gl if gl > 0 else 999
wr_all = len(w) / len(tr) * 100 if tr else 0.0
dr = [s['pnl'] / 25000 * 100 for s in dstats]
sharpe = np.mean(dr) / np.std(dr) * np.sqrt(365) if np.std(dr) > 0 else 0
peak_cap = 25000.0
max_dd = 0.0
for s in dstats:
    peak_cap = max(peak_cap, s['cap'])
    dd = (peak_cap - s['cap']) / peak_cap * 100
    max_dd = max(max_dd, dd)

mid_idx = len(parquet_files) // 2
h1 = sum(s['pnl'] for s in dstats[:mid_idx])
h2 = sum(s['pnl'] for s in dstats[mid_idx:])
h2h1 = h2 / h1 if h1 != 0 else float('nan')

avg_win  = float(np.mean([t.pnl_pct for t in w]) * 100) if w else 0.0
avg_loss = float(np.mean([t.pnl_pct for t in l]) * 100) if l else 0.0

# --- MC-Forewarner summary ---
if use_mc:
    red_days  = [s for s in dstats if s.get('mc_status') == 'RED']
    orng_days = [s for s in dstats if s.get('mc_status') == 'ORANGE']
    print(f"\n  MC-Forewarner: {len(red_days)} RED, {len(orng_days)} ORANGE "
          f"({(len(red_days)+len(orng_days))/total_files*100:.1f}% of dates intervened)")
    shown = 0
    for s in dstats:
        if s.get('mc_status') not in ('RED', 'ORANGE'):
            continue
        print(f"    {s['date']}: {s['mc_status']:6s}  boost={s['boost']:.2f}x  P&L={s['pnl']:+.0f}")
        shown += 1
        if shown >= 10:
            remaining = len(red_days) + len(orng_days) - shown
            if remaining > 0:
                print(f"    ... and {remaining} more")
            break
else:
    red_days = []; orng_days = []
    print(f"\n  MC-Forewarner: NOT ACTIVE (failed to load)")

print(f"\n{'='*70}")
print(f"  5-YEAR Klines Backtest — Full Stack")
print(f"{'='*70}")
print(f"  Dataset:  {total_files} dates (2021-06-15 to 2026-03-05)")
print(f"  ROI:      {roi:+.2f}%")
print(f"  PF:       {pf_all:.3f}")
print(f"  DD:       {max_dd:.2f}%")
print(f"  Sharpe:   {sharpe:.2f}")
print(f"  WR:       {wr_all:.1f}%  (W={len(w)} L={len(l)})")
print(f"  AvgWin:   {avg_win:+.3f}%  AvgLoss: {avg_loss:+.3f}%")
print(f"  Trades:   {len(tr)}")
print(f"  Capital:  ${engine.capital:,.2f}")
print(f"  H1 P&L:   ${h1:+,.2f}")
print(f"  H2 P&L:   ${h2:+,.2f}")
print(f"  H2/H1:    {h2h1:.2f}")
print(f"  Time:     {elapsed:.0f}s ({elapsed/60:.1f}m)")
print(f"{'='*70}")

# --- Per-Year Breakdown ---
print(f"\n{'='*70}")
print(f"  PER-YEAR BREAKDOWN")
print(f"{'='*70}")
print(f"  {'Year':<10} {'Dates':>6} {'Trades':>8} {'ROI':>10} {'PF':>8} {'DD':>8} {'Sharpe':>8} {'WR':>8}")
print(f"  {'-'*70}")

year_summary = {}
for year in ['2021', '2022', '2023', '2024', '2025', '2026']:
    year_ds = [s for s in dstats if s['date'][:4] == year]
    if not year_ds:
        continue
    n_dates = len(year_ds)
    year_tr = year_trades[year]
    n_trades = len(year_tr)

    first_idx = next((i for i, s in enumerate(dstats) if s['date'][:4] == year), 0)
    cap_start = dstats[first_idx-1]['cap'] if first_idx > 0 else 25000.0
    cap_end = year_ds[-1]['cap']
    roi_y = (cap_end - cap_start) / cap_start * 100

    # Per-year max DD
    peak = cap_start
    max_dd_y = 0.0
    for s in year_ds:
        peak = max(peak, s['cap'])
        dd_y = (peak - s['cap']) / peak * 100
        max_dd_y = max(max_dd_y, dd_y)

    # Per-year Sharpe
    dr_y = [s['pnl'] / cap_start * 100 for s in year_ds]
    sharpe_y = np.mean(dr_y) / np.std(dr_y) * np.sqrt(365) if np.std(dr_y) > 0 else 0

    # Per-year WR and PF from actual trades
    w_y = [t for t in year_tr if t.pnl_absolute > 0]
    l_y = [t for t in year_tr if t.pnl_absolute <= 0]
    wr_y = len(w_y) / len(year_tr) * 100 if year_tr else 0.0
    gw_y = sum(t.pnl_absolute for t in w_y) if w_y else 0
    gl_y = abs(sum(t.pnl_absolute for t in l_y)) if l_y else 0
    pf_y = gw_y / gl_y if gl_y > 0 else 999

    year_summary[year] = {
        'n_dates': n_dates, 'n_trades': n_trades, 'roi_pct': roi_y,
        'pf': pf_y, 'max_dd_pct': max_dd_y, 'sharpe': sharpe_y, 'wr': wr_y,
        'cap_start': cap_start, 'cap_end': cap_end,
    }
    print(f"  {year:<10} {n_dates:>6} {n_trades:>8} {roi_y:>+9.1f}% {pf_y:>8.3f} {max_dd_y:>7.1f}% {sharpe_y:>+8.2f} {wr_y:>7.1f}%")

print(f"  {'-'*70}")
print(f"  {'TOTAL':<10} {total_files:>6} {len(tr):>8} {roi:>+9.1f}% {pf_all:>8.3f} {max_dd:>7.1f}% {sharpe:>+8.2f} {wr_all:>7.1f}%")
print(f"{'='*70}")

# H2/H1 per year
print(f"\n  H2/H1 Ratio per Year:")
for year in sorted(year_summary.keys()):
    year_ds = [s for s in dstats if s['date'][:4] == year]
    if len(year_ds) < 10:
        continue
    mid = len(year_ds) // 2
    h1_y = sum(s['pnl'] for s in year_ds[:mid])
    h2_y = sum(s['pnl'] for s in year_ds[mid:])
    ratio_y = h2_y / h1_y if h1_y != 0 else float('nan')
    print(f"    {year}: H1=${h1_y:+,.0f}  H2=${h2_y:+,.0f}  Ratio={ratio_y:.2f}")

# Quarterly breakdown
print(f"\n  Quarterly Breakdown:")
qtrs = {}
for s in dstats:
    d = s['date']
    m = int(d[5:7])
    q = (m - 1) // 3 + 1
    key = f"{d[:4]}-Q{q}"
    if key not in qtrs:
        qtrs[key] = {'dates': [], 'trades': 0}
    qtrs[key]['dates'].append(s)

for qkey in sorted(qtrs.keys()):
    q_ds = qtrs[qkey]['dates']
    if not q_ds:
        continue
    cap_s = dstats[dstats.index(q_ds[0])-1]['cap'] if dstats.index(q_ds[0]) > 0 else 25000.0
    q_roi = (q_ds[-1]['cap'] - cap_s) / cap_s * 100
    q_tr = sum(s['trades'] for s in q_ds)
    print(f"    {qkey}: {len(q_ds):>3} dates  {q_tr:>5} trades  ROI={q_roi:>+7.1f}%  cap=${q_ds[-1]['cap']:,.0f}")

# --- Save logs ---
LOG_DIR.mkdir(exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"\n  Saving logs...")

trades_path = LOG_DIR / f"trades_5y_{ts}.csv"
with open(trades_path, 'w', newline='') as f:
    cw = csv.writer(f)
    cw.writerow(['trade_id', 'asset', 'direction', 'entry_price', 'exit_price',
                 'entry_bar', 'exit_bar', 'bars_held', 'leverage', 'notional',
                 'pnl_pct', 'pnl_absolute', 'exit_reason', 'bucket_idx'])
    for t in tr:
        cw.writerow([t.trade_id, t.asset, t.direction,
                     f"{t.entry_price:.6f}", f"{t.exit_price:.6f}",
                     t.entry_bar, t.exit_bar, t.bars_held,
                     f"{t.leverage:.4f}", f"{t.notional:.4f}",
                     f"{t.pnl_pct:.8f}", f"{t.pnl_absolute:.4f}",
                     t.exit_reason, t.bucket_idx])

daily_path = LOG_DIR / f"daily_5y_{ts}.csv"
with open(daily_path, 'w', newline='') as f:
    cw = csv.writer(f)
    cw.writerow(['date', 'pnl', 'capital', 'dd_pct', 'boost', 'beta', 'mc_status', 'trades'])
    pk_log = 25000.0
    for s in dstats:
        pk_log = max(pk_log, s['cap'])
        cw.writerow([s['date'], f"{s['pnl']:.4f}", f"{s['cap']:.4f}",
                     f"{(pk_log - s['cap'])/pk_log*100:.4f}",
                     f"{s.get('boost', 1.0):.4f}", f"{s.get('beta', 0.5):.2f}",
                     s.get('mc_status', 'N/A'), s['trades']])

summary = {
    'script': 'test_pf_5y_klines',
    'timestamp': ts,
    'date_range': f'{date_strings[0]} to {date_strings[-1]}',
    'n_dates': total_files,
    'total_trades': len(tr),
    'roi_pct': round(roi, 4),
    'pf': round(pf_all, 4),
    'max_dd_pct': round(max_dd, 4),
    'sharpe': round(sharpe, 4),
    'win_rate': round(wr_all, 4),
    'h2h1_ratio': round(h2h1, 4) if not math.isnan(h2h1) else None,
    'mc_red_days': len(red_days) if use_mc else None,
    'mc_orange_days': len(orng_days) if use_mc else None,
    'mc_active': use_mc,
    'capital_final': round(engine.capital, 4),
    'per_year': {
        y: {k: round(v, 4) if isinstance(v, float) else v
            for k, v in ys.items()}
        for y, ys in year_summary.items()
    },
    'engine_kwargs': ENGINE_KWARGS,
    'acb_config': {
        'w750_source': 'parquet_column_v750_lambda_max_velocity',
        'w750_threshold_pct': 60,
        'w750_threshold_value': round(float(acb._w750_threshold), 8),
        'w750_dates_loaded': w750_loaded,
        'w750_total_dates': total_files,
    },
    'stack_notes': [
        'OB: MockOBProvider 4D real-calibrated (BTC=-0.086, ETH=-0.092)',
        'ACBv6: w750 from parquet column (klines fix, not NPZ)',
        'ACBv6 DEGRADED: ExF boost=1.0 on ALL dates (no NPZ Indicators for klines). Only w750 beta active.',
        f'MC-Forewarner: {"ACTIVE" if use_mc else "INACTIVE (failed to load)"}',
        'EsoF: hazard=0.0 neutral',
        'ExF: neutral (no NPZ for klines dates) — results are CONSERVATIVE vs full ExF backfill',
    ],
}
summary_path = LOG_DIR / f"summary_5y_{ts}.json"
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2)

print(f"  trades  → {trades_path}")
print(f"  daily   → {daily_path}")
print(f"  summary → {summary_path}")
print(f"{'='*70}")
print(f"\n  5-YEAR Klines Backtest COMPLETE")
print(f"{'='*70}")