"""Compare 5s posture backtest: dvol Q1 gated vs ungated.

Hypothesis: days where dvol_btc < 47.5 show -0.79pp SHORT edge historically.
Gating them to NONE should improve PF by removing noisy low-dvol days
driven only by extreme fng (fng 6-17 in Jan 2026).

Runs both variants in a single pass and compares.
"""
import sys, time, gc
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
_here = Path(__file__).parent
sys.path.insert(0, str(_here))          # nautilus_dolphin/
sys.path.insert(0, str(_here.parent))   # project root

from pathlib import Path
from collections import defaultdict
import numpy as np
import pandas as pd

from nautilus_dolphin.nautilus.macro_posture_switcher import (
    MacroPostureSwitcher, Posture
)

VBT_DIR_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
EIGEN_PATH  = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues")
LOG_DIR     = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")

ENTRY_T  = 0.020
MAX_HOLD = 240
EXF_KEYS = ['dvol_btc', 'fng', 'funding_btc', 'taker']

def load_exf(date_str):
    defaults = {'dvol_btc': 50.0, 'fng': 50.0, 'funding_btc': 0.0, 'taker': 1.0}
    dp = EIGEN_PATH / date_str
    if not dp.exists():
        return defaults
    files = sorted(dp.glob('scan_*__Indicators.npz'))[:5]
    if not files:
        return defaults
    buckets = defaultdict(list)
    for f in files:
        try:
            d = np.load(f, allow_pickle=True)
            if 'api_names' not in d:
                continue
            names = list(d['api_names'])
            vals  = d['api_indicators']
            for k in EXF_KEYS:
                if k in names:
                    v = float(vals[names.index(k)])
                    if np.isfinite(v):
                        buckets[k].append(v)
        except Exception:
            pass
    out = dict(defaults)
    for k, vs in buckets.items():
        if vs:
            out[k] = float(np.median(vs))
    return out

parquet_files = sorted(VBT_DIR_5S.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)

# Two switchers: baseline (no dvol gate) vs gated (dvol < 47.5 → NONE)
switcher_base = MacroPostureSwitcher(
    enable_long_posture=True,
    rvol_pause_thresh=0.000203,
    rvol_strong_thresh=0.000337,
    dvol_none_below=0.0,        # UNGATED
)
switcher_gate = MacroPostureSwitcher(
    enable_long_posture=True,
    rvol_pause_thresh=0.000203,
    rvol_strong_thresh=0.000337,
    dvol_none_below=47.5,       # GATED — dvol Q1 → NONE
)

# Pass 1: prev-day rvol
print("Pass 1: lag-1 rvol...")
t0 = time.time()
day_rvol = {}
day_btcret = {}
for pf in parquet_files:
    ds = pf.stem
    try:
        df = pd.read_parquet(pf, columns=['BTCUSDT'])
    except Exception:
        continue
    btc = df['BTCUSDT'].values.astype(np.float64)
    btc = btc[np.isfinite(btc) & (btc > 0)]
    if len(btc) < 2:
        continue
    log_r = np.diff(np.log(btc))
    day_rvol[ds]   = float(np.std(log_r))
    day_btcret[ds] = float((btc[-1] - btc[0]) / btc[0])

dates_sorted = sorted(day_rvol.keys())
prev_rvol   = {d: day_rvol.get(dates_sorted[i-1])   if i > 0 else None for i, d in enumerate(dates_sorted)}
prev_btcret = {d: day_btcret.get(dates_sorted[i-1]) if i > 0 else None for i, d in enumerate(dates_sorted)}
print(f"  done: {time.time()-t0:.1f}s")

# Accumulators for both variants
def make_acc():
    return {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0,
            'equity': 1.0, 'equity_curve': [1.0], 'active': 0, 'paused': 0,
            'day_rets': [], 'day_rows': []}

acc_base = make_acc()
acc_gate = make_acc()

print("Pass 2: crossover simulation...")

for i, pf in enumerate(parquet_files):
    ds  = pf.stem
    pr  = prev_rvol.get(ds)
    pb  = prev_btcret.get(ds)
    exf = load_exf(ds)

    for acc, sw in [(acc_base, switcher_base), (acc_gate, switcher_gate)]:
        decision = sw.decide(
            dvol_btc=exf['dvol_btc'], fng=exf['fng'],
            funding_btc=exf['funding_btc'],
            realized_vol=pr, btc_day_return=pb,
        )
        if decision.posture == Posture.NONE:
            acc['paused'] += 1
            acc['day_rows'].append({
                'date': ds, 'posture': 'NONE', 'dvol': exf['dvol_btc'],
                'fng': exf['fng'], 'fear': round(decision.fear_score, 3),
                'n': 0, 'day_ret': 0.0,
            })
            continue
        acc['active'] += 1

        # Load data (only once per file — reuse for both variants)
        # We'll load separately but it's fine for a 56-file test
        try:
            df = pd.read_parquet(pf)
        except Exception:
            acc['paused'] += 1
            continue
        if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
            acc['paused'] += 1
            continue

        vd  = df['vel_div'].values.astype(np.float64)
        btc = df['BTCUSDT'].values.astype(np.float64)
        vd  = np.where(np.isfinite(vd), vd, 0.0)
        btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
        n   = len(btc)

        if n < MAX_HOLD + 5:
            acc['paused'] += 1
            del vd, btc, df
            continue

        del df

        pos   = decision.posture
        smult = decision.size_mult
        if pos == Posture.SHORT:
            entry_mask = (vd >= ENTRY_T)  & np.isfinite(btc)
            cross_back = (vd <= -ENTRY_T)
            sign = -1
        else:
            entry_mask = (vd <= -ENTRY_T) & np.isfinite(btc)
            cross_back = (vd >= ENTRY_T)
            sign = +1

        day_rets_sized = []
        for t in range(n - MAX_HOLD):
            if not entry_mask[t]:
                continue
            ep = btc[t]
            if not np.isfinite(ep) or ep <= 0:
                continue
            exit_bar = MAX_HOLD
            for k in range(1, MAX_HOLD + 1):
                tb = t + k
                if tb >= n:
                    exit_bar = k; break
                if cross_back[tb]:
                    exit_bar = k; break
            tb = t + exit_bar
            if tb >= n:
                continue
            xp = btc[tb]
            if not np.isfinite(xp) or xp <= 0:
                continue
            raw_ret   = sign * (xp - ep) / ep
            sized_ret = raw_ret * smult
            day_rets_sized.append((raw_ret, sized_ret))

        del vd, btc, entry_mask, cross_back

        n_t = len(day_rets_sized)
        if n_t == 0:
            acc['day_rows'].append({'date': ds, 'posture': pos.value,
                'dvol': exf['dvol_btc'], 'fng': exf['fng'],
                'fear': round(decision.fear_score, 3), 'n': 0, 'day_ret': 0.0})
            continue

        wins   = sum(1 for r, _ in day_rets_sized if r >= 0)
        losses = n_t - wins
        gw     = sum(r for r, _ in day_rets_sized if r >= 0)
        gl     = sum(abs(r) for r, _ in day_rets_sized if r < 0)
        day_ret = sum(s for _, s in day_rets_sized)

        acc['wins']   += wins;  acc['losses'] += losses
        acc['gw']     += gw;    acc['gl']     += gl;    acc['n'] += n_t

        day_ret_clamped = max(-0.5, min(day_ret, 2.0))
        acc['equity'] *= (1 + day_ret_clamped)
        acc['equity_curve'].append(acc['equity'])
        acc['day_rets'].append(day_ret)

        pf_d = gw / gl if gl > 0 else 999.0
        acc['day_rows'].append({
            'date': ds, 'posture': pos.value, 'dvol': round(exf['dvol_btc'], 1),
            'fng': round(exf['fng'], 1), 'fear': round(decision.fear_score, 3),
            'n': n_t, 'wins': wins, 'losses': losses,
            'pf': round(pf_d, 4), 'day_ret': round(day_ret, 6),
        })

    if (i + 1) % 10 == 0:
        gc.collect()

elapsed = time.time() - t0
print(f"Done: {elapsed:.1f}s\n")

# ── Report ──────────────────────────────────────────────────────────────────
def report(label, acc):
    n  = acc['n']
    pf = acc['gw'] / acc['gl'] if acc['gl'] > 0 else 999.0
    wr = acc['wins'] / n * 100 if n > 0 else 0.0
    ec = np.array(acc['equity_curve'])
    roi = (ec[-1] - 1.0) * 100
    running_max = np.maximum.accumulate(ec)
    dd = float(np.max((running_max - ec) / running_max)) * 100
    dr = np.array(acc['day_rets'])
    sharpe = float(np.mean(dr) / np.std(dr) * np.sqrt(252)) if len(dr) > 1 and np.std(dr) > 0 else 0.0
    print(f"  {label}")
    print(f"    Active/Paused: {acc['active']} / {acc['paused']}")
    print(f"    N trades: {n:,}  |  WR: {wr:.2f}%  |  PF: {pf:.4f}")
    print(f"    ROI: {roi:+.2f}%  |  MaxDD: {dd:.2f}%  |  Sharpe: {sharpe:.3f}")
    print(f"    Equity: {ec[-1]:.4f}x")
    return pf, wr, n

print("=" * 60)
print("  5s Posture Backtest — dvol Q1 Gate Comparison")
print("=" * 60)
pf_b, wr_b, n_b = report("BASELINE (no dvol gate)", acc_base)
print()
pf_g, wr_g, n_g = report("GATED    (dvol < 47.5 → NONE)", acc_gate)
print()
print(f"  Delta PF:  {pf_g - pf_b:+.4f}  ({'+' if pf_g > pf_b else ''}{(pf_g/pf_b - 1)*100:.1f}% change)")
print(f"  Delta WR:  {wr_g - wr_b:+.2f}pp")
print(f"  Trades removed: {n_b - n_g:,}")

# ── Per-day detail for gated variant: show which days got removed ──────────
print(f"\n  Days gated to NONE in variant (dvol < 47.5):")
gated_days = [r for r in acc_gate['day_rows'] if r['posture'] == 'NONE' and r.get('dvol', 99) < 47.5]
# Cross-ref with baseline to find what their day_ret WAS
base_by_date = {r['date']: r for r in acc_base['day_rows']}
removed = []
for r in gated_days:
    b = base_by_date.get(r['date'])
    if b and b['posture'] != 'NONE':
        removed.append((r['date'], b.get('dvol', 0), b.get('fng', 0),
                        b.get('fear', 0), b.get('pf', 0), b.get('day_ret', 0)))

removed.sort(key=lambda x: x[5])  # sort by day_ret
print(f"  {'Date':<12}  {'dvol':>5}  {'fng':>4}  {'fear':>6}  {'PF_base':>7}  {'ret_base':>9}")
print(f"  {'-'*55}")
for date, dvol, fng, fear, pf_d, ret in removed:
    marker = " ◄ LOSS" if ret < -0.01 else (" ◄ WIN" if ret > 0.01 else "")
    print(f"  {date:<12}  {dvol:>5.1f}  {fng:>4.0f}  {fear:>6.3f}  {pf_d:>7.4f}  {ret:>+9.4f}{marker}")

win_days  = sum(1 for _, _, _, _, _, r in removed if r > 0)
loss_days = sum(1 for _, _, _, _, _, r in removed if r < 0)
net_ret   = sum(r for _, _, _, _, _, r in removed)
print(f"\n  Removed: {len(removed)} days  |  Wins: {win_days}  Losses: {loss_days}  |  Net ret removed: {net_ret:+.4f}")