"""Putative ROI estimate: OB engine calibrated to REAL OB distributions.

Real OB data (2025-01-15) showed:
  BTC imbalance: mean=-0.086 (mild sell pressure, confirms SHORT)
  ETH imbalance: mean=-0.092
  DASH imbalance: mean=+0.265 (buy pressure)
  LTC imbalance: mean=+0.095
  Overall: slight negative bias across majors
  Depth quality: ~1.0 (median-normalized)
  Fill probability: ~0.86

We calibrate MockOBProvider to these observed distributions and run the
full backtest to estimate putative ROI with realistic OB intelligence.
"""
import sys, time, math
from pathlib import Path
import numpy as np
import pandas as pd

sys.path.insert(0, str(Path(__file__).parent))

print("Compiling numba kernels...")
t0c = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider

_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
                  np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
                  np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
# Warmup OB kernels
from nautilus_dolphin.nautilus.ob_features import (
    compute_imbalance_nb, compute_depth_1pct_nb, compute_depth_quality_nb,
    compute_fill_probability_nb, compute_spread_proxy_nb, compute_depth_asymmetry_nb,
    compute_imbalance_persistence_nb, compute_withdrawal_velocity_nb,
    compute_market_agreement_nb, compute_cascade_signal_nb,
)
_b = np.array([100.0, 200.0, 300.0, 400.0, 500.0], dtype=np.float64)
_a = np.array([110.0, 190.0, 310.0, 390.0, 510.0], dtype=np.float64)
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
compute_depth_quality_nb(210.0, 200.0); compute_fill_probability_nb(1.0)
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
compute_imbalance_persistence_nb(np.array([0.1, -0.1], dtype=np.float64), 2)
compute_withdrawal_velocity_nb(np.array([100.0, 110.0], dtype=np.float64), 1)
compute_market_agreement_nb(np.array([0.1, -0.05], dtype=np.float64), 2)
compute_cascade_signal_nb(np.array([-0.05, -0.15], dtype=np.float64), 2, -0.10)
print(f"  JIT: {time.time() - t0c:.1f}s")

from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker

VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
             'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
             'instability_50', 'instability_150'}
ENGINE_KWARGS = dict(
    initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05,
    min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
    fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
    use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
    dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
    use_asset_selection=True, min_irp_alignment=0.45,
    use_sp_fees=True, use_sp_slippage=True,
    sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
    use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
    lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
VD_THRESH = -0.02; VD_EXTREME = -0.05; CONVEXITY = 3.0

parquet_files = sorted(VBT_DIR.glob("*.parquet"))
# Filter out catalog subdirectory parquets
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]

# Initialize ACB
acb = AdaptiveCircuitBreaker()
date_strings = [pf.stem for pf in parquet_files]
acb.preload_w750(date_strings)

# Pre-load VBT data
all_vols = []
for pf in parquet_files[:2]:
    df = pd.read_parquet(pf)
    if 'BTCUSDT' not in df.columns: continue
    pr = df['BTCUSDT'].values
    for i in range(60, len(pr)):
        seg = pr[max(0,i-50):i]
        if len(seg)<10: continue
        v = float(np.std(np.diff(seg)/seg[:-1]))
        if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))

pq_data = {}
for pf in parquet_files:
    df = pd.read_parquet(pf)
    ac = [c for c in df.columns if c not in META_COLS]
    bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
    dv = np.full(len(df), np.nan)
    if bp is not None:
        for i in range(50, len(bp)):
            seg = bp[max(0,i-50):i]
            if len(seg)<10: continue
            dv[i] = float(np.std(np.diff(seg)/seg[:-1]))
    pq_data[pf.stem] = (df, ac, dv)

def strength_cubic(vel_div):
    if vel_div >= VD_THRESH: return 0.0
    raw = (VD_THRESH - vel_div) / (VD_THRESH - VD_EXTREME)
    return min(1.0, max(0.0, raw)) ** CONVEXITY

def run_engine(label, ob_engine_instance=None):
    import gc
    gc.collect()
    engine = NDAlphaEngine(**ENGINE_KWARGS)
    if ob_engine_instance is not None:
        engine.set_ob_engine(ob_engine_instance)

    bar_idx = 0; ph = {}; dstats = []
    for pf in parquet_files:
        ds = pf.stem; cs = engine.capital
        engine.regime_direction = -1
        engine.regime_dd_halt = False

        acb_info = acb.get_dynamic_boost_for_date(ds, ob_engine=ob_engine_instance)
        base_boost = acb_info['boost']
        beta = acb_info['beta']

        df, acols, dvol = pq_data[ds]
        bid = 0
        for ri in range(len(df)):
            row = df.iloc[ri]; vd = row.get("vel_div")
            if vd is None or not np.isfinite(vd): bar_idx+=1; bid+=1; continue
            prices = {}
            for ac in acols:
                p = row[ac]
                if p and p > 0 and np.isfinite(p):
                    prices[ac] = float(p)
                    if ac not in ph: ph[ac] = []
                    ph[ac].append(float(p))
                    # Cap history to prevent memory growth
                    if len(ph[ac]) > 500:
                        ph[ac] = ph[ac][-200:]
            if not prices: bar_idx+=1; bid+=1; continue
            vrok = False if bid < 100 else (np.isfinite(dvol[ri]) and dvol[ri] > vol_p60)

            if beta > 0:
                ss = strength_cubic(float(vd))
                engine.regime_size_mult = base_boost * (1.0 + beta * ss)
            else:
                engine.regime_size_mult = base_boost

            engine.process_bar(bar_idx=bar_idx, vel_div=float(vd), prices=prices,
                               vol_regime_ok=vrok, price_histories=ph)
            bar_idx+=1; bid+=1
        dstats.append({'date': ds, 'pnl': engine.capital - cs, 'cap': engine.capital})

    tr = engine.trade_history
    w = [t for t in tr if t.pnl_absolute > 0]; l = [t for t in tr if t.pnl_absolute <= 0]
    gw = sum(t.pnl_absolute for t in w) if w else 0
    gl = abs(sum(t.pnl_absolute for t in l)) if l else 0
    roi = (engine.capital - 25000) / 25000 * 100
    pf_val = gw / gl if gl > 0 else 999
    dr = [s['pnl']/25000*100 for s in dstats]
    sharpe = np.mean(dr) / np.std(dr) * np.sqrt(365) if np.std(dr) > 0 else 0
    peak_cap = 25000.0; max_dd = 0.0
    for s in dstats:
        peak_cap = max(peak_cap, s['cap'])
        dd = (peak_cap - s['cap']) / peak_cap * 100
        max_dd = max(max_dd, dd)
    mid = len(parquet_files) // 2
    h1 = sum(s['pnl'] for s in dstats[:mid])
    h2 = sum(s['pnl'] for s in dstats[mid:])

    wr = len(w) / len(tr) * 100 if tr else 0.0
    avg_win = np.mean([t.pnl_pct for t in w]) * 100 if w else 0.0
    avg_loss = np.mean([t.pnl_pct for t in l]) * 100 if l else 0.0

    return {
        'label': label, 'roi': roi, 'pf': pf_val, 'dd': max_dd,
        'sharpe': sharpe, 'trades': len(tr), 'capital': engine.capital,
        'h1': h1, 'h2': h2, 'dstats': dstats,
        'wr': wr, 'avg_win': avg_win, 'avg_loss': avg_loss,
        'n_wins': len(w), 'n_losses': len(l),
    }

print(f"\n{'='*75}")
print(f"  PUTATIVE ROI WITH REAL-CALIBRATED OB INTELLIGENCE")
print(f"{'='*75}")
print(f"  Real OB observations (2025-01-15):")
print(f"    BTC imbalance: -0.086  ETH: -0.092  (sell pressure, confirms SHORT)")
print(f"    DASH: +0.265  LTC: +0.095  (buy pressure, contradicts SHORT)")
print(f"    Depth quality: ~1.0  Fill probability: ~0.86")
print(f"    Market agreement: ~55% (rarely >80%)")
print(f"  Backtest period: {date_strings[0]} to {date_strings[-1]} ({len(date_strings)} days)")
print(f"{'='*75}")

assets = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"]

# Extended configs: per_biases=None uses global imb_bias; dict = per-asset overrides
# Real OB Jan-15: BTC=-0.086, ETH=-0.092 confirm SHORT; DASH=+0.265, LTC=+0.095 contradict.
# Backtest uses BTC/ETH/BNB/SOL - proxying with estimated BNB/SOL biases.
configs = [
    # Label, imb_bias, depth_sc, per_biases, rationale
    ("Baseline (no OB)",        None,  None, None,
     "No OB engine - Monte Carlo edge only"),
    ("All-confirm SHORT",       -0.09, 1.0,  None,
     "All assets: sell pressure confirms SHORT (uniform -0.09)"),
    ("Real-calibrated (mixed)", -0.09, 1.0,
     {"BTCUSDT": -0.086, "ETHUSDT": -0.092, "BNBUSDT": +0.05, "SOLUSDT": +0.05},
     "BTC/ETH confirm (-0.09), BNB/SOL mildly contradict (+0.05) - realistic mixed"),
    ("Hard-skip test",          -0.09, 1.0,
     {"BTCUSDT": -0.086, "ETHUSDT": -0.092, "BNBUSDT": +0.20, "SOLUSDT": +0.20},
     "BTC/ETH confirm; BNB/SOL strongly contradict (+0.20, persist>0.60) -> OB_SKIP"),
    ("All-contradict SHORT",    +0.20, 1.0,  None,
     "All assets: buy pressure contradicts SHORT - maximum OB_SKIP filtering"),
]

results = []

for label, imb_bias, depth_sc, per_biases, rationale in configs:
    print(f"\n  Running: {label}")
    print(f"    Rationale: {rationale}")
    t0 = time.time()

    if imb_bias is None:
        # Baseline: no OB engine
        r = run_engine(label, None)
    else:
        mock = MockOBProvider(
            imbalance_bias=imb_bias, depth_scale=depth_sc, assets=assets,
            imbalance_biases=per_biases or {},
        )
        ob_eng = OBFeatureEngine(mock)
        ob_eng.preload_date("mock", assets)
        r = run_engine(label, ob_eng)

    elapsed = time.time() - t0
    r['rationale'] = rationale
    results.append(r)
    print(f"    ROI={r['roi']:+.2f}%  PF={r['pf']:.3f}  DD={r['dd']:.2f}%  "
          f"Sharpe={r['sharpe']:.2f}  Trades={r['trades']}  [{elapsed:.0f}s]")
    print(f"    WR={r['wr']:.1f}%  AvgWin={r['avg_win']:+.3f}%  AvgLoss={r['avg_loss']:+.3f}%  "
          f"W={r['n_wins']} L={r['n_losses']}")
    if r['h1'] != 0:
        print(f"    Capital: ${r['capital']:,.2f}  H1=${r['h1']:+,.2f}  H2=${r['h2']:+,.2f}  "
              f"H2/H1={r['h2']/r['h1']:.2f}")

# ============================================================================
# Summary table
# ============================================================================
print(f"\n{'='*75}")
print(f"  PUTATIVE ROI SUMMARY")
print(f"{'='*75}")
print(f"  {'Config':<25s} {'ROI':>8s} {'PF':>7s} {'WR':>6s} {'AvgW':>7s} {'AvgL':>7s} {'Sharpe':>7s} {'Trades':>7s}")
print(f"  {'-'*25} {'-'*8} {'-'*7} {'-'*6} {'-'*7} {'-'*7} {'-'*7} {'-'*7}")
for r in results:
    print(f"  {r['label']:<25s} {r['roi']:+7.2f}% {r['pf']:6.3f} {r['wr']:5.1f}% "
          f"{r['avg_win']:+6.3f}% {r['avg_loss']:+6.3f}% {r['sharpe']:6.2f} {r['trades']:7d}")

# Delta vs baseline
baseline_roi = results[0]['roi']
baseline_wr  = results[0]['wr']
print(f"\n  {'Config':<25s} {'dROI':>9s} {'dPF':>8s} {'dWR':>7s} {'dAvgWin':>9s} {'dAvgLoss':>9s} {'dTrades':>8s}")
print(f"  {'-'*25} {'-'*9} {'-'*8} {'-'*7} {'-'*9} {'-'*9} {'-'*8}")
for r in results[1:]:
    print(f"  {r['label']:<25s} {r['roi']-baseline_roi:+8.2f}% "
          f"{r['pf']-results[0]['pf']:+7.3f} "
          f"{r['wr']-baseline_wr:+6.2f}% "
          f"{r['avg_win']-results[0]['avg_win']:+8.3f}% "
          f"{r['avg_loss']-results[0]['avg_loss']:+8.3f}% "
          f"{r['trades']-results[0]['trades']:+7d}")

print(f"\n  NOTE: These are PUTATIVE estimates using calibrated MockOBProvider.")
print(f"  Real performance requires downloading live OB data for the backtest period.")
print(f"{'='*75}")