DOLPHIN/prod/reconstruct_181.py

"""
reconstruct_181.py — Faithful reconstruction of the original certification path.

The E2E test that certified 181.81% used:
  - exp_shared.load_data() that returned pq_data (all data pre-loaded, float64)
  - eng.set_esoteric_hazard_multiplier(0.0)  [ceiling=6.0 at time of certification]
  - vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)  [STATIC threshold]
  - NO per-day OB clearing (OB set up once)
  - funding_btc (not fund_dbt_btc) for ACB signals

This script tests multiple sub-hypotheses to find what changed.

Tests:
  R1: Exact old path (pq_data style, ceiling patched to 6.0, static vol, no OB clear)
  R2: Same but ceiling=10.0 (agent's change) — to isolate ceiling effect in old path
  R3: No hazard call + old path (to isolate hazard effect)
  R4: Old path but funding_btc forced for ACB (isolate ACB change)

All use static vol_p60 and np.isfinite (the certification condition).
"""
import sys, time, gc, math
from pathlib import Path
import numpy as np
import pandas as pd

ROOT = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")
sys.path.insert(0, str(ROOT / 'nautilus_dolphin'))
sys.path.insert(0, str(ROOT / 'nautilus_dolphin' / 'dvae'))

import exp_shared
from nautilus_dolphin.nautilus.proxy_boost_engine import create_d_liq_engine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker

print("Ensuring JIT...")
exp_shared.ensure_jit()

# ── Pre-load all data (OLD style — what pq_data contained) ───────────────────
print("\nPre-loading all parquet data (old style, float64)...")
VBT_DIR = exp_shared.VBT_DIR
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
date_strings = [p.stem for p in parquet_files]

all_vols = []
for pf in parquet_files[:3]:
    tmp = pd.read_parquet(pf)
    if 'BTCUSDT' in tmp.columns:
        bp = tmp['BTCUSDT'].values
        diffs = np.diff(bp) / bp[:-1]
        for i in range(50, len(diffs)):
            all_vols.append(np.std(diffs[i-50:i]))
    del tmp
vol_p60_static = float(np.percentile(all_vols, 60)) if all_vols else 0.0002
print(f"  Static vol_p60: {vol_p60_static:.8f}")

# Load ALL data into pq_data (the old approach)
print("  Loading all 56 parquet files...")
pq_data = {}
for pf in parquet_files:
    ds = pf.stem
    df = pd.read_parquet(pf)   # float64, no casting
    acols = [c for c in df.columns if c not in exp_shared.META_COLS]
    bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
    dvol = np.full(len(df), np.nan)
    if bp is not None:
        diffs = np.zeros(len(bp), dtype=np.float64)
        diffs[1:] = np.diff(bp) / bp[:-1]
        for j in range(50, len(bp)):
            dvol[j] = np.std(diffs[j-50:j])
    pq_data[ds] = (df, acols, dvol)

# OB setup (once only, like old path)
from nautilus_dolphin.nautilus.ob_features import OBFeatureEngine
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
OB_ASSETS = ["BTCUSDT", "ETHUSDT"]
_mock_ob = MockOBProvider(
    imbalance_bias=-0.09, depth_scale=1.0, assets=OB_ASSETS,
    imbalance_biases={"BTCUSDT":-0.086,"ETHUSDT":-0.092,"BNBUSDT":+0.05,"SOLUSDT":+0.05},
)
ob_eng = OBFeatureEngine(_mock_ob)
ob_eng.preload_date("mock", OB_ASSETS)

print(f"  Loaded {len(pq_data)} days. Ready.\n")


def run_reconstruction(label, ceiling_lev, use_hazard_call, use_static_vol):
    print(f"\n{'='*65}")
    print(f"  {label}")
    print(f"  ceiling={ceiling_lev}  hazard_call={use_hazard_call}  static_vol={use_static_vol}")
    print(f"{'='*65}")

    kw = exp_shared.ENGINE_KWARGS.copy()
    kw.update({'sp_maker_entry_rate': 1.0, 'sp_maker_exit_rate': 1.0, 'use_sp_slippage': False})

    acb = AdaptiveCircuitBreaker()
    acb.preload_w750(date_strings)

    eng = create_d_liq_engine(**kw)
    eng.set_ob_engine(ob_eng)
    eng.set_acb(acb)

    if use_hazard_call:
        # Patch ceiling_lev before calling
        import nautilus_dolphin.nautilus.esf_alpha_orchestrator as orch_mod
        original_fn = orch_mod.NDAlphaEngine.set_esoteric_hazard_multiplier

        def patched_hazard(self, hazard_score):
            floor_lev = 3.0
            c_lev = ceiling_lev  # capture
            range_lev = c_lev - floor_lev
            target_lev = c_lev - (hazard_score * range_lev)
            import math
            step = range_lev / 8.0
            stepped_lev = math.ceil(target_lev / step) * step
            self.base_max_leverage = max(floor_lev, min(c_lev, stepped_lev))
            self.bet_sizer.max_leverage = self.base_max_leverage
            if hasattr(self, '_day_mc_status'):
                if self._day_mc_status == 'RED':
                    self.bet_sizer.max_leverage = self.base_max_leverage * 0.8
                else:
                    self.bet_sizer.max_leverage = self.base_max_leverage

        import types
        eng.set_esoteric_hazard_multiplier = types.MethodType(patched_hazard, eng)
        eng.set_esoteric_hazard_multiplier(0.0)

        base_after = getattr(eng, 'base_max_leverage', None)
        print(f"  After hazard call: base_max={base_after}  abs_max={getattr(eng,'abs_max_leverage',None)}")
    else:
        print(f"  No hazard call: base_max={getattr(eng,'base_max_leverage',None)}  abs_max={getattr(eng,'abs_max_leverage',None)}")

    daily_caps, daily_pnls = [], []
    t0 = time.time()

    for pf in parquet_files:
        ds = pf.stem
        df, acols, dvol = pq_data[ds]  # OLD style: pre-loaded float64 data

        cap_before = eng.capital
        vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60_static, False)  # OLD condition
        eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
        daily_caps.append(eng.capital)
        daily_pnls.append(eng.capital - cap_before)

    tr = eng.trade_history
    n = len(tr)
    roi = (eng.capital - 25000.0) / 25000.0 * 100.0

    peak_cap, max_dd = 25000.0, 0.0
    for cap in daily_caps:
        peak_cap = max(peak_cap, cap)
        max_dd = max(max_dd, (peak_cap - cap) / peak_cap * 100.0)

    elapsed = time.time() - t0

    def _abs(t): return t.pnl_absolute if hasattr(t,'pnl_absolute') else t.pnl_pct*250.
    if n > 0:
        wins = [t for t in tr if _abs(t) > 0]
        pf_val = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in tr if _abs(t)<=0)), 1e-9)
        dr = np.array([p/25000.*100. for p in daily_pnls])
        sharpe = float(dr.mean()/(dr.std()+1e-9)*math.sqrt(365))
        calmar = roi / max(max_dd, 0.01)
    else:
        pf_val, sharpe, calmar = 0, 0, 0

    print(f"\n  ROI: {roi:+.2f}%  T={n}  DD={max_dd:.2f}%  PF={pf_val:.3f}  Calmar={calmar:.2f}  ({elapsed:.0f}s)")

    gold_roi, gold_t = 181.81, 2155
    roi_ok = abs(roi - gold_roi) <= 2.0
    t_ok = abs(n - gold_t) <= 5
    print(f"  vs GOLD (+181.81%, T=2155): ROI diff={roi-gold_roi:+.2f}pp {'✓' if roi_ok else '✗'}  T diff={n-gold_t:+d} {'✓' if t_ok else '✗'}")

    return {'label': label, 'roi': roi, 'trades': n, 'dd': max_dd, 'pf': pf_val, 'calmar': calmar, 'elapsed': elapsed}


if __name__ == '__main__':
    results = []

    # R1: Exact certification conditions — ceiling=6.0 (original), hazard call, static vol, np.isfinite
    results.append(run_reconstruction(
        "R1: Cert conditions (ceiling=6.0, hazard, static vol)",
        ceiling_lev=6.0, use_hazard_call=True, use_static_vol=True,
    ))

    # R2: Ceiling=10.0 (agent's change), hazard call, static vol — isolate ceiling effect
    results.append(run_reconstruction(
        "R2: Agent ceiling=10.0, hazard, static vol",
        ceiling_lev=10.0, use_hazard_call=True, use_static_vol=True,
    ))

    # R3: No hazard call, static vol — baseline replicate style (should ≈ 111%)
    results.append(run_reconstruction(
        "R3: No hazard call, static vol (replicate style)",
        ceiling_lev=6.0, use_hazard_call=False, use_static_vol=True,
    ))

    print(f"\n{'='*65}")
    print("  RECONSTRUCTION SUMMARY")
    print(f"{'='*65}")
    print(f"  {'Config':<45} {'ROI':>8} {'T':>6} {'DD':>7} {'PF':>6}")
    print(f"  {'-'*75}")
    print(f"  {'GOLD STANDARD':.<45} {'+181.81%':>8} {'2155':>6} {'17.65%':>7} {'---':>6}")
    for r in results:
        print(f"  {r['label']:<45} {r['roi']:>+7.2f}% {r['trades']:>6} {r['dd']:>6.2f}% {r['pf']:>6.3f}")
    print(f"  {'Current replicate (no hazard, float64, static)':.<45} {'~+111%':>8} {'~1959':>6} {'~16.9%':>7}")