#!/usr/bin/env python3 """ EsoF Gate Strategies — 56-Day Gold Backtest Simulation Runs the gold-spec engine over all 56 vbt_cache parquet days, collects ~2000 trade records with real UTC entry timestamps, then evaluates all EsoF gate strategies (A–E + S6) and overfitting guard tests against that statistically substantial dataset. Timestamp reconstruction: parquet 'timestamp' column → Unix seconds or nanoseconds NDTradeRecord.entry_bar → row index in the day's dataframe entry_ts = datetime.fromtimestamp(ts_col[entry_bar], UTC) Caches trade data to /tmp/esof_bt_trades.json to avoid re-running the 56-day engine on subsequent test/analysis calls. Run: source /home/dolphin/siloqy_env/bin/activate cd /mnt/dolphinng5_predict python prod/tests/run_esof_backtest_sim.py # full run + report python prod/tests/run_esof_backtest_sim.py --cached # skip backtest, use cache """ from __future__ import annotations import argparse import json import sys import time from collections import defaultdict from datetime import datetime, timezone from pathlib import Path from typing import List, Dict, Optional import numpy as np import pandas as pd # ── paths ───────────────────────────────────────────────────────────────────── _ROOT = Path(__file__).parent.parent.parent _PROD_DIR = _ROOT / "prod" sys.path.insert(0, str(_ROOT)) sys.path.insert(0, str(_ROOT / "Observability")) sys.path.insert(0, str(_ROOT / "nautilus_dolphin")) PARQUET_DIR = _ROOT / "vbt_cache" CACHE_FILE = Path("/tmp/esof_bt_trades.json") # ── reuse gold engine infrastructure ────────────────────────────────────────── from prod.backtest_gold_verify import ( _build_engine, _load_config, _META_COLS_SET, _compute_vol_ok, INITIAL_CAPITAL, ) # ── EsoF advisory + gate ─────────────────────────────────────────────────────── from esof_advisor import compute_esof, BASELINE_WR from esof_gate import apply_gate, get_s6_mult, get_bucket, S6_BASE, S6_MULT # ── statistical helpers (reuse from overfitting test) ───────────────────────── import math, random def wr(trades): return sum(1 for t in trades if t["pnl"] > 0) / len(trades) if trades else float("nan") def net(trades): return sum(t["pnl"] for t in trades) def cohen_h(p1, p2): return abs(2*math.asin(math.sqrt(max(0,min(1,p1)))) - 2*math.asin(math.sqrt(max(0,min(1,p2))))) def bootstrap_ci(vals, n_boot=3000, ci=0.95, seed=42): rng = random.Random(seed) n = len(vals) s = sorted(rng.choice(vals) for _ in range(n_boot * n)) # approximate samples = [] for _ in range(n_boot): samples.append(sum(rng.choice(vals) for _ in range(n)) / n) samples.sort() lo, hi = int((1-ci)/2*n_boot), int((1+ci)/2*n_boot) return samples[lo], samples[hi] def binomial_se(p, n): return math.sqrt(p*(1-p)/n) if n > 0 else float("inf") def permutation_pvalue(trades, observed_delta, key, blocked_val, n_perm=2000, seed=42): rng = random.Random(seed) labels = [t[key] for t in trades] pnls = [t["pnl"] for t in trades] count = 0 for _ in range(n_perm): rng.shuffle(labels) d = -sum(p for l,p in zip(labels,pnls) if l == blocked_val) if d >= observed_delta: count += 1 return count / n_perm # ── Backtest runner ──────────────────────────────────────────────────────────── def run_backtest() -> List[dict]: """ Run gold-spec engine over all vbt_cache parquets. Returns list of trade dicts with real UTC entry timestamps. """ print(f"[BT] Loading config from blue.yml ...") cfg = _load_config() print(f"[BT] Building engine ...") engine = _build_engine(cfg, INITIAL_CAPITAL) engine.set_esoteric_hazard_multiplier(0.0) # gold spec parquet_files = sorted(PARQUET_DIR.glob("*.parquet")) parquet_files = [p for p in parquet_files if "catalog" not in str(p)] print(f"[BT] {len(parquet_files)} parquet days: {parquet_files[0].stem} → {parquet_files[-1].stem}") all_trades: List[dict] = [] pkl_map: Optional[Dict[str,int]] = None try: import pickle with open(_ROOT / "adaptive_exit/models/bucket_assignments.pkl", "rb") as f: pkl_map = pickle.load(f).get("assignments", {}) except Exception: pass t_global = time.time() for i, pf in enumerate(parquet_files): date_str = pf.stem df = pd.read_parquet(pf) # Save timestamp array for this day before processing ts_raw = df["timestamp"].values if "timestamp" in df.columns else None asset_cols = [c for c in df.columns if c not in _META_COLS_SET] vol_ok = _compute_vol_ok(df, float(cfg.get("paper_trade", {}).get("vol_p60", 0.00009868))) t_before = len(engine.trade_history) t0 = time.time() engine.process_day(date_str, df, asset_cols, vol_regime_ok=vol_ok, direction=-1, posture="APEX") elapsed = time.time() - t0 trades_today = engine.trade_history[t_before:] day_new = 0 for tr in trades_today: entry_bar = tr.entry_bar # Resolve UTC timestamp if ts_raw is not None and 0 <= entry_bar < len(ts_raw): raw = float(ts_raw[entry_bar]) if raw > 1e12: # nanoseconds entry_ts = datetime.fromtimestamp(raw / 1e9, tz=timezone.utc) elif raw > 1e9: # seconds (Unix) entry_ts = datetime.fromtimestamp(raw, tz=timezone.utc) else: # fractional day or other — fallback to midnight entry_ts = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc) else: entry_ts = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc) # Skip non-alpha exits if tr.exit_reason in ("HIBERNATE_HALT", "SUBDAY_ACB_NORMALIZATION"): continue asset = tr.asset bkt = get_bucket(asset, pkl_map) adv = compute_esof(entry_ts) all_trades.append({ "ts": entry_ts.isoformat(), "date": date_str, "asset": asset, "pnl": round(tr.pnl_absolute, 4), "leverage": round(tr.leverage, 3), "exit_reason":tr.exit_reason, "bucket_id": bkt, "session": adv["session"], "dow": adv["dow"], "score": round(adv["advisory_score"], 4), "label": adv["advisory_label"], "liq_bkt": adv["liq_bucket_3h"], }) day_new += 1 cum_T = len(all_trades) cap_now = engine.capital roi = (cap_now / INITIAL_CAPITAL - 1) * 100 print(f" {date_str}: +{day_new:3d} trades (cum={cum_T:4d}) " f"${cap_now:>10,.0f} ROI={roi:+.1f}% ({elapsed:.1f}s)", flush=True) total_elapsed = time.time() - t_global print(f"\n[BT] Done: {len(all_trades)} trades in {total_elapsed:.0f}s " f"ROI={((engine.capital/INITIAL_CAPITAL)-1)*100:+.2f}%") return all_trades def load_or_run(use_cache: bool) -> List[dict]: if use_cache and CACHE_FILE.exists(): print(f"[CACHE] Loading from {CACHE_FILE}") with open(CACHE_FILE) as f: raw = json.load(f) print(f" {len(raw)} trades loaded.") return raw trades = run_backtest() # ts is already an ISO string (set at collection time) with open(CACHE_FILE, "w") as f: json.dump(trades, f) print(f"[CACHE] Saved to {CACHE_FILE}") return trades # ── Strategy simulation ──────────────────────────────────────────────────────── def run_strategy(strategy: str, trades: List[dict]) -> dict: cf_pnl = 0.0; act_pnl = 0.0 n_blk = 0; n_scl = 0 n_win_cf = 0; n_win_act = 0 for t in trades: act_pnl += t["pnl"] n_win_act += t["pnl"] > 0 adv = {"advisory_label": t["label"], "advisory_score": t["score"], "session": t["session"], "dow": t["dow"]} r = apply_gate(strategy, adv) if strategy == "F": mult = r.s6_mult.get(t["bucket_id"], 0.4) cf_pnl += t["pnl"] * mult n_win_cf += t["pnl"] * mult > 0 n_blk += mult < 1e-6 n_scl += 0 < mult < 1.0 else: mult = r.lev_mult if r.is_blocked: n_blk += 1 else: cf_pnl += t["pnl"] * mult n_win_cf += t["pnl"] * mult > 0 n_scl += mult < 1.0 n = len(trades) n_exec = n - (n_blk if strategy != "F" else 0) wr_act = n_win_act / n * 100 if n else 0 wr_cf = (n_win_cf / max(n_exec,1) * 100) if strategy != "F" else (n_win_cf / n * 100) return dict(strategy=strategy, n=n, n_exec=n_exec, n_blk=n_blk, n_scl=n_scl, act_pnl=round(act_pnl,2), cf_pnl=round(cf_pnl,2), delta=round(cf_pnl-act_pnl,2), wr_act=round(wr_act,1), wr_cf=round(wr_cf,1)) def run_s6_base(trades): cf = sum(t["pnl"] * S6_BASE.get(t["bucket_id"], 0.4) for t in trades) wins = sum(t["pnl"] * S6_BASE.get(t["bucket_id"], 0.4) > 0 for t in trades) return dict(cf_pnl=round(cf,2), delta=round(cf-sum(t["pnl"] for t in trades),2), wr_cf=round(wins/len(trades)*100,1) if trades else 0) # ── Overfitting guard (adapted for large sample) ────────────────────────────── def run_overfitting_report(trades: List[dict]): n = len(trades) h1, h2 = trades[:n//2], trades[n//2:] base = wr(trades) ny = [t for t in trades if t["session"] == "NY_AFTERNOON"] mon = [t for t in trades if t["dow"] == 0] ldn = [t for t in trades if t["session"] == "LONDON_MORNING"] ny_h1 = [t for t in h1 if t["session"] == "NY_AFTERNOON"] ny_h2 = [t for t in h2 if t["session"] == "NY_AFTERNOON"] mon_h1 = [t for t in h1 if t["dow"] == 0] mon_h2 = [t for t in h2 if t["dow"] == 0] # Permutation tests ny_pnl = sum(t["pnl"] for t in ny) mon_pnl = sum(t["pnl"] for t in mon) p_ny = permutation_pvalue(trades, -ny_pnl, "session", "NY_AFTERNOON") p_mon = permutation_pvalue(trades, -mon_pnl, "dow", 0) # Effect sizes h_ny = cohen_h(wr(ny), base) h_mon = cohen_h(wr(mon), base) h_ldn = cohen_h(wr(ldn), base) # Bonferroni z z_ny = (base - wr(ny)) / binomial_se(base, len(ny)) if len(ny) else 0 z_mon = (base - wr(mon)) / binomial_se(base, len(mon)) if len(mon) else 0 # Walk-forward score prediction h2s = sorted(h2, key=lambda t: t["score"]) q = max(1, len(h2s)//4) wr_bot, wr_top = wr(h2s[:q]), wr(h2s[-q:]) # Bootstrap CI on WR (approximate using mean sample) ny_wrs = [1 if t["pnl"] > 0 else 0 for t in ny] ny_lo, ny_hi = bootstrap_ci(ny_wrs, n_boot=3000) # Session-bucket confound check by_bkt_ny = defaultdict(list) by_bkt_out = defaultdict(list) for t in ny: by_bkt_ny[t["bucket_id"]].append(t) for t in trades: if t["session"] != "NY_AFTERNOON": by_bkt_out[t["bucket_id"]].append(t) n_cross = sum(1 for b in by_bkt_ny if len(by_bkt_ny[b])>=5 and len(by_bkt_out.get(b,[]))>=5 and wr(by_bkt_ny[b]) < wr(by_bkt_out[b])) return dict( n=n, base=base, ny_n=len(ny), ny_wr=round(wr(ny),3), ny_net=round(net(ny),0), mon_n=len(mon), mon_wr=round(wr(mon),3), mon_net=round(net(mon),0), ldn_n=len(ldn), ldn_wr=round(wr(ldn),3), ny_h1_wr=round(wr(ny_h1),3), ny_h2_wr=round(wr(ny_h2),3), mon_h1_wr=round(wr(mon_h1),3), mon_h2_wr=round(wr(mon_h2),3), p_ny=round(p_ny,4), p_mon=round(p_mon,4), h_ny=round(h_ny,3), h_mon=round(h_mon,3), h_ldn=round(h_ldn,3), z_ny=round(z_ny,2), z_mon=round(z_mon,2), ny_wr_ci=(round(ny_lo,3), round(ny_hi,3)), wf_top=round(wr_top,3), wf_bot=round(wr_bot,3), n_cross_bucket=n_cross, ) # ── Report printer ───────────────────────────────────────────────────────────── G="\033[32m"; R="\033[31m"; Y="\033[33m"; B="\033[1m"; D="\033[2m"; X="\033[0m" def col(v, good_if_positive=True): if v > 0: return G if good_if_positive else R if v < 0: return R if good_if_positive else G return X def print_full_report(strategies, s6base, ov): base_pnl = strategies[0]["act_pnl"] base_wr = strategies[0]["wr_act"] n = strategies[0]["n"] print(f"\n{B}{'═'*74}{X}") print(f"{B} EsoF Gate — 56-Day Gold Backtest ({n} clean alpha trades){X}") print(f" Baseline: WR={base_wr:.1f}% Net=${base_pnl:+,.0f} " f"Period: 2025-12-31 → 2026-02-25") print(f"{'═'*74}{X}") # Gate results table NAMES = {"A":"A: LEV_SCALE","B":"B: HARD_BLOCK","C":"C: DOW_BLOCK", "D":"D: SESSION_BLOCK","E":"E: COMBINED","F":"F: S6_BUCKET"} hdr = f"\n {'Strategy':<22}│{'T_exec':>7}│{'T_blk':>6}│{'CF Net':>11}│{'ΔPnL':>10}│{'WR_cf':>7}│{'WR_Δ':>6}" sep = f" {'─'*22}┼{'─'*7}┼{'─'*6}┼{'─'*11}┼{'─'*10}┼{'─'*7}┼{'─'*6}" print(f"{B}{hdr}{X}\n{sep}") for r in strategies: nm = NAMES.get(r["strategy"], r["strategy"]) dpnl = r["delta"] dwr = r["wr_cf"] - r["wr_act"] c = G if dpnl > 0 else R wc = G if dwr > 0 else R print(f" {nm:<22}│{r['n_exec']:>7}│{r['n_blk']:>6}│" f"{c}{r['cf_pnl']:>+11,.0f}{X}│{c}{dpnl:>+10,.0f}{X}│" f"{wc}{r['wr_cf']:>6.1f}%{X}│{wc}{dwr:>+5.1f}pp{X}") print(sep) f_r = next(r for r in strategies if r["strategy"]=="F") fvs = f_r["cf_pnl"] - s6base["cf_pnl"] c = G if fvs > 0 else R print(f" {'F vs S6_BASE (EsoF uplift)':<22}│{'':>7}│{'':>6}│{'':>11}│" f"{c}{fvs:>+10,.0f}{X}│{'':>7}│") print(f" {'S6_BASE (flat, no EsoF)':<22}│{'':>7}│{'':>6}│" f"{s6base['cf_pnl']:>+11,.0f}│{s6base['delta']:>+10,.0f}│" f"{s6base['wr_cf']:>6.1f}%│") # Overfitting guard print(f"\n{B} Overfitting Guard — Large-Sample Results{X}") print(f" {'─'*68}") def orow(label, val, good=True, ref=None, fmt=".3f", suffix=""): v = f"{val:{fmt}}{suffix}" if ref is not None: c = G if (val < ref) == good else R else: c = X print(f" {label:<42} {c}{v}{X}") print(f" {'1. Temporal Stability':}") orow(f" NY_AFT WR H1 (n={ov['ny_n']//2})", ov["ny_h1_wr"], ref=ov["base"]) orow(f" NY_AFT WR H2", ov["ny_h2_wr"], ref=ov["base"]) orow(f" Monday WR H1 (n={ov['mon_n']//2})", ov["mon_h1_wr"], ref=ov["base"]) orow(f" Monday WR H2", ov["mon_h2_wr"], ref=ov["base"]) print(f"\n {'2. Permutation p-values (n_perm=2000)':}") c_ny = G if ov["p_ny"] < 0.05 else Y if ov["p_ny"] < 0.15 else R c_mon = G if ov["p_mon"] < 0.05 else Y if ov["p_mon"] < 0.15 else R print(f" {' NY_AFT p-value':<42} {c_ny}{ov['p_ny']:.4f}{X} {D}(< 0.05 = significant){X}") print(f" {' Monday p-value':<42} {c_mon}{ov['p_mon']:.4f}{X}") print(f"\n {'3. Effect sizes (Cohen\'s h)':}") for label, h, n_cell in [("NY_AFT",ov["h_ny"],ov["ny_n"]), ("Monday",ov["h_mon"],ov["mon_n"]), ("London",ov["h_ldn"],ov["ldn_n"])]: grade = "large" if h>=0.8 else "medium" if h>=0.5 else "small" if h>=0.2 else "trivial" c = G if h>=0.5 else Y if h>=0.2 else R print(f" {' '+label:<42} {c}{h:.3f}{X} {D}{grade} (n={n_cell}){X}") print(f"\n {'4. Bonferroni z-scores (35 cells, crit≈2.99)':}") crit = 2.99 for label, z in [("NY_AFT", ov["z_ny"]), ("Monday", ov["z_mon"])]: c = G if z > crit else Y if z > 2.0 else R print(f" {' '+label:<42} {c}{z:.2f}{X}") print(f"\n {'5. Bootstrap 95% CI on NY_AFT WR':}") lo, hi = ov["ny_wr_ci"] c = G if hi < ov["base"] else R print(f" {' NY_AFT WR CI':<42} {c}[{lo:.3f}, {hi:.3f}]{X} " f"{D}({'below' if hi < ov['base'] else 'overlaps'} baseline {ov['base']:.3f}){X}") print(f"\n {'6. Walk-forward: advisory score → H2 WR':}") c = G if ov["wf_top"] > ov["wf_bot"] else R print(f" {' Top-quartile WR (H2)':<42} {c}{ov['wf_top']:.3f}{X}") print(f" {' Bot-quartile WR (H2)':<42} {c}{ov['wf_bot']:.3f}{X}") print(f" {' Predictive?':<42} {c}{'YES' if ov['wf_top'] > ov['wf_bot'] else 'NO — overfit'}{X}") print(f"\n {'7. Cross-bucket NY_AFT confound check':}") c = G if ov["n_cross_bucket"] >= 2 else Y if ov["n_cross_bucket"] == 1 else R print(f" {' Buckets confirming NY_AFT drag':<42} {c}{ov['n_cross_bucket']}{X} " f"{D}(≥ 2 = session-driven, not bucket-proxy){X}") print(f"\n{'═'*74}\n") # ── Main ─────────────────────────────────────────────────────────────────────── if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument("--cached", action="store_true", help="Use cached trades (skip backtest)") args = ap.parse_args() trades = load_or_run(use_cache=args.cached) if len(trades) < 100: print(f"{R}Too few trades ({len(trades)}) — check engine setup.{X}") sys.exit(1) print(f"\n[SIM] Running gate strategies on {len(trades)} trades ...") strategy_results = [run_strategy(s, trades) for s in ["A","B","C","D","E","F"]] s6base = run_s6_base(trades) print("[OV] Running overfitting guard ...") ov = run_overfitting_report(trades) print_full_report(strategy_results, s6base, ov)