Files
DOLPHIN/nautilus_dolphin/dvae/exp2_proxy_exit.py

315 lines
13 KiB
Python
Raw Normal View History

"""
Exp 2 proxy_B as premature exit signal, with shadow trades.
Post-hoc "what-if" analysis on the baseline trade set.
1. Run baseline engine; log per-day proxy_B and per-asset prices keyed by
(date_str, bar_idx) the composite key that matches trade.entry_bar.
2. For each trade: find which day it was on (tracked by engine override),
then check if proxy_B dropped below threshold during the hold.
3. Compute early-exit PnL at the trigger bar using the CORRECT asset price.
4. Compare vs actual PnL.
Shadow insight: avg_pnl_delta = early_exit_pnl - actual_pnl
Positive early exit would have been better
Negative holding to natural exit was better (proxy_B is NOT a useful exit signal)
Thresholds tested (rolling percentile of proxy_B, window=500):
T1: exit if proxy_B < p10 (rare trigger)
T2: exit if proxy_B < p25 (moderate)
T3: exit if proxy_B < p50 (aggressive)
Logged to exp2_proxy_exit_results.json.
"""
import sys, time, json
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))
from exp_shared import (
ensure_jit, ENGINE_KWARGS, GOLD, load_data, load_forewarner, log_results
)
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
# ── Engine that logs per-day proxy_B + asset prices + trade dates ─────────────
class ShadowLoggingEngine(NDAlphaEngine):
"""
NDAlphaEngine that captures:
- day_proxy[date][ri] = proxy_b value
- day_prices[date][ri][asset] = price
- trade_dates[trade_idx] = date_str of entry
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.day_proxy = {} # date_str → {ri: proxy_b}
self.day_prices = {} # date_str → {ri: {asset: price}}
self._cur_date = None
self._n_trades_before = 0
self.trade_dates = [] # parallel list to trade_history, entry date per trade
def process_day(self, date_str, df, asset_columns,
vol_regime_ok=None, direction=None, posture='APEX'):
self._cur_date = date_str
self.day_proxy[date_str] = {}
self.day_prices[date_str] = {}
self._n_trades_before = len(self.trade_history)
self.begin_day(date_str, posture=posture, direction=direction)
bid = 0
for ri in range(len(df)):
row = df.iloc[ri]
vd = row.get('vel_div')
if vd is None or not np.isfinite(float(vd)):
self._global_bar_idx += 1; bid += 1; continue
def gf(col):
v = row.get(col)
if v is None: return 0.0
try: f = float(v); return f if np.isfinite(f) else 0.0
except: return 0.0
v50 = gf('v50_lambda_max_velocity')
v750 = gf('v750_lambda_max_velocity')
inst = gf('instability_50')
pb = inst - v750
self.day_proxy[date_str][ri] = pb
prices = {}
for ac in asset_columns:
p = row.get(ac)
if p is not None and p > 0 and np.isfinite(p):
prices[ac] = float(p)
self.day_prices[date_str][ri] = dict(prices)
if not prices:
self._global_bar_idx += 1; bid += 1; continue
vrok = bool(vol_regime_ok[ri]) if vol_regime_ok is not None else (bid >= 100)
self.step_bar(bar_idx=ri, vel_div=float(vd), prices=prices,
vol_regime_ok=vrok, v50_vel=v50, v750_vel=v750)
bid += 1
result = self.end_day()
# Tag new trades with this date
new_trades = self.trade_history[self._n_trades_before:]
for _ in new_trades:
self.trade_dates.append(date_str)
return result
# ── Shadow analysis ───────────────────────────────────────────────────────────
def shadow_analysis(eng, threshold_pct, window=500):
"""
For each trade, check if proxy_B dropped below rolling threshold
during hold period (same-day bars between entry_bar and exit_bar).
Uses the correct asset price for PnL computation.
"""
tr = eng.trade_history
dates = eng.trade_dates
if len(dates) < len(tr):
# Pad if any trades weren't tagged (shouldn't happen)
dates = dates + [None] * (len(tr) - len(dates))
# Build rolling proxy_B history across all days (chronological)
# We need a global chronological sequence for percentile computation
all_proxy_seq = []
for pf_stem in sorted(eng.day_proxy.keys()):
day_d = eng.day_proxy[pf_stem]
for ri in sorted(day_d.keys()):
all_proxy_seq.append((pf_stem, ri, day_d[ri]))
results = []
proxy_hist = [] # rolling window of ALL bars seen so far
# Build per-day sorted bar sequences for efficient lookup
day_bars = {d: sorted(eng.day_proxy[d].keys()) for d in eng.day_proxy}
# Build lookup: (date, ri) → index in all_proxy_seq (for rolling history)
seq_idx = {(s, r): i for i, (s, r, _) in enumerate(all_proxy_seq)}
for t, date in zip(tr, dates):
if date is None:
results.append(dict(triggered=False, actual_pnl=t.pnl_pct))
continue
entry_bar = int(t.entry_bar) if hasattr(t, 'entry_bar') else 0
exit_bar = int(t.exit_bar) if hasattr(t, 'exit_bar') else entry_bar
actual_pnl = float(t.pnl_pct) if hasattr(t, 'pnl_pct') else 0.0
entry_price = float(t.entry_price) if hasattr(t, 'entry_price') and t.entry_price else 0.0
direction = int(t.direction) if hasattr(t, 'direction') else -1
asset = t.asset if hasattr(t, 'asset') else 'BTCUSDT'
# Rolling threshold: use all bars BEFORE entry on this day
eidx = seq_idx.get((date, entry_bar), 0)
hist_window = [pb for (_, _, pb) in all_proxy_seq[max(0, eidx-window):eidx]]
if len(hist_window) < 20:
results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
threshold = float(np.percentile(hist_window, threshold_pct * 100))
# Find hold bars on the same day
if date not in day_bars:
results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
hold_bars = [ri for ri in day_bars[date]
if entry_bar < ri <= exit_bar]
triggered_bar = None
for ri in hold_bars:
if eng.day_proxy[date].get(ri, 999) < threshold:
triggered_bar = ri
break
if triggered_bar is None:
results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
# Correct early-exit price: same asset, triggered bar on same day
early_price = eng.day_prices[date].get(triggered_bar, {}).get(asset, 0.0)
if entry_price > 0 and early_price > 0:
early_pnl = direction * (early_price - entry_price) / entry_price
else:
results.append(dict(triggered=False, actual_pnl=actual_pnl)); continue
bars_saved = exit_bar - triggered_bar
results.append(dict(
triggered=True,
date=date, entry_bar=entry_bar, exit_bar=exit_bar,
triggered_bar=triggered_bar, bars_saved=bars_saved,
asset=asset, direction=direction,
entry_price=entry_price, early_price=early_price,
actual_pnl=actual_pnl,
early_exit_pnl=early_pnl,
pnl_delta=early_pnl - actual_pnl,
))
triggered = [r for r in results if r['triggered']]
if not triggered:
return dict(n_triggered=0, n_total=len(results), pct_triggered=0,
avg_actual_pnl_pct=0, avg_early_pnl_pct=0, avg_delta_pct=0,
early_better_rate=0, roi_impact_pp=0)
avg_actual = float(np.mean([r['actual_pnl'] for r in triggered]))
avg_early = float(np.mean([r['early_exit_pnl'] for r in triggered]))
avg_delta = float(np.mean([r['pnl_delta'] for r in triggered]))
early_better = float(np.mean([r['pnl_delta'] > 0 for r in triggered]))
avg_bars_saved = float(np.mean([r['bars_saved'] for r in triggered]))
# Estimated ROI impact (sum of pnl deltas × fraction × 100)
roi_impact = float(sum(r['pnl_delta'] for r in triggered) * 0.20 * 100)
# Per-exit-reason breakdown if available
return dict(
n_triggered=len(triggered),
n_total=len(results),
pct_triggered=len(triggered) / max(1, len(results)) * 100,
avg_actual_pnl_pct=avg_actual * 100,
avg_early_exit_pnl_pct=avg_early * 100,
avg_pnl_delta_pct=avg_delta * 100,
early_better_rate=early_better * 100,
avg_bars_saved=avg_bars_saved,
roi_impact_estimate_pp=roi_impact,
)
def main():
ensure_jit()
print("\nLoading data & forewarner...")
d = load_data()
fw = load_forewarner()
from exp_shared import ENGINE_KWARGS, MC_BASE_CFG
import math
print("\nRunning baseline with shadow logging...")
t0 = time.time()
kw = ENGINE_KWARGS.copy()
acb = AdaptiveCircuitBreaker()
acb.preload_w750(d['date_strings'])
eng = ShadowLoggingEngine(**kw)
eng.set_ob_engine(d['ob_eng'])
eng.set_acb(acb)
if fw: eng.set_mc_forewarner(fw, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
daily_caps, daily_pnls = [], []
for pf in d['parquet_files']:
ds = pf.stem
df, acols, dvol = d['pq_data'][ds]
cap_before = eng.capital
vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
daily_caps.append(eng.capital)
daily_pnls.append(eng.capital - cap_before)
tr = eng.trade_history
print(f" Done in {time.time()-t0:.0f}s Trades={len(tr)} "
f"Tagged={len(eng.trade_dates)}")
# Confirm baseline metrics match gold
def _abs(t): return t.pnl_absolute if hasattr(t,'pnl_absolute') else t.pnl_pct*250.
wins = [t for t in tr if _abs(t) > 0]
pf = sum(_abs(t) for t in wins) / max(abs(sum(_abs(t) for t in [x for x in tr if _abs(x)<=0])),1e-9)
roi = (eng.capital - 25000) / 25000 * 100
print(f" Baseline: ROI={roi:.2f}% PF={pf:.4f} (gold: 88.55% / 1.215)")
THRESHOLDS = [
('T1: exit if proxy_B < p10', 0.10),
('T2: exit if proxy_B < p25', 0.25),
('T3: exit if proxy_B < p50', 0.50),
]
all_results = []
for tname, tpct in THRESHOLDS:
print(f"\n{tname}")
res = shadow_analysis(eng, threshold_pct=tpct, window=500)
res['name'] = tname
all_results.append(res)
print(f" Triggered: {res['n_triggered']}/{res['n_total']} "
f"({res['pct_triggered']:.1f}%)")
if res['n_triggered'] > 0:
print(f" Avg actual PnL: {res['avg_actual_pnl_pct']:+.4f}%")
print(f" Avg early-exit PnL: {res['avg_early_exit_pnl_pct']:+.4f}%")
print(f" Avg delta: {res['avg_pnl_delta_pct']:+.4f}% "
f"(+ = early exit BETTER)")
print(f" Early exit better: {res['early_better_rate']:.1f}% of triggered")
print(f" Avg bars saved: {res['avg_bars_saved']:.1f}")
print(f" Est. ROI impact: {res['roi_impact_estimate_pp']:+.2f}pp")
print("\n" + "="*75)
print("EXP 2 — SHADOW EXIT SUMMARY")
print("="*75)
print(f"{'Threshold':<35} {'Trig%':>6} {'AvgDelta%':>11} "
f"{'EarlyBetter%':>13} {'ROI_pp':>8}")
print('-'*75)
for r in all_results:
if r['n_triggered'] > 0:
print(f" {r['name']:<33} {r['pct_triggered']:>6.1f}% "
f"{r['avg_pnl_delta_pct']:>10.4f}% "
f"{r['early_better_rate']:>12.1f}% "
f"{r['roi_impact_estimate_pp']:>8.2f}pp")
else:
print(f" {r['name']:<33} (no triggers)")
verdict = all_results[0] if all_results else {}
if verdict.get('avg_pnl_delta_pct', -1) > 0:
print("\n → VERDICT: Early exit is BENEFICIAL (delta > 0)")
else:
print("\n → VERDICT: Holding to natural exit is BETTER (early exit hurts)")
log_results(all_results, _HERE / 'exp2_proxy_exit_results.json',
meta={'experiment': 'proxy_B exit shadow (corrected)',
'proxy': 'instability_50 - v750_lambda_max_velocity',
'n_trades': len(tr),
'baseline_roi': roi, 'baseline_pf': pf})
if __name__ == '__main__':
main()