Files
DOLPHIN/nautilus_dolphin/dvae/exp4_proxy_coupling.py

506 lines
22 KiB
Python
Raw Normal View History

"""
Exp 4 proxy_B Coupling & Orthogonality Sweep
Research questions:
1. Is proxy_B orthogonal to the entry signal (vel_div) and other system state?
2. Can proxy_B be *coupled* to existing system parameters to reduce DD without
reducing ROI? (position scale, hold limit, stop gate, rising-proxy exit)
3. Does proxy_B predict trades that will hit large adverse excursions (MAE)?
Method: retroactive shadow analysis on the full 2155-trade baseline.
- One full AE run with extended logging (per-bar proxy_B + vel_div + prices)
- All coupling tests applied post-hoc: O(N_trades) per config < 1s for 150+ configs
- Focus metric: DD reduction with ROI >= gold * 0.95
Note on stop_pct=1.0 in gold config:
The engine has stop_pct=1.0 (100% effectively no stop). Trades exit via:
fixed_tp (0.95%), max_hold_bars (120), or direction-reversal signal.
This means MAE can be large before trades recover proxy-gated stop is meaningful.
Coupling modes:
A. scale_suppress: scale down position when proxy_B high at entry
B. scale_boost: scale up position when proxy_B low at entry
C. hold_limit: exit at fraction of natural hold when proxy_B_max exceeds threshold
D. rising_exit: exit early when proxy_B trajectory during hold is strongly rising
E. pure_stop: retroactive stop simulation (benchmark, no proxy coupling)
F. gated_stop: stop applies ONLY when proxy_B at entry exceeds threshold
Statistical tests:
- Pearson + Spearman: proxy_B vs vel_div, pnl, MAE
- Mann-Whitney U: worst-10% trades vs rest on proxy_B_entry
"""
import sys, time, json, math
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
from collections import defaultdict
_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))
from exp_shared import (
ensure_jit, ENGINE_KWARGS, GOLD, MC_BASE_CFG,
load_data, load_forewarner, log_results
)
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
# ── Extended shadow engine ────────────────────────────────────────────────────
class CouplingEngine(NDAlphaEngine):
"""Runs baseline + captures per-bar: proxy_B, vel_div, asset prices."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.day_proxy = {} # date_str → {ri: proxy_B}
self.day_veldiv = {} # date_str → {ri: vel_div}
self.day_prices = {} # date_str → {ri: {asset: price}}
self._n_before = 0
self.trade_dates = [] # parallel to trade_history
def process_day(self, date_str, df, asset_columns,
vol_regime_ok=None, direction=None, posture='APEX'):
self.day_proxy[date_str] = {}
self.day_veldiv[date_str] = {}
self.day_prices[date_str] = {}
self._n_before = len(self.trade_history)
self.begin_day(date_str, posture=posture, direction=direction)
bid = 0
for ri in range(len(df)):
row = df.iloc[ri]
vd = row.get('vel_div')
if vd is None or not np.isfinite(float(vd)):
self._global_bar_idx += 1; bid += 1; continue
def gf(col):
v = row.get(col)
if v is None: return 0.0
try: return float(v) if np.isfinite(float(v)) else 0.0
except: return 0.0
v50 = gf('v50_lambda_max_velocity')
v750 = gf('v750_lambda_max_velocity')
inst = gf('instability_50')
pb = inst - v750
self.day_proxy[date_str][ri] = pb
self.day_veldiv[date_str][ri] = float(vd)
prices = {}
for ac in asset_columns:
p = row.get(ac)
if p is not None and p > 0 and np.isfinite(float(p)):
prices[ac] = float(p)
self.day_prices[date_str][ri] = prices
if not prices:
self._global_bar_idx += 1; bid += 1; continue
vrok = bool(vol_regime_ok[ri]) if vol_regime_ok is not None else (bid >= 100)
self.step_bar(bar_idx=ri, vel_div=float(vd), prices=prices,
vol_regime_ok=vrok, v50_vel=v50, v750_vel=v750)
bid += 1
self.end_day()
for _ in self.trade_history[self._n_before:]:
self.trade_dates.append(date_str)
# ── Build shadow data ─────────────────────────────────────────────────────────
def build_shadow(d, fw):
kw = ENGINE_KWARGS.copy()
acb = AdaptiveCircuitBreaker()
acb.preload_w750(d['date_strings'])
eng = CouplingEngine(**kw)
eng.set_ob_engine(d['ob_eng'])
eng.set_acb(acb)
if fw: eng.set_mc_forewarner(fw, MC_BASE_CFG)
eng.set_esoteric_hazard_multiplier(0.0)
for pf in d['parquet_files']:
ds = pf.stem
df, acols, dvol = d['pq_data'][ds]
vol_ok = np.where(np.isfinite(dvol), dvol > d['vol_p60'], False)
eng.process_day(ds, df, acols, vol_regime_ok=vol_ok)
tr = eng.trade_history
roi = (eng.capital - 25000) / 25000 * 100
print(f" Shadow run: ROI={roi:.2f}% Trades={len(tr)}"
f" Tagged={len(eng.trade_dates)}")
return eng, tr
# ── Feature extraction ────────────────────────────────────────────────────────
def extract_features(eng, tr):
"""Per-trade features for coupling analysis."""
feats = []
for t, date in zip(tr, eng.trade_dates):
if date is None:
continue
entry_bar = int(t.entry_bar)
exit_bar = int(getattr(t, 'exit_bar', entry_bar))
direction = int(t.direction)
asset = t.asset
pnl_frac = float(t.pnl_pct) # fraction (not %)
pnl_abs = float(t.pnl_absolute) if hasattr(t, 'pnl_absolute') else pnl_frac * 250.
entry_price = float(getattr(t, 'entry_price', 0) or 0)
pb_entry = eng.day_proxy.get(date, {}).get(entry_bar, np.nan)
vd_entry = eng.day_veldiv.get(date, {}).get(entry_bar, np.nan)
# Hold bars (in-trade, exclusive of entry)
hold_bars = sorted(ri for ri in eng.day_proxy.get(date, {})
if entry_bar < ri <= exit_bar)
pb_hold = [eng.day_proxy[date][ri] for ri in hold_bars]
pb_max = max(pb_hold) if pb_hold else (pb_entry if np.isfinite(pb_entry) else 0.0)
pb_traj = (pb_hold[-1] - pb_hold[0]) if len(pb_hold) > 1 else 0.0
# Max adverse excursion (MAE) — negative = loss
mae = 0.0
if entry_price > 0:
for ri in hold_bars:
p = eng.day_prices.get(date, {}).get(ri, {}).get(asset, 0.0)
if p > 0:
exc = direction * (p - entry_price) / entry_price
if exc < mae:
mae = exc
# Early exit prices at hold fraction 0.25, 0.50, 0.75
early = {}
for frac in (0.25, 0.50, 0.75):
target = entry_bar + max(1, int(frac * (exit_bar - entry_bar)))
avail = [ri for ri in hold_bars if ri >= target]
if avail and entry_price > 0:
p = eng.day_prices.get(date, {}).get(avail[0], {}).get(asset, 0.0)
if p > 0:
early[frac] = direction * (p - entry_price) / entry_price
continue
early[frac] = pnl_frac # fallback: no change
feats.append(dict(
date=date,
hold_bars=exit_bar - entry_bar,
direction=direction,
pnl_frac=pnl_frac,
pnl_abs=pnl_abs,
pb_entry=pb_entry,
vd_entry=vd_entry,
pb_max=pb_max,
pb_traj=pb_traj,
mae=mae,
e25=early[0.25],
e50=early[0.50],
e75=early[0.75],
))
return feats
# ── Orthogonality analysis ────────────────────────────────────────────────────
def orthogonality_analysis(feats):
from scipy.stats import pearsonr, spearmanr, mannwhitneyu
valid = [f for f in feats if np.isfinite(f['pb_entry']) and np.isfinite(f['vd_entry'])]
pb_e = np.array([f['pb_entry'] for f in valid])
vd_e = np.array([f['vd_entry'] for f in valid])
pnl = np.array([f['pnl_frac'] for f in valid])
mae = np.array([f['mae'] for f in valid])
pb_mx = np.array([f['pb_max'] for f in valid])
hold = np.array([f['hold_bars'] for f in valid])
print(f"\n N valid (finite pb_entry + vd_entry): {len(valid)}/{len(feats)}")
print(f" proxy_B stats: mean={pb_e.mean():.4f} std={pb_e.std():.4f} "
f"p10={np.percentile(pb_e,10):.4f} p90={np.percentile(pb_e,90):.4f}")
print(f" vel_div stats: mean={vd_e.mean():.4f} std={vd_e.std():.4f}")
print()
pairs = [
('pb_entry', pb_e, 'vel_div_entry', vd_e),
('pb_entry', pb_e, 'pnl_frac', pnl),
('pb_entry', pb_e, 'mae', mae),
('pb_entry', pb_e, 'hold_bars', hold),
('pb_max', pb_mx, 'pnl_frac', pnl),
('pb_max', pb_mx, 'mae', mae),
]
corr_res = {}
for na, a, nb, b in pairs:
pr, pp = pearsonr(a, b)
sr, sp = spearmanr(a, b)
sig = '***' if pp < 0.001 else '**' if pp < 0.01 else '*' if pp < 0.05 else 'ns'
print(f" corr({na}, {nb}): Pearson r={pr:+.4f} p={pp:.4f} {sig:3s}"
f" Spearman rho={sr:+.4f}")
corr_res[f'{na}_vs_{nb}'] = dict(pearson=float(pr), p=float(pp),
spearman=float(sr), sig=sig)
# Mann-Whitney: is proxy_B different for worst-10% trades vs rest?
print()
for label, metric in [('worst_pnl_10pct', pnl), ('worst_mae_10pct', mae)]:
cut = np.percentile(metric, 10)
mask_w = metric <= cut
pb_w = pb_e[mask_w]
pb_r = pb_e[~mask_w]
stat, p = mannwhitneyu(pb_w, pb_r, alternative='two-sided')
sig = '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else 'ns'
print(f" MW {label}: pb_entry worst={pb_w.mean():.4f} rest={pb_r.mean():.4f} "
f"p={p:.4f} {sig}")
corr_res[f'mw_{label}'] = dict(stat=float(stat), p=float(p),
mean_worst=float(pb_w.mean()),
mean_rest=float(pb_r.mean()), sig=sig)
return corr_res
# ── Coupling sweep ────────────────────────────────────────────────────────────
def _dd_roi(new_pnl_abs, date_order, date_to_trades):
"""Retroactive DD and ROI from modified per-trade PnL array."""
cap, peak, max_dd = 25000.0, 25000.0, 0.0
total = 0.0
for d in date_order:
for i in date_to_trades[d]:
cap += new_pnl_abs[i]
total += new_pnl_abs[i]
if cap > peak: peak = cap
dd = (peak - cap) / peak * 100.0
if dd > max_dd: max_dd = dd
return total / 25000. * 100., max_dd
def coupling_sweep(feats, n_max=None):
N = len(feats)
if n_max: feats = feats[:n_max]
# ---- Arrays ----
pnl_abs = np.array([f['pnl_abs'] for f in feats])
pnl_frac = np.array([f['pnl_frac'] for f in feats])
pb_entry = np.array([f['pb_entry'] for f in feats])
pb_max = np.array([f['pb_max'] for f in feats])
pb_traj = np.array([f['pb_traj'] for f in feats])
mae = np.array([f['mae'] for f in feats])
e25 = np.array([f['e25'] for f in feats])
e50 = np.array([f['e50'] for f in feats])
e75 = np.array([f['e75'] for f in feats])
# Replace NaN pb with median
pb_med = float(np.nanmedian(pb_entry))
pb_entry_c = np.where(np.isfinite(pb_entry), pb_entry, pb_med)
pb_max_c = np.where(np.isfinite(pb_max), pb_max, pb_med)
# Percentile ranks (0=low, 1=high)
def prank(x):
r = np.argsort(np.argsort(x)).astype(float)
return r / max(len(r) - 1, 1)
rk_e = prank(pb_entry_c)
rk_mx = prank(pb_max_c)
rk_tr = prank(pb_traj)
# Date ordering for DD computation
dates_list = [f['date'] for f in feats]
date_order = sorted(set(dates_list))
date_to_trades = defaultdict(list)
for i, d in enumerate(dates_list):
date_to_trades[d].append(i)
base_roi, base_dd = _dd_roi(pnl_abs, date_order, date_to_trades)
# Helper: new_pnl_abs from early exit fraction
def _early_abs(early_frac_arr):
ratio = np.where(np.abs(pnl_frac) > 1e-9,
early_frac_arr / pnl_frac, 1.0)
ratio = np.clip(ratio, -5.0, 5.0)
return pnl_abs * ratio
configs = []
def add(name, new_pnl, **meta):
roi, dd = _dd_roi(new_pnl, date_order, date_to_trades)
configs.append(dict(name=name, roi=roi, dd=dd,
roi_delta=roi - base_roi,
dd_delta=dd - base_dd,
**meta))
# ─── Mode A: scale_suppress — scale down when proxy_B high ───────────────
for sig_name, rk in [('pb_entry', rk_e), ('pb_max', rk_mx), ('pb_traj', rk_tr)]:
for thr in [0.50, 0.65, 0.75, 0.85]:
for alpha in [0.5, 1.0, 2.0]:
for s_min in [0.0, 0.25, 0.5]:
scales = np.maximum(s_min, 1.0 - alpha * np.maximum(0, rk - thr))
add(f'A/{sig_name}/thr{thr}/a{alpha}/min{s_min}',
pnl_abs * scales,
mode='scale_suppress', signal=sig_name,
thr=thr, alpha=alpha, s_min=s_min,
scale_mean=float(scales.mean()))
# ─── Mode B: scale_boost — scale up when proxy_B low ─────────────────────
for sig_name, rk in [('pb_entry', rk_e)]:
for thr in [0.25, 0.35, 0.50]:
for alpha in [0.5, 1.0]:
scales = 1.0 + alpha * np.maximum(0, thr - rk)
add(f'B/{sig_name}/thr{thr}/a{alpha}',
pnl_abs * scales,
mode='scale_boost', signal=sig_name,
thr=thr, alpha=alpha,
scale_mean=float(scales.mean()))
# ─── Mode C: hold_limit — exit early when pb_max high ────────────────────
for frac, early_arr in [(0.25, e25), (0.50, e50), (0.75, e75)]:
for thr_pct in [0.65, 0.75, 0.85, 0.90]:
thr_abs = np.percentile(pb_max_c, thr_pct * 100)
trigger = pb_max_c > thr_abs
new_pnl_f = np.where(trigger, early_arr, pnl_frac)
n_trig = int(trigger.sum())
add(f'C/frac{frac}/pbmax_p{thr_pct}',
_early_abs(new_pnl_f),
mode='hold_limit', frac=frac, thr_pct=thr_pct, n_triggered=n_trig)
# ─── Mode D: rising_exit — exit early when pb trajectory strongly up ──────
for frac, early_arr in [(0.25, e25), (0.50, e50)]:
for thr_pct in [0.70, 0.80, 0.90]:
thr_abs = np.percentile(pb_traj, thr_pct * 100)
trigger = pb_traj > thr_abs
new_pnl_f = np.where(trigger, early_arr, pnl_frac)
n_trig = int(trigger.sum())
add(f'D/frac{frac}/traj_p{thr_pct}',
_early_abs(new_pnl_f),
mode='rising_exit', frac=frac, thr_pct=thr_pct, n_triggered=n_trig)
# ─── Mode E: pure_stop — retroactive stop (no proxy, benchmark) ──────────
for stop_p in [0.003, 0.005, 0.008, 0.010, 0.015, 0.020, 0.030]:
# mae < -stop_p → exit was stopped; clamp pnl_frac to -stop_p
stopped = mae < -stop_p
new_pnl_f = np.where(stopped, -stop_p, pnl_frac)
n_trig = int(stopped.sum())
add(f'E/stop_{stop_p:.3f}',
_early_abs(new_pnl_f),
mode='pure_stop', stop_pct=stop_p, n_triggered=n_trig)
# ─── Mode F: gated_stop — stop applies only when pb_entry high ───────────
for stop_p in [0.005, 0.008, 0.010, 0.015]:
for gate_pct in [0.50, 0.60, 0.75, 0.85]:
gate_thr = np.percentile(pb_entry_c, gate_pct * 100)
gated = pb_entry_c > gate_thr
stopped = gated & (mae < -stop_p)
new_pnl_f = np.where(stopped, -stop_p, pnl_frac)
n_trig = int(stopped.sum())
add(f'F/stop_{stop_p:.3f}/gate_p{gate_pct}',
_early_abs(new_pnl_f),
mode='gated_stop', stop_pct=stop_p, gate_pct=gate_pct,
n_triggered=n_trig)
return base_roi, base_dd, configs
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
ensure_jit()
print("\nLoading data & forewarner...")
d = load_data()
fw = load_forewarner()
print("\nBuilding shadow data (one full AE run)...")
t0 = time.time()
eng, tr = build_shadow(d, fw)
print(f" Built in {time.time()-t0:.0f}s")
print("\nExtracting per-trade features...")
feats = extract_features(eng, tr)
print(f" {len(feats)} trades with valid features")
# ── Orthogonality ─────────────────────────────────────────────────────────
print("\n" + "="*60)
print("ORTHOGONALITY ANALYSIS")
print("="*60)
corr_res = orthogonality_analysis(feats)
# ── Coupling sweep ────────────────────────────────────────────────────────
print("\n" + "="*60)
print(f"COUPLING SWEEP (N={len(feats)} trades)")
print("="*60)
t1 = time.time()
base_roi, base_dd, configs = coupling_sweep(feats)
print(f" Tested {len(configs)} configs in {time.time()-t1:.2f}s")
print(f" Baseline: ROI={base_roi:.2f}% DD={base_dd:.2f}%")
# ── Find DD-reduction candidates ──────────────────────────────────────────
GOLD_ROI = GOLD['roi']
GOLD_DD = GOLD['dd']
ROI_FLOOR = GOLD_ROI * 0.95 # allow at most -5% ROI cost
candidates = [c for c in configs
if c['dd'] < GOLD_DD and c['roi'] >= ROI_FLOOR]
candidates.sort(key=lambda c: (c['dd_delta'], -c['roi_delta']))
print(f"\n Configs with DD < {GOLD_DD:.2f}% AND ROI >= {ROI_FLOOR:.1f}%: "
f"{len(candidates)}")
# Also find absolute best DD reduction regardless of ROI
by_dd = sorted(configs, key=lambda c: c['dd'])[:10]
# Print tables
def hdr():
print(f"\n {'Config':<45} {'ROI%':>7} {'DD%':>6} {'ΔROI':>7} {'ΔDD':>7}"
f" {'mode':<14}")
print(' ' + '-'*90)
def row(c):
extra = ''
if 'n_triggered' in c: extra = f" trig={c['n_triggered']}"
if 'scale_mean' in c: extra = f" smean={c['scale_mean']:.3f}"
print(f" {c['name']:<45} {c['roi']:>7.2f} {c['dd']:>6.2f} "
f"{c['roi_delta']:>+7.2f} {c['dd_delta']:>+7.2f} "
f"{c.get('mode',''):<14}{extra}")
print(f"\n *** GOLD ***: ROI={GOLD_ROI:.2f}% DD={GOLD_DD:.2f}%")
if candidates:
print("\n ── DD < gold AND ROI >= 95% gold ──")
hdr()
for c in candidates[:20]:
row(c)
else:
print("\n (no configs meet both criteria)")
print("\n ── Top 10 by lowest DD (regardless of ROI) ──")
hdr()
for c in by_dd:
row(c)
# ── Summary by mode ───────────────────────────────────────────────────────
from itertools import groupby
print("\n ── Best config per mode (by DD delta, ROI >= floor) ──")
hdr()
by_mode = defaultdict(list)
for c in configs:
by_mode[c.get('mode', 'other')].append(c)
for mode, cs in sorted(by_mode.items()):
best = min(cs, key=lambda c: c['dd'])
row(best)
# ── Log results ───────────────────────────────────────────────────────────
out = _HERE / 'exp4_proxy_coupling_results.json'
payload = {
'gold': GOLD,
'baseline': dict(roi=base_roi, dd=base_dd),
'orthogonality': corr_res,
'n_configs_tested': len(configs),
'dd_reduction_candidates': candidates[:20],
'top10_by_dd': by_dd,
'best_per_mode': {
mode: min(cs, key=lambda c: c['dd'])
for mode, cs in by_mode.items()
},
'all_configs': configs,
}
out.parent.mkdir(parents=True, exist_ok=True)
with open(out, 'w', encoding='utf-8') as f:
json.dump(payload, f, indent=2)
print(f"\n Logged → {out}")
if __name__ == '__main__':
main()