Files
DOLPHIN/nautilus_dolphin/dvae/exp3_longer_proxies.py

420 lines
16 KiB
Python
Raw Normal View History

"""
Exp 3 Longer-window proxies × three modes (gate / size / exit).
Available proxy signals from scan parquets:
proxy_B50 = instability_50 - v750_lambda_max_velocity (original)
proxy_B150 = instability_150 - v750_lambda_max_velocity (longer instability window)
proxy_V50 = v50_lambda_max_velocity - v750_lambda_max_velocity (vel divergence short)
proxy_V150 = v150_lambda_max_velocity - v750_lambda_max_velocity (vel divergence medium)
proxy_V300 = v300_lambda_max_velocity - v750_lambda_max_velocity (vel divergence long)
For each proxy, test:
MODE_GATE: binary suppress entry when proxy < rolling threshold
MODE_SIZE: scale fraction [0.5x, 1.5x] by proxy percentile
MODE_EXIT: (shadow analysis) early exit when proxy < rolling threshold
Run order:
Step 1 fast numpy sweep across all proxy × mode × threshold
(no Alpha Engine, simplified TP/max_hold model, ~seconds per config)
Step 2 top-2 configs per proxy validated with full Alpha Engine (~200s each)
Results: exp3_fast_sweep_results.json + exp3_alpha_engine_results.json
"""
import sys, time, math, json
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
import pandas as pd
_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))
from exp_shared import (
ensure_jit, ENGINE_KWARGS, GOLD, VBT_DIR, META_COLS,
load_data, load_forewarner, run_backtest, print_table, log_results
)
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
# ── Proxy definitions ─────────────────────────────────────────────────────────
PROXY_DEFS = {
'B50': lambda row: _get(row,'instability_50') - _get(row,'v750_lambda_max_velocity'),
'B150': lambda row: _get(row,'instability_150') - _get(row,'v750_lambda_max_velocity'),
'V50': lambda row: _get(row,'v50_lambda_max_velocity') - _get(row,'v750_lambda_max_velocity'),
'V150': lambda row: _get(row,'v150_lambda_max_velocity') - _get(row,'v750_lambda_max_velocity'),
'V300': lambda row: _get(row,'v300_lambda_max_velocity') - _get(row,'v750_lambda_max_velocity'),
}
def _get(row, col, default=0.0):
v = row.get(col)
if v is None: return default
try:
f = float(v)
return f if np.isfinite(f) else default
except Exception:
return default
# ── Step 1: Fast numpy sweep ──────────────────────────────────────────────────
def fast_sweep():
"""
Vectorized sweep across all proxies × modes × thresholds.
Uses simplified backtest: vel_div < -0.02 entry, fixed 0.95% TP, 120-bar max hold.
Single asset (BTCUSDT), no fees, no leverage dynamics.
~100x faster than Alpha Engine.
"""
print("\n" + "="*65)
print("STEP 1 — FAST NUMPY SWEEP (simplified, no Alpha Engine)")
print("="*65)
d = load_data()
TP = 0.0095 # 99bps take profit
MH = 120 # max hold bars
VDT = -0.02 # vel_div entry threshold
# Build concatenated scan data across all days
all_rows = []
for pf in d['parquet_files']:
df, _, _ = d['pq_data'][pf.stem]
for ri in range(len(df)):
row = df.iloc[ri]
r = {c: row.get(c) for c in ['vel_div','BTCUSDT',
'v50_lambda_max_velocity','v150_lambda_max_velocity',
'v300_lambda_max_velocity','v750_lambda_max_velocity',
'instability_50','instability_150']}
all_rows.append(r)
N = len(all_rows)
vd = np.array([_get(r,'vel_div',np.nan) for r in all_rows])
price = np.array([_get(r,'BTCUSDT',np.nan) for r in all_rows])
# Precompute all proxy arrays
proxy_arrays = {}
for pname, pfn in PROXY_DEFS.items():
proxy_arrays[pname] = np.array([pfn(r) for r in all_rows])
def simplified_backtest(entry_mask, proxy_arr, mode, threshold_pct, window=500):
"""
mode: 'gate' | 'size' | 'exit'
entry_mask: boolean array of candidate entries
Returns: ROI, n_trades, win_rate
"""
capital = 1.0
in_position = False
entry_bar = 0
entry_p = 0.0
pb_hist = []
trades = []
scale = 1.0
for i in range(N):
pb = proxy_arr[i]
if np.isfinite(pb):
pb_hist.append(pb)
hist_window = pb_hist[-window:] if len(pb_hist) >= window else pb_hist
# Rolling threshold
if len(hist_window) >= 20:
thr = float(np.percentile(hist_window, threshold_pct * 100))
else:
thr = -999.0
if in_position:
if np.isnan(price[i]) or entry_p <= 0:
in_position = False; continue
ret = (price[i] - entry_p) / entry_p # LONG direction (for backtest)
# direction=-1 (SHORT) — vel_div < 0 = eigenspace stress = SHORT signal
pnl_pct = -ret # SHORT
bars_held = i - entry_bar
exited = False
# Proxy-based exit (mode='exit')
if mode == 'exit' and np.isfinite(pb) and pb < thr:
exited = True
# Natural exits
if not exited and pnl_pct >= TP:
exited = True
if not exited and bars_held >= MH:
exited = True
if exited:
pos_size = scale * 0.20
trade_pnl = capital * pos_size * pnl_pct
capital += trade_pnl
trades.append(pnl_pct)
in_position = False
else:
if (not np.isnan(vd[i]) and entry_mask[i] and
not np.isnan(price[i]) and price[i] > 0):
# Gate mode: skip if proxy below threshold
if mode == 'gate' and np.isfinite(pb) and pb < thr:
continue
# Sizing mode: compute scale
if mode == 'size' and len(hist_window) >= 20:
pct = float(np.mean(np.array(hist_window) <= pb)) if np.isfinite(pb) else 0.5
scale = 0.5 + pct * 1.0 # linear [0.5, 1.5]
else:
scale = 1.0
in_position = True
entry_bar = i
entry_p = price[i]
n = len(trades)
if n == 0: return dict(roi=0, n_trades=0, wr=0, sharpe=0)
roi = (capital - 1.0) * 100.0
arr = np.array(trades)
wr = float(np.mean(arr > 0)) * 100
sh = float(arr.mean() / (arr.std() + 1e-9) * math.sqrt(n))
return dict(roi=roi, n_trades=n, wr=wr, sharpe=sh)
entry_mask = (np.isfinite(vd)) & (vd < VDT)
MODES = ['gate', 'size', 'exit']
THRESHOLDS = [0.10, 0.25, 0.50]
sweep_results = []
best_by_proxy = {}
for pname in PROXY_DEFS:
parr = proxy_arrays[pname]
for mode in MODES:
for tpct in THRESHOLDS:
key = f"{pname}/{mode}/p{int(tpct*100)}"
res = simplified_backtest(entry_mask, parr, mode, tpct)
res['key'] = key; res['proxy'] = pname
res['mode'] = mode; res['threshold_pct'] = tpct
sweep_results.append(res)
# Baseline (no proxy modification)
base = simplified_backtest(entry_mask, proxy_arrays['B50'], 'size', 0.0)
base['key'] = 'BASELINE'; base['proxy'] = '-'; base['mode'] = '-'; base['threshold_pct'] = 0
sweep_results.insert(0, base)
# Sort by Sharpe
ranked = sorted(sweep_results, key=lambda r: r.get('sharpe', -999), reverse=True)
print(f"\n{'Key':<30} {'ROI%':>7} {'Trades':>7} {'WR%':>6} {'Sharpe':>8}")
print('-'*60)
print(f"{'BASELINE':<30} {base['roi']:>7.2f} {base['n_trades']:>7d} "
f"{base['wr']:>6.1f}% {base['sharpe']:>8.4f}")
print('-'*60)
for r in ranked[:20]:
if r['key'] == 'BASELINE': continue
marker = ' ◄ TOP' if ranked.index(r) <= 5 else ''
print(f"{r['key']:<30} {r['roi']:>7.2f} {r['n_trades']:>7d} "
f"{r['wr']:>6.1f}% {r['sharpe']:>8.4f}{marker}")
log_results(
ranked,
_HERE / 'exp3_fast_sweep_results.json',
gold=None,
meta={'experiment': 'exp3 fast numpy sweep', 'n_bars': N,
'baseline': base, 'note': 'simplified SHORT-only, no fees, no leverage'}
)
# Return top configs for Alpha Engine validation (top 2 per proxy)
top_configs = []
seen_proxies = {}
for r in ranked:
if r['key'] == 'BASELINE': continue
pn = r['proxy']
if pn not in seen_proxies:
seen_proxies[pn] = 0
if seen_proxies[pn] < 2:
top_configs.append(r)
seen_proxies[pn] += 1
return top_configs, ranked[0] # top_configs for AE validation, baseline ref
# ── Step 2: Alpha Engine validation of top configs ────────────────────────────
class MultiProxyEngine(NDAlphaEngine):
"""Generic engine parameterised by proxy function + mode."""
def __init__(self, *args, proxy_name='B50', mode='gate',
threshold_pct=0.25, window=500,
size_min=0.5, size_max=1.5, **kwargs):
super().__init__(*args, **kwargs)
self._proxy_name = proxy_name
self._mode = mode
self._threshold_pct = threshold_pct
self._window = window
self._size_min = size_min
self._size_max = size_max
self._pb_history = []
self._current_vals = {}
# Stats
self.gate_suppressed = 0
self.gate_allowed = 0
self.early_exits = 0
self.sizing_scales = []
def _proxy(self):
v = self._current_vals
if self._proxy_name == 'B50':
return v.get('i50',0.) - v.get('v750',0.)
elif self._proxy_name == 'B150':
return v.get('i150',0.) - v.get('v750',0.)
elif self._proxy_name == 'V50':
return v.get('v50',0.) - v.get('v750',0.)
elif self._proxy_name == 'V150':
return v.get('v150',0.) - v.get('v750',0.)
elif self._proxy_name == 'V300':
return v.get('v300',0.) - v.get('v750',0.)
return 0.0
def _rolling_threshold(self):
h = self._pb_history[-self._window:]
if len(h) < 20: return -999.0
return float(np.percentile(h, self._threshold_pct * 100))
def _rolling_pct(self, pb):
h = np.array(self._pb_history[-self._window:])
if len(h) < 20: return 0.5
return float(np.mean(h <= pb))
def process_day(self, date_str, df, asset_columns,
vol_regime_ok=None, direction=None, posture='APEX'):
self.begin_day(date_str, posture=posture, direction=direction)
bid = 0
for ri in range(len(df)):
row = df.iloc[ri]
vd = row.get('vel_div')
if vd is None or not np.isfinite(float(vd)):
self._global_bar_idx += 1; bid += 1; continue
def gf(col):
v = row.get(col)
if v is None: return 0.0
try:
f = float(v)
return f if np.isfinite(f) else 0.0
except Exception: return 0.0
self._current_vals = dict(
i50=gf('instability_50'), i150=gf('instability_150'),
v50=gf('v50_lambda_max_velocity'), v150=gf('v150_lambda_max_velocity'),
v300=gf('v300_lambda_max_velocity'), v750=gf('v750_lambda_max_velocity'),
)
pb = self._proxy()
self._pb_history.append(pb)
prices = {}
for ac in asset_columns:
p = row.get(ac)
if p is not None and p > 0 and np.isfinite(p):
prices[ac] = float(p)
if not prices:
self._global_bar_idx += 1; bid += 1; continue
vrok = bool(vol_regime_ok[ri]) if vol_regime_ok is not None else (bid >= 100)
self.step_bar(bar_idx=ri, vel_div=float(vd), prices=prices,
vol_regime_ok=vrok,
v50_vel=self._current_vals['v50'],
v750_vel=self._current_vals['v750'])
bid += 1
return self.end_day()
def _try_entry(self, bar_idx, vel_div, prices, price_histories,
v50_vel=0.0, v750_vel=0.0):
pb = self._proxy()
thr = self._rolling_threshold()
if self._mode == 'gate':
if pb < thr:
self.gate_suppressed += 1
return None
self.gate_allowed += 1
elif self._mode == 'size':
pct = self._rolling_pct(pb)
scale = self._size_min + pct * (self._size_max - self._size_min)
self.sizing_scales.append(scale)
orig = self.bet_sizer.base_fraction
self.bet_sizer.base_fraction = orig * scale
result = super()._try_entry(bar_idx, vel_div, prices, price_histories,
v50_vel, v750_vel)
self.bet_sizer.base_fraction = orig
return result
return super()._try_entry(bar_idx, vel_div, prices, price_histories,
v50_vel, v750_vel)
@property
def sizing_scale_mean(self):
return float(np.mean(self.sizing_scales)) if self.sizing_scales else 1.0
def validate_with_alpha_engine(top_configs, forewarner):
print("\n" + "="*65)
print("STEP 2 — ALPHA ENGINE VALIDATION (top configs)")
print("="*65)
ae_results = []
# Baseline first
print("\nBaseline...")
t0 = time.time()
r = run_backtest(lambda kw: NDAlphaEngine(**kw), 'Baseline', forewarner=forewarner)
r['elapsed'] = time.time() - t0
ae_results.append(r)
print(f" {r['roi']:.2f}% PF={r['pf']:.4f} DD={r['dd']:.2f}% ({r['elapsed']:.0f}s)")
for cfg in top_configs:
pn = cfg['proxy']
mode = cfg['mode']
tpct = cfg['threshold_pct']
name = f"{pn}/{mode}/p{int(tpct*100)}"
print(f"\n{name} (sweep rank: Sharpe={cfg['sharpe']:.4f})")
t0 = time.time()
def factory(kw, pn=pn, mode=mode, tpct=tpct):
return MultiProxyEngine(**kw, proxy_name=pn, mode=mode,
threshold_pct=tpct, window=500)
r = run_backtest(factory, name, forewarner=forewarner)
r['elapsed'] = time.time() - t0
ae_results.append(r)
print(f" {r['roi']:.2f}% PF={r['pf']:.4f} DD={r['dd']:.2f}% ({r['elapsed']:.0f}s)")
print("\n" + "="*83)
print("EXP 3 — ALPHA ENGINE RESULTS")
print_table(ae_results, gold=GOLD)
return ae_results
def main():
ensure_jit()
print("\nLoading data & forewarner...")
load_data()
fw = load_forewarner()
top_configs, baseline_ref = fast_sweep()
print(f"\nFast sweep done. Top {len(top_configs)} configs selected for AE validation.")
print(f"Fast baseline: ROI={baseline_ref['roi']:.2f}% Sharpe={baseline_ref['sharpe']:.4f}")
ae_results = validate_with_alpha_engine(top_configs, fw)
log_results(
ae_results,
_HERE / 'exp3_alpha_engine_results.json',
meta={
'experiment': 'exp3 longer proxies alpha engine validation',
'proxies_tested': list(PROXY_DEFS.keys()),
'modes_tested': ['gate','size'], # exit=shadow only, done in exp2
'note': 'Top-2 per proxy from fast sweep, validated with full Alpha Engine',
}
)
if __name__ == '__main__':
main()