Files
DOLPHIN/nautilus_dolphin/dvae/exp3_longer_proxies.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

420 lines
16 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Exp 3 — Longer-window proxies × three modes (gate / size / exit).
Available proxy signals from scan parquets:
proxy_B50 = instability_50 - v750_lambda_max_velocity (original)
proxy_B150 = instability_150 - v750_lambda_max_velocity (longer instability window)
proxy_V50 = v50_lambda_max_velocity - v750_lambda_max_velocity (vel divergence short)
proxy_V150 = v150_lambda_max_velocity - v750_lambda_max_velocity (vel divergence medium)
proxy_V300 = v300_lambda_max_velocity - v750_lambda_max_velocity (vel divergence long)
For each proxy, test:
MODE_GATE: binary suppress entry when proxy < rolling threshold
MODE_SIZE: scale fraction [0.5x, 1.5x] by proxy percentile
MODE_EXIT: (shadow analysis) early exit when proxy < rolling threshold
Run order:
Step 1 — fast numpy sweep across all proxy × mode × threshold
(no Alpha Engine, simplified TP/max_hold model, ~seconds per config)
Step 2 — top-2 configs per proxy validated with full Alpha Engine (~200s each)
Results: exp3_fast_sweep_results.json + exp3_alpha_engine_results.json
"""
import sys, time, math, json
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
import pandas as pd
_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE.parent))
from exp_shared import (
ensure_jit, ENGINE_KWARGS, GOLD, VBT_DIR, META_COLS,
load_data, load_forewarner, run_backtest, print_table, log_results
)
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
# ── Proxy definitions ─────────────────────────────────────────────────────────
PROXY_DEFS = {
'B50': lambda row: _get(row,'instability_50') - _get(row,'v750_lambda_max_velocity'),
'B150': lambda row: _get(row,'instability_150') - _get(row,'v750_lambda_max_velocity'),
'V50': lambda row: _get(row,'v50_lambda_max_velocity') - _get(row,'v750_lambda_max_velocity'),
'V150': lambda row: _get(row,'v150_lambda_max_velocity') - _get(row,'v750_lambda_max_velocity'),
'V300': lambda row: _get(row,'v300_lambda_max_velocity') - _get(row,'v750_lambda_max_velocity'),
}
def _get(row, col, default=0.0):
v = row.get(col)
if v is None: return default
try:
f = float(v)
return f if np.isfinite(f) else default
except Exception:
return default
# ── Step 1: Fast numpy sweep ──────────────────────────────────────────────────
def fast_sweep():
"""
Vectorized sweep across all proxies × modes × thresholds.
Uses simplified backtest: vel_div < -0.02 entry, fixed 0.95% TP, 120-bar max hold.
Single asset (BTCUSDT), no fees, no leverage dynamics.
~100x faster than Alpha Engine.
"""
print("\n" + "="*65)
print("STEP 1 — FAST NUMPY SWEEP (simplified, no Alpha Engine)")
print("="*65)
d = load_data()
TP = 0.0095 # 99bps take profit
MH = 120 # max hold bars
VDT = -0.02 # vel_div entry threshold
# Build concatenated scan data across all days
all_rows = []
for pf in d['parquet_files']:
df, _, _ = d['pq_data'][pf.stem]
for ri in range(len(df)):
row = df.iloc[ri]
r = {c: row.get(c) for c in ['vel_div','BTCUSDT',
'v50_lambda_max_velocity','v150_lambda_max_velocity',
'v300_lambda_max_velocity','v750_lambda_max_velocity',
'instability_50','instability_150']}
all_rows.append(r)
N = len(all_rows)
vd = np.array([_get(r,'vel_div',np.nan) for r in all_rows])
price = np.array([_get(r,'BTCUSDT',np.nan) for r in all_rows])
# Precompute all proxy arrays
proxy_arrays = {}
for pname, pfn in PROXY_DEFS.items():
proxy_arrays[pname] = np.array([pfn(r) for r in all_rows])
def simplified_backtest(entry_mask, proxy_arr, mode, threshold_pct, window=500):
"""
mode: 'gate' | 'size' | 'exit'
entry_mask: boolean array of candidate entries
Returns: ROI, n_trades, win_rate
"""
capital = 1.0
in_position = False
entry_bar = 0
entry_p = 0.0
pb_hist = []
trades = []
scale = 1.0
for i in range(N):
pb = proxy_arr[i]
if np.isfinite(pb):
pb_hist.append(pb)
hist_window = pb_hist[-window:] if len(pb_hist) >= window else pb_hist
# Rolling threshold
if len(hist_window) >= 20:
thr = float(np.percentile(hist_window, threshold_pct * 100))
else:
thr = -999.0
if in_position:
if np.isnan(price[i]) or entry_p <= 0:
in_position = False; continue
ret = (price[i] - entry_p) / entry_p # LONG direction (for backtest)
# direction=-1 (SHORT) — vel_div < 0 = eigenspace stress = SHORT signal
pnl_pct = -ret # SHORT
bars_held = i - entry_bar
exited = False
# Proxy-based exit (mode='exit')
if mode == 'exit' and np.isfinite(pb) and pb < thr:
exited = True
# Natural exits
if not exited and pnl_pct >= TP:
exited = True
if not exited and bars_held >= MH:
exited = True
if exited:
pos_size = scale * 0.20
trade_pnl = capital * pos_size * pnl_pct
capital += trade_pnl
trades.append(pnl_pct)
in_position = False
else:
if (not np.isnan(vd[i]) and entry_mask[i] and
not np.isnan(price[i]) and price[i] > 0):
# Gate mode: skip if proxy below threshold
if mode == 'gate' and np.isfinite(pb) and pb < thr:
continue
# Sizing mode: compute scale
if mode == 'size' and len(hist_window) >= 20:
pct = float(np.mean(np.array(hist_window) <= pb)) if np.isfinite(pb) else 0.5
scale = 0.5 + pct * 1.0 # linear [0.5, 1.5]
else:
scale = 1.0
in_position = True
entry_bar = i
entry_p = price[i]
n = len(trades)
if n == 0: return dict(roi=0, n_trades=0, wr=0, sharpe=0)
roi = (capital - 1.0) * 100.0
arr = np.array(trades)
wr = float(np.mean(arr > 0)) * 100
sh = float(arr.mean() / (arr.std() + 1e-9) * math.sqrt(n))
return dict(roi=roi, n_trades=n, wr=wr, sharpe=sh)
entry_mask = (np.isfinite(vd)) & (vd < VDT)
MODES = ['gate', 'size', 'exit']
THRESHOLDS = [0.10, 0.25, 0.50]
sweep_results = []
best_by_proxy = {}
for pname in PROXY_DEFS:
parr = proxy_arrays[pname]
for mode in MODES:
for tpct in THRESHOLDS:
key = f"{pname}/{mode}/p{int(tpct*100)}"
res = simplified_backtest(entry_mask, parr, mode, tpct)
res['key'] = key; res['proxy'] = pname
res['mode'] = mode; res['threshold_pct'] = tpct
sweep_results.append(res)
# Baseline (no proxy modification)
base = simplified_backtest(entry_mask, proxy_arrays['B50'], 'size', 0.0)
base['key'] = 'BASELINE'; base['proxy'] = '-'; base['mode'] = '-'; base['threshold_pct'] = 0
sweep_results.insert(0, base)
# Sort by Sharpe
ranked = sorted(sweep_results, key=lambda r: r.get('sharpe', -999), reverse=True)
print(f"\n{'Key':<30} {'ROI%':>7} {'Trades':>7} {'WR%':>6} {'Sharpe':>8}")
print('-'*60)
print(f"{'BASELINE':<30} {base['roi']:>7.2f} {base['n_trades']:>7d} "
f"{base['wr']:>6.1f}% {base['sharpe']:>8.4f}")
print('-'*60)
for r in ranked[:20]:
if r['key'] == 'BASELINE': continue
marker = ' ◄ TOP' if ranked.index(r) <= 5 else ''
print(f"{r['key']:<30} {r['roi']:>7.2f} {r['n_trades']:>7d} "
f"{r['wr']:>6.1f}% {r['sharpe']:>8.4f}{marker}")
log_results(
ranked,
_HERE / 'exp3_fast_sweep_results.json',
gold=None,
meta={'experiment': 'exp3 fast numpy sweep', 'n_bars': N,
'baseline': base, 'note': 'simplified SHORT-only, no fees, no leverage'}
)
# Return top configs for Alpha Engine validation (top 2 per proxy)
top_configs = []
seen_proxies = {}
for r in ranked:
if r['key'] == 'BASELINE': continue
pn = r['proxy']
if pn not in seen_proxies:
seen_proxies[pn] = 0
if seen_proxies[pn] < 2:
top_configs.append(r)
seen_proxies[pn] += 1
return top_configs, ranked[0] # top_configs for AE validation, baseline ref
# ── Step 2: Alpha Engine validation of top configs ────────────────────────────
class MultiProxyEngine(NDAlphaEngine):
"""Generic engine parameterised by proxy function + mode."""
def __init__(self, *args, proxy_name='B50', mode='gate',
threshold_pct=0.25, window=500,
size_min=0.5, size_max=1.5, **kwargs):
super().__init__(*args, **kwargs)
self._proxy_name = proxy_name
self._mode = mode
self._threshold_pct = threshold_pct
self._window = window
self._size_min = size_min
self._size_max = size_max
self._pb_history = []
self._current_vals = {}
# Stats
self.gate_suppressed = 0
self.gate_allowed = 0
self.early_exits = 0
self.sizing_scales = []
def _proxy(self):
v = self._current_vals
if self._proxy_name == 'B50':
return v.get('i50',0.) - v.get('v750',0.)
elif self._proxy_name == 'B150':
return v.get('i150',0.) - v.get('v750',0.)
elif self._proxy_name == 'V50':
return v.get('v50',0.) - v.get('v750',0.)
elif self._proxy_name == 'V150':
return v.get('v150',0.) - v.get('v750',0.)
elif self._proxy_name == 'V300':
return v.get('v300',0.) - v.get('v750',0.)
return 0.0
def _rolling_threshold(self):
h = self._pb_history[-self._window:]
if len(h) < 20: return -999.0
return float(np.percentile(h, self._threshold_pct * 100))
def _rolling_pct(self, pb):
h = np.array(self._pb_history[-self._window:])
if len(h) < 20: return 0.5
return float(np.mean(h <= pb))
def process_day(self, date_str, df, asset_columns,
vol_regime_ok=None, direction=None, posture='APEX'):
self.begin_day(date_str, posture=posture, direction=direction)
bid = 0
for ri in range(len(df)):
row = df.iloc[ri]
vd = row.get('vel_div')
if vd is None or not np.isfinite(float(vd)):
self._global_bar_idx += 1; bid += 1; continue
def gf(col):
v = row.get(col)
if v is None: return 0.0
try:
f = float(v)
return f if np.isfinite(f) else 0.0
except Exception: return 0.0
self._current_vals = dict(
i50=gf('instability_50'), i150=gf('instability_150'),
v50=gf('v50_lambda_max_velocity'), v150=gf('v150_lambda_max_velocity'),
v300=gf('v300_lambda_max_velocity'), v750=gf('v750_lambda_max_velocity'),
)
pb = self._proxy()
self._pb_history.append(pb)
prices = {}
for ac in asset_columns:
p = row.get(ac)
if p is not None and p > 0 and np.isfinite(p):
prices[ac] = float(p)
if not prices:
self._global_bar_idx += 1; bid += 1; continue
vrok = bool(vol_regime_ok[ri]) if vol_regime_ok is not None else (bid >= 100)
self.step_bar(bar_idx=ri, vel_div=float(vd), prices=prices,
vol_regime_ok=vrok,
v50_vel=self._current_vals['v50'],
v750_vel=self._current_vals['v750'])
bid += 1
return self.end_day()
def _try_entry(self, bar_idx, vel_div, prices, price_histories,
v50_vel=0.0, v750_vel=0.0):
pb = self._proxy()
thr = self._rolling_threshold()
if self._mode == 'gate':
if pb < thr:
self.gate_suppressed += 1
return None
self.gate_allowed += 1
elif self._mode == 'size':
pct = self._rolling_pct(pb)
scale = self._size_min + pct * (self._size_max - self._size_min)
self.sizing_scales.append(scale)
orig = self.bet_sizer.base_fraction
self.bet_sizer.base_fraction = orig * scale
result = super()._try_entry(bar_idx, vel_div, prices, price_histories,
v50_vel, v750_vel)
self.bet_sizer.base_fraction = orig
return result
return super()._try_entry(bar_idx, vel_div, prices, price_histories,
v50_vel, v750_vel)
@property
def sizing_scale_mean(self):
return float(np.mean(self.sizing_scales)) if self.sizing_scales else 1.0
def validate_with_alpha_engine(top_configs, forewarner):
print("\n" + "="*65)
print("STEP 2 — ALPHA ENGINE VALIDATION (top configs)")
print("="*65)
ae_results = []
# Baseline first
print("\nBaseline...")
t0 = time.time()
r = run_backtest(lambda kw: NDAlphaEngine(**kw), 'Baseline', forewarner=forewarner)
r['elapsed'] = time.time() - t0
ae_results.append(r)
print(f" {r['roi']:.2f}% PF={r['pf']:.4f} DD={r['dd']:.2f}% ({r['elapsed']:.0f}s)")
for cfg in top_configs:
pn = cfg['proxy']
mode = cfg['mode']
tpct = cfg['threshold_pct']
name = f"{pn}/{mode}/p{int(tpct*100)}"
print(f"\n{name} (sweep rank: Sharpe={cfg['sharpe']:.4f})")
t0 = time.time()
def factory(kw, pn=pn, mode=mode, tpct=tpct):
return MultiProxyEngine(**kw, proxy_name=pn, mode=mode,
threshold_pct=tpct, window=500)
r = run_backtest(factory, name, forewarner=forewarner)
r['elapsed'] = time.time() - t0
ae_results.append(r)
print(f" {r['roi']:.2f}% PF={r['pf']:.4f} DD={r['dd']:.2f}% ({r['elapsed']:.0f}s)")
print("\n" + "="*83)
print("EXP 3 — ALPHA ENGINE RESULTS")
print_table(ae_results, gold=GOLD)
return ae_results
def main():
ensure_jit()
print("\nLoading data & forewarner...")
load_data()
fw = load_forewarner()
top_configs, baseline_ref = fast_sweep()
print(f"\nFast sweep done. Top {len(top_configs)} configs selected for AE validation.")
print(f"Fast baseline: ROI={baseline_ref['roi']:.2f}% Sharpe={baseline_ref['sharpe']:.4f}")
ae_results = validate_with_alpha_engine(top_configs, fw)
log_results(
ae_results,
_HERE / 'exp3_alpha_engine_results.json',
meta={
'experiment': 'exp3 longer proxies alpha engine validation',
'proxies_tested': list(PROXY_DEFS.keys()),
'modes_tested': ['gate','size'], # exit=shadow only, done in exp2
'note': 'Top-2 per proxy from fast sweep, validated with full Alpha Engine',
}
)
if __name__ == '__main__':
main()