Files
DOLPHIN/nautilus_dolphin/dvae/exp13_model_sweep.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

364 lines
16 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
exp13_model_sweep.py — Multi-model exp13 test harness.
For each model in the registry:
1. Auto-identify proxy_B dim (highest |r| correlation with raw proxy_B signal)
2. Validate calibration (always-positive in 56-day window required for exp13 scaling)
3. Run exp13 Phase 1 (14-day screening) + Phase 2 (full 56-day, top-k configs)
4. Save per-model results to exp13_sweep_<tag>_results.json
5. Print final comparison table across all models
All tests use IDENTICAL configs/window/threshold to exp13 v2 (the CONFIRMED baseline).
Threshold: Calmar > 7.83 (102% of D_LIQ_GOLD baseline 7.67 in the 56-day window)
Reference: v2 BOB — 9/20 PASS, best dROI=+4.59pp, best Calmar=7.87
Usage (from nautilus_dolphin/ dir):
python dvae/exp13_model_sweep.py # all available models in registry
python dvae/exp13_model_sweep.py --models v4 # single model
python dvae/exp13_model_sweep.py --models v4 v5 v6 # explicit list
python dvae/exp13_model_sweep.py --probe_only # dim probe only, no backtest
python dvae/exp13_model_sweep.py --subset 14 --top_k 20 # explicit Phase 1/2 params
Adding new models (v5, v6, v7, v8):
1. Transfer model JSON from DOLPHIN Linux to models/convnext_dvae_ML/
2. Uncomment (or add) the entry in MODEL_REGISTRY below
3. Re-run this script
"""
import sys, os, time, json, importlib, argparse
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace', line_buffering=True)
import numpy as np
import pandas as pd
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent.parent
DVAE_DIR = ROOT / 'nautilus_dolphin' / 'dvae'
sys.path.insert(0, str(ROOT / 'nautilus_dolphin'))
# ── Model registry ─────────────────────────────────────────────────────────────
# Slot in v5/v6/v7/v8 when transferred from DOLPHIN Linux — just uncomment.
MODEL_REGISTRY = {
'v2_bob': ROOT / 'nautilus_dolphin' / 'dvae' / 'convnext_model_v2.json',
'v4': ROOT / 'models' / 'convnext_dvae_ML' / 'convnext_model_v4_ep22_best.json',
'v5': ROOT / 'models' / 'dolphin_training' / 'winning_models' / 'v5_ep28_best_total_loss.json',
'v6': ROOT / 'models' / 'dolphin_training' / 'winning_models' / 'v6_ep8_best_val_loss.json',
'v7': ROOT / 'models' / 'dolphin_training' / 'winning_models' / 'v7_ep10_best_generalization.json',
# v8: step=2 training (1.2M windows), only 2 epochs — val=34.92. Experimental.
'v8': ROOT / 'models' / 'dolphin_training' / 'dvae' / 'v8_step2.json',
# v6.5 (ANOMALOUS — DO NOT USE: broken during training per researcher note)
}
KLINES_DIR = ROOT / 'vbt_cache_klines'
DATE_START = '2025-12-31'
DATE_END = '2026-02-25'
CALMAR_THR = 7.83 # 102% of D_LIQ_GOLD baseline 7.67
FEATURE_COLS = [
'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
'vel_div', 'instability_50', 'instability_150',
]
T_WIN = 32
# ── Proxy_B dim identification ─────────────────────────────────────────────────
from dvae.convnext_dvae import ConvNeXtVAE
def _load_model(path: Path):
with open(path) as f:
meta = json.load(f)
arch = meta['architecture']
m = ConvNeXtVAE(
C_in=arch['C_in'], T_in=arch['T_in'],
z_dim=arch['z_dim'], base_ch=arch['base_ch'],
n_blocks=arch.get('n_blocks', 3), seed=42,
)
m.load(str(path))
nm = np.array(meta['norm_mean']) if 'norm_mean' in meta else None
ns = np.array(meta['norm_std']) if 'norm_std' in meta else None
return m, nm, ns, meta
def _build_probe_set():
"""Sample probe windows from 56-day window; shared across all models."""
files = sorted(KLINES_DIR.glob('*.parquet'))
period = [f for f in files if DATE_START <= f.stem[:10] <= DATE_END]
rng = np.random.default_rng(42)
probes_raw, proxy_B_vals = [], []
step = max(1, len(period) // 60)
for f in period[::step]:
try:
df = pd.read_parquet(f, columns=FEATURE_COLS).dropna()
if len(df) < T_WIN + 10: continue
mid = len(df) // 2
pos = int(rng.integers(max(0, mid-30), min(len(df)-T_WIN, mid+30)))
arr = df[FEATURE_COLS].values[pos:pos+T_WIN].astype(np.float64)
proxy_B = (arr[:, 5] - arr[:, 3]).reshape(-1, 1)
exf = np.zeros((T_WIN, 3), dtype=np.float64)
arr11 = np.concatenate([arr, proxy_B, exf], axis=1).T # (11, T)
if not np.isfinite(arr11).all(): continue
probes_raw.append(arr11)
proxy_B_vals.append(float(proxy_B.mean()))
except Exception:
pass
return np.stack(probes_raw), np.array(proxy_B_vals)
def probe_model(tag: str, path: Path, probes_raw: np.ndarray,
proxy_B_arr: np.ndarray) -> dict:
"""Identify proxy_B dim and report calibration for one model."""
print(f"\n{'='*60}")
print(f" PROBE: {tag} ({path.name})")
print(f"{'='*60}")
model, nm, ns, meta = _load_model(path)
ep = meta.get('epoch', '?')
val = meta.get('val_loss', 0.0)
print(f" epoch={ep} val_loss={val:.5f}")
probes = probes_raw.copy()
if nm is not None:
probes = (probes - nm[None, :, None]) / ns[None, :, None]
np.clip(probes, -6., 6., out=probes)
z_mu, z_logvar = model.encode(probes)
z_std = z_mu.std(0)
corrs = []
for d in range(z_mu.shape[1]):
if z_std[d] > 0.01:
r = float(np.corrcoef(z_mu[:, d], proxy_B_arr)[0, 1])
if np.isfinite(r):
corrs.append((abs(r), r, d))
corrs.sort(reverse=True)
best_abs_r, best_r, best_dim = corrs[0] if corrs else (0.0, 0.0, -1)
z_best = z_mu[:, best_dim]
z_min, z_max = float(z_best.min()), float(z_best.max())
always_pos = z_min > 0
always_neg = z_max < 0
if always_pos: calib = 'ALWAYS_POSITIVE'
elif always_neg: calib = 'ALWAYS_NEGATIVE'
else: calib = f'MIXED[{z_min:+.3f},{z_max:+.3f}]'
q75, q25 = np.percentile(proxy_B_arr, 75), np.percentile(proxy_B_arr, 25)
z_hi = float(z_best[proxy_B_arr >= q75].mean())
z_lo = float(z_best[proxy_B_arr <= q25].mean())
sep = abs(z_hi - z_lo)
# Also find best POSITIVELY correlated dim (same sign as v2 z[13])
pos_corrs = [(abs_r, r, d) for abs_r, r, d in corrs if r > 0]
pos_dim = pos_corrs[0][2] if pos_corrs else best_dim
pos_r = pos_corrs[0][1] if pos_corrs else best_r
usable = always_pos and best_abs_r > 0.5
print(f" proxy_B dim : z[{best_dim}] r={best_r:+.4f} "
f"(best |r|) best positive: z[{pos_dim}] r={pos_r:+.4f}")
print(f" Top-5 : " + ' '.join(f'z[{d}]={r:+.3f}' for _,r,d in corrs[:5]))
print(f" Calibration : {calib} sep={sep:.4f}")
print(f" Usable : {'YES ✓' if usable else 'CAUTION ⚠ (will skip exp13 sweep)'}")
return {
'tag': tag, 'path': str(path),
'epoch': ep, 'val_loss': val,
'proxy_B_dim': best_dim, 'proxy_B_r': best_r,
'proxy_B_dim_pos': pos_dim, 'proxy_B_r_pos': pos_r,
'calibration': calib, 'always_positive': always_pos,
'separation': sep, 'top5': [(r,d) for _,r,d in corrs[:5]],
}
# ── exp13 runner per model ─────────────────────────────────────────────────────
def run_exp13_for_model(probe: dict, subset_days: int, top_k: int,
only_config: str = None) -> dict:
"""Patch MODEL_1M + PROXY_B_DIM into exp13, call main(), read results JSON."""
import dvae.exp13_multiscale_sweep as e13
importlib.reload(e13) # fresh state: clears cached signals from prior model
# Patch module-level constants AFTER reload
e13.MODEL_1M = Path(probe['path'])
e13.PROXY_B_DIM = probe['proxy_B_dim']
out_file = ROOT / f"exp13_sweep_{probe['tag']}_results.json"
e13.OUT_FILE = out_file
# Patch sys.argv so argparse inside main() picks up our params
sys.argv = [
'exp13_multiscale_sweep.py',
'--subset', str(subset_days),
'--top_k', str(top_k),
'--skip_sets', 'B,Bp', # skip 5s sets (no 5s model per model variant)
]
if only_config:
sys.argv += ['--only_config', only_config, '--skip_5s']
print(f"\n{''*60}")
print(f" EXP13: {probe['tag']} z[{probe['proxy_B_dim']}] r={probe['proxy_B_r']:+.4f}")
print(f" subset={subset_days}d top_k={top_k} out={out_file.name}")
print(f"{''*60}")
t0 = time.time()
e13.main()
elapsed = time.time() - t0
# Read saved results
if not out_file.exists():
print(f"[ERROR] Results file not written: {out_file}")
return {'tag': probe['tag'], 'error': 'no results file'}
with open(out_file) as f:
raw = json.load(f)
baseline_full = raw.get('phase2', {}).get('baseline_full') or raw.get('phase1_results', [{}])[0]
p2_list = raw.get('phase2', {}).get('results', [])
if not p2_list:
# full run (subset=0): use phase1_results as p2
p2_list = raw.get('phase1_results', [])
baseline_cal = (raw.get('phase2', {}).get('baseline_full') or {}).get('Calmar', 0.0)
n_pass = sum(1 for r in p2_list if r.get('Calmar', 0) > CALMAR_THR)
best = max(p2_list, key=lambda r: r.get('Calmar', 0)) if p2_list else {}
base_roi = (raw.get('phase2', {}).get('baseline_full') or {}).get('ROI', 0.0)
return {
'tag': probe['tag'],
'val_loss': probe['val_loss'],
'proxy_B_dim': probe['proxy_B_dim'],
'proxy_B_r': probe['proxy_B_r'],
'calibration': probe['calibration'],
'baseline_calmar': baseline_cal,
'baseline_roi': base_roi,
'n_phase2': len(p2_list),
'n_pass': n_pass,
'best_config': best.get('name', '?'),
'best_roi': best.get('ROI', 0.0),
'best_calmar': best.get('Calmar', 0.0),
'best_droi': best.get('ROI', 0.0) - base_roi,
'best_ddd': best.get('DD', 0.0) - (raw.get('phase2', {}).get('baseline_full') or {}).get('DD', 0.0),
'elapsed_s': round(elapsed),
'results_file': str(out_file),
}
# ── Comparison tables ──────────────────────────────────────────────────────────
def _print_probe_table(probes: dict):
print(f"\n{'='*72}")
print(f" MODEL PROBE SUMMARY")
print(f"{'='*72}")
print(f" {'Tag':10s} {'ValLoss':>8s} {'Dim':>5s} {'r':>7s} {'Sep':>6s} {'Calibration':22s} OK?")
print(f" {'-'*72}")
for tag, p in probes.items():
ok = '' if p['always_positive'] and abs(p['proxy_B_r']) > 0.5 else ''
print(f" {tag:10s} {p['val_loss']:8.4f} "
f"z[{p['proxy_B_dim']:2d}] {p['proxy_B_r']:+7.4f} "
f"{p['separation']:6.4f} {p['calibration']:22s} {ok}")
def _print_comparison_table(results: list):
if not results:
return
print(f"\n{'='*84}")
print(f" EXP13 MULTI-MODEL FINAL COMPARISON")
print(f" Threshold: Calmar > {CALMAR_THR} | Reference: v2_BOB — 9/20 PASS dROI=+4.59pp Cal=7.87")
print(f"{'='*84}")
print(f" {'Tag':10s} {'ValLoss':>8s} {'Dim':>5s} {'r':>7s} "
f"{'Pass':>6s} {'BestCal':>8s} {'BestdROI':>9s} {'BestdDD':>8s} Best Config")
print(f" {'-'*84}")
for r in sorted(results, key=lambda x: x.get('best_calmar', 0), reverse=True):
pass_str = f"{r['n_pass']:2d}/{r['n_phase2']}"
flag = '' if r['n_pass'] > 0 else ' '
print(f" {r['tag']:10s} {r['val_loss']:8.4f} "
f"z[{r['proxy_B_dim']:2d}] {r['proxy_B_r']:+7.4f} "
f"{pass_str:>6s} {flag} {r['best_calmar']:8.3f} "
f"{r['best_droi']:+9.2f}pp {r['best_ddd']:+8.2f}pp {r['best_config']}")
print(f" {''*84}")
print(f" {'v2_bob REF':10s} {'18.0024':>8s} z[13] {'+0.9332':>7s} "
f"{'9/20':>6s}{'7.870':>8s} {'+4.59':>9s}pp {'0.00':>8s}pp A_P5_M2_W1_a0.5")
# ── Main ───────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description='Multi-model exp13 test harness')
parser.add_argument('--models', nargs='+', default=None,
help='Model tags to run (default: all available)')
parser.add_argument('--probe_only', action='store_true',
help='Dim probe only — skip exp13 sweep')
parser.add_argument('--subset', type=int, default=14,
help='Phase 1 days (default 14)')
parser.add_argument('--top_k', type=int, default=20,
help='Phase 2 top-k configs (default 20)')
parser.add_argument('--fast_check', type=str, default='',
help='Skip Phase 1; run just this config on full window. '
'Default known-winner: A_P5_M2_W1_a0.5')
args = parser.parse_args()
if args.fast_check == 'winner':
args.fast_check = 'A_P5_M2_W1_a0.5' # shorthand
# Select models
tags = args.models or list(MODEL_REGISTRY.keys())
available = {t: MODEL_REGISTRY[t] for t in tags
if t in MODEL_REGISTRY and Path(MODEL_REGISTRY[t]).exists()}
skipped = [t for t in tags if t not in MODEL_REGISTRY]
missing = [t for t in MODEL_REGISTRY if t not in available and
t in (args.models or list(MODEL_REGISTRY.keys()))]
if skipped: print(f"[WARN] Unknown tags: {skipped}")
if missing: print(f"[WARN] File not found (transfer from DOLPHIN Linux): {missing}")
if not available:
print("No model files found. Check MODEL_REGISTRY or --models flag."); return
print(f"\n{''*60}")
print(f" EXP13 MULTI-MODEL SWEEP")
print(f" Models : {list(available.keys())}")
print(f" Window : {DATE_START}{DATE_END}")
print(f" Threshold: Calmar > {CALMAR_THR}")
print(f" Phase1 : {args.subset} days Phase2 top-k: {args.top_k}")
print(f"{''*60}")
# Build shared probe set
print(f"\nBuilding probe set ({DATE_START}{DATE_END})...")
probes_raw, proxy_B_arr = _build_probe_set()
print(f" {len(probes_raw)} windows proxy_B: μ={proxy_B_arr.mean():+.4f} σ={proxy_B_arr.std():.4f}")
# Step 1: probe all models
probe_reports = {tag: probe_model(tag, path, probes_raw, proxy_B_arr)
for tag, path in available.items()}
_print_probe_table(probe_reports)
if args.probe_only:
return
# Step 2: run exp13 for each usable model
sweep_results = []
for tag, probe in probe_reports.items():
if not probe['always_positive']:
print(f"\n[SKIP] {tag}: calib={probe['calibration']} — not always-positive")
continue
try:
summary = run_exp13_for_model(probe, args.subset, args.top_k,
only_config=args.fast_check or None)
sweep_results.append(summary)
except Exception as ex:
import traceback
print(f"\n[ERROR] {tag}: {ex}")
traceback.print_exc()
_print_comparison_table(sweep_results)
# Save combined summary
out = ROOT / 'exp13_model_sweep_results.json'
with open(out, 'w') as f:
json.dump({
'probes': probe_reports,
'sweep': sweep_results,
'threshold': CALMAR_THR,
'window': {'start': DATE_START, 'end': DATE_END},
'ref_v2_bob': {'n_pass': 9, 'best_droi': 4.59, 'best_calmar': 7.87},
}, f, indent=2, default=str)
print(f"\nSummary → {out}")
if __name__ == '__main__':
main()