Files
DOLPHIN/nautilus_dolphin/dvae/proto_v2_query.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

194 lines
8.0 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
proto_v2_query.py — Back-of-envelope z-space probe for convnext_model_v2.json
Queries the current best checkpoint against the 56-day backtest period
(Dec 31 2025 Feb 25 2026) to assess signal quality vs the ep=17 baseline.
Reports:
1. z_active, z_post_std (latent health)
2. proxy_B dim + r (encoding quality)
3. Calibration: is z_proxy_B still always negative for this period?
4. Split test: top-25% vs bottom-25% proxy_B days — does z separate them?
ExF columns (dvol_btc, fng, funding_btc) are zero-filled — same as exp13 fallback.
Safe to run while training is still in progress (read-only, no GPU).
"""
import os, sys, json, glob
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
import numpy as np
import pandas as pd
ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
DVAE_DIR = os.path.join(ROOT, 'nautilus_dolphin', 'dvae')
sys.path.insert(0, DVAE_DIR)
MODEL_V2 = os.path.join(DVAE_DIR, 'convnext_model_v2.json')
MODEL_EP17 = os.path.join(DVAE_DIR, 'convnext_model.json')
KLINES_DIR = os.path.join(ROOT, 'vbt_cache_klines')
FEATURE_COLS = [
'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
'vel_div', 'instability_50', 'instability_150',
]
EXF_COLS = ['dvol_btc', 'fng', 'funding_btc'] # zero-filled
T_WIN = 32
# 56-day backtest window
DATE_START = '2025-12-31'
DATE_END = '2026-02-25'
# ── load model ───────────────────────────────────────────────────────────────
from convnext_dvae import ConvNeXtVAE
def load_model(path):
with open(path) as f:
meta = json.load(f)
arch = meta.get('architecture', {})
m = ConvNeXtVAE(
C_in=arch['C_in'], T_in=arch['T_in'],
z_dim=arch['z_dim'], base_ch=arch['base_ch'],
n_blocks=arch.get('n_blocks', 3), seed=42,
)
m.load(path)
nm = np.array(meta['norm_mean']) if 'norm_mean' in meta else None
ns = np.array(meta['norm_std']) if 'norm_std' in meta else None
return m, nm, ns, meta
print(f"Loading v2 checkpoint...")
model_v2, nm_v2, ns_v2, meta_v2 = load_model(MODEL_V2)
print(f" ep={meta_v2.get('epoch')} val_loss={meta_v2.get('val_loss',0):.5f}")
ep17_exists = os.path.exists(MODEL_EP17)
if ep17_exists:
print(f"Loading ep=17 baseline for comparison...")
model_17, nm_17, ns_17, meta_17 = load_model(MODEL_EP17)
print(f" ep={meta_17.get('epoch')} val_loss={meta_17.get('val_loss',0):.5f}")
# ── build probe set from 56-day window ───────────────────────────────────────
print(f"\nBuilding probe windows from {DATE_START} to {DATE_END}...")
files = sorted(f for f in glob.glob(os.path.join(KLINES_DIR, '*.parquet')))
# filter to date range
period_files = [f for f in files
if DATE_START <= os.path.basename(f)[:10] <= DATE_END]
print(f" {len(period_files)} klines files in period")
rng = np.random.default_rng(42)
probes_raw, proxy_B_vals, file_dates = [], [], []
step = max(1, len(period_files) // 60) # ~60 probes across period
for f in period_files[::step]:
try:
df = pd.read_parquet(f, columns=FEATURE_COLS).dropna()
if len(df) < T_WIN + 10: continue
# sample from middle of each day (avoid open/close noise)
mid = len(df) // 2
pos = int(rng.integers(max(0, mid - 30), min(len(df) - T_WIN, mid + 30)))
arr = df[FEATURE_COLS].values[pos:pos+T_WIN].astype(np.float64) # (T, 7)
proxy_B = (arr[:, 5] - arr[:, 3]).reshape(-1, 1) # instability_50 - v750
exf = np.zeros((T_WIN, 3), dtype=np.float64) # zero-fill ExF
arr11 = np.concatenate([arr, proxy_B, exf], axis=1).T # (11, T)
if not np.isfinite(arr11).all(): continue
probes_raw.append(arr11)
proxy_B_vals.append(float(proxy_B.mean()))
file_dates.append(os.path.basename(f)[:10])
except Exception as e:
pass
probes_raw = np.stack(probes_raw) # (N, 11, T)
proxy_B_arr = np.array(proxy_B_vals)
print(f" Probe set: {probes_raw.shape} ({len(probes_raw)} windows)")
def normalise(probes, nm, ns):
if nm is None: return probes
p = (probes - nm[None, :, None]) / ns[None, :, None]
np.clip(p, -6., 6., out=p)
return p
def run_query(model, nm, ns, label):
probes = normalise(probes_raw, nm, ns)
z_mu, z_logvar = model.encode(probes)
x_recon = model.decode(z_mu)
# 1. Latent health
z_std_per_dim = z_mu.std(0)
z_active = int((z_std_per_dim > 0.01).sum())
z_post_std = float(np.exp(0.5 * z_logvar).mean())
z_mean_all = float(z_mu.mean())
# 2. Reconstruction
recon_err = ((probes - x_recon) ** 2).mean(axis=(-1, -2))
recon_p50 = float(np.median(recon_err))
# 3. proxy_B correlation — find best dim
corrs = []
for d in range(z_mu.shape[1]):
if z_std_per_dim[d] > 0.01:
r = float(np.corrcoef(z_mu[:, d], proxy_B_arr)[0, 1])
if np.isfinite(r): corrs.append((abs(r), r, d))
corrs.sort(reverse=True)
best_abs_r, best_r, best_dim = corrs[0] if corrs else (0, 0, -1)
# 4. Calibration: is best_dim always negative for this period?
z_best = z_mu[:, best_dim]
z_min, z_max = float(z_best.min()), float(z_best.max())
always_neg = z_max < 0
always_pos = z_min > 0
calib = "ALWAYS NEGATIVE" if always_neg else ("ALWAYS POSITIVE" if always_pos else
f"MIXED [{z_min:+.3f}, {z_max:+.3f}]")
# 5. Split test: top-25% vs bottom-25% proxy_B days
q75 = np.percentile(proxy_B_arr, 75)
q25 = np.percentile(proxy_B_arr, 25)
hi_mask = proxy_B_arr >= q75
lo_mask = proxy_B_arr <= q25
z_hi = float(z_best[hi_mask].mean()) if hi_mask.sum() > 2 else float('nan')
z_lo = float(z_best[lo_mask].mean()) if lo_mask.sum() > 2 else float('nan')
sep = abs(z_hi - z_lo)
print(f"\n{'='*60}")
print(f" {label}")
print(f"{'='*60}")
print(f" z_active : {z_active} / {z_mu.shape[1]}")
print(f" z_post_std : {z_post_std:.4f} (healthy: 0.61.2)")
print(f" recon_p50 : {recon_p50:.4f} (ep17 baseline: 0.2999)")
print(f"\n proxy_B dim : z[{best_dim}] r={best_r:+.4f} (ep17 had z[10] r=+0.973)")
print(f" Top-5 z×proxy_B corrs:")
for _, r, d in corrs[:5]:
bar = '#' * int(abs(r) * 30)
print(f" z[{d:2d}] r={r:+.4f} {bar}")
print(f"\n Calibration : {calib}")
print(f" z[{best_dim}] range : [{z_min:+.4f}, {z_max:+.4f}]")
print(f" z[{best_dim}] mean : {z_best.mean():+.4f}")
print(f"\n Split test (proxy_B quartiles):")
print(f" top-25% proxy_B → z[{best_dim}] mean = {z_hi:+.4f}")
print(f" bot-25% proxy_B → z[{best_dim}] mean = {z_lo:+.4f}")
print(f" separation = {sep:.4f} (>0.3 useful, >0.6 good)")
return z_mu, corrs
print("\n" + "="*60)
print(f"proxy_B stats over {len(probes_raw)} probes:")
print(f" mean={proxy_B_arr.mean():+.4f} std={proxy_B_arr.std():.4f} "
f"min={proxy_B_arr.min():+.4f} max={proxy_B_arr.max():+.4f}")
z_v2, corrs_v2 = run_query(model_v2, nm_v2, ns_v2,
f"v2 ep={meta_v2.get('epoch')} val={meta_v2.get('val_loss',0):.5f} [CURRENT BEST]")
if ep17_exists:
z_17, corrs_17 = run_query(model_17, nm_17, ns_17,
f"ep17 val={meta_17.get('val_loss',0):.5f} [PRODUCTION BASELINE]")
# Side-by-side summary
print(f"\n{'='*60}")
print(" COMPARISON SUMMARY")
print(f"{'='*60}")
r_v2 = corrs_v2[0][1] if corrs_v2 else 0
r_17 = corrs_17[0][1] if corrs_17 else 0
print(f" proxy_B r : v2={r_v2:+.4f} vs ep17={r_17:+.4f} "
f"({'BETTER' if abs(r_v2) > abs(r_17) else 'WORSE' if abs(r_v2) < abs(r_17) else 'SAME'})")
print(f"\nDone.")