""" proto_v2_query.py — Back-of-envelope z-space probe for convnext_model_v2.json Queries the current best checkpoint against the 56-day backtest period (Dec 31 2025 – Feb 25 2026) to assess signal quality vs the ep=17 baseline. Reports: 1. z_active, z_post_std (latent health) 2. proxy_B dim + r (encoding quality) 3. Calibration: is z_proxy_B still always negative for this period? 4. Split test: top-25% vs bottom-25% proxy_B days — does z separate them? ExF columns (dvol_btc, fng, funding_btc) are zero-filled — same as exp13 fallback. Safe to run while training is still in progress (read-only, no GPU). """ import os, sys, json, glob import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') import numpy as np import pandas as pd ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) DVAE_DIR = os.path.join(ROOT, 'nautilus_dolphin', 'dvae') sys.path.insert(0, DVAE_DIR) MODEL_V2 = os.path.join(DVAE_DIR, 'convnext_model_v2.json') MODEL_EP17 = os.path.join(DVAE_DIR, 'convnext_model.json') KLINES_DIR = os.path.join(ROOT, 'vbt_cache_klines') FEATURE_COLS = [ 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150', ] EXF_COLS = ['dvol_btc', 'fng', 'funding_btc'] # zero-filled T_WIN = 32 # 56-day backtest window DATE_START = '2025-12-31' DATE_END = '2026-02-25' # ── load model ─────────────────────────────────────────────────────────────── from convnext_dvae import ConvNeXtVAE def load_model(path): with open(path) as f: meta = json.load(f) arch = meta.get('architecture', {}) m = ConvNeXtVAE( C_in=arch['C_in'], T_in=arch['T_in'], z_dim=arch['z_dim'], base_ch=arch['base_ch'], n_blocks=arch.get('n_blocks', 3), seed=42, ) m.load(path) nm = np.array(meta['norm_mean']) if 'norm_mean' in meta else None ns = np.array(meta['norm_std']) if 'norm_std' in meta else None return m, nm, ns, meta print(f"Loading v2 checkpoint...") model_v2, nm_v2, ns_v2, meta_v2 = load_model(MODEL_V2) print(f" ep={meta_v2.get('epoch')} val_loss={meta_v2.get('val_loss',0):.5f}") ep17_exists = os.path.exists(MODEL_EP17) if ep17_exists: print(f"Loading ep=17 baseline for comparison...") model_17, nm_17, ns_17, meta_17 = load_model(MODEL_EP17) print(f" ep={meta_17.get('epoch')} val_loss={meta_17.get('val_loss',0):.5f}") # ── build probe set from 56-day window ─────────────────────────────────────── print(f"\nBuilding probe windows from {DATE_START} to {DATE_END}...") files = sorted(f for f in glob.glob(os.path.join(KLINES_DIR, '*.parquet'))) # filter to date range period_files = [f for f in files if DATE_START <= os.path.basename(f)[:10] <= DATE_END] print(f" {len(period_files)} klines files in period") rng = np.random.default_rng(42) probes_raw, proxy_B_vals, file_dates = [], [], [] step = max(1, len(period_files) // 60) # ~60 probes across period for f in period_files[::step]: try: df = pd.read_parquet(f, columns=FEATURE_COLS).dropna() if len(df) < T_WIN + 10: continue # sample from middle of each day (avoid open/close noise) mid = len(df) // 2 pos = int(rng.integers(max(0, mid - 30), min(len(df) - T_WIN, mid + 30))) arr = df[FEATURE_COLS].values[pos:pos+T_WIN].astype(np.float64) # (T, 7) proxy_B = (arr[:, 5] - arr[:, 3]).reshape(-1, 1) # instability_50 - v750 exf = np.zeros((T_WIN, 3), dtype=np.float64) # zero-fill ExF arr11 = np.concatenate([arr, proxy_B, exf], axis=1).T # (11, T) if not np.isfinite(arr11).all(): continue probes_raw.append(arr11) proxy_B_vals.append(float(proxy_B.mean())) file_dates.append(os.path.basename(f)[:10]) except Exception as e: pass probes_raw = np.stack(probes_raw) # (N, 11, T) proxy_B_arr = np.array(proxy_B_vals) print(f" Probe set: {probes_raw.shape} ({len(probes_raw)} windows)") def normalise(probes, nm, ns): if nm is None: return probes p = (probes - nm[None, :, None]) / ns[None, :, None] np.clip(p, -6., 6., out=p) return p def run_query(model, nm, ns, label): probes = normalise(probes_raw, nm, ns) z_mu, z_logvar = model.encode(probes) x_recon = model.decode(z_mu) # 1. Latent health z_std_per_dim = z_mu.std(0) z_active = int((z_std_per_dim > 0.01).sum()) z_post_std = float(np.exp(0.5 * z_logvar).mean()) z_mean_all = float(z_mu.mean()) # 2. Reconstruction recon_err = ((probes - x_recon) ** 2).mean(axis=(-1, -2)) recon_p50 = float(np.median(recon_err)) # 3. proxy_B correlation — find best dim corrs = [] for d in range(z_mu.shape[1]): if z_std_per_dim[d] > 0.01: r = float(np.corrcoef(z_mu[:, d], proxy_B_arr)[0, 1]) if np.isfinite(r): corrs.append((abs(r), r, d)) corrs.sort(reverse=True) best_abs_r, best_r, best_dim = corrs[0] if corrs else (0, 0, -1) # 4. Calibration: is best_dim always negative for this period? z_best = z_mu[:, best_dim] z_min, z_max = float(z_best.min()), float(z_best.max()) always_neg = z_max < 0 always_pos = z_min > 0 calib = "ALWAYS NEGATIVE" if always_neg else ("ALWAYS POSITIVE" if always_pos else f"MIXED [{z_min:+.3f}, {z_max:+.3f}]") # 5. Split test: top-25% vs bottom-25% proxy_B days q75 = np.percentile(proxy_B_arr, 75) q25 = np.percentile(proxy_B_arr, 25) hi_mask = proxy_B_arr >= q75 lo_mask = proxy_B_arr <= q25 z_hi = float(z_best[hi_mask].mean()) if hi_mask.sum() > 2 else float('nan') z_lo = float(z_best[lo_mask].mean()) if lo_mask.sum() > 2 else float('nan') sep = abs(z_hi - z_lo) print(f"\n{'='*60}") print(f" {label}") print(f"{'='*60}") print(f" z_active : {z_active} / {z_mu.shape[1]}") print(f" z_post_std : {z_post_std:.4f} (healthy: 0.6–1.2)") print(f" recon_p50 : {recon_p50:.4f} (ep17 baseline: 0.2999)") print(f"\n proxy_B dim : z[{best_dim}] r={best_r:+.4f} (ep17 had z[10] r=+0.973)") print(f" Top-5 z×proxy_B corrs:") for _, r, d in corrs[:5]: bar = '#' * int(abs(r) * 30) print(f" z[{d:2d}] r={r:+.4f} {bar}") print(f"\n Calibration : {calib}") print(f" z[{best_dim}] range : [{z_min:+.4f}, {z_max:+.4f}]") print(f" z[{best_dim}] mean : {z_best.mean():+.4f}") print(f"\n Split test (proxy_B quartiles):") print(f" top-25% proxy_B → z[{best_dim}] mean = {z_hi:+.4f}") print(f" bot-25% proxy_B → z[{best_dim}] mean = {z_lo:+.4f}") print(f" separation = {sep:.4f} (>0.3 useful, >0.6 good)") return z_mu, corrs print("\n" + "="*60) print(f"proxy_B stats over {len(probes_raw)} probes:") print(f" mean={proxy_B_arr.mean():+.4f} std={proxy_B_arr.std():.4f} " f"min={proxy_B_arr.min():+.4f} max={proxy_B_arr.max():+.4f}") z_v2, corrs_v2 = run_query(model_v2, nm_v2, ns_v2, f"v2 ep={meta_v2.get('epoch')} val={meta_v2.get('val_loss',0):.5f} [CURRENT BEST]") if ep17_exists: z_17, corrs_17 = run_query(model_17, nm_17, ns_17, f"ep17 val={meta_17.get('val_loss',0):.5f} [PRODUCTION BASELINE]") # Side-by-side summary print(f"\n{'='*60}") print(" COMPARISON SUMMARY") print(f"{'='*60}") r_v2 = corrs_v2[0][1] if corrs_v2 else 0 r_17 = corrs_17[0][1] if corrs_17 else 0 print(f" proxy_B r : v2={r_v2:+.4f} vs ep17={r_17:+.4f} " f"({'BETTER' if abs(r_v2) > abs(r_17) else 'WORSE' if abs(r_v2) < abs(r_17) else 'SAME'})") print(f"\nDone.")