""" Exp 5 — Two-pass β VAE training. The question: does high-β pass (β=4) to "map features" followed by low-β pass (β=0.1) for "fidelity" outperform single-pass β=0.1? Theory: Pass 1 (high β): forces encoder to compress — ideally clusters similar market states together, even at cost of reconstruction quality. Acts as a structured initializer. Pass 2 (low β): fine-tunes with more fidelity, starting from the structured initializer rather than random weights. We test three variants: A. Single-pass β=0.1 (baseline, AUC≈0.6918 from flint_precursor_sweep) B. Two-pass sequential: β=4 (20ep) → β=0.1 (20ep) on same model C. Two-pass sequential: β=2 (20ep) → β=0.1 (20ep) (softer first pass) D. Dual encoder: β=4 encoder + β=0.1 encoder, z concatenated (16-dim total) Metric: OOS AUC for eigenspace stress prediction (K=5, same as e2e_precursor_auc.py). Gate: if two-pass AUC > single-pass AUC + 0.02 → meaningful improvement. Note on β=12 (the user's original suggestion): β=12 would cause complete posterior collapse even with warmup (β=6 collapsed at 0/20 dims). β=4 is the practical upper bound where some structure survives. We test β=2 and β=4 to find the sweet spot. """ import sys sys.stdout.reconfigure(encoding='utf-8', errors='replace') from pathlib import Path import numpy as np _HERE = Path(__file__).resolve().parent sys.path.insert(0, str(_HERE)) _CORPUS_PATH = str(_HERE / 'corpus_cache.npz') # ── Load T1 corpus ──────────────────────────────────────────────────────────── print("Loading 16K eigen corpus...") from corpus_builder import DolphinCorpus, OFF, T1 as T1_DIM corpus = DolphinCorpus.load(_CORPUS_PATH) mask = corpus.mask[:, 1] X_e = corpus.X[mask] T1_data = X_e[:, OFF[1]:OFF[1]+T1_DIM].copy() # (16607, 20) N = len(T1_data) print(f" N={N} T1 shape={T1_data.shape}") # ── Stress labels (K=5) ─────────────────────────────────────────────────────── K = 5 inst_w50 = T1_data[:, 3] gap_w50 = T1_data[:, 2] vel_w750 = T1_data[:, 16] inst_p90 = np.percentile(inst_w50, 90) gap_p10 = np.percentile(gap_w50, 10) labels = np.zeros(N, dtype=np.float32) for i in range(N - K): if np.any(inst_w50[i+1:i+1+K] > inst_p90) and np.any(gap_w50[i+1:i+1+K] < gap_p10): labels[i] = 1.0 print(f" Stress labels: {labels.mean()*100:.1f}% positive") # Chronological split n_test = N // 4 idx_tr = slice(0, N - n_test) idx_te = slice(N - n_test, N) # ── AUC helpers ─────────────────────────────────────────────────────────────── from sklearn.linear_model import LogisticRegression from sklearn.metrics import roc_auc_score def eval_auc(z_all, labels, n_test): X_lr = z_all[:-K]; y_lr = labels[:-K] valid = np.isfinite(X_lr).all(1) & np.isfinite(y_lr) X_lr, y_lr = X_lr[valid], y_lr[valid] n = len(X_lr) // 4 X_tr, X_te = X_lr[:-n], X_lr[-n:] y_tr, y_te = y_lr[:-n], y_lr[-n:] clf = LogisticRegression(class_weight='balanced', max_iter=500, C=0.1) clf.fit(X_tr, y_tr) preds = clf.predict_proba(X_te)[:,1] auc = roc_auc_score(y_te, preds) return max(auc, 1-auc) # ── Import FlintHDVAE ───────────────────────────────────────────────────────── from flint_hd_vae import FlintHDVAE def build_model(seed=42): return FlintHDVAE(input_dim=20, hd_dim=512, latent_dim=8, beta=0.1, seed=seed, use_flint_norm=False) n_vae_train = int(N * 0.8) T1_train = T1_data[:n_vae_train] results = {} # ── Variant A: Single-pass β=0.1 (baseline) ────────────────────────────────── print("\n" + "="*55) print("A. SINGLE-PASS β=0.1 (baseline)") print("="*55) m_a = build_model(seed=42) m_a.fit(T1_train, epochs=40, lr=1e-3, batch_size=256, verbose=True, warmup_frac=0.3) z_a = m_a.encode(T1_data) print(f" z var per dim: {z_a.var(0).round(3)}") print(f" Active dims (var>0.1): {int((z_a.var(0)>0.1).sum())}/8") auc_a = eval_auc(z_a, labels, n_test) print(f" OOS AUC = {auc_a:.4f}") results['A_single_pass_b0.1'] = dict(auc=auc_a, active_dims=int((z_a.var(0)>0.1).sum()), z_var=z_a.var(0).tolist()) # ── Variant B: Two-pass β=4 → β=0.1 ───────────────────────────────────────── print("\n" + "="*55) print("B. TWO-PASS β=4 (20ep) → β=0.1 (20ep)") print("="*55) m_b = build_model(seed=42) print(" Pass 1: β=4, 20 epochs") m_b.beta = 4.0 m_b.fit(T1_train, epochs=20, lr=1e-3, batch_size=256, verbose=True, warmup_frac=0.3) print(" Pass 2: β=0.1, 20 epochs (continuing from Pass 1 weights)") m_b.beta = 0.1 m_b.fit(T1_train, epochs=20, lr=5e-4, batch_size=256, verbose=True, warmup_frac=0.1) z_b = m_b.encode(T1_data) print(f" z var per dim: {z_b.var(0).round(3)}") print(f" Active dims (var>0.1): {int((z_b.var(0)>0.1).sum())}/8") auc_b = eval_auc(z_b, labels, n_test) print(f" OOS AUC = {auc_b:.4f} (vs A: {auc_b-auc_a:+.4f})") results['B_twopass_b4_b0.1'] = dict(auc=auc_b, active_dims=int((z_b.var(0)>0.1).sum()), z_var=z_b.var(0).tolist()) # ── Variant C: Two-pass β=2 → β=0.1 ───────────────────────────────────────── print("\n" + "="*55) print("C. TWO-PASS β=2 (20ep) → β=0.1 (20ep)") print("="*55) m_c = build_model(seed=42) print(" Pass 1: β=2, 20 epochs") m_c.beta = 2.0 m_c.fit(T1_train, epochs=20, lr=1e-3, batch_size=256, verbose=True, warmup_frac=0.3) print(" Pass 2: β=0.1, 20 epochs") m_c.beta = 0.1 m_c.fit(T1_train, epochs=20, lr=5e-4, batch_size=256, verbose=True, warmup_frac=0.1) z_c = m_c.encode(T1_data) print(f" z var per dim: {z_c.var(0).round(3)}") print(f" Active dims (var>0.1): {int((z_c.var(0)>0.1).sum())}/8") auc_c = eval_auc(z_c, labels, n_test) print(f" OOS AUC = {auc_c:.4f} (vs A: {auc_c-auc_a:+.4f})") results['C_twopass_b2_b0.1'] = dict(auc=auc_c, active_dims=int((z_c.var(0)>0.1).sum()), z_var=z_c.var(0).tolist()) # ── Variant D: Dual encoder (β=4 ‖ β=0.1, z concatenated) ─────────────────── print("\n" + "="*55) print("D. DUAL ENCODER: β=4 encoder ‖ β=0.1 encoder (z concat → 16-dim)") print("="*55) m_d_hi = build_model(seed=42) m_d_hi.beta = 4.0 print(" Training β=4 encoder (20 epochs)...") m_d_hi.fit(T1_train, epochs=20, lr=1e-3, batch_size=256, verbose=False, warmup_frac=0.3) m_d_lo = build_model(seed=123) m_d_lo.beta = 0.1 print(" Training β=0.1 encoder (40 epochs)...") m_d_lo.fit(T1_train, epochs=40, lr=1e-3, batch_size=256, verbose=False, warmup_frac=0.3) z_hi = m_d_hi.encode(T1_data) # (N, 8) z_lo = m_d_lo.encode(T1_data) # (N, 8) z_d = np.concatenate([z_hi, z_lo], axis=1) # (N, 16) print(f" β=4 z var: {z_hi.var(0).round(3)}") print(f" β=0.1 z var: {z_lo.var(0).round(3)}") print(f" Combined z shape: {z_d.shape}") auc_d = eval_auc(z_d, labels, n_test) print(f" OOS AUC = {auc_d:.4f} (vs A: {auc_d-auc_a:+.4f})") results['D_dual_b4_b0.1'] = dict(auc=auc_d, active_dims_hi=int((z_hi.var(0)>0.1).sum()), active_dims_lo=int((z_lo.var(0)>0.1).sum()), z_var_hi=z_hi.var(0).tolist(), z_var_lo=z_lo.var(0).tolist()) # ── Summary ─────────────────────────────────────────────────────────────────── GATE = 0.02 # improvement threshold print("\n" + "="*55) print("EXP 5 — TWO-PASS β SUMMARY") print("="*55) print(f"{'Variant':<35} {'AUC':>8} {'vs A':>8} {'ActiveDims':>11}") print('-'*65) for k, v in results.items(): ad = v.get('active_dims', v.get('active_dims_lo', '?')) delta = v['auc'] - auc_a flag = ' ◄ GAIN' if delta >= GATE else (' △' if delta > 0 else '') print(f" {k:<33} {v['auc']:>8.4f} {delta:>+8.4f} {str(ad):>11}{flag}") best = max(results, key=lambda k: results[k]['auc']) best_auc = results[best]['auc'] print(f"\n Best: {best} AUC={best_auc:.4f}") if best_auc - auc_a >= GATE: print(f" GATE PASS: improvement {best_auc-auc_a:+.4f} ≥ {GATE}") print(f" → Two-pass training IS beneficial. Adopt for FlintHDVAE.") else: print(f" GATE FAIL: best improvement {best_auc-auc_a:+.4f} < {GATE}") print(f" → Two-pass training offers NO meaningful gain on this dataset.") # Save import json out = _HERE / 'exp5_dvae_twopass_results.json' with open(out, 'w', encoding='utf-8') as f: json.dump({'results': results, 'baseline_auc': float(auc_a), 'gate_threshold': GATE, 'winner': best, 'note': 'beta=12 not tested (collapses; beta=6 already showed 0/20 active dims)'}, f, indent=2) print(f"\n Logged → {out}")