""" Unit tests: LSTM weight save/load fix ====================================== Tests that DisentangledVAEGenerator.save_model() correctly persists W_ih/W_hh/b_h and that TitanSensor loads them instead of random re-initialising. Run BEFORE and AFTER retrain to catch regressions. Usage: cd nautilus_dolphin python dvae/test_lstm_weight_fix.py """ import sys, json, os, tempfile sys.stdout.reconfigure(encoding='utf-8', errors='replace') from pathlib import Path import numpy as np ROOT = Path(__file__).parent.parent # nautilus_dolphin/ PROJECT = ROOT.parent # project root (disentangled_vae_joint_generator.py lives here) sys.path.insert(0, str(ROOT)) sys.path.insert(0, str(PROJECT)) MODEL_PATH = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict" r"\dvae_regime_model_TITAN_ULTRA_250_ULTRA261_MCDAIN.json") MODEL_PATH_GD = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict" r"\dvae_regime_model_TITAN_ULTRA_GD.json") _PASS = "[PASS]" _FAIL = "[FAIL]" # ── Helpers ─────────────────────────────────────────────────────────────────── def _make_dummy_generator(): """ Build a minimal DisentangledVAEGenerator-like object with known numpy weights, bypassing FLINT _init_weights(). Used to test save/load roundtrip without requiring FLINT/arb to be present. """ import types, importlib # Import the class but intercept _init_weights so it doesn't call crypto_random_arb mod = importlib.import_module('disentangled_vae_joint_generator') cls = mod.DisentangledVAEGenerator obj = cls.__new__(cls) obj.input_dim = 8 obj.hidden_dim = 4 obj.latent_dim = 2 obj.regime_dim = 2 obj.prec = 64 obj.beta = 1.0 obj.is_trained = True obj.edain = None obj.latent_names = {0: "A", 1: "B"} rng = np.random.RandomState(7777) obj.W_ih = rng.randn(8, 16).astype(np.float64) # (input_dim, hidden*4) obj.W_hh = rng.randn(4, 16).astype(np.float64) # (hidden_dim, hidden*4) obj.b_h = rng.randn(16).astype(np.float64) obj.W_mu = rng.randn(4, 2).astype(np.float64) obj.W_logvar = rng.randn(4, 2).astype(np.float64) obj.b_mu = rng.randn(2).astype(np.float64) obj.b_logvar = rng.randn(2).astype(np.float64) obj.W_dec = rng.randn(2, 4).astype(np.float64) obj.W_out = rng.randn(4, 8).astype(np.float64) obj.b_dec = rng.randn(4).astype(np.float64) obj.b_out = rng.randn(8).astype(np.float64) return obj def _load_sensor(): from dvae.titan_sensor import TitanSensor return TitanSensor(str(MODEL_PATH)) # ── Test 1: save_model() roundtrip (no FLINT needed) ───────────────────────── def test_save_model_includes_lstm(): print("\n[T1] save_model() includes W_ih / W_hh / b_h ...") gen = _make_dummy_generator() with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: tmp = f.name try: gen.save_model(tmp) with open(tmp) as f: m = json.load(f) for key in ('W_ih', 'W_hh', 'b_h', 'W_mu', 'W_logvar', 'W_dec', 'W_out', 'b_mu', 'b_logvar', 'b_dec', 'b_out'): assert key in m, f"{_FAIL}: key '{key}' missing from saved JSON" W_ih_rt = np.array(m['W_ih']) assert W_ih_rt.shape == gen.W_ih.shape, \ f"{_FAIL}: W_ih shape mismatch {W_ih_rt.shape} vs {gen.W_ih.shape}" assert np.allclose(W_ih_rt, gen.W_ih, atol=1e-15), \ f"{_FAIL}: W_ih values differ after roundtrip (max={np.max(np.abs(W_ih_rt - gen.W_ih)):.2e})" W_hh_rt = np.array(m['W_hh']) assert np.allclose(W_hh_rt, gen.W_hh, atol=1e-15), \ f"{_FAIL}: W_hh values differ after roundtrip" b_h_rt = np.array(m['b_h']) assert np.allclose(b_h_rt, gen.b_h, atol=1e-15), \ f"{_FAIL}: b_h values differ after roundtrip" print(f" {_PASS} W_ih {gen.W_ih.shape} roundtrip exact") print(f" {_PASS} W_hh {gen.W_hh.shape} roundtrip exact") print(f" {_PASS} b_h {gen.b_h.shape} roundtrip exact") print(f" {_PASS} all 11 weight keys present") finally: os.unlink(tmp) # ── Test 2: TitanSensor loads W_ih from JSON, not random seed=42 ───────────── def test_sensor_loads_from_json_not_random(): print("\n[T2] TitanSensor loads LSTM weights from JSON (not seed=42 random) ...") assert MODEL_PATH.exists(), f"{_FAIL}: model not found at {MODEL_PATH}" with open(MODEL_PATH) as f: m = json.load(f) assert 'W_ih' in m, \ f"{_FAIL}: W_ih missing from model JSON — model was saved before the fix. Retrain first." sensor = _load_sensor() assert sensor.lstm_weights_valid, \ f"{_FAIL}: sensor.lstm_weights_valid=False — W_ih missing from JSON" W_ih_json = np.array(m['W_ih'], dtype=np.float64) max_diff = np.max(np.abs(sensor.W_ih - W_ih_json)) assert max_diff < 1e-12, \ f"{_FAIL}: sensor.W_ih != JSON W_ih (max_diff={max_diff:.3e})" print(f" {_PASS} sensor.W_ih == JSON W_ih (max_diff={max_diff:.2e})") W_hh_json = np.array(m['W_hh'], dtype=np.float64) max_diff_hh = np.max(np.abs(sensor.W_hh - W_hh_json)) assert max_diff_hh < 1e-12, \ f"{_FAIL}: sensor.W_hh != JSON W_hh (max_diff={max_diff_hh:.3e})" print(f" {_PASS} sensor.W_hh == JSON W_hh (max_diff={max_diff_hh:.2e})") # Confirm it is NOT the seed=42 random initialisation rng_42 = np.random.RandomState(42) W_ih_42 = rng_42.randn(261, 512) * 0.1 diff_vs_42 = np.max(np.abs(sensor.W_ih - W_ih_42)) assert diff_vs_42 > 0.01, \ f"{_FAIL}: sensor.W_ih matches seed=42 random (diff={diff_vs_42:.3e}) — LSTM still wrong" print(f" {_PASS} sensor.W_ih is NOT seed=42 random (diff_vs_42={diff_vs_42:.3f})") # ── Test 3: recon_err is finite and in plausible range ──────────────────────── def test_recon_err_plausible(): print("\n[T3] recon_err is finite and << 10^6 (was ~10^14 pre-fix) ...") sensor = _load_sensor() from dvae.titan_sensor import build_feature_vector rng = np.random.RandomState(42) results = {} for label, x in [ ("zeros", np.zeros(261)), ("ones", np.ones(261) * 0.01), ("random_s", rng.randn(261) * 0.05), ("random_l", rng.randn(261) * 2.0), ]: z_mu, recon_err, z_logvar = sensor.encode(x) results[label] = recon_err assert np.isfinite(recon_err), \ f"{_FAIL}: recon_err not finite for '{label}' input: {recon_err}" # Pre-fix: ~10^14. Post-fix: should be O(1) or O(100) at worst. assert recon_err < 1e8, \ f"{_FAIL}: recon_err={recon_err:.3e} suspiciously large for '{label}'" print(f" {_PASS} [{label:10s}] recon_err={recon_err:.4e} z_mu[0:4]={z_mu[:4].round(4)}") # Distribution check: recon_err should vary with input (not uniform noise) errs = list(results.values()) assert max(errs) / (min(errs) + 1e-12) > 2.0, \ f"{_FAIL}: recon_err suspiciously uniform across inputs ({errs}) — LSTM may still be wrong" print(f" {_PASS} recon_err varies meaningfully across inputs (ratio={max(errs)/(min(errs)+1e-12):.1f}x)") # ── Test 4: Encoding is deterministic ───────────────────────────────────────── def test_encoding_deterministic(): print("\n[T4] encode() is deterministic across two sensor instances ...") sensor1 = _load_sensor() sensor2 = _load_sensor() x = np.random.RandomState(99).randn(261) * 0.1 z1, e1, _ = sensor1.encode(x) z2, e2, _ = sensor2.encode(x) assert np.allclose(z1, z2, atol=1e-14), \ f"{_FAIL}: z_mu differs between two sensors (max={np.max(np.abs(z1-z2)):.2e})" assert abs(e1 - e2) < 1e-10, \ f"{_FAIL}: recon_err differs between two sensors ({e1:.6e} vs {e2:.6e})" print(f" {_PASS} z_mu identical (max_diff={np.max(np.abs(z1-z2)):.2e})") print(f" {_PASS} recon_err identical ({e1:.6e})") # ── Test 5: Legacy model emits RuntimeWarning, sensor.lstm_weights_valid=False ─ def test_legacy_model_warns(): print("\n[T5] Legacy model (no W_ih in JSON) emits RuntimeWarning ...") from dvae.titan_sensor import TitanSensor import warnings # Build a minimal legacy-style JSON (no W_ih/W_hh) legacy = { "W_mu": np.zeros((128, 32)).tolist(), "W_logvar": np.zeros((128, 32)).tolist(), "W_dec": np.zeros((32, 128)).tolist(), "W_out": np.zeros((128, 261)).tolist(), "latent_names": {}, "precision_bits": 512, } with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: json.dump(legacy, f) tmp = f.name try: with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always") s = TitanSensor(tmp) assert not s.lstm_weights_valid, \ f"{_FAIL}: lstm_weights_valid should be False for legacy model" assert any(issubclass(w.category, RuntimeWarning) for w in caught), \ f"{_FAIL}: no RuntimeWarning emitted for legacy model" print(f" {_PASS} lstm_weights_valid=False") print(f" {_PASS} RuntimeWarning emitted") finally: os.unlink(tmp) # ── Test 6: z_mu dimensionality and range ───────────────────────────────────── def test_latent_dimensionality(): print("\n[T6] z_mu has correct dimensionality and plausible range ...") sensor = _load_sensor() x = np.random.RandomState(55).randn(261) * 0.1 z_mu, recon_err, z_logvar = sensor.encode(x) assert z_mu.shape == (32,), f"{_FAIL}: z_mu shape {z_mu.shape} != (32,)" assert z_logvar.shape == (32,), f"{_FAIL}: z_logvar shape {z_logvar.shape} != (32,)" assert np.all(np.isfinite(z_mu)), f"{_FAIL}: z_mu contains non-finite values" assert np.all(np.isfinite(z_logvar)), f"{_FAIL}: z_logvar contains non-finite values" assert np.abs(z_mu).max() < 1e3, \ f"{_FAIL}: z_mu values suspiciously large (max={np.abs(z_mu).max():.2e})" print(f" {_PASS} z_mu.shape=(32,) z_mu range=[{z_mu.min():.3f}, {z_mu.max():.3f}]") print(f" {_PASS} z_logvar.shape=(32,) range=[{z_logvar.min():.3f}, {z_logvar.max():.3f}]") # ── Test 7: GD-trained model has correct LSTM weights and sane recon_err ────── def test_gd_model(): print("\n[T7] GD-trained model: W_ih present, recon_err << MCDAIN model ...") assert MODEL_PATH_GD.exists(), f"{_FAIL}: GD model not found at {MODEL_PATH_GD}" from dvae.titan_sensor import TitanSensor sensor_gd = TitanSensor(str(MODEL_PATH_GD)) assert sensor_gd.lstm_weights_valid, f"{_FAIL}: GD sensor.lstm_weights_valid=False" print(f" {_PASS} GD model: lstm_weights_valid=True") # Verify W_ih from GD model matches W_ih from MCDAIN model # (they should be THE SAME — GD model was initialized from MCDAIN model's W_ih) sensor_mc = _load_sensor() diff_wih = np.max(np.abs(sensor_gd.W_ih - sensor_mc.W_ih)) assert diff_wih < 1e-12, \ f"{_FAIL}: GD model W_ih != MCDAIN model W_ih (diff={diff_wih:.3e}) — should be same LSTM basis" print(f" {_PASS} GD model W_ih == MCDAIN model W_ih (same LSTM basis, diff={diff_wih:.2e})") # Reconstruction error: generate inputs GUARANTEED to be in-distribution by # sampling x = norm_mean + norm_std * randn (within ±2σ of training corpus). # After encode()'s normalization step: v_norm = clip(randn, -2, 2) → perfectly O(1). # recon_err should be close to the training-set value (p50=0.59). with open(MODEL_PATH_GD) as fj: gd_json = json.load(fj) nm_gd = np.array(gd_json['norm_mean']); ns_gd = np.array(gd_json['norm_std']) rng = np.random.RandomState(42) errs_gd = []; errs_mc = [] for _ in range(20): # Guaranteed in-distribution: raw input = corpus_mean + corpus_std * noise x_raw = nm_gd + ns_gd * np.clip(rng.randn(261), -2, 2) x_raw[78:] = 0.0 # T3/T4/T5 are always 0 in corpus _, e_gd, _ = sensor_gd.encode(x_raw) _, e_mc, _ = sensor_mc.encode(x_raw) errs_gd.append(e_gd); errs_mc.append(e_mc) med_gd = float(np.median(errs_gd)); med_mc = float(np.median(errs_mc)) print(f" GD median recon_err={med_gd:.4e} (in-distribution) MCDAIN median recon_err={med_mc:.4e}") # GD trained with proper GD — should reconstruct in-distribution inputs well assert med_gd < 10.0, f"{_FAIL}: GD recon_err too large ({med_gd:.4e}) — model didn't learn" print(f" {_PASS} GD recon_err < 10.0 for in-distribution inputs (model learned)") # ── Test 8: GD-v2 normalization stored and applied at inference ─────────────── def test_normalization_stored_and_applied(): print("\n[T8] GD-v2 model: norm_mean/norm_std present and applied by TitanSensor ...") assert MODEL_PATH_GD.exists(), f"{_FAIL}: GD model not found at {MODEL_PATH_GD}" from dvae.titan_sensor import TitanSensor # 8a: JSON must contain norm_mean / norm_std with open(MODEL_PATH_GD) as f: m = json.load(f) assert 'norm_mean' in m, f"{_FAIL}: GD model JSON missing 'norm_mean'" assert 'norm_std' in m, f"{_FAIL}: GD model JSON missing 'norm_std'" nm = np.array(m['norm_mean']); ns = np.array(m['norm_std']) assert nm.shape == (261,), f"{_FAIL}: norm_mean shape {nm.shape} != (261,)" assert ns.shape == (261,), f"{_FAIL}: norm_std shape {ns.shape} != (261,)" assert np.all(ns > 0), f"{_FAIL}: norm_std has zero or negative entries" print(f" {_PASS} norm_mean/norm_std present, shape=(261,), all std>0") # 8b: TitanSensor must load them (not None) sensor = TitanSensor(str(MODEL_PATH_GD)) assert sensor.norm_mean is not None, f"{_FAIL}: sensor.norm_mean is None — not loaded" assert sensor.norm_std is not None, f"{_FAIL}: sensor.norm_std is None — not loaded" assert np.allclose(sensor.norm_mean, nm, atol=1e-12), \ f"{_FAIL}: sensor.norm_mean != JSON norm_mean" print(f" {_PASS} sensor.norm_mean loaded from JSON") # 8c: recon_err on realistic inputs (matching build_feature_vector() output structure) # Before fix (MCDAIN-trained, no normalization stored): encode(raw) → huge recon_err # After fix (GD-v2, normalization stored and applied): encode(raw) → O(1-50) rng = np.random.RandomState(42) errs = [] for _ in range(30): x = np.zeros(261) x[0:8] = rng.randn(8) * 0.7 # T0: time encoding x[8:28] = rng.randn(20) * 0.02 # T1: eigenvalue velocity scale x[28:78] = np.clip(rng.randn(50), -5, 5) # T2: return z-scores [-5,5] # T3/T4/T5 = 0 (Stage 1 contract — matches training corpus) _, e, _ = sensor.encode(x) errs.append(e) med_err = float(np.median(errs)) max_err = float(np.max(errs)) print(f" GD-v2 realistic-input recon_err: median={med_err:.4e} max={max_err:.4e}") assert med_err < 100.0, \ f"{_FAIL}: recon_err too large ({med_err:.4e}) — normalization may not be applied" print(f" {_PASS} recon_err O(1-100) for realistic-scale inputs") # 8d: T5 dims are zero after normalization (non-zero norm_mean would shift them) x_zeros = np.zeros(261) _, e_z, _ = sensor.encode(x_zeros) # The sensor zeros T5 after normalization — this just checks it doesn't crash assert np.isfinite(e_z), f"{_FAIL}: encode(zeros) returned non-finite recon_err={e_z}" print(f" {_PASS} encode(zeros) is finite after normalization: recon_err={e_z:.4e}") # ── Runner ──────────────────────────────────────────────────────────────────── if __name__ == '__main__': print("=" * 60) print("TitanSensor LSTM weight fix — unit tests") print("=" * 60) n_pass = 0 n_fail = 0 tests = [ ("T1: save_model roundtrip", test_save_model_includes_lstm), ("T2: sensor loads JSON weights", test_sensor_loads_from_json_not_random), ("T3: recon_err plausible", test_recon_err_plausible), ("T4: encoding deterministic", test_encoding_deterministic), ("T5: legacy model warns", test_legacy_model_warns), ("T6: latent dimensionality", test_latent_dimensionality), ("T7: GD model quality", test_gd_model), ("T8: normalization stored and applied", test_normalization_stored_and_applied), ] for name, fn in tests: try: fn() n_pass += 1 except AssertionError as e: print(f" {_FAIL} ASSERTION: {e}") n_fail += 1 except Exception as e: print(f" {_FAIL} EXCEPTION in {name}: {type(e).__name__}: {e}") n_fail += 1 print() print("=" * 60) print(f"Results: {n_pass}/{n_pass+n_fail} PASSED {n_fail} FAILED") print("=" * 60) if n_fail > 0: sys.exit(1)