Files
DOLPHIN/nautilus_dolphin/dvae/test_lstm_weight_fix.py

395 lines
18 KiB
Python
Raw Normal View History

"""
Unit tests: LSTM weight save/load fix
======================================
Tests that DisentangledVAEGenerator.save_model() correctly persists W_ih/W_hh/b_h
and that TitanSensor loads them instead of random re-initialising.
Run BEFORE and AFTER retrain to catch regressions.
Usage:
cd nautilus_dolphin
python dvae/test_lstm_weight_fix.py
"""
import sys, json, os, tempfile
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
ROOT = Path(__file__).parent.parent # nautilus_dolphin/
PROJECT = ROOT.parent # project root (disentangled_vae_joint_generator.py lives here)
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(PROJECT))
MODEL_PATH = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict"
r"\dvae_regime_model_TITAN_ULTRA_250_ULTRA261_MCDAIN.json")
MODEL_PATH_GD = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict"
r"\dvae_regime_model_TITAN_ULTRA_GD.json")
_PASS = "[PASS]"
_FAIL = "[FAIL]"
# ── Helpers ───────────────────────────────────────────────────────────────────
def _make_dummy_generator():
"""
Build a minimal DisentangledVAEGenerator-like object with known numpy weights,
bypassing FLINT _init_weights(). Used to test save/load roundtrip without
requiring FLINT/arb to be present.
"""
import types, importlib
# Import the class but intercept _init_weights so it doesn't call crypto_random_arb
mod = importlib.import_module('disentangled_vae_joint_generator')
cls = mod.DisentangledVAEGenerator
obj = cls.__new__(cls)
obj.input_dim = 8
obj.hidden_dim = 4
obj.latent_dim = 2
obj.regime_dim = 2
obj.prec = 64
obj.beta = 1.0
obj.is_trained = True
obj.edain = None
obj.latent_names = {0: "A", 1: "B"}
rng = np.random.RandomState(7777)
obj.W_ih = rng.randn(8, 16).astype(np.float64) # (input_dim, hidden*4)
obj.W_hh = rng.randn(4, 16).astype(np.float64) # (hidden_dim, hidden*4)
obj.b_h = rng.randn(16).astype(np.float64)
obj.W_mu = rng.randn(4, 2).astype(np.float64)
obj.W_logvar = rng.randn(4, 2).astype(np.float64)
obj.b_mu = rng.randn(2).astype(np.float64)
obj.b_logvar = rng.randn(2).astype(np.float64)
obj.W_dec = rng.randn(2, 4).astype(np.float64)
obj.W_out = rng.randn(4, 8).astype(np.float64)
obj.b_dec = rng.randn(4).astype(np.float64)
obj.b_out = rng.randn(8).astype(np.float64)
return obj
def _load_sensor():
from dvae.titan_sensor import TitanSensor
return TitanSensor(str(MODEL_PATH))
# ── Test 1: save_model() roundtrip (no FLINT needed) ─────────────────────────
def test_save_model_includes_lstm():
print("\n[T1] save_model() includes W_ih / W_hh / b_h ...")
gen = _make_dummy_generator()
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
tmp = f.name
try:
gen.save_model(tmp)
with open(tmp) as f:
m = json.load(f)
for key in ('W_ih', 'W_hh', 'b_h', 'W_mu', 'W_logvar', 'W_dec', 'W_out',
'b_mu', 'b_logvar', 'b_dec', 'b_out'):
assert key in m, f"{_FAIL}: key '{key}' missing from saved JSON"
W_ih_rt = np.array(m['W_ih'])
assert W_ih_rt.shape == gen.W_ih.shape, \
f"{_FAIL}: W_ih shape mismatch {W_ih_rt.shape} vs {gen.W_ih.shape}"
assert np.allclose(W_ih_rt, gen.W_ih, atol=1e-15), \
f"{_FAIL}: W_ih values differ after roundtrip (max={np.max(np.abs(W_ih_rt - gen.W_ih)):.2e})"
W_hh_rt = np.array(m['W_hh'])
assert np.allclose(W_hh_rt, gen.W_hh, atol=1e-15), \
f"{_FAIL}: W_hh values differ after roundtrip"
b_h_rt = np.array(m['b_h'])
assert np.allclose(b_h_rt, gen.b_h, atol=1e-15), \
f"{_FAIL}: b_h values differ after roundtrip"
print(f" {_PASS} W_ih {gen.W_ih.shape} roundtrip exact")
print(f" {_PASS} W_hh {gen.W_hh.shape} roundtrip exact")
print(f" {_PASS} b_h {gen.b_h.shape} roundtrip exact")
print(f" {_PASS} all 11 weight keys present")
finally:
os.unlink(tmp)
# ── Test 2: TitanSensor loads W_ih from JSON, not random seed=42 ─────────────
def test_sensor_loads_from_json_not_random():
print("\n[T2] TitanSensor loads LSTM weights from JSON (not seed=42 random) ...")
assert MODEL_PATH.exists(), f"{_FAIL}: model not found at {MODEL_PATH}"
with open(MODEL_PATH) as f:
m = json.load(f)
assert 'W_ih' in m, \
f"{_FAIL}: W_ih missing from model JSON — model was saved before the fix. Retrain first."
sensor = _load_sensor()
assert sensor.lstm_weights_valid, \
f"{_FAIL}: sensor.lstm_weights_valid=False — W_ih missing from JSON"
W_ih_json = np.array(m['W_ih'], dtype=np.float64)
max_diff = np.max(np.abs(sensor.W_ih - W_ih_json))
assert max_diff < 1e-12, \
f"{_FAIL}: sensor.W_ih != JSON W_ih (max_diff={max_diff:.3e})"
print(f" {_PASS} sensor.W_ih == JSON W_ih (max_diff={max_diff:.2e})")
W_hh_json = np.array(m['W_hh'], dtype=np.float64)
max_diff_hh = np.max(np.abs(sensor.W_hh - W_hh_json))
assert max_diff_hh < 1e-12, \
f"{_FAIL}: sensor.W_hh != JSON W_hh (max_diff={max_diff_hh:.3e})"
print(f" {_PASS} sensor.W_hh == JSON W_hh (max_diff={max_diff_hh:.2e})")
# Confirm it is NOT the seed=42 random initialisation
rng_42 = np.random.RandomState(42)
W_ih_42 = rng_42.randn(261, 512) * 0.1
diff_vs_42 = np.max(np.abs(sensor.W_ih - W_ih_42))
assert diff_vs_42 > 0.01, \
f"{_FAIL}: sensor.W_ih matches seed=42 random (diff={diff_vs_42:.3e}) — LSTM still wrong"
print(f" {_PASS} sensor.W_ih is NOT seed=42 random (diff_vs_42={diff_vs_42:.3f})")
# ── Test 3: recon_err is finite and in plausible range ────────────────────────
def test_recon_err_plausible():
print("\n[T3] recon_err is finite and << 10^6 (was ~10^14 pre-fix) ...")
sensor = _load_sensor()
from dvae.titan_sensor import build_feature_vector
rng = np.random.RandomState(42)
results = {}
for label, x in [
("zeros", np.zeros(261)),
("ones", np.ones(261) * 0.01),
("random_s", rng.randn(261) * 0.05),
("random_l", rng.randn(261) * 2.0),
]:
z_mu, recon_err, z_logvar = sensor.encode(x)
results[label] = recon_err
assert np.isfinite(recon_err), \
f"{_FAIL}: recon_err not finite for '{label}' input: {recon_err}"
# Pre-fix: ~10^14. Post-fix: should be O(1) or O(100) at worst.
assert recon_err < 1e8, \
f"{_FAIL}: recon_err={recon_err:.3e} suspiciously large for '{label}'"
print(f" {_PASS} [{label:10s}] recon_err={recon_err:.4e} z_mu[0:4]={z_mu[:4].round(4)}")
# Distribution check: recon_err should vary with input (not uniform noise)
errs = list(results.values())
assert max(errs) / (min(errs) + 1e-12) > 2.0, \
f"{_FAIL}: recon_err suspiciously uniform across inputs ({errs}) — LSTM may still be wrong"
print(f" {_PASS} recon_err varies meaningfully across inputs (ratio={max(errs)/(min(errs)+1e-12):.1f}x)")
# ── Test 4: Encoding is deterministic ─────────────────────────────────────────
def test_encoding_deterministic():
print("\n[T4] encode() is deterministic across two sensor instances ...")
sensor1 = _load_sensor()
sensor2 = _load_sensor()
x = np.random.RandomState(99).randn(261) * 0.1
z1, e1, _ = sensor1.encode(x)
z2, e2, _ = sensor2.encode(x)
assert np.allclose(z1, z2, atol=1e-14), \
f"{_FAIL}: z_mu differs between two sensors (max={np.max(np.abs(z1-z2)):.2e})"
assert abs(e1 - e2) < 1e-10, \
f"{_FAIL}: recon_err differs between two sensors ({e1:.6e} vs {e2:.6e})"
print(f" {_PASS} z_mu identical (max_diff={np.max(np.abs(z1-z2)):.2e})")
print(f" {_PASS} recon_err identical ({e1:.6e})")
# ── Test 5: Legacy model emits RuntimeWarning, sensor.lstm_weights_valid=False ─
def test_legacy_model_warns():
print("\n[T5] Legacy model (no W_ih in JSON) emits RuntimeWarning ...")
from dvae.titan_sensor import TitanSensor
import warnings
# Build a minimal legacy-style JSON (no W_ih/W_hh)
legacy = {
"W_mu": np.zeros((128, 32)).tolist(),
"W_logvar": np.zeros((128, 32)).tolist(),
"W_dec": np.zeros((32, 128)).tolist(),
"W_out": np.zeros((128, 261)).tolist(),
"latent_names": {},
"precision_bits": 512,
}
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(legacy, f)
tmp = f.name
try:
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always")
s = TitanSensor(tmp)
assert not s.lstm_weights_valid, \
f"{_FAIL}: lstm_weights_valid should be False for legacy model"
assert any(issubclass(w.category, RuntimeWarning) for w in caught), \
f"{_FAIL}: no RuntimeWarning emitted for legacy model"
print(f" {_PASS} lstm_weights_valid=False")
print(f" {_PASS} RuntimeWarning emitted")
finally:
os.unlink(tmp)
# ── Test 6: z_mu dimensionality and range ─────────────────────────────────────
def test_latent_dimensionality():
print("\n[T6] z_mu has correct dimensionality and plausible range ...")
sensor = _load_sensor()
x = np.random.RandomState(55).randn(261) * 0.1
z_mu, recon_err, z_logvar = sensor.encode(x)
assert z_mu.shape == (32,), f"{_FAIL}: z_mu shape {z_mu.shape} != (32,)"
assert z_logvar.shape == (32,), f"{_FAIL}: z_logvar shape {z_logvar.shape} != (32,)"
assert np.all(np.isfinite(z_mu)), f"{_FAIL}: z_mu contains non-finite values"
assert np.all(np.isfinite(z_logvar)), f"{_FAIL}: z_logvar contains non-finite values"
assert np.abs(z_mu).max() < 1e3, \
f"{_FAIL}: z_mu values suspiciously large (max={np.abs(z_mu).max():.2e})"
print(f" {_PASS} z_mu.shape=(32,) z_mu range=[{z_mu.min():.3f}, {z_mu.max():.3f}]")
print(f" {_PASS} z_logvar.shape=(32,) range=[{z_logvar.min():.3f}, {z_logvar.max():.3f}]")
# ── Test 7: GD-trained model has correct LSTM weights and sane recon_err ──────
def test_gd_model():
print("\n[T7] GD-trained model: W_ih present, recon_err << MCDAIN model ...")
assert MODEL_PATH_GD.exists(), f"{_FAIL}: GD model not found at {MODEL_PATH_GD}"
from dvae.titan_sensor import TitanSensor
sensor_gd = TitanSensor(str(MODEL_PATH_GD))
assert sensor_gd.lstm_weights_valid, f"{_FAIL}: GD sensor.lstm_weights_valid=False"
print(f" {_PASS} GD model: lstm_weights_valid=True")
# Verify W_ih from GD model matches W_ih from MCDAIN model
# (they should be THE SAME — GD model was initialized from MCDAIN model's W_ih)
sensor_mc = _load_sensor()
diff_wih = np.max(np.abs(sensor_gd.W_ih - sensor_mc.W_ih))
assert diff_wih < 1e-12, \
f"{_FAIL}: GD model W_ih != MCDAIN model W_ih (diff={diff_wih:.3e}) — should be same LSTM basis"
print(f" {_PASS} GD model W_ih == MCDAIN model W_ih (same LSTM basis, diff={diff_wih:.2e})")
# Reconstruction error: generate inputs GUARANTEED to be in-distribution by
# sampling x = norm_mean + norm_std * randn (within ±2σ of training corpus).
# After encode()'s normalization step: v_norm = clip(randn, -2, 2) → perfectly O(1).
# recon_err should be close to the training-set value (p50=0.59).
with open(MODEL_PATH_GD) as fj:
gd_json = json.load(fj)
nm_gd = np.array(gd_json['norm_mean']); ns_gd = np.array(gd_json['norm_std'])
rng = np.random.RandomState(42)
errs_gd = []; errs_mc = []
for _ in range(20):
# Guaranteed in-distribution: raw input = corpus_mean + corpus_std * noise
x_raw = nm_gd + ns_gd * np.clip(rng.randn(261), -2, 2)
x_raw[78:] = 0.0 # T3/T4/T5 are always 0 in corpus
_, e_gd, _ = sensor_gd.encode(x_raw)
_, e_mc, _ = sensor_mc.encode(x_raw)
errs_gd.append(e_gd); errs_mc.append(e_mc)
med_gd = float(np.median(errs_gd)); med_mc = float(np.median(errs_mc))
print(f" GD median recon_err={med_gd:.4e} (in-distribution) MCDAIN median recon_err={med_mc:.4e}")
# GD trained with proper GD — should reconstruct in-distribution inputs well
assert med_gd < 10.0, f"{_FAIL}: GD recon_err too large ({med_gd:.4e}) — model didn't learn"
print(f" {_PASS} GD recon_err < 10.0 for in-distribution inputs (model learned)")
# ── Test 8: GD-v2 normalization stored and applied at inference ───────────────
def test_normalization_stored_and_applied():
print("\n[T8] GD-v2 model: norm_mean/norm_std present and applied by TitanSensor ...")
assert MODEL_PATH_GD.exists(), f"{_FAIL}: GD model not found at {MODEL_PATH_GD}"
from dvae.titan_sensor import TitanSensor
# 8a: JSON must contain norm_mean / norm_std
with open(MODEL_PATH_GD) as f:
m = json.load(f)
assert 'norm_mean' in m, f"{_FAIL}: GD model JSON missing 'norm_mean'"
assert 'norm_std' in m, f"{_FAIL}: GD model JSON missing 'norm_std'"
nm = np.array(m['norm_mean']); ns = np.array(m['norm_std'])
assert nm.shape == (261,), f"{_FAIL}: norm_mean shape {nm.shape} != (261,)"
assert ns.shape == (261,), f"{_FAIL}: norm_std shape {ns.shape} != (261,)"
assert np.all(ns > 0), f"{_FAIL}: norm_std has zero or negative entries"
print(f" {_PASS} norm_mean/norm_std present, shape=(261,), all std>0")
# 8b: TitanSensor must load them (not None)
sensor = TitanSensor(str(MODEL_PATH_GD))
assert sensor.norm_mean is not None, f"{_FAIL}: sensor.norm_mean is None — not loaded"
assert sensor.norm_std is not None, f"{_FAIL}: sensor.norm_std is None — not loaded"
assert np.allclose(sensor.norm_mean, nm, atol=1e-12), \
f"{_FAIL}: sensor.norm_mean != JSON norm_mean"
print(f" {_PASS} sensor.norm_mean loaded from JSON")
# 8c: recon_err on realistic inputs (matching build_feature_vector() output structure)
# Before fix (MCDAIN-trained, no normalization stored): encode(raw) → huge recon_err
# After fix (GD-v2, normalization stored and applied): encode(raw) → O(1-50)
rng = np.random.RandomState(42)
errs = []
for _ in range(30):
x = np.zeros(261)
x[0:8] = rng.randn(8) * 0.7 # T0: time encoding
x[8:28] = rng.randn(20) * 0.02 # T1: eigenvalue velocity scale
x[28:78] = np.clip(rng.randn(50), -5, 5) # T2: return z-scores [-5,5]
# T3/T4/T5 = 0 (Stage 1 contract — matches training corpus)
_, e, _ = sensor.encode(x)
errs.append(e)
med_err = float(np.median(errs))
max_err = float(np.max(errs))
print(f" GD-v2 realistic-input recon_err: median={med_err:.4e} max={max_err:.4e}")
assert med_err < 100.0, \
f"{_FAIL}: recon_err too large ({med_err:.4e}) — normalization may not be applied"
print(f" {_PASS} recon_err O(1-100) for realistic-scale inputs")
# 8d: T5 dims are zero after normalization (non-zero norm_mean would shift them)
x_zeros = np.zeros(261)
_, e_z, _ = sensor.encode(x_zeros)
# The sensor zeros T5 after normalization — this just checks it doesn't crash
assert np.isfinite(e_z), f"{_FAIL}: encode(zeros) returned non-finite recon_err={e_z}"
print(f" {_PASS} encode(zeros) is finite after normalization: recon_err={e_z:.4e}")
# ── Runner ────────────────────────────────────────────────────────────────────
if __name__ == '__main__':
print("=" * 60)
print("TitanSensor LSTM weight fix — unit tests")
print("=" * 60)
n_pass = 0
n_fail = 0
tests = [
("T1: save_model roundtrip", test_save_model_includes_lstm),
("T2: sensor loads JSON weights", test_sensor_loads_from_json_not_random),
("T3: recon_err plausible", test_recon_err_plausible),
("T4: encoding deterministic", test_encoding_deterministic),
("T5: legacy model warns", test_legacy_model_warns),
("T6: latent dimensionality", test_latent_dimensionality),
("T7: GD model quality", test_gd_model),
("T8: normalization stored and applied", test_normalization_stored_and_applied),
]
for name, fn in tests:
try:
fn()
n_pass += 1
except AssertionError as e:
print(f" {_FAIL} ASSERTION: {e}")
n_fail += 1
except Exception as e:
print(f" {_FAIL} EXCEPTION in {name}: {type(e).__name__}: {e}")
n_fail += 1
print()
print("=" * 60)
print(f"Results: {n_pass}/{n_pass+n_fail} PASSED {n_fail} FAILED")
print("=" * 60)
if n_fail > 0:
sys.exit(1)