Files
DOLPHIN/nautilus_dolphin/dvae/test_lstm_weight_fix.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

395 lines
18 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Unit tests: LSTM weight save/load fix
======================================
Tests that DisentangledVAEGenerator.save_model() correctly persists W_ih/W_hh/b_h
and that TitanSensor loads them instead of random re-initialising.
Run BEFORE and AFTER retrain to catch regressions.
Usage:
cd nautilus_dolphin
python dvae/test_lstm_weight_fix.py
"""
import sys, json, os, tempfile
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
ROOT = Path(__file__).parent.parent # nautilus_dolphin/
PROJECT = ROOT.parent # project root (disentangled_vae_joint_generator.py lives here)
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(PROJECT))
MODEL_PATH = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict"
r"\dvae_regime_model_TITAN_ULTRA_250_ULTRA261_MCDAIN.json")
MODEL_PATH_GD = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict"
r"\dvae_regime_model_TITAN_ULTRA_GD.json")
_PASS = "[PASS]"
_FAIL = "[FAIL]"
# ── Helpers ───────────────────────────────────────────────────────────────────
def _make_dummy_generator():
"""
Build a minimal DisentangledVAEGenerator-like object with known numpy weights,
bypassing FLINT _init_weights(). Used to test save/load roundtrip without
requiring FLINT/arb to be present.
"""
import types, importlib
# Import the class but intercept _init_weights so it doesn't call crypto_random_arb
mod = importlib.import_module('disentangled_vae_joint_generator')
cls = mod.DisentangledVAEGenerator
obj = cls.__new__(cls)
obj.input_dim = 8
obj.hidden_dim = 4
obj.latent_dim = 2
obj.regime_dim = 2
obj.prec = 64
obj.beta = 1.0
obj.is_trained = True
obj.edain = None
obj.latent_names = {0: "A", 1: "B"}
rng = np.random.RandomState(7777)
obj.W_ih = rng.randn(8, 16).astype(np.float64) # (input_dim, hidden*4)
obj.W_hh = rng.randn(4, 16).astype(np.float64) # (hidden_dim, hidden*4)
obj.b_h = rng.randn(16).astype(np.float64)
obj.W_mu = rng.randn(4, 2).astype(np.float64)
obj.W_logvar = rng.randn(4, 2).astype(np.float64)
obj.b_mu = rng.randn(2).astype(np.float64)
obj.b_logvar = rng.randn(2).astype(np.float64)
obj.W_dec = rng.randn(2, 4).astype(np.float64)
obj.W_out = rng.randn(4, 8).astype(np.float64)
obj.b_dec = rng.randn(4).astype(np.float64)
obj.b_out = rng.randn(8).astype(np.float64)
return obj
def _load_sensor():
from dvae.titan_sensor import TitanSensor
return TitanSensor(str(MODEL_PATH))
# ── Test 1: save_model() roundtrip (no FLINT needed) ─────────────────────────
def test_save_model_includes_lstm():
print("\n[T1] save_model() includes W_ih / W_hh / b_h ...")
gen = _make_dummy_generator()
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
tmp = f.name
try:
gen.save_model(tmp)
with open(tmp) as f:
m = json.load(f)
for key in ('W_ih', 'W_hh', 'b_h', 'W_mu', 'W_logvar', 'W_dec', 'W_out',
'b_mu', 'b_logvar', 'b_dec', 'b_out'):
assert key in m, f"{_FAIL}: key '{key}' missing from saved JSON"
W_ih_rt = np.array(m['W_ih'])
assert W_ih_rt.shape == gen.W_ih.shape, \
f"{_FAIL}: W_ih shape mismatch {W_ih_rt.shape} vs {gen.W_ih.shape}"
assert np.allclose(W_ih_rt, gen.W_ih, atol=1e-15), \
f"{_FAIL}: W_ih values differ after roundtrip (max={np.max(np.abs(W_ih_rt - gen.W_ih)):.2e})"
W_hh_rt = np.array(m['W_hh'])
assert np.allclose(W_hh_rt, gen.W_hh, atol=1e-15), \
f"{_FAIL}: W_hh values differ after roundtrip"
b_h_rt = np.array(m['b_h'])
assert np.allclose(b_h_rt, gen.b_h, atol=1e-15), \
f"{_FAIL}: b_h values differ after roundtrip"
print(f" {_PASS} W_ih {gen.W_ih.shape} roundtrip exact")
print(f" {_PASS} W_hh {gen.W_hh.shape} roundtrip exact")
print(f" {_PASS} b_h {gen.b_h.shape} roundtrip exact")
print(f" {_PASS} all 11 weight keys present")
finally:
os.unlink(tmp)
# ── Test 2: TitanSensor loads W_ih from JSON, not random seed=42 ─────────────
def test_sensor_loads_from_json_not_random():
print("\n[T2] TitanSensor loads LSTM weights from JSON (not seed=42 random) ...")
assert MODEL_PATH.exists(), f"{_FAIL}: model not found at {MODEL_PATH}"
with open(MODEL_PATH) as f:
m = json.load(f)
assert 'W_ih' in m, \
f"{_FAIL}: W_ih missing from model JSON — model was saved before the fix. Retrain first."
sensor = _load_sensor()
assert sensor.lstm_weights_valid, \
f"{_FAIL}: sensor.lstm_weights_valid=False — W_ih missing from JSON"
W_ih_json = np.array(m['W_ih'], dtype=np.float64)
max_diff = np.max(np.abs(sensor.W_ih - W_ih_json))
assert max_diff < 1e-12, \
f"{_FAIL}: sensor.W_ih != JSON W_ih (max_diff={max_diff:.3e})"
print(f" {_PASS} sensor.W_ih == JSON W_ih (max_diff={max_diff:.2e})")
W_hh_json = np.array(m['W_hh'], dtype=np.float64)
max_diff_hh = np.max(np.abs(sensor.W_hh - W_hh_json))
assert max_diff_hh < 1e-12, \
f"{_FAIL}: sensor.W_hh != JSON W_hh (max_diff={max_diff_hh:.3e})"
print(f" {_PASS} sensor.W_hh == JSON W_hh (max_diff={max_diff_hh:.2e})")
# Confirm it is NOT the seed=42 random initialisation
rng_42 = np.random.RandomState(42)
W_ih_42 = rng_42.randn(261, 512) * 0.1
diff_vs_42 = np.max(np.abs(sensor.W_ih - W_ih_42))
assert diff_vs_42 > 0.01, \
f"{_FAIL}: sensor.W_ih matches seed=42 random (diff={diff_vs_42:.3e}) — LSTM still wrong"
print(f" {_PASS} sensor.W_ih is NOT seed=42 random (diff_vs_42={diff_vs_42:.3f})")
# ── Test 3: recon_err is finite and in plausible range ────────────────────────
def test_recon_err_plausible():
print("\n[T3] recon_err is finite and << 10^6 (was ~10^14 pre-fix) ...")
sensor = _load_sensor()
from dvae.titan_sensor import build_feature_vector
rng = np.random.RandomState(42)
results = {}
for label, x in [
("zeros", np.zeros(261)),
("ones", np.ones(261) * 0.01),
("random_s", rng.randn(261) * 0.05),
("random_l", rng.randn(261) * 2.0),
]:
z_mu, recon_err, z_logvar = sensor.encode(x)
results[label] = recon_err
assert np.isfinite(recon_err), \
f"{_FAIL}: recon_err not finite for '{label}' input: {recon_err}"
# Pre-fix: ~10^14. Post-fix: should be O(1) or O(100) at worst.
assert recon_err < 1e8, \
f"{_FAIL}: recon_err={recon_err:.3e} suspiciously large for '{label}'"
print(f" {_PASS} [{label:10s}] recon_err={recon_err:.4e} z_mu[0:4]={z_mu[:4].round(4)}")
# Distribution check: recon_err should vary with input (not uniform noise)
errs = list(results.values())
assert max(errs) / (min(errs) + 1e-12) > 2.0, \
f"{_FAIL}: recon_err suspiciously uniform across inputs ({errs}) — LSTM may still be wrong"
print(f" {_PASS} recon_err varies meaningfully across inputs (ratio={max(errs)/(min(errs)+1e-12):.1f}x)")
# ── Test 4: Encoding is deterministic ─────────────────────────────────────────
def test_encoding_deterministic():
print("\n[T4] encode() is deterministic across two sensor instances ...")
sensor1 = _load_sensor()
sensor2 = _load_sensor()
x = np.random.RandomState(99).randn(261) * 0.1
z1, e1, _ = sensor1.encode(x)
z2, e2, _ = sensor2.encode(x)
assert np.allclose(z1, z2, atol=1e-14), \
f"{_FAIL}: z_mu differs between two sensors (max={np.max(np.abs(z1-z2)):.2e})"
assert abs(e1 - e2) < 1e-10, \
f"{_FAIL}: recon_err differs between two sensors ({e1:.6e} vs {e2:.6e})"
print(f" {_PASS} z_mu identical (max_diff={np.max(np.abs(z1-z2)):.2e})")
print(f" {_PASS} recon_err identical ({e1:.6e})")
# ── Test 5: Legacy model emits RuntimeWarning, sensor.lstm_weights_valid=False ─
def test_legacy_model_warns():
print("\n[T5] Legacy model (no W_ih in JSON) emits RuntimeWarning ...")
from dvae.titan_sensor import TitanSensor
import warnings
# Build a minimal legacy-style JSON (no W_ih/W_hh)
legacy = {
"W_mu": np.zeros((128, 32)).tolist(),
"W_logvar": np.zeros((128, 32)).tolist(),
"W_dec": np.zeros((32, 128)).tolist(),
"W_out": np.zeros((128, 261)).tolist(),
"latent_names": {},
"precision_bits": 512,
}
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(legacy, f)
tmp = f.name
try:
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always")
s = TitanSensor(tmp)
assert not s.lstm_weights_valid, \
f"{_FAIL}: lstm_weights_valid should be False for legacy model"
assert any(issubclass(w.category, RuntimeWarning) for w in caught), \
f"{_FAIL}: no RuntimeWarning emitted for legacy model"
print(f" {_PASS} lstm_weights_valid=False")
print(f" {_PASS} RuntimeWarning emitted")
finally:
os.unlink(tmp)
# ── Test 6: z_mu dimensionality and range ─────────────────────────────────────
def test_latent_dimensionality():
print("\n[T6] z_mu has correct dimensionality and plausible range ...")
sensor = _load_sensor()
x = np.random.RandomState(55).randn(261) * 0.1
z_mu, recon_err, z_logvar = sensor.encode(x)
assert z_mu.shape == (32,), f"{_FAIL}: z_mu shape {z_mu.shape} != (32,)"
assert z_logvar.shape == (32,), f"{_FAIL}: z_logvar shape {z_logvar.shape} != (32,)"
assert np.all(np.isfinite(z_mu)), f"{_FAIL}: z_mu contains non-finite values"
assert np.all(np.isfinite(z_logvar)), f"{_FAIL}: z_logvar contains non-finite values"
assert np.abs(z_mu).max() < 1e3, \
f"{_FAIL}: z_mu values suspiciously large (max={np.abs(z_mu).max():.2e})"
print(f" {_PASS} z_mu.shape=(32,) z_mu range=[{z_mu.min():.3f}, {z_mu.max():.3f}]")
print(f" {_PASS} z_logvar.shape=(32,) range=[{z_logvar.min():.3f}, {z_logvar.max():.3f}]")
# ── Test 7: GD-trained model has correct LSTM weights and sane recon_err ──────
def test_gd_model():
print("\n[T7] GD-trained model: W_ih present, recon_err << MCDAIN model ...")
assert MODEL_PATH_GD.exists(), f"{_FAIL}: GD model not found at {MODEL_PATH_GD}"
from dvae.titan_sensor import TitanSensor
sensor_gd = TitanSensor(str(MODEL_PATH_GD))
assert sensor_gd.lstm_weights_valid, f"{_FAIL}: GD sensor.lstm_weights_valid=False"
print(f" {_PASS} GD model: lstm_weights_valid=True")
# Verify W_ih from GD model matches W_ih from MCDAIN model
# (they should be THE SAME — GD model was initialized from MCDAIN model's W_ih)
sensor_mc = _load_sensor()
diff_wih = np.max(np.abs(sensor_gd.W_ih - sensor_mc.W_ih))
assert diff_wih < 1e-12, \
f"{_FAIL}: GD model W_ih != MCDAIN model W_ih (diff={diff_wih:.3e}) — should be same LSTM basis"
print(f" {_PASS} GD model W_ih == MCDAIN model W_ih (same LSTM basis, diff={diff_wih:.2e})")
# Reconstruction error: generate inputs GUARANTEED to be in-distribution by
# sampling x = norm_mean + norm_std * randn (within ±2σ of training corpus).
# After encode()'s normalization step: v_norm = clip(randn, -2, 2) → perfectly O(1).
# recon_err should be close to the training-set value (p50=0.59).
with open(MODEL_PATH_GD) as fj:
gd_json = json.load(fj)
nm_gd = np.array(gd_json['norm_mean']); ns_gd = np.array(gd_json['norm_std'])
rng = np.random.RandomState(42)
errs_gd = []; errs_mc = []
for _ in range(20):
# Guaranteed in-distribution: raw input = corpus_mean + corpus_std * noise
x_raw = nm_gd + ns_gd * np.clip(rng.randn(261), -2, 2)
x_raw[78:] = 0.0 # T3/T4/T5 are always 0 in corpus
_, e_gd, _ = sensor_gd.encode(x_raw)
_, e_mc, _ = sensor_mc.encode(x_raw)
errs_gd.append(e_gd); errs_mc.append(e_mc)
med_gd = float(np.median(errs_gd)); med_mc = float(np.median(errs_mc))
print(f" GD median recon_err={med_gd:.4e} (in-distribution) MCDAIN median recon_err={med_mc:.4e}")
# GD trained with proper GD — should reconstruct in-distribution inputs well
assert med_gd < 10.0, f"{_FAIL}: GD recon_err too large ({med_gd:.4e}) — model didn't learn"
print(f" {_PASS} GD recon_err < 10.0 for in-distribution inputs (model learned)")
# ── Test 8: GD-v2 normalization stored and applied at inference ───────────────
def test_normalization_stored_and_applied():
print("\n[T8] GD-v2 model: norm_mean/norm_std present and applied by TitanSensor ...")
assert MODEL_PATH_GD.exists(), f"{_FAIL}: GD model not found at {MODEL_PATH_GD}"
from dvae.titan_sensor import TitanSensor
# 8a: JSON must contain norm_mean / norm_std
with open(MODEL_PATH_GD) as f:
m = json.load(f)
assert 'norm_mean' in m, f"{_FAIL}: GD model JSON missing 'norm_mean'"
assert 'norm_std' in m, f"{_FAIL}: GD model JSON missing 'norm_std'"
nm = np.array(m['norm_mean']); ns = np.array(m['norm_std'])
assert nm.shape == (261,), f"{_FAIL}: norm_mean shape {nm.shape} != (261,)"
assert ns.shape == (261,), f"{_FAIL}: norm_std shape {ns.shape} != (261,)"
assert np.all(ns > 0), f"{_FAIL}: norm_std has zero or negative entries"
print(f" {_PASS} norm_mean/norm_std present, shape=(261,), all std>0")
# 8b: TitanSensor must load them (not None)
sensor = TitanSensor(str(MODEL_PATH_GD))
assert sensor.norm_mean is not None, f"{_FAIL}: sensor.norm_mean is None — not loaded"
assert sensor.norm_std is not None, f"{_FAIL}: sensor.norm_std is None — not loaded"
assert np.allclose(sensor.norm_mean, nm, atol=1e-12), \
f"{_FAIL}: sensor.norm_mean != JSON norm_mean"
print(f" {_PASS} sensor.norm_mean loaded from JSON")
# 8c: recon_err on realistic inputs (matching build_feature_vector() output structure)
# Before fix (MCDAIN-trained, no normalization stored): encode(raw) → huge recon_err
# After fix (GD-v2, normalization stored and applied): encode(raw) → O(1-50)
rng = np.random.RandomState(42)
errs = []
for _ in range(30):
x = np.zeros(261)
x[0:8] = rng.randn(8) * 0.7 # T0: time encoding
x[8:28] = rng.randn(20) * 0.02 # T1: eigenvalue velocity scale
x[28:78] = np.clip(rng.randn(50), -5, 5) # T2: return z-scores [-5,5]
# T3/T4/T5 = 0 (Stage 1 contract — matches training corpus)
_, e, _ = sensor.encode(x)
errs.append(e)
med_err = float(np.median(errs))
max_err = float(np.max(errs))
print(f" GD-v2 realistic-input recon_err: median={med_err:.4e} max={max_err:.4e}")
assert med_err < 100.0, \
f"{_FAIL}: recon_err too large ({med_err:.4e}) — normalization may not be applied"
print(f" {_PASS} recon_err O(1-100) for realistic-scale inputs")
# 8d: T5 dims are zero after normalization (non-zero norm_mean would shift them)
x_zeros = np.zeros(261)
_, e_z, _ = sensor.encode(x_zeros)
# The sensor zeros T5 after normalization — this just checks it doesn't crash
assert np.isfinite(e_z), f"{_FAIL}: encode(zeros) returned non-finite recon_err={e_z}"
print(f" {_PASS} encode(zeros) is finite after normalization: recon_err={e_z:.4e}")
# ── Runner ────────────────────────────────────────────────────────────────────
if __name__ == '__main__':
print("=" * 60)
print("TitanSensor LSTM weight fix — unit tests")
print("=" * 60)
n_pass = 0
n_fail = 0
tests = [
("T1: save_model roundtrip", test_save_model_includes_lstm),
("T2: sensor loads JSON weights", test_sensor_loads_from_json_not_random),
("T3: recon_err plausible", test_recon_err_plausible),
("T4: encoding deterministic", test_encoding_deterministic),
("T5: legacy model warns", test_legacy_model_warns),
("T6: latent dimensionality", test_latent_dimensionality),
("T7: GD model quality", test_gd_model),
("T8: normalization stored and applied", test_normalization_stored_and_applied),
]
for name, fn in tests:
try:
fn()
n_pass += 1
except AssertionError as e:
print(f" {_FAIL} ASSERTION: {e}")
n_fail += 1
except Exception as e:
print(f" {_FAIL} EXCEPTION in {name}: {type(e).__name__}: {e}")
n_fail += 1
print()
print("=" * 60)
print(f"Results: {n_pass}/{n_pass+n_fail} PASSED {n_fail} FAILED")
print("=" * 60)
if n_fail > 0:
sys.exit(1)