Files
DOLPHIN/nautilus_dolphin/dvae/flint_hd_vae.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

275 lines
13 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
flint_hd_vae.py
===============
SILOQY-compatible HD-VAE with inverse projection decoder.
Architecture:
Encoder:
T1 (20-dim)
→ MCDAIN 550-bit normalisation (no upstream modification — read-only call)
→ HD random projection W_enc (20×512), ReLU → h (512)
→ Linear bottleneck: W_mu (512×8), W_lv (512×8) → mu, logvar (8)
→ reparameterisation → z (8)
Decoder (inverse projection — THE NEW PIECE):
z (8)
→ Linear W_dec (8×512), ReLU → h_hat (512) *inverse of bottleneck*
→ Linear W_out (512×20) → T1_hat (20) *pseudo-inverse of HD proj*
Loss:
recon = MSE(T1_hat, T1_norm)
KL = -0.5 * sum(1 + logvar - mu^2 - exp(logvar)) [standard VAE KL]
total = recon + beta * KL
No upstream files are modified. All SILOQY calls are read-only.
"""
import sys, os
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")
import numpy as np
from pathlib import Path
from SILOQY_NN_Kernel_COMPLETE6 import arb, safe_float, FLINT_AVAILABLE, with_precision
EPS = 1e-8
# ── MCDAIN 550-bit normalisation (read-only logic, no upstream changes) ────
def mcdain_550bit(X_raw: np.ndarray) -> np.ndarray:
"""Apply MCDAIN analytical normalisation at 550-bit precision."""
rows, cols = X_raw.shape
X_norm = np.zeros_like(X_raw, dtype=np.float64)
with with_precision(550):
for j in range(cols):
col = X_raw[:, j]
col_abs = np.abs(col[np.isfinite(col)])
if len(col_abs) == 0 or col_abs.mean() < 1e-12:
continue
magnitude = arb(str(float(col_abs.mean())))
log_mag = magnitude.log()
mean_val = magnitude * arb("0.1")
scale_val = arb("1.0") / (log_mag + arb("1e-8"))
gate_val = arb("1.0") / (arb("1.0") + (-log_mag).exp())
m = safe_float(mean_val)
s = safe_float(scale_val)
g = safe_float(gate_val)
X_norm[:, j] = np.clip((X_raw[:, j] - m) * s * g, -10, 10)
return np.nan_to_num(X_norm, nan=0.0, posinf=5.0, neginf=-5.0)
# ── Adam optimiser state ───────────────────────────────────────────────────
class AdamParam:
def __init__(self, shape, seed=0):
rng = np.random.RandomState(seed)
scale = np.sqrt(2.0 / shape[0])
self.W = rng.randn(*shape).astype(np.float64) * scale
self.m = np.zeros_like(self.W)
self.v = np.zeros_like(self.W)
self.t = 0
def step(self, grad, lr=1e-3, b1=0.9, b2=0.999):
self.t += 1
self.m = b1 * self.m + (1 - b1) * grad
self.v = b2 * self.v + (1 - b2) * grad**2
m_hat = self.m / (1 - b1**self.t)
v_hat = self.v / (1 - b2**self.t)
self.W -= lr * m_hat / (np.sqrt(v_hat) + EPS)
# ── FlintHDVAE ────────────────────────────────────────────────────────────
class FlintHDVAE:
"""
HD-VAE with 550-bit MCDAIN encoder normalisation.
Inverse projection decoder: z(8) → Linear+ReLU(512) → Linear(20).
"""
def __init__(self, input_dim=20, hd_dim=512, latent_dim=8,
beta=0.5, seed=42, use_flint_norm=True):
self.input_dim = input_dim
self.hd_dim = hd_dim
self.latent_dim = latent_dim
self.beta = beta
self.use_flint = use_flint_norm and FLINT_AVAILABLE
rng = np.random.RandomState(seed)
# Fixed random HD projection (encoder side, non-trainable)
self.W_hd = rng.randn(input_dim, hd_dim).astype(np.float64) * np.sqrt(2.0/input_dim)
# Trainable parameters — encoder bottleneck
self.P_mu = AdamParam((hd_dim, latent_dim), seed=seed+1)
self.P_lv = AdamParam((hd_dim, latent_dim), seed=seed+2)
# Trainable parameters — DECODER (inverse projection, THE NEW PIECE)
self.P_dec = AdamParam((latent_dim, hd_dim), seed=seed+3) # z→h_hat
self.P_out = AdamParam((hd_dim, input_dim), seed=seed+4) # h_hat→T1_hat
# Normaliser stats (fitted once)
self._norm_fitted = False
self._norm_mu = np.zeros(input_dim)
self._norm_sd = np.ones(input_dim)
self.train_losses = []
# ── Normalisation ──────────────────────────────────────────────────────
def fit_normaliser(self, X: np.ndarray):
"""Fit normaliser stats from the FULL training set (called once).
For MCDAIN: computes global per-column m/s/g and stores them so that
all subsequent _normalise() calls are deterministic (no batch-dependency).
Falls back to z-score if FLINT unavailable."""
self._norm_mu = X.mean(0)
self._norm_sd = X.std(0) + EPS
if self.use_flint:
# Compute MCDAIN params column-wise on full X, store as fixed stats
X_norm_full = mcdain_550bit(X)
# Store the effective per-column shift/scale as z-score of the MCDAIN output
self._mcdain_mu = X_norm_full.mean(0)
self._mcdain_sd = X_norm_full.std(0) + EPS
# Also store the raw MCDAIN params by fitting a passthrough
self._mcdain_fitted = True
self._X_norm_ref = X_norm_full # kept for diagnostics only (not used in loops)
self._norm_fitted = True
def _normalise(self, X: np.ndarray) -> np.ndarray:
if self.use_flint and self._norm_fitted and hasattr(self, '_mcdain_fitted'):
# Apply MCDAIN then standardise using TRAINING statistics
# This makes normalisation deterministic regardless of batch size
raw = mcdain_550bit(X)
return (raw - self._mcdain_mu) / self._mcdain_sd
return (X - self._norm_mu) / self._norm_sd
# ── Forward pass ──────────────────────────────────────────────────────
def _encode(self, X_norm, rng):
"""X_norm (B,20) → h (B,512) → mu,logvar (B,8) → z (B,8)"""
h = np.maximum(0, X_norm @ self.W_hd) # (B, 512) ReLU
mu = h @ self.P_mu.W # (B, 8)
lv = np.clip(h @ self.P_lv.W, -4, 4) # (B, 8)
eps = rng.randn(*mu.shape)
z = mu + np.exp(0.5 * lv) * eps # reparam
return h, mu, lv, z
def _decode(self, z):
"""z (B,8) → h_hat (B,512) → T1_hat (B,20) — INVERSE PROJECTION"""
h_hat = np.maximum(0, z @ self.P_dec.W) # (B, 512) ReLU
T1_hat = h_hat @ self.P_out.W # (B, 20) linear
return h_hat, T1_hat
# ── Loss ──────────────────────────────────────────────────────────────
def _loss(self, T1_norm, T1_hat, mu, lv):
B = len(T1_norm)
recon = np.mean((T1_hat - T1_norm)**2)
kl = -0.5 * np.mean(1 + lv - mu**2 - np.exp(lv))
total = recon + self.beta * kl
return total, recon, kl
# ── Backward (analytical gradients) ───────────────────────────────────
def _backward(self, T1_norm, T1_hat, h, h_hat, mu, lv, z, lr):
B = len(T1_norm)
# ── Decoder gradients ────────────────────────────────────────────
# dL/dT1_hat = 2*(T1_hat - T1_norm) / (B*D)
dT1 = 2.0 * (T1_hat - T1_norm) / (B * self.input_dim)
# W_out: h_hat.T @ dT1
dW_out = h_hat.T @ dT1 # (512, 20)
self.P_out.step(dW_out, lr)
# Back through ReLU of h_hat
dh_hat = (dT1 @ self.P_out.W.T) * (h_hat > 0) # (B, 512)
# W_dec: z.T @ dh_hat
dW_dec = z.T @ dh_hat # (8, 512)
self.P_dec.step(dW_dec, lr)
# dz from decoder
dz_dec = dh_hat @ self.P_dec.W.T # (B, 8)
# ── KL gradients (standard VAE) ──────────────────────────────────
# dKL/dmu = mu/B; dKL/dlv = 0.5*(exp(lv)-1)/B
dmu_kl = self.beta * mu / B
dlv_kl = self.beta * 0.5 * (np.exp(lv) - 1) / B
# ── Reparameterisation: dz flows back to mu and lv ───────────────
# z = mu + exp(0.5*lv)*eps → dmu = dz, dlv = dz*0.5*z (approx)
dmu = dz_dec + dmu_kl
dlv = dz_dec * 0.5 * (z - mu) + dlv_kl # chain rule
# ── Encoder bottleneck gradients ─────────────────────────────────
dW_mu = h.T @ dmu # (512, 8)
dW_lv = h.T @ dlv
self.P_mu.step(dW_mu, lr)
self.P_lv.step(dW_lv, lr)
# (W_hd is fixed, no gradient needed for it)
# ── Training ──────────────────────────────────────────────────────────
def fit(self, X: np.ndarray, epochs=30, lr=1e-3,
batch_size=256, verbose=True, warmup_frac=0.3):
"""
warmup_frac: fraction of epochs over which beta ramps 0 → self.beta.
Prevents KL from dominating before the decoder learns to reconstruct.
"""
rng = np.random.RandomState(42)
self.fit_normaliser(X) # computes global MCDAIN stats once
X_norm = self._normalise(X) # normalise full dataset once; stable across batches
N = len(X_norm)
target_beta = self.beta
warmup_epochs = max(1, int(epochs * warmup_frac))
for epoch in range(1, epochs + 1):
# KL warmup: ramp beta from 0 to target over first warmup_epochs
if epoch <= warmup_epochs:
self.beta = target_beta * (epoch / warmup_epochs)
else:
self.beta = target_beta
idx = rng.permutation(N)
ep_loss = ep_recon = ep_kl = 0.0
n_batches = 0
for start in range(0, N, batch_size):
bi = idx[start:start + batch_size]
Xb = X_norm[bi] # already normalised with global stats
h, mu, lv, z = self._encode(Xb, rng)
h_hat, T1_hat = self._decode(z)
loss, recon, kl = self._loss(Xb, T1_hat, mu, lv)
self._backward(Xb, T1_hat, h, h_hat, mu, lv, z, lr)
ep_loss += loss; ep_recon += recon; ep_kl += kl
n_batches += 1
ep_loss /= n_batches; ep_recon /= n_batches; ep_kl /= n_batches
self.train_losses.append(ep_loss)
if verbose and (epoch % 5 == 0 or epoch == 1):
# Anti-collapse diagnostic: encode a fixed held-out sample
sample_norm = X_norm[:min(1000, N)]
_, mu_s, _, _ = self._encode(sample_norm, rng)
var_per_dim = mu_s.var(0)
print(f" ep{epoch:3d}/{epochs} beta={self.beta:.3f} "
f"loss={ep_loss:.4f} recon={ep_recon:.4f} kl={ep_kl:.4f} "
f"z_var=[{' '.join(f'{v:.3f}' for v in var_per_dim)}]")
self.beta = target_beta # restore after training
return self
# ── Encode for downstream use ─────────────────────────────────────────
def encode(self, X: np.ndarray) -> np.ndarray:
"""Return deterministic mu (B, latent_dim) for all samples.
Normalisation is deterministic (global MCDAIN stats from fit_normaliser)."""
rng = np.random.RandomState(0)
STEP = 512
mus = []
for s in range(0, len(X), STEP):
Xb = self._normalise(X[s:s+STEP])
_, mu, _, _ = self._encode(Xb, rng)
mus.append(mu)
return np.concatenate(mus)
def reconstruct(self, X: np.ndarray) -> np.ndarray:
"""Returns (T1_hat, X_norm) both in the same normalised space.
Normalisation is deterministic (global MCDAIN stats from fit_normaliser)."""
rng = np.random.RandomState(0)
Xn = self._normalise(X)
STEP = 512
hats = []
for s in range(0, len(Xn), STEP):
_, mu, _, _ = self._encode(Xn[s:s+STEP], rng)
_, T1_hat = self._decode(mu)
hats.append(T1_hat)
return np.concatenate(hats), Xn