Files
DOLPHIN/nautilus_dolphin/dvae/flint_hd_vae.py

275 lines
13 KiB
Python
Raw Normal View History

"""
flint_hd_vae.py
===============
SILOQY-compatible HD-VAE with inverse projection decoder.
Architecture:
Encoder:
T1 (20-dim)
MCDAIN 550-bit normalisation (no upstream modification read-only call)
HD random projection W_enc (20×512), ReLU h (512)
Linear bottleneck: W_mu (512×8), W_lv (512×8) mu, logvar (8)
reparameterisation z (8)
Decoder (inverse projection THE NEW PIECE):
z (8)
Linear W_dec (8×512), ReLU h_hat (512) *inverse of bottleneck*
Linear W_out (512×20) T1_hat (20) *pseudo-inverse of HD proj*
Loss:
recon = MSE(T1_hat, T1_norm)
KL = -0.5 * sum(1 + logvar - mu^2 - exp(logvar)) [standard VAE KL]
total = recon + beta * KL
No upstream files are modified. All SILOQY calls are read-only.
"""
import sys, os
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")
import numpy as np
from pathlib import Path
from SILOQY_NN_Kernel_COMPLETE6 import arb, safe_float, FLINT_AVAILABLE, with_precision
EPS = 1e-8
# ── MCDAIN 550-bit normalisation (read-only logic, no upstream changes) ────
def mcdain_550bit(X_raw: np.ndarray) -> np.ndarray:
"""Apply MCDAIN analytical normalisation at 550-bit precision."""
rows, cols = X_raw.shape
X_norm = np.zeros_like(X_raw, dtype=np.float64)
with with_precision(550):
for j in range(cols):
col = X_raw[:, j]
col_abs = np.abs(col[np.isfinite(col)])
if len(col_abs) == 0 or col_abs.mean() < 1e-12:
continue
magnitude = arb(str(float(col_abs.mean())))
log_mag = magnitude.log()
mean_val = magnitude * arb("0.1")
scale_val = arb("1.0") / (log_mag + arb("1e-8"))
gate_val = arb("1.0") / (arb("1.0") + (-log_mag).exp())
m = safe_float(mean_val)
s = safe_float(scale_val)
g = safe_float(gate_val)
X_norm[:, j] = np.clip((X_raw[:, j] - m) * s * g, -10, 10)
return np.nan_to_num(X_norm, nan=0.0, posinf=5.0, neginf=-5.0)
# ── Adam optimiser state ───────────────────────────────────────────────────
class AdamParam:
def __init__(self, shape, seed=0):
rng = np.random.RandomState(seed)
scale = np.sqrt(2.0 / shape[0])
self.W = rng.randn(*shape).astype(np.float64) * scale
self.m = np.zeros_like(self.W)
self.v = np.zeros_like(self.W)
self.t = 0
def step(self, grad, lr=1e-3, b1=0.9, b2=0.999):
self.t += 1
self.m = b1 * self.m + (1 - b1) * grad
self.v = b2 * self.v + (1 - b2) * grad**2
m_hat = self.m / (1 - b1**self.t)
v_hat = self.v / (1 - b2**self.t)
self.W -= lr * m_hat / (np.sqrt(v_hat) + EPS)
# ── FlintHDVAE ────────────────────────────────────────────────────────────
class FlintHDVAE:
"""
HD-VAE with 550-bit MCDAIN encoder normalisation.
Inverse projection decoder: z(8) Linear+ReLU(512) Linear(20).
"""
def __init__(self, input_dim=20, hd_dim=512, latent_dim=8,
beta=0.5, seed=42, use_flint_norm=True):
self.input_dim = input_dim
self.hd_dim = hd_dim
self.latent_dim = latent_dim
self.beta = beta
self.use_flint = use_flint_norm and FLINT_AVAILABLE
rng = np.random.RandomState(seed)
# Fixed random HD projection (encoder side, non-trainable)
self.W_hd = rng.randn(input_dim, hd_dim).astype(np.float64) * np.sqrt(2.0/input_dim)
# Trainable parameters — encoder bottleneck
self.P_mu = AdamParam((hd_dim, latent_dim), seed=seed+1)
self.P_lv = AdamParam((hd_dim, latent_dim), seed=seed+2)
# Trainable parameters — DECODER (inverse projection, THE NEW PIECE)
self.P_dec = AdamParam((latent_dim, hd_dim), seed=seed+3) # z→h_hat
self.P_out = AdamParam((hd_dim, input_dim), seed=seed+4) # h_hat→T1_hat
# Normaliser stats (fitted once)
self._norm_fitted = False
self._norm_mu = np.zeros(input_dim)
self._norm_sd = np.ones(input_dim)
self.train_losses = []
# ── Normalisation ──────────────────────────────────────────────────────
def fit_normaliser(self, X: np.ndarray):
"""Fit normaliser stats from the FULL training set (called once).
For MCDAIN: computes global per-column m/s/g and stores them so that
all subsequent _normalise() calls are deterministic (no batch-dependency).
Falls back to z-score if FLINT unavailable."""
self._norm_mu = X.mean(0)
self._norm_sd = X.std(0) + EPS
if self.use_flint:
# Compute MCDAIN params column-wise on full X, store as fixed stats
X_norm_full = mcdain_550bit(X)
# Store the effective per-column shift/scale as z-score of the MCDAIN output
self._mcdain_mu = X_norm_full.mean(0)
self._mcdain_sd = X_norm_full.std(0) + EPS
# Also store the raw MCDAIN params by fitting a passthrough
self._mcdain_fitted = True
self._X_norm_ref = X_norm_full # kept for diagnostics only (not used in loops)
self._norm_fitted = True
def _normalise(self, X: np.ndarray) -> np.ndarray:
if self.use_flint and self._norm_fitted and hasattr(self, '_mcdain_fitted'):
# Apply MCDAIN then standardise using TRAINING statistics
# This makes normalisation deterministic regardless of batch size
raw = mcdain_550bit(X)
return (raw - self._mcdain_mu) / self._mcdain_sd
return (X - self._norm_mu) / self._norm_sd
# ── Forward pass ──────────────────────────────────────────────────────
def _encode(self, X_norm, rng):
"""X_norm (B,20) → h (B,512) → mu,logvar (B,8) → z (B,8)"""
h = np.maximum(0, X_norm @ self.W_hd) # (B, 512) ReLU
mu = h @ self.P_mu.W # (B, 8)
lv = np.clip(h @ self.P_lv.W, -4, 4) # (B, 8)
eps = rng.randn(*mu.shape)
z = mu + np.exp(0.5 * lv) * eps # reparam
return h, mu, lv, z
def _decode(self, z):
"""z (B,8) → h_hat (B,512) → T1_hat (B,20) — INVERSE PROJECTION"""
h_hat = np.maximum(0, z @ self.P_dec.W) # (B, 512) ReLU
T1_hat = h_hat @ self.P_out.W # (B, 20) linear
return h_hat, T1_hat
# ── Loss ──────────────────────────────────────────────────────────────
def _loss(self, T1_norm, T1_hat, mu, lv):
B = len(T1_norm)
recon = np.mean((T1_hat - T1_norm)**2)
kl = -0.5 * np.mean(1 + lv - mu**2 - np.exp(lv))
total = recon + self.beta * kl
return total, recon, kl
# ── Backward (analytical gradients) ───────────────────────────────────
def _backward(self, T1_norm, T1_hat, h, h_hat, mu, lv, z, lr):
B = len(T1_norm)
# ── Decoder gradients ────────────────────────────────────────────
# dL/dT1_hat = 2*(T1_hat - T1_norm) / (B*D)
dT1 = 2.0 * (T1_hat - T1_norm) / (B * self.input_dim)
# W_out: h_hat.T @ dT1
dW_out = h_hat.T @ dT1 # (512, 20)
self.P_out.step(dW_out, lr)
# Back through ReLU of h_hat
dh_hat = (dT1 @ self.P_out.W.T) * (h_hat > 0) # (B, 512)
# W_dec: z.T @ dh_hat
dW_dec = z.T @ dh_hat # (8, 512)
self.P_dec.step(dW_dec, lr)
# dz from decoder
dz_dec = dh_hat @ self.P_dec.W.T # (B, 8)
# ── KL gradients (standard VAE) ──────────────────────────────────
# dKL/dmu = mu/B; dKL/dlv = 0.5*(exp(lv)-1)/B
dmu_kl = self.beta * mu / B
dlv_kl = self.beta * 0.5 * (np.exp(lv) - 1) / B
# ── Reparameterisation: dz flows back to mu and lv ───────────────
# z = mu + exp(0.5*lv)*eps → dmu = dz, dlv = dz*0.5*z (approx)
dmu = dz_dec + dmu_kl
dlv = dz_dec * 0.5 * (z - mu) + dlv_kl # chain rule
# ── Encoder bottleneck gradients ─────────────────────────────────
dW_mu = h.T @ dmu # (512, 8)
dW_lv = h.T @ dlv
self.P_mu.step(dW_mu, lr)
self.P_lv.step(dW_lv, lr)
# (W_hd is fixed, no gradient needed for it)
# ── Training ──────────────────────────────────────────────────────────
def fit(self, X: np.ndarray, epochs=30, lr=1e-3,
batch_size=256, verbose=True, warmup_frac=0.3):
"""
warmup_frac: fraction of epochs over which beta ramps 0 self.beta.
Prevents KL from dominating before the decoder learns to reconstruct.
"""
rng = np.random.RandomState(42)
self.fit_normaliser(X) # computes global MCDAIN stats once
X_norm = self._normalise(X) # normalise full dataset once; stable across batches
N = len(X_norm)
target_beta = self.beta
warmup_epochs = max(1, int(epochs * warmup_frac))
for epoch in range(1, epochs + 1):
# KL warmup: ramp beta from 0 to target over first warmup_epochs
if epoch <= warmup_epochs:
self.beta = target_beta * (epoch / warmup_epochs)
else:
self.beta = target_beta
idx = rng.permutation(N)
ep_loss = ep_recon = ep_kl = 0.0
n_batches = 0
for start in range(0, N, batch_size):
bi = idx[start:start + batch_size]
Xb = X_norm[bi] # already normalised with global stats
h, mu, lv, z = self._encode(Xb, rng)
h_hat, T1_hat = self._decode(z)
loss, recon, kl = self._loss(Xb, T1_hat, mu, lv)
self._backward(Xb, T1_hat, h, h_hat, mu, lv, z, lr)
ep_loss += loss; ep_recon += recon; ep_kl += kl
n_batches += 1
ep_loss /= n_batches; ep_recon /= n_batches; ep_kl /= n_batches
self.train_losses.append(ep_loss)
if verbose and (epoch % 5 == 0 or epoch == 1):
# Anti-collapse diagnostic: encode a fixed held-out sample
sample_norm = X_norm[:min(1000, N)]
_, mu_s, _, _ = self._encode(sample_norm, rng)
var_per_dim = mu_s.var(0)
print(f" ep{epoch:3d}/{epochs} beta={self.beta:.3f} "
f"loss={ep_loss:.4f} recon={ep_recon:.4f} kl={ep_kl:.4f} "
f"z_var=[{' '.join(f'{v:.3f}' for v in var_per_dim)}]")
self.beta = target_beta # restore after training
return self
# ── Encode for downstream use ─────────────────────────────────────────
def encode(self, X: np.ndarray) -> np.ndarray:
"""Return deterministic mu (B, latent_dim) for all samples.
Normalisation is deterministic (global MCDAIN stats from fit_normaliser)."""
rng = np.random.RandomState(0)
STEP = 512
mus = []
for s in range(0, len(X), STEP):
Xb = self._normalise(X[s:s+STEP])
_, mu, _, _ = self._encode(Xb, rng)
mus.append(mu)
return np.concatenate(mus)
def reconstruct(self, X: np.ndarray) -> np.ndarray:
"""Returns (T1_hat, X_norm) both in the same normalised space.
Normalisation is deterministic (global MCDAIN stats from fit_normaliser)."""
rng = np.random.RandomState(0)
Xn = self._normalise(X)
STEP = 512
hats = []
for s in range(0, len(Xn), STEP):
_, mu, _, _ = self._encode(Xn[s:s+STEP], rng)
_, T1_hat = self._decode(mu)
hats.append(T1_hat)
return np.concatenate(hats), Xn