""" flint_hd_vae.py =============== SILOQY-compatible HD-VAE with inverse projection decoder. Architecture: Encoder: T1 (20-dim) → MCDAIN 550-bit normalisation (no upstream modification — read-only call) → HD random projection W_enc (20×512), ReLU → h (512) → Linear bottleneck: W_mu (512×8), W_lv (512×8) → mu, logvar (8) → reparameterisation → z (8) Decoder (inverse projection — THE NEW PIECE): z (8) → Linear W_dec (8×512), ReLU → h_hat (512) *inverse of bottleneck* → Linear W_out (512×20) → T1_hat (20) *pseudo-inverse of HD proj* Loss: recon = MSE(T1_hat, T1_norm) KL = -0.5 * sum(1 + logvar - mu^2 - exp(logvar)) [standard VAE KL] total = recon + beta * KL No upstream files are modified. All SILOQY calls are read-only. """ import sys, os sys.stdout.reconfigure(encoding='utf-8', errors='replace') sys.path.insert(0, os.path.dirname(__file__)) sys.path.insert(0, r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict") import numpy as np from pathlib import Path from SILOQY_NN_Kernel_COMPLETE6 import arb, safe_float, FLINT_AVAILABLE, with_precision EPS = 1e-8 # ── MCDAIN 550-bit normalisation (read-only logic, no upstream changes) ──── def mcdain_550bit(X_raw: np.ndarray) -> np.ndarray: """Apply MCDAIN analytical normalisation at 550-bit precision.""" rows, cols = X_raw.shape X_norm = np.zeros_like(X_raw, dtype=np.float64) with with_precision(550): for j in range(cols): col = X_raw[:, j] col_abs = np.abs(col[np.isfinite(col)]) if len(col_abs) == 0 or col_abs.mean() < 1e-12: continue magnitude = arb(str(float(col_abs.mean()))) log_mag = magnitude.log() mean_val = magnitude * arb("0.1") scale_val = arb("1.0") / (log_mag + arb("1e-8")) gate_val = arb("1.0") / (arb("1.0") + (-log_mag).exp()) m = safe_float(mean_val) s = safe_float(scale_val) g = safe_float(gate_val) X_norm[:, j] = np.clip((X_raw[:, j] - m) * s * g, -10, 10) return np.nan_to_num(X_norm, nan=0.0, posinf=5.0, neginf=-5.0) # ── Adam optimiser state ─────────────────────────────────────────────────── class AdamParam: def __init__(self, shape, seed=0): rng = np.random.RandomState(seed) scale = np.sqrt(2.0 / shape[0]) self.W = rng.randn(*shape).astype(np.float64) * scale self.m = np.zeros_like(self.W) self.v = np.zeros_like(self.W) self.t = 0 def step(self, grad, lr=1e-3, b1=0.9, b2=0.999): self.t += 1 self.m = b1 * self.m + (1 - b1) * grad self.v = b2 * self.v + (1 - b2) * grad**2 m_hat = self.m / (1 - b1**self.t) v_hat = self.v / (1 - b2**self.t) self.W -= lr * m_hat / (np.sqrt(v_hat) + EPS) # ── FlintHDVAE ──────────────────────────────────────────────────────────── class FlintHDVAE: """ HD-VAE with 550-bit MCDAIN encoder normalisation. Inverse projection decoder: z(8) → Linear+ReLU(512) → Linear(20). """ def __init__(self, input_dim=20, hd_dim=512, latent_dim=8, beta=0.5, seed=42, use_flint_norm=True): self.input_dim = input_dim self.hd_dim = hd_dim self.latent_dim = latent_dim self.beta = beta self.use_flint = use_flint_norm and FLINT_AVAILABLE rng = np.random.RandomState(seed) # Fixed random HD projection (encoder side, non-trainable) self.W_hd = rng.randn(input_dim, hd_dim).astype(np.float64) * np.sqrt(2.0/input_dim) # Trainable parameters — encoder bottleneck self.P_mu = AdamParam((hd_dim, latent_dim), seed=seed+1) self.P_lv = AdamParam((hd_dim, latent_dim), seed=seed+2) # Trainable parameters — DECODER (inverse projection, THE NEW PIECE) self.P_dec = AdamParam((latent_dim, hd_dim), seed=seed+3) # z→h_hat self.P_out = AdamParam((hd_dim, input_dim), seed=seed+4) # h_hat→T1_hat # Normaliser stats (fitted once) self._norm_fitted = False self._norm_mu = np.zeros(input_dim) self._norm_sd = np.ones(input_dim) self.train_losses = [] # ── Normalisation ────────────────────────────────────────────────────── def fit_normaliser(self, X: np.ndarray): """Fit normaliser stats from the FULL training set (called once). For MCDAIN: computes global per-column m/s/g and stores them so that all subsequent _normalise() calls are deterministic (no batch-dependency). Falls back to z-score if FLINT unavailable.""" self._norm_mu = X.mean(0) self._norm_sd = X.std(0) + EPS if self.use_flint: # Compute MCDAIN params column-wise on full X, store as fixed stats X_norm_full = mcdain_550bit(X) # Store the effective per-column shift/scale as z-score of the MCDAIN output self._mcdain_mu = X_norm_full.mean(0) self._mcdain_sd = X_norm_full.std(0) + EPS # Also store the raw MCDAIN params by fitting a passthrough self._mcdain_fitted = True self._X_norm_ref = X_norm_full # kept for diagnostics only (not used in loops) self._norm_fitted = True def _normalise(self, X: np.ndarray) -> np.ndarray: if self.use_flint and self._norm_fitted and hasattr(self, '_mcdain_fitted'): # Apply MCDAIN then standardise using TRAINING statistics # This makes normalisation deterministic regardless of batch size raw = mcdain_550bit(X) return (raw - self._mcdain_mu) / self._mcdain_sd return (X - self._norm_mu) / self._norm_sd # ── Forward pass ────────────────────────────────────────────────────── def _encode(self, X_norm, rng): """X_norm (B,20) → h (B,512) → mu,logvar (B,8) → z (B,8)""" h = np.maximum(0, X_norm @ self.W_hd) # (B, 512) ReLU mu = h @ self.P_mu.W # (B, 8) lv = np.clip(h @ self.P_lv.W, -4, 4) # (B, 8) eps = rng.randn(*mu.shape) z = mu + np.exp(0.5 * lv) * eps # reparam return h, mu, lv, z def _decode(self, z): """z (B,8) → h_hat (B,512) → T1_hat (B,20) — INVERSE PROJECTION""" h_hat = np.maximum(0, z @ self.P_dec.W) # (B, 512) ReLU T1_hat = h_hat @ self.P_out.W # (B, 20) linear return h_hat, T1_hat # ── Loss ────────────────────────────────────────────────────────────── def _loss(self, T1_norm, T1_hat, mu, lv): B = len(T1_norm) recon = np.mean((T1_hat - T1_norm)**2) kl = -0.5 * np.mean(1 + lv - mu**2 - np.exp(lv)) total = recon + self.beta * kl return total, recon, kl # ── Backward (analytical gradients) ─────────────────────────────────── def _backward(self, T1_norm, T1_hat, h, h_hat, mu, lv, z, lr): B = len(T1_norm) # ── Decoder gradients ──────────────────────────────────────────── # dL/dT1_hat = 2*(T1_hat - T1_norm) / (B*D) dT1 = 2.0 * (T1_hat - T1_norm) / (B * self.input_dim) # W_out: h_hat.T @ dT1 dW_out = h_hat.T @ dT1 # (512, 20) self.P_out.step(dW_out, lr) # Back through ReLU of h_hat dh_hat = (dT1 @ self.P_out.W.T) * (h_hat > 0) # (B, 512) # W_dec: z.T @ dh_hat dW_dec = z.T @ dh_hat # (8, 512) self.P_dec.step(dW_dec, lr) # dz from decoder dz_dec = dh_hat @ self.P_dec.W.T # (B, 8) # ── KL gradients (standard VAE) ────────────────────────────────── # dKL/dmu = mu/B; dKL/dlv = 0.5*(exp(lv)-1)/B dmu_kl = self.beta * mu / B dlv_kl = self.beta * 0.5 * (np.exp(lv) - 1) / B # ── Reparameterisation: dz flows back to mu and lv ─────────────── # z = mu + exp(0.5*lv)*eps → dmu = dz, dlv = dz*0.5*z (approx) dmu = dz_dec + dmu_kl dlv = dz_dec * 0.5 * (z - mu) + dlv_kl # chain rule # ── Encoder bottleneck gradients ───────────────────────────────── dW_mu = h.T @ dmu # (512, 8) dW_lv = h.T @ dlv self.P_mu.step(dW_mu, lr) self.P_lv.step(dW_lv, lr) # (W_hd is fixed, no gradient needed for it) # ── Training ────────────────────────────────────────────────────────── def fit(self, X: np.ndarray, epochs=30, lr=1e-3, batch_size=256, verbose=True, warmup_frac=0.3): """ warmup_frac: fraction of epochs over which beta ramps 0 → self.beta. Prevents KL from dominating before the decoder learns to reconstruct. """ rng = np.random.RandomState(42) self.fit_normaliser(X) # computes global MCDAIN stats once X_norm = self._normalise(X) # normalise full dataset once; stable across batches N = len(X_norm) target_beta = self.beta warmup_epochs = max(1, int(epochs * warmup_frac)) for epoch in range(1, epochs + 1): # KL warmup: ramp beta from 0 to target over first warmup_epochs if epoch <= warmup_epochs: self.beta = target_beta * (epoch / warmup_epochs) else: self.beta = target_beta idx = rng.permutation(N) ep_loss = ep_recon = ep_kl = 0.0 n_batches = 0 for start in range(0, N, batch_size): bi = idx[start:start + batch_size] Xb = X_norm[bi] # already normalised with global stats h, mu, lv, z = self._encode(Xb, rng) h_hat, T1_hat = self._decode(z) loss, recon, kl = self._loss(Xb, T1_hat, mu, lv) self._backward(Xb, T1_hat, h, h_hat, mu, lv, z, lr) ep_loss += loss; ep_recon += recon; ep_kl += kl n_batches += 1 ep_loss /= n_batches; ep_recon /= n_batches; ep_kl /= n_batches self.train_losses.append(ep_loss) if verbose and (epoch % 5 == 0 or epoch == 1): # Anti-collapse diagnostic: encode a fixed held-out sample sample_norm = X_norm[:min(1000, N)] _, mu_s, _, _ = self._encode(sample_norm, rng) var_per_dim = mu_s.var(0) print(f" ep{epoch:3d}/{epochs} beta={self.beta:.3f} " f"loss={ep_loss:.4f} recon={ep_recon:.4f} kl={ep_kl:.4f} " f"z_var=[{' '.join(f'{v:.3f}' for v in var_per_dim)}]") self.beta = target_beta # restore after training return self # ── Encode for downstream use ───────────────────────────────────────── def encode(self, X: np.ndarray) -> np.ndarray: """Return deterministic mu (B, latent_dim) for all samples. Normalisation is deterministic (global MCDAIN stats from fit_normaliser).""" rng = np.random.RandomState(0) STEP = 512 mus = [] for s in range(0, len(X), STEP): Xb = self._normalise(X[s:s+STEP]) _, mu, _, _ = self._encode(Xb, rng) mus.append(mu) return np.concatenate(mus) def reconstruct(self, X: np.ndarray) -> np.ndarray: """Returns (T1_hat, X_norm) both in the same normalised space. Normalisation is deterministic (global MCDAIN stats from fit_normaliser).""" rng = np.random.RandomState(0) Xn = self._normalise(X) STEP = 512 hats = [] for s in range(0, len(Xn), STEP): _, mu, _, _ = self._encode(Xn[s:s+STEP], rng) _, T1_hat = self._decode(mu) hats.append(T1_hat) return np.concatenate(hats), Xn