DOLPHIN/nautilus_dolphin/dvae/z17_signal_analysis.py

"""
z1[7] Mechanical Simplification + FLINT 512-bit amplification.

z1[7] VAE finding:
  w750_vel_norm r=-0.674  (long-term velocity reversing)
  w300_vel_norm r=-0.357
  w50_instability r=+0.421 (short-term chaos spiking)
= "eigenspace stress reversal"

Steps:
  1. Mechanical proxy: linear combo from correlations
  2. Different activations
  3. PCA comparison
  4. EDAIN-KL + HD 512-dim projection (FLINT)
"""
import sys, os
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")

import numpy as np
from pathlib import Path

HERE = Path(__file__).parent

WINDOWS = [50, 150, 300, 750]
T1_NAMES = []
for w in WINDOWS:
    for f in ['log_lmax', 'vel_norm', 'gap_ratio', 'instability', 'rtp']:
        T1_NAMES.append(f"w{w}_{f}")

T0_NAMES = ['bull_pct', 'bear_pct', 'side_pct', 'sin_hour', 'cos_hour', 'sin_day', 'cos_day', 'has_eigen']

print("Loading corpus...")
from corpus_builder import DolphinCorpus, OFF, T1 as T1_DIM
corpus = DolphinCorpus.load(str(HERE / 'corpus_cache.npz'))
idx = corpus.mask[:, 1]
X_e = corpus.X[idx]
mask_e = corpus.mask[idx]
t1 = X_e[:, OFF[1]:OFF[1]+T1_DIM].copy()   # (16607, 20)
t0 = X_e[:, OFF[0]:OFF[0]+8].copy()
print(f"T1 shape: {t1.shape}")

# Feature shortcuts
vel_w50  = t1[:, 1]
vel_w150 = t1[:, 6]
vel_w300 = t1[:, 11]
vel_w750 = t1[:, 16]
inst_w50 = t1[:, 3]
inst_w300= t1[:, 13]
lmax_w50 = t1[:, 0]
lmax_w750= t1[:, 15]

# ================================================================
# 1. MECHANICAL PROXIES
# ================================================================
print("\n" + "="*65)
print("1. MECHANICAL z1[7] PROXIES")
print("="*65)

proxy_A = -0.674*vel_w750 - 0.357*vel_w300 + 0.421*inst_w50
proxy_B = inst_w50 - vel_w750
proxy_C = vel_w50 - vel_w750
proxy_D = inst_w50 * (-vel_w750)
proxy_E = (inst_w50 - inst_w300) - (vel_w50 - vel_w750)  # instability delta + vel divergence

for name, p in [
    ('A: linear VAE weights', proxy_A),
    ('B: inst_w50 - vel_w750', proxy_B),
    ('C: vel_w50 - vel_w750 (cross-scale divergence)', proxy_C),
    ('D: inst_w50 * -vel_w750 (interaction)', proxy_D),
    ('E: dinst - dvel (delta-delta)', proxy_E),
]:
    nans = np.isnan(p).sum()
    pv = p[np.isfinite(p)]
    print(f"\n{name}:")
    print(f"  std={pv.std():.4f}  p5={np.percentile(pv,5):.4f}  "
          f"p50={np.percentile(pv,50):.4f}  p95={np.percentile(pv,95):.4f}  NaN={nans}")
    skew = float(((pv - pv.mean())**3).mean() / (pv.std()**3 + 1e-8))
    kurt = float(((pv - pv.mean())**4).mean() / (pv.std()**4 + 1e-8))
    print(f"  skew={skew:.3f}  kurt={kurt:.2f}  (>3 = heavy tails = signal potential)")

# ================================================================
# 2. WHAT DOES HIGH proxy_B LOOK LIKE IN FULL T1 SPACE?
# ================================================================
print("\n" + "="*65)
print("2. HIGH vs LOW proxy_B = inst_w50 - vel_w750")
print("="*65)
p = proxy_B
lo = p < np.percentile(p, 10)
hi = p > np.percentile(p, 90)
print(f"N low={lo.sum()}  N high={hi.sum()}")
print(f"\n{'Feature':<22} {'ALL':>8} {'LOW10%':>8} {'HIGH10%':>8} {'diff':>8}")
for i, name in enumerate(T1_NAMES):
    a = t1[:, i].mean()
    l = t1[lo, i].mean()
    h = t1[hi, i].mean()
    d = h - l
    if abs(d) > 0.02:
        print(f"  {name:<20} {a:8.4f} {l:8.4f} {h:8.4f} {d:+8.4f}")
print(f"\n  T0 context:")
for i, name in enumerate(T0_NAMES[:3]):
    print(f"  {name:<20} {t0[:,i].mean():8.3f} {t0[lo,i].mean():8.3f} {t0[hi,i].mean():8.3f} {t0[hi,i].mean()-t0[lo,i].mean():+8.3f}")

# ================================================================
# 3. ACTIVATION FUNCTIONS
# ================================================================
print("\n" + "="*65)
print("3. ACTIVATION FUNCTIONS ON proxy_B")
print("="*65)

def softplus(x):
    return np.log1p(np.exp(np.clip(x, -30, 30)))

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-np.clip(x, -30, 30)))

pb_z = (proxy_B - proxy_B.mean()) / (proxy_B.std() + 1e-8)

for name, act in [
    ('raw z-score',        pb_z),
    ('relu',               np.maximum(0, pb_z)),
    ('tanh',               np.tanh(pb_z)),
    ('softplus',           softplus(pb_z)),
    ('sigmoid',            sigmoid(pb_z)),
    ('sign*log1p(|x|)',    np.sign(pb_z) * np.log1p(np.abs(pb_z))),
    ('x^3 (cubic)',        pb_z**3),
]:
    if np.isnan(act).any():
        print(f"  {name:<22} NaN!")
        continue
    skew = float(((act - act.mean())**3).mean() / (act.std()**3 + 1e-8))
    kurt = float(((act - act.mean())**4).mean() / (act.std()**4 + 1e-8))
    # High kurtosis = heavy tails = outliers preserved = signal in extremes
    print(f"  {name:<22} std={act.std():.4f}  skew={skew:+.3f}  kurt={kurt:6.1f}")

# ================================================================
# 4. PCA vs VAE
# ================================================================
print("\n" + "="*65)
print("4. PCA ON T1: linear basis vs VAE z1 basis")
print("="*65)

t1_z = (t1 - t1.mean(0)) / (t1.std(0) + 1e-8)
U, S, Vt = np.linalg.svd(t1_z, full_matrices=False)
explained = S**2 / (S**2).sum()
print(f"Explained variance: {np.round(100*explained[:8], 1)}%")
for pc_i in range(5):
    loadings = Vt[pc_i]
    top3 = np.argsort(np.abs(loadings))[-3:][::-1]
    pc_scores = t1_z @ Vt[pc_i]
    r_pb = np.corrcoef(pc_scores, proxy_B)[0,1]
    print(f"PC{pc_i+1} ({100*explained[pc_i]:.1f}%)  r(proxy_B)={r_pb:+.3f}: "
          + "  ".join(f"{T1_NAMES[j]}={loadings[j]:+.3f}" for j in top3))

# ================================================================
# 5. FLINT EDAIN-KL + HD PROJECTION
# ================================================================
print("\n" + "="*65)
print("5. FLINT: EDAIN-KL + HD PROJECTION 20->512")
print("="*65)

try:
    from SILOQY_NN_Kernel_COMPLETE6 import MCDAINLayer
    FLINT_OK = True
    print("FLINT OK")
except ImportError as e:
    FLINT_OK = False
    print(f"FLINT import failed: {e}")

if FLINT_OK:
    # 5a: MCDAINLayer (no training needed, NaN-safe)
    print("\n5a: MCDAINLayer normalization:")
    t1_f32 = t1.astype(np.float32)
    try:
        import torch
        t1_torch = torch.from_numpy(t1_f32)
        mc = MCDAINLayer(input_dim=20)
        with torch.no_grad():
            t1_mc = mc(t1_torch).numpy()
        mc_std = t1_mc.std(0)
        print(f"  Per-dim std after MCDAIN: {mc_std.round(3)}")
        print(f"  Max std: {mc_std.max():.4f}  Min std: {mc_std.min():.4f}")
        # Proxy_B in MCDAIN space
        pb_mc = t1_mc[:, 3] - t1_mc[:, 16]   # inst_w50 - vel_w750
        print(f"  proxy_B (MCDAIN): std={pb_mc.std():.4f}  "
              f"skew={float(((pb_mc-pb_mc.mean())**3).mean()/(pb_mc.std()**3+1e-8)):.3f}  "
              f"kurt={float(((pb_mc-pb_mc.mean())**4).mean()/(pb_mc.std()**4+1e-8)):.1f}")
        print(f"  Variance ratio MCDAIN/raw: {pb_mc.var()/proxy_B.var():.3f}x")
    except Exception as ex:
        print(f"  MCDAINLayer: {ex}")

    # 5b: HD projection 20->512 with ReLU
    print("\n5b: HD projection (20->512, ReLU):")
    try:
        np.random.seed(42)
        W_hd = np.random.randn(20, 512).astype(np.float32) * np.sqrt(2.0/20)
        t1_n = t1_mc if 't1_mc' in dir() else t1_z.astype(np.float32)
        H = np.maximum(0, t1_n @ W_hd)   # (16607, 512)
        hd_var = H.var(0)
        active = (hd_var > 0.01).sum()
        print(f"  Active HD dims (var>0.01): {active}/512")
        # Which HD dims correlate most with proxy_B?
        pb_n = (proxy_B - proxy_B.mean()) / (proxy_B.std() + 1e-8)
        hd_r = np.array([np.corrcoef(pb_n, H[:, d])[0, 1] for d in range(512)])
        print(f"  |r(HD, proxy_B)| > 0.3: {(np.abs(hd_r)>0.3).sum()} dims")
        print(f"  |r(HD, proxy_B)| > 0.5: {(np.abs(hd_r)>0.5).sum()} dims")
        print(f"  max |r|: {np.abs(hd_r).max():.4f}")
        top5 = np.argsort(np.abs(hd_r))[-5:][::-1]
        print(f"  Top HD dims:")
        for d in top5:
            print(f"    HD[{d:3d}]: r={hd_r[d]:+.4f}  var={hd_var[d]:.4f}")

        # 5c: Can HD space reconstruct proxy_B better than PCA?
        print("\n5c: Ridge regression HD->proxy_B (R^2 comparison):")
        from numpy.linalg import lstsq
        # PCA baseline: PC1 alone
        pc1_scores = (t1_z @ Vt[0]).reshape(-1, 1)
        coef_pc1, _, _, _ = lstsq(pc1_scores, proxy_B, rcond=None)
        pred_pc1 = pc1_scores @ coef_pc1
        ss_res = ((proxy_B - pred_pc1)**2).sum()
        ss_tot = ((proxy_B - proxy_B.mean())**2).sum()
        r2_pc1 = 1 - ss_res/ss_tot
        print(f"  PC1 alone         R2={r2_pc1:.4f}")

        # Top 8 PCs
        pc8 = (t1_z @ Vt[:8].T)
        coef8, _, _, _ = lstsq(pc8, proxy_B, rcond=None)
        pred8 = pc8 @ coef8
        r2_pc8 = 1 - ((proxy_B - pred8)**2).sum()/ss_tot
        print(f"  Top 8 PCs         R2={r2_pc8:.4f}")

        # Top 8 HD dims
        top8_hd = np.argsort(np.abs(hd_r))[-8:]
        H8 = H[:, top8_hd]
        coef_hd8, _, _, _ = lstsq(H8, proxy_B, rcond=None)
        pred_hd8 = H8 @ coef_hd8
        r2_hd8 = 1 - ((proxy_B - pred_hd8)**2).sum()/ss_tot
        print(f"  Top 8 HD dims     R2={r2_hd8:.4f}")
        print(f"  HD/PCA improvement: {r2_hd8/max(r2_pc8, 1e-8):.3f}x")

    except Exception as ex:
        import traceback; traceback.print_exc()

print("\nDone.")
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`"""`
			`z1[7] Mechanical Simplification + FLINT 512-bit amplification.`

			`z1[7] VAE finding:`
			`w750_vel_norm r=-0.674 (long-term velocity reversing)`
			`w300_vel_norm r=-0.357`
			`w50_instability r=+0.421 (short-term chaos spiking)`
			`= "eigenspace stress reversal"`

			`Steps:`
			`1. Mechanical proxy: linear combo from correlations`
			`2. Different activations`
			`3. PCA comparison`
			`4. EDAIN-KL + HD 512-dim projection (FLINT)`
			`"""`
			`import sys, os`
			`sys.stdout.reconfigure(encoding='utf-8', errors='replace')`
			`sys.path.insert(0, os.path.dirname(__file__))`
			`sys.path.insert(0, r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")`

			`import numpy as np`
			`from pathlib import Path`

			`HERE = Path(__file__).parent`

			`WINDOWS = [50, 150, 300, 750]`
			`T1_NAMES = []`
			`for w in WINDOWS:`
			`for f in ['log_lmax', 'vel_norm', 'gap_ratio', 'instability', 'rtp']:`
			`T1_NAMES.append(f"w{w}_{f}")`

			`T0_NAMES = ['bull_pct', 'bear_pct', 'side_pct', 'sin_hour', 'cos_hour', 'sin_day', 'cos_day', 'has_eigen']`

			`print("Loading corpus...")`
			`from corpus_builder import DolphinCorpus, OFF, T1 as T1_DIM`
			`corpus = DolphinCorpus.load(str(HERE / 'corpus_cache.npz'))`
			`idx = corpus.mask[:, 1]`
			`X_e = corpus.X[idx]`
			`mask_e = corpus.mask[idx]`
			`t1 = X_e[:, OFF[1]:OFF[1]+T1_DIM].copy() # (16607, 20)`
			`t0 = X_e[:, OFF[0]:OFF[0]+8].copy()`
			`print(f"T1 shape: {t1.shape}")`

			`# Feature shortcuts`
			`vel_w50 = t1[:, 1]`
			`vel_w150 = t1[:, 6]`
			`vel_w300 = t1[:, 11]`
			`vel_w750 = t1[:, 16]`
			`inst_w50 = t1[:, 3]`
			`inst_w300= t1[:, 13]`
			`lmax_w50 = t1[:, 0]`
			`lmax_w750= t1[:, 15]`

			`# ================================================================`
			`# 1. MECHANICAL PROXIES`
			`# ================================================================`
			`print("\n" + "="*65)`
			`print("1. MECHANICAL z1[7] PROXIES")`
			`print("="*65)`

			`proxy_A = -0.674vel_w750 - 0.357vel_w300 + 0.421*inst_w50`
			`proxy_B = inst_w50 - vel_w750`
			`proxy_C = vel_w50 - vel_w750`
			`proxy_D = inst_w50 * (-vel_w750)`
			`proxy_E = (inst_w50 - inst_w300) - (vel_w50 - vel_w750) # instability delta + vel divergence`

			`for name, p in [`
			`('A: linear VAE weights', proxy_A),`
			`('B: inst_w50 - vel_w750', proxy_B),`
			`('C: vel_w50 - vel_w750 (cross-scale divergence)', proxy_C),`
			`('D: inst_w50 * -vel_w750 (interaction)', proxy_D),`
			`('E: dinst - dvel (delta-delta)', proxy_E),`
			`]:`
			`nans = np.isnan(p).sum()`
			`pv = p[np.isfinite(p)]`
			`print(f"\n{name}:")`
			`print(f" std={pv.std():.4f} p5={np.percentile(pv,5):.4f} "`
			`f"p50={np.percentile(pv,50):.4f} p95={np.percentile(pv,95):.4f} NaN={nans}")`
			`skew = float(((pv - pv.mean())3).mean() / (pv.std()3 + 1e-8))`
			`kurt = float(((pv - pv.mean())4).mean() / (pv.std()4 + 1e-8))`
			`print(f" skew={skew:.3f} kurt={kurt:.2f} (>3 = heavy tails = signal potential)")`

			`# ================================================================`
			`# 2. WHAT DOES HIGH proxy_B LOOK LIKE IN FULL T1 SPACE?`
			`# ================================================================`
			`print("\n" + "="*65)`
			`print("2. HIGH vs LOW proxy_B = inst_w50 - vel_w750")`
			`print("="*65)`
			`p = proxy_B`
			`lo = p < np.percentile(p, 10)`
			`hi = p > np.percentile(p, 90)`
			`print(f"N low={lo.sum()} N high={hi.sum()}")`
			`print(f"\n{'Feature':<22} {'ALL':>8} {'LOW10%':>8} {'HIGH10%':>8} {'diff':>8}")`
			`for i, name in enumerate(T1_NAMES):`
			`a = t1[:, i].mean()`
			`l = t1[lo, i].mean()`
			`h = t1[hi, i].mean()`
			`d = h - l`
			`if abs(d) > 0.02:`
			`print(f" {name:<20} {a:8.4f} {l:8.4f} {h:8.4f} {d:+8.4f}")`
			`print(f"\n T0 context:")`
			`for i, name in enumerate(T0_NAMES[:3]):`
			`print(f" {name:<20} {t0[:,i].mean():8.3f} {t0[lo,i].mean():8.3f} {t0[hi,i].mean():8.3f} {t0[hi,i].mean()-t0[lo,i].mean():+8.3f}")`

			`# ================================================================`
			`# 3. ACTIVATION FUNCTIONS`
			`# ================================================================`
			`print("\n" + "="*65)`
			`print("3. ACTIVATION FUNCTIONS ON proxy_B")`
			`print("="*65)`

			`def softplus(x):`
			`return np.log1p(np.exp(np.clip(x, -30, 30)))`

			`def sigmoid(x):`
			`return 1.0 / (1.0 + np.exp(-np.clip(x, -30, 30)))`

			`pb_z = (proxy_B - proxy_B.mean()) / (proxy_B.std() + 1e-8)`

			`for name, act in [`
			`('raw z-score', pb_z),`
			`('relu', np.maximum(0, pb_z)),`
			`('tanh', np.tanh(pb_z)),`
			`('softplus', softplus(pb_z)),`
			`('sigmoid', sigmoid(pb_z)),`
			`('signlog1p(\|x\|)', np.sign(pb_z) np.log1p(np.abs(pb_z))),`
			`('x^3 (cubic)', pb_z**3),`
			`]:`
			`if np.isnan(act).any():`
			`print(f" {name:<22} NaN!")`
			`continue`
			`skew = float(((act - act.mean())3).mean() / (act.std()3 + 1e-8))`
			`kurt = float(((act - act.mean())4).mean() / (act.std()4 + 1e-8))`
			`# High kurtosis = heavy tails = outliers preserved = signal in extremes`
			`print(f" {name:<22} std={act.std():.4f} skew={skew:+.3f} kurt={kurt:6.1f}")`

			`# ================================================================`
			`# 4. PCA vs VAE`
			`# ================================================================`
			`print("\n" + "="*65)`
			`print("4. PCA ON T1: linear basis vs VAE z1 basis")`
			`print("="*65)`

			`t1_z = (t1 - t1.mean(0)) / (t1.std(0) + 1e-8)`
			`U, S, Vt = np.linalg.svd(t1_z, full_matrices=False)`
			`explained = S2 / (S2).sum()`
			`print(f"Explained variance: {np.round(100*explained[:8], 1)}%")`
			`for pc_i in range(5):`
			`loadings = Vt[pc_i]`
			`top3 = np.argsort(np.abs(loadings))[-3:][::-1]`
			`pc_scores = t1_z @ Vt[pc_i]`
			`r_pb = np.corrcoef(pc_scores, proxy_B)[0,1]`
			`print(f"PC{pc_i+1} ({100*explained[pc_i]:.1f}%) r(proxy_B)={r_pb:+.3f}: "`
			`+ " ".join(f"{T1_NAMES[j]}={loadings[j]:+.3f}" for j in top3))`

			`# ================================================================`
			`# 5. FLINT EDAIN-KL + HD PROJECTION`
			`# ================================================================`
			`print("\n" + "="*65)`
			`print("5. FLINT: EDAIN-KL + HD PROJECTION 20->512")`
			`print("="*65)`

			`try:`
			`from SILOQY_NN_Kernel_COMPLETE6 import MCDAINLayer`
			`FLINT_OK = True`
			`print("FLINT OK")`
			`except ImportError as e:`
			`FLINT_OK = False`
			`print(f"FLINT import failed: {e}")`

			`if FLINT_OK:`
			`# 5a: MCDAINLayer (no training needed, NaN-safe)`
			`print("\n5a: MCDAINLayer normalization:")`
			`t1_f32 = t1.astype(np.float32)`
			`try:`
			`import torch`
			`t1_torch = torch.from_numpy(t1_f32)`
			`mc = MCDAINLayer(input_dim=20)`
			`with torch.no_grad():`
			`t1_mc = mc(t1_torch).numpy()`
			`mc_std = t1_mc.std(0)`
			`print(f" Per-dim std after MCDAIN: {mc_std.round(3)}")`
			`print(f" Max std: {mc_std.max():.4f} Min std: {mc_std.min():.4f}")`
			`# Proxy_B in MCDAIN space`
			`pb_mc = t1_mc[:, 3] - t1_mc[:, 16] # inst_w50 - vel_w750`
			`print(f" proxy_B (MCDAIN): std={pb_mc.std():.4f} "`
			`f"skew={float(((pb_mc-pb_mc.mean())3).mean()/(pb_mc.std()3+1e-8)):.3f} "`
			`f"kurt={float(((pb_mc-pb_mc.mean())4).mean()/(pb_mc.std()4+1e-8)):.1f}")`
			`print(f" Variance ratio MCDAIN/raw: {pb_mc.var()/proxy_B.var():.3f}x")`
			`except Exception as ex:`
			`print(f" MCDAINLayer: {ex}")`

			`# 5b: HD projection 20->512 with ReLU`
			`print("\n5b: HD projection (20->512, ReLU):")`
			`try:`
			`np.random.seed(42)`
			`W_hd = np.random.randn(20, 512).astype(np.float32) * np.sqrt(2.0/20)`
			`t1_n = t1_mc if 't1_mc' in dir() else t1_z.astype(np.float32)`
			`H = np.maximum(0, t1_n @ W_hd) # (16607, 512)`
			`hd_var = H.var(0)`
			`active = (hd_var > 0.01).sum()`
			`print(f" Active HD dims (var>0.01): {active}/512")`
			`# Which HD dims correlate most with proxy_B?`
			`pb_n = (proxy_B - proxy_B.mean()) / (proxy_B.std() + 1e-8)`
			`hd_r = np.array([np.corrcoef(pb_n, H[:, d])[0, 1] for d in range(512)])`
			`print(f" \|r(HD, proxy_B)\| > 0.3: {(np.abs(hd_r)>0.3).sum()} dims")`
			`print(f" \|r(HD, proxy_B)\| > 0.5: {(np.abs(hd_r)>0.5).sum()} dims")`
			`print(f" max \|r\|: {np.abs(hd_r).max():.4f}")`
			`top5 = np.argsort(np.abs(hd_r))[-5:][::-1]`
			`print(f" Top HD dims:")`
			`for d in top5:`
			`print(f" HD[{d:3d}]: r={hd_r[d]:+.4f} var={hd_var[d]:.4f}")`

			`# 5c: Can HD space reconstruct proxy_B better than PCA?`
			`print("\n5c: Ridge regression HD->proxy_B (R^2 comparison):")`
			`from numpy.linalg import lstsq`
			`# PCA baseline: PC1 alone`
			`pc1_scores = (t1_z @ Vt[0]).reshape(-1, 1)`
			`coef_pc1, _, _, _ = lstsq(pc1_scores, proxy_B, rcond=None)`
			`pred_pc1 = pc1_scores @ coef_pc1`
			`ss_res = ((proxy_B - pred_pc1)**2).sum()`
			`ss_tot = ((proxy_B - proxy_B.mean())**2).sum()`
			`r2_pc1 = 1 - ss_res/ss_tot`
			`print(f" PC1 alone R2={r2_pc1:.4f}")`

			`# Top 8 PCs`
			`pc8 = (t1_z @ Vt[:8].T)`
			`coef8, _, _, _ = lstsq(pc8, proxy_B, rcond=None)`
			`pred8 = pc8 @ coef8`
			`r2_pc8 = 1 - ((proxy_B - pred8)**2).sum()/ss_tot`
			`print(f" Top 8 PCs R2={r2_pc8:.4f}")`

			`# Top 8 HD dims`
			`top8_hd = np.argsort(np.abs(hd_r))[-8:]`
			`H8 = H[:, top8_hd]`
			`coef_hd8, _, _, _ = lstsq(H8, proxy_B, rcond=None)`
			`pred_hd8 = H8 @ coef_hd8`
			`r2_hd8 = 1 - ((proxy_B - pred_hd8)**2).sum()/ss_tot`
			`print(f" Top 8 HD dims R2={r2_hd8:.4f}")`
			`print(f" HD/PCA improvement: {r2_hd8/max(r2_pc8, 1e-8):.3f}x")`

			`except Exception as ex:`
			`import traceback; traceback.print_exc()`

			`print("\nDone.")`