234 lines
9.8 KiB
Python
234 lines
9.8 KiB
Python
|
|
"""
|
|||
|
|
TitanSensor — Non-invasive latent state extractor (Stage 1, read-only).
|
|||
|
|
|
|||
|
|
Architecture notes
|
|||
|
|
------------------
|
|||
|
|
The LSTM weights (W_ih, W_hh) are fixed random projections used as the
|
|||
|
|
encoder basis during training. W_mu/W_logvar/W_dec/W_out were trained to
|
|||
|
|
map the output of THESE specific W_ih/W_hh matrices. Loading the correct
|
|||
|
|
W_ih/W_hh is therefore mandatory for valid reconstruction errors.
|
|||
|
|
|
|||
|
|
Models saved before 2026-03-15 did not persist W_ih/W_hh — those fall back
|
|||
|
|
to legacy seed=42 re-init (meaningless recon_err ~10^14) and are flagged.
|
|||
|
|
|
|||
|
|
T5 oracle-leakage fix
|
|||
|
|
---------------------
|
|||
|
|
Training included T5 dims [111:261] = spectral path coefficients derived from
|
|||
|
|
FUTURE prices [t+1, t+51]. At inference we zero those dims unconditionally.
|
|||
|
|
|
|||
|
|
Normalisation (GD-v2 models, training >= 2026-03-15)
|
|||
|
|
------------------------------------------------------
|
|||
|
|
GD-v2 models store per-feature z-score stats (norm_mean, norm_std) computed
|
|||
|
|
from the raw training corpus. encode() applies v = (v - norm_mean)/norm_std
|
|||
|
|
before the LSTM step so that training and inference see the same distribution.
|
|||
|
|
Models without norm_mean (legacy) skip this step — recon_err is in raw-feature
|
|||
|
|
space and remains valid for relative comparison within that model.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TitanSensor:
|
|||
|
|
INPUT_DIM = 261
|
|||
|
|
HIDDEN_DIM = 128
|
|||
|
|
LATENT_DIM = 32
|
|||
|
|
T5_START = 111 # dims [111:261] = spectral (oracle) → always zero
|
|||
|
|
|
|||
|
|
def __init__(self, model_path: str, lstm_seed: int = 42):
|
|||
|
|
with open(model_path) as f:
|
|||
|
|
m = json.load(f)
|
|||
|
|
|
|||
|
|
def _arr(key, default_shape):
|
|||
|
|
return np.array(m[key]) if key in m else np.zeros(default_shape)
|
|||
|
|
|
|||
|
|
self.W_mu = _arr('W_mu', (self.HIDDEN_DIM, self.LATENT_DIM))
|
|||
|
|
self.W_logvar = _arr('W_logvar', (self.HIDDEN_DIM, self.LATENT_DIM))
|
|||
|
|
self.W_dec = _arr('W_dec', (self.LATENT_DIM, self.HIDDEN_DIM))
|
|||
|
|
self.W_out = _arr('W_out', (self.HIDDEN_DIM, self.INPUT_DIM))
|
|||
|
|
self.b_mu = _arr('b_mu', (self.LATENT_DIM,))
|
|||
|
|
self.b_logvar = _arr('b_logvar', (self.LATENT_DIM,))
|
|||
|
|
self.b_dec = _arr('b_dec', (self.HIDDEN_DIM,))
|
|||
|
|
self.b_out = _arr('b_out', (self.INPUT_DIM,))
|
|||
|
|
|
|||
|
|
# LSTM weights: load from JSON if present (fixed since 2026-03-15).
|
|||
|
|
# Legacy models (no W_ih in JSON) fall back to seed=42 re-init —
|
|||
|
|
# recon_err will be ~10^14 for those; a warning is emitted.
|
|||
|
|
if 'W_ih' in m:
|
|||
|
|
self.W_ih = np.array(m['W_ih'], dtype=np.float64)
|
|||
|
|
self.W_hh = np.array(m['W_hh'], dtype=np.float64)
|
|||
|
|
self.b_h = np.array(m['b_h'], dtype=np.float64)
|
|||
|
|
self.lstm_weights_valid = True
|
|||
|
|
else:
|
|||
|
|
import warnings
|
|||
|
|
warnings.warn(
|
|||
|
|
f"TitanSensor: model at '{model_path}' does not contain LSTM weights "
|
|||
|
|
"(W_ih/W_hh/b_h). Falling back to seed=42 re-init. "
|
|||
|
|
"recon_err will be ~10^14 (meaningless). Retrain with fixed save_model().",
|
|||
|
|
RuntimeWarning, stacklevel=2,
|
|||
|
|
)
|
|||
|
|
rng = np.random.RandomState(lstm_seed)
|
|||
|
|
s = 0.1
|
|||
|
|
self.W_ih = rng.randn(self.INPUT_DIM, self.HIDDEN_DIM * 4) * s
|
|||
|
|
self.W_hh = rng.randn(self.HIDDEN_DIM, self.HIDDEN_DIM * 4) * s
|
|||
|
|
self.b_h = np.zeros(self.HIDDEN_DIM * 4)
|
|||
|
|
self.lstm_weights_valid = False
|
|||
|
|
|
|||
|
|
self.latent_names = {int(k): v for k, v in m.get('latent_names', {}).items()}
|
|||
|
|
|
|||
|
|
# Per-feature normalisation stats (present in GD-v2 models trained 2026-03-15+).
|
|||
|
|
# If absent (legacy models): no normalization applied — recon_err will be in
|
|||
|
|
# raw-feature space and is still valid for relative comparison within that model.
|
|||
|
|
if 'norm_mean' in m:
|
|||
|
|
self.norm_mean = np.array(m['norm_mean'], dtype=np.float64)
|
|||
|
|
self.norm_std = np.array(m['norm_std'], dtype=np.float64)
|
|||
|
|
else:
|
|||
|
|
self.norm_mean = None
|
|||
|
|
self.norm_std = None
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
def _sigmoid(self, x):
|
|||
|
|
return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|||
|
|
|
|||
|
|
def _lstm_step(self, x, h, c):
|
|||
|
|
g = x @ self.W_ih + h @ self.W_hh + self.b_h
|
|||
|
|
i_, f_, g_, o_ = np.split(g, 4, axis=-1)
|
|||
|
|
i_ = self._sigmoid(i_); f_ = self._sigmoid(f_); o_ = self._sigmoid(o_)
|
|||
|
|
g_ = np.tanh(g_)
|
|||
|
|
c2 = f_ * c + i_ * g_
|
|||
|
|
h2 = o_ * np.tanh(c2)
|
|||
|
|
return h2, c2
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
def encode(self, x: np.ndarray):
|
|||
|
|
"""
|
|||
|
|
Encode one 261-dim feature row.
|
|||
|
|
|
|||
|
|
Returns
|
|||
|
|
-------
|
|||
|
|
z_mu : np.ndarray (32,) — latent mean
|
|||
|
|
recon_err : float — MSE on T0-T4 reconstruction (in normalised space)
|
|||
|
|
z_logvar : np.ndarray (32,) — per-dim log-variance
|
|||
|
|
"""
|
|||
|
|
v = np.array(x, dtype=np.float64).ravel()
|
|||
|
|
v = np.resize(v, self.INPUT_DIM) # pad/truncate
|
|||
|
|
# Stage-1 zero: T3 (ExF, 78-102), T4 (esoteric, 103-110) and T5 (oracle, 111+)
|
|||
|
|
# are always zero in the training corpus — enforce this here so that
|
|||
|
|
# callers not using build_feature_vector() don't blow up normalization.
|
|||
|
|
v[78:] = 0.0
|
|||
|
|
v[self.T5_START:] = 0.0 # T5 oracle fix (redundant but explicit)
|
|||
|
|
v = np.nan_to_num(v, nan=0.0, posinf=0.0, neginf=0.0)
|
|||
|
|
|
|||
|
|
# Apply stored per-feature normalisation (GD-v2 models only).
|
|||
|
|
# This reproduces the same transform applied during training so that
|
|||
|
|
# the LSTM sees the same input distribution and recon_err is O(1).
|
|||
|
|
if self.norm_mean is not None:
|
|||
|
|
v = (v - self.norm_mean) / self.norm_std
|
|||
|
|
v[78:] = 0.0 # re-zero Stage-1 dims after norm
|
|||
|
|
# Clip to [-50, 50] for safety — training post-norm max was ~37
|
|||
|
|
v = np.clip(v, -50.0, 50.0)
|
|||
|
|
|
|||
|
|
h = np.zeros((1, self.HIDDEN_DIM))
|
|||
|
|
c = np.zeros((1, self.HIDDEN_DIM))
|
|||
|
|
h, c = self._lstm_step(v.reshape(1, -1), h, c)
|
|||
|
|
|
|||
|
|
z_mu = (h @ self.W_mu + self.b_mu)[0]
|
|||
|
|
z_logvar = (h @ self.W_logvar + self.b_logvar)[0]
|
|||
|
|
|
|||
|
|
h_dec = np.tanh(z_mu @ self.W_dec + self.b_dec)
|
|||
|
|
recon = h_dec @ self.W_out + self.b_out
|
|||
|
|
recon_err = float(np.mean((recon[:self.T5_START] - v[:self.T5_START]) ** 2))
|
|||
|
|
|
|||
|
|
return z_mu, recon_err, z_logvar
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# Feature builder — constructs a 261-dim vector from a parquet row.
|
|||
|
|
# T3 (ExF, 78-102), T4 (esoteric, 103-110), T5 (111-260) are zeroed.
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
_ASSET_CACHE = {} # parquet-path → ordered asset list
|
|||
|
|
|
|||
|
|
|
|||
|
|
def build_feature_vector(df, ri: int, assets: list) -> np.ndarray:
|
|||
|
|
"""
|
|||
|
|
Build 261-dim T0-T4 feature vector (T5 always zero).
|
|||
|
|
|
|||
|
|
T0 (0-7) : time encoding + rolling breadth
|
|||
|
|
T1 (8-27) : eigenvalue velocity features (4 windows × 5 dims)
|
|||
|
|
T2 (28-77) : per-asset return z-scores (up to 50 assets, rolling 50 bars)
|
|||
|
|
T3 (78-102): ExF macro — zeroed (Stage 1)
|
|||
|
|
T4 (103-110): esoteric — zeroed (Stage 1)
|
|||
|
|
T5 (111-260): spectral — zeroed (oracle fix)
|
|||
|
|
"""
|
|||
|
|
x = np.zeros(261, dtype=np.float64)
|
|||
|
|
row = df.iloc[ri]
|
|||
|
|
|
|||
|
|
# --- T0: time encoding ---
|
|||
|
|
try:
|
|||
|
|
ts = row['timestamp']
|
|||
|
|
if hasattr(ts, 'hour'):
|
|||
|
|
h = ts.hour + ts.minute / 60.0
|
|||
|
|
d = ts.dayofweek
|
|||
|
|
else:
|
|||
|
|
import pandas as pd
|
|||
|
|
ts = pd.Timestamp(ts)
|
|||
|
|
h = ts.hour + ts.minute / 60.0
|
|||
|
|
d = ts.dayofweek
|
|||
|
|
x[0] = np.sin(2 * np.pi * h / 24)
|
|||
|
|
x[1] = np.cos(2 * np.pi * h / 24)
|
|||
|
|
x[2] = np.sin(2 * np.pi * d / 7)
|
|||
|
|
x[3] = np.cos(2 * np.pi * d / 7)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
x[4] = 1.0 # has_eigen always true for NG3
|
|||
|
|
|
|||
|
|
# rolling BTC breadth (T0 dims 5-7)
|
|||
|
|
if ri >= 20 and 'BTCUSDT' in df.columns:
|
|||
|
|
diffs = np.diff(df['BTCUSDT'].values[ri - 20:ri + 1])
|
|||
|
|
x[5] = float(np.mean(diffs > 0))
|
|||
|
|
x[6] = float(np.mean(diffs < 0))
|
|||
|
|
x[7] = float(np.mean(diffs == 0))
|
|||
|
|
|
|||
|
|
# --- T1: eigenvalue velocity features ---
|
|||
|
|
def _g(col):
|
|||
|
|
v = row.get(col, 0.0)
|
|||
|
|
return float(v) if v is not None and v == v else 0.0
|
|||
|
|
|
|||
|
|
v50 = _g('v50_lambda_max_velocity')
|
|||
|
|
v150 = _g('v150_lambda_max_velocity')
|
|||
|
|
v300 = _g('v300_lambda_max_velocity')
|
|||
|
|
v750 = _g('v750_lambda_max_velocity')
|
|||
|
|
i50 = _g('instability_50')
|
|||
|
|
i150 = _g('instability_150')
|
|||
|
|
vd = _g('vel_div')
|
|||
|
|
|
|||
|
|
# window-0 (8-12): v50 group
|
|||
|
|
x[8] = v50; x[9] = i50; x[10] = v50 - v150
|
|||
|
|
x[11] = v50 - v300; x[12] = v50 - v750
|
|||
|
|
# window-1 (13-17): v150 group
|
|||
|
|
x[13] = v150; x[14] = i150; x[15] = v150 - v300
|
|||
|
|
x[16] = v150 - v750; x[17] = abs(v150)
|
|||
|
|
# window-2 (18-22): v300 group
|
|||
|
|
x[18] = v300; x[19] = abs(v300); x[20] = v300 - v750
|
|||
|
|
x[21] = v50 * v300; x[22] = i50 - v750 # proxy_B equivalent
|
|||
|
|
# window-3 (23-27): v750 + composite
|
|||
|
|
x[23] = v750; x[24] = abs(v750)
|
|||
|
|
x[25] = v50 / (abs(v750) + 1e-8)
|
|||
|
|
x[26] = i50 - i150; x[27] = vd
|
|||
|
|
|
|||
|
|
# --- T2: per-asset return z-scores ---
|
|||
|
|
if ri >= 50:
|
|||
|
|
prices_cache = {a: df[a].values for a in assets if a in df.columns}
|
|||
|
|
for j, asset in enumerate(assets[:50]):
|
|||
|
|
if asset not in prices_cache:
|
|||
|
|
continue
|
|||
|
|
seg = prices_cache[asset][ri - 50:ri + 1]
|
|||
|
|
if np.any(seg <= 0) or np.any(~np.isfinite(seg)):
|
|||
|
|
continue
|
|||
|
|
rets = np.diff(seg) / seg[:-1]
|
|||
|
|
std = np.std(rets)
|
|||
|
|
if std > 0:
|
|||
|
|
x[28 + j] = float(np.clip((rets[-1] - np.mean(rets)) / std, -5.0, 5.0))
|
|||
|
|
|
|||
|
|
return x
|