""" exp13_multiscale_sweep.py — Multi-scale ConvNeXt z-signal sweep on D_LIQ_GOLD ============================================================================== Tests ConvNeXt z-signals from two models injected at multiple points in the D_LIQ_GOLD Alpha Engine stack. Signal sources: S2: 1m ConvNeXt (ep=17, val_loss=19.26, trained on 5y klines) macro regime, updated every 12 5s bars S1: 5s ConvNeXt (trained on 56d scans, model at convnext_model_5s.json) micro regime, updated every 16 5s bars Experiment sets: A : Alpha engine + S2 (1m model only), balanced weights B : Alpha engine + S1 + S2 (both models), balanced weights A' : Same as A but with recency bias (1m model discounted) B' : Same as B but with recency bias (5s model favored, 1m discounted) Injection points (P): P1: ACBv6 beta modulation — modify _day_beta after begin_day super() call P3: Entry gate — block entry if z_combined > gate_threshold P4: regime_size_mult modulation — multiply before entry P5: Notional scale — multiply notional after entry result Combination modes (M): M1: tanh soft-clamp: alpha * tanh(z / K_TANH) M2: confidence-weighted: multiply each z by 1 / max(0.1, z_post_std) M3: rank-based: percentile rank [0,1] mapped to [-1, +1] M4: macro-gated micro: sigmoid(z_s2) * tanh(z_s1 / K_TANH) [B/B' only] Weight schemes (W): W1 balanced: [w_s1=0.5, w_s2=0.5] (B/B'), [w_s2=1.0] (A/A') W2 mild recency: [w_s1=0.6, w_s2=0.3] (B/B'), [w_s2=0.5] (A/A') W3 strong recency:[w_s1=0.8, w_s2=0.15] (B/B'), [w_s2=0.25] (A/A') Signal strengths alpha: {0.2, 0.3, 0.5} Total configs: 252 + 1 baseline = 253 K_TANH = 1.5 P3 gate threshold (M3): 0.75 (top quartile) P3 gate threshold (others): 0.5 sigma """ import sys, time, json, warnings, argparse sys.stdout.reconfigure(encoding='utf-8', errors='replace') warnings.filterwarnings('ignore') from pathlib import Path import numpy as np import pandas as pd ROOT = Path(__file__).parent.parent sys.path.insert(0, str(ROOT)) from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb from nautilus_dolphin.nautilus.ob_features import ( OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb, compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb, compute_depth_asymmetry_nb, compute_imbalance_persistence_nb, compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb, ) from nautilus_dolphin.nautilus.ob_provider import MockOBProvider from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker from nautilus_dolphin.nautilus.proxy_boost_engine import LiquidationGuardEngine, create_d_liq_engine from mc.mc_ml import DolphinForewarner from dvae.convnext_sensor import ConvNextSensor, PROXY_B_DIM # ── JIT warmup ──────────────────────────────────────────────────────────────── print("Warming up JIT...") _p = np.array([1., 2., 3.], dtype=np.float64) compute_irp_nb(_p, -1); compute_ars_nb(1., .5, .01) rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500., 20, 0.20) compute_sizing_nb(-.03, -.02, -.05, 3., .5, 5., .20, True, True, 0., np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64), np.zeros(5, dtype=np.float64), 0, -1, .01, .04) check_dc_nb(_p, 3, 1, .75) _b = np.array([100., 200., 300., 400., 500.], dtype=np.float64) _a = np.array([110., 190., 310., 390., 510.], dtype=np.float64) compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a) compute_depth_quality_nb(210., 200.); compute_fill_probability_nb(1.) compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a) compute_imbalance_persistence_nb(np.array([.1, -.1], dtype=np.float64), 2) compute_withdrawal_velocity_nb(np.array([100., 110.], dtype=np.float64), 1) compute_market_agreement_nb(np.array([.1, -.05], dtype=np.float64), 2) compute_cascade_signal_nb(np.array([-.05, -.15], dtype=np.float64), 2, -.10) print(" JIT ready.") # ── Paths ───────────────────────────────────────────────────────────────────── VBT5s = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache") VBT1m = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines") MODEL_1M = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\dvae\convnext_model.json") MODEL_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\dvae\convnext_model_5s.json") MC_MODELS = str(ROOT / "mc_results" / "models") OUT_FILE = Path(__file__).parent / "exp13_multiscale_sweep_results.json" META_COLS = { 'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150', } FEATURE_COLS = [ 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150', ] BASE_ENGINE_KWARGS = dict( initial_capital=25000., vel_div_threshold=-.02, vel_div_extreme=-.05, min_leverage=.5, max_leverage=5., leverage_convexity=3., fraction=.20, fixed_tp_pct=.0095, stop_pct=1., max_hold_bars=120, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=.75, dc_skip_contradicts=True, dc_leverage_boost=1., dc_leverage_reduce=.5, use_asset_selection=True, min_irp_alignment=.45, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=.62, sp_maker_exit_rate=.50, use_ob_edge=True, ob_edge_bps=5., ob_confirm_rate=.40, lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42, ) D_LIQ_KWARGS = dict( extended_soft_cap=8., extended_abs_cap=9., mc_leverage_ref=5., margin_buffer=.95, threshold=.35, alpha=1., adaptive_beta=True, ) MC_BASE_CFG = { 'trial_id': 0, 'vel_div_threshold': -.020, 'vel_div_extreme': -.050, 'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': .75, 'dc_skip_contradicts': True, 'dc_leverage_boost': 1.00, 'dc_leverage_reduce': .50, 'vd_trend_lookback': 10, 'min_leverage': .50, 'max_leverage': 5.00, 'leverage_convexity': 3.00, 'fraction': .20, 'use_alpha_layers': True, 'use_dynamic_leverage': True, 'fixed_tp_pct': .0095, 'stop_pct': 1.00, 'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True, 'sp_maker_entry_rate': .62, 'sp_maker_exit_rate': .50, 'use_ob_edge': True, 'ob_edge_bps': 5.00, 'ob_confirm_rate': .40, 'ob_imbalance_bias': -.09, 'ob_depth_scale': 1.00, 'use_asset_selection': True, 'min_irp_alignment': .45, 'lookback': 100, 'acb_beta_high': .80, 'acb_beta_low': .20, 'acb_w750_threshold_pct': 60, } K_TANH = 1.5 T_WIN = 32 STEP_5S = 16 # 5s window stride P3_GATE_RAW = 0.5 # gate threshold for M1/M2 (sigma units) P3_GATE_RANK = 0.75 # gate threshold for M3 (top quartile of pct rank = adverse) PCT_RANK_WINDOW = 500 # rolling window for percentile rank precomputation # ══════════════════════════════════════════════════════════════════════════════ # Signal precomputation # ══════════════════════════════════════════════════════════════════════════════ def _compute_rolling_pct_rank(values: np.ndarray, window: int = PCT_RANK_WINDOW) -> np.ndarray: """ Compute rolling percentile rank in [0, 1] for each element in values. Uses a window of preceding elements (exclusive of current). At positions with < 2 preceding elements, returns 0.5 (neutral). """ N = len(values) rank = np.full(N, 0.5, dtype=np.float64) for i in range(1, N): lo = max(0, i - window) seg = values[lo:i] if len(seg) < 2: rank[i] = 0.5 else: rank[i] = float(np.searchsorted(np.sort(seg), values[i]) / len(seg)) return rank def precompute_1m_signals(parquet_files_5s, sensor_1m: ConvNextSensor) -> dict: """ Precompute 1m ConvNext z-signals for every day, mapped to 5s resolution. Returns ------- dict[date_str -> { 'z1m_5s' : np.ndarray (N5s,) — proxy_B z mapped to 5s bars 'zstd1m_5s' : np.ndarray (N5s,) — z_post_std mapped to 5s bars 'pct1m_5s' : np.ndarray (N5s,) — rolling pct rank of z (for M3) 'z_sod' : float — start-of-day z (first window) }] or None entry when 1m file missing. """ print("Pre-computing 1m ConvNext z-signals...") all_z = [] # accumulate across all days for global pct rank all_ds = [] # date strings in order raw_per_day = {} # date_str -> (z1m_5s, zstd1m_5s, z_sod) for pf5 in parquet_files_5s: ds = pf5.stem pf1 = VBT1m / f"{ds}.parquet" if not pf1.exists(): raw_per_day[ds] = None all_ds.append(ds) continue df1 = pd.read_parquet(pf1).replace([np.inf, -np.inf], np.nan).fillna(0.) n1 = len(df1) n5 = len(pd.read_parquet(pf5, columns=['timestamp'])) z_arr = np.zeros(n1, dtype=np.float64) zstd_arr = np.zeros(n1, dtype=np.float64) for j in range(n1): z_mu, z_post_std = sensor_1m.encode_window(df1, j) z_arr[j] = float(z_mu[PROXY_B_DIM]) zstd_arr[j] = z_post_std # Map 1m -> 5s by nearest index z1m_5s = np.array([z_arr[min(int(i * n1 / n5), n1 - 1)] for i in range(n5)]) zstd1m_5s = np.array([zstd_arr[min(int(i * n1 / n5), n1 - 1)] for i in range(n5)]) # Start-of-day z: first fully valid window (T_WIN-1 in 1m = bar index 31) z_sod = float(z_arr[min(T_WIN - 1, n1 - 1)]) raw_per_day[ds] = (z1m_5s, zstd1m_5s, z_sod) all_z.append(z1m_5s) all_ds.append(ds) print(f" {ds}: z=[{z1m_5s.min():.2f},{z1m_5s.max():.2f}] " f"zstd=[{zstd1m_5s.min():.3f},{zstd1m_5s.max():.3f}]") # Global percentile rank across all concatenated z values all_z_cat = np.concatenate([z for z in all_z if z is not None and len(z) > 0]) all_pct = _compute_rolling_pct_rank(all_z_cat, window=PCT_RANK_WINDOW) # Split back per day signals = {} offset = 0 for ds in all_ds: entry = raw_per_day.get(ds) if entry is None: signals[ds] = None continue z1m_5s, zstd1m_5s, z_sod = entry N = len(z1m_5s) pct_slice = all_pct[offset: offset + N] # Pad if lengths differ (should not happen but defensive) if len(pct_slice) < N: pct_slice = np.concatenate([np.full(N - len(pct_slice), 0.5), pct_slice]) signals[ds] = { 'z1m_5s': z1m_5s, 'zstd1m_5s': zstd1m_5s, 'pct1m_5s': pct_slice, 'z_sod': z_sod, } offset += N n_ok = sum(1 for v in signals.values() if v is not None) print(f" 1m signals ready: {n_ok}/{len(signals)} days\n") return signals def find_proxy_b_dim_5s(parquet_files_5s, sensor_5s) -> int: """ Find the 5s z-dim most correlated with proxy_B (ch7 mean) using 20 probe windows sampled uniformly from the 5s corpus. Returns proxy_b_dim_5s (int). """ from dvae.convnext_5s_sensor import ConvNext5sSensor # noqa: F401 (type hint) print("Finding proxy_B dim for 5s model...") probe_windows = [] step = max(1, len(parquet_files_5s) // 20) for pf in parquet_files_5s[::step]: try: df = pd.read_parquet(pf).replace([np.inf, -np.inf], np.nan).fillna(0.) avail = [c for c in FEATURE_COLS if c in df.columns] if len(avail) < 7: continue feats = df[FEATURE_COLS].values.astype(np.float64) if len(feats) < T_WIN: continue # Pick one window near the middle of the day mid = len(feats) // 2 start = max(0, mid - T_WIN) arr7 = feats[start: start + T_WIN] proxy_b = arr7[:, 5] - arr7[:, 3] arr8 = np.concatenate([arr7, proxy_b[:, np.newaxis]], axis=1) # (T, 8) probe_windows.append(arr8.T) # (C_IN, T_WIN) except Exception as e: print(f" Warning: could not build probe from {pf.stem}: {e}") continue if len(probe_windows) >= 20: break if not probe_windows: print(" No probe windows built — defaulting to dim 0") return 0 probe_arr = np.stack(probe_windows, axis=0) # (N, C_IN, T_WIN) dim_idx, corr = sensor_5s.find_proxy_b_dim(probe_arr) print(f" 5s proxy_B dim = {dim_idx} (r={corr:+.3f})\n") return dim_idx def precompute_5s_signals(parquet_files_5s, sensor_5s, proxy_b_dim_5s: int) -> dict: """ Precompute 5s ConvNext z-signals for every day using sliding 32-bar windows with stride 16 (zero-order hold between updates). Returns ------- dict[date_str -> { 'z5s' : np.ndarray (N,) — z per 5s bar (ZOH between windows) 'zstd5s' : np.ndarray (N,) — z_post_std (ZOH) 'pct5s' : np.ndarray (N,) — rolling pct rank (for M3) 'z_sod_5s': float — first window z }] """ print("Pre-computing 5s ConvNext z-signals...") all_z = [] all_ds = [] raw_per_day = {} for pf in parquet_files_5s: ds = pf.stem try: df = pd.read_parquet(pf).replace([np.inf, -np.inf], np.nan).fillna(0.) avail = [c for c in FEATURE_COLS if c in df.columns] if len(avail) < 7: raw_per_day[ds] = None all_ds.append(ds) continue feats = df[FEATURE_COLS].values.astype(np.float64) N = len(feats) z_arr = np.zeros(N, dtype=np.float64) zstd_arr = np.ones(N, dtype=np.float64) last_z = 0.0 last_zstd = 1.0 z_sod_5s = None for i in range(0, N, STEP_5S): end = min(i + T_WIN, N) start = max(0, end - T_WIN) arr7 = feats[start: end] actual = len(arr7) if actual < 4: continue # Pad to T_WIN if needed if actual < T_WIN: pad = np.zeros((T_WIN - actual, 7), dtype=np.float64) arr7 = np.concatenate([pad, arr7], axis=0) proxy_b = arr7[:, 5] - arr7[:, 3] arr8 = np.concatenate([arr7, proxy_b[:, np.newaxis]], axis=1) # (T_WIN, 8) z_mu, z_post_std = sensor_5s.encode_raw(arr8.T) last_z = float(z_mu[proxy_b_dim_5s]) last_zstd = z_post_std if z_sod_5s is None: z_sod_5s = last_z # ZOH: fill from i to i+STEP_5S hi = min(i + STEP_5S, N) z_arr[i:hi] = last_z zstd_arr[i:hi] = last_zstd if z_sod_5s is None: z_sod_5s = 0.0 raw_per_day[ds] = (z_arr, zstd_arr, z_sod_5s) all_z.append(z_arr) all_ds.append(ds) print(f" {ds}: z=[{z_arr.min():.2f},{z_arr.max():.2f}] " f"zstd=[{zstd_arr.min():.3f},{zstd_arr.max():.3f}]") except Exception as e: print(f" Warning: failed to process {ds}: {e}") raw_per_day[ds] = None all_ds.append(ds) # Global percentile rank all_z_cat = np.concatenate([z for z in all_z if z is not None and len(z) > 0]) all_pct = _compute_rolling_pct_rank(all_z_cat, window=PCT_RANK_WINDOW) signals = {} offset = 0 for ds in all_ds: entry = raw_per_day.get(ds) if entry is None: signals[ds] = None continue z_arr, zstd_arr, z_sod_5s = entry N = len(z_arr) pct_slice = all_pct[offset: offset + N] if len(pct_slice) < N: pct_slice = np.concatenate([np.full(N - len(pct_slice), 0.5), pct_slice]) signals[ds] = { 'z5s': z_arr, 'zstd5s': zstd_arr, 'pct5s': pct_slice, 'z_sod_5s': z_sod_5s, } offset += N n_ok = sum(1 for v in signals.values() if v is not None) print(f" 5s signals ready: {n_ok}/{len(signals)} days\n") return signals # ══════════════════════════════════════════════════════════════════════════════ # Signal combination # ══════════════════════════════════════════════════════════════════════════════ def combine_signals( z1m_day: np.ndarray, zstd1m_day: np.ndarray, pct1m_day: np.ndarray, z5s_day: np.ndarray, zstd5s_day: np.ndarray, pct5s_day: np.ndarray, exp_set: str, mode: str, weights: tuple, z_sod_1m: float, z_sod_5s: float, ): """ Combine z-signals from 1m and (optionally) 5s models into a single per-bar signal array and a start-of-day scalar. Parameters ---------- z1m_day, zstd1m_day, pct1m_day : (N,) arrays — 1m signal mapped to 5s bars z5s_day, zstd5s_day, pct5s_day : (N,) arrays — 5s signal (or zeros if set A/A') exp_set : 'A', 'B', 'Ap', 'Bp' mode : 'M1', 'M2', 'M3', 'M4' weights : (w_s1, w_s2) — w_s1 for 5s model, w_s2 for 1m model z_sod_1m : float — start-of-day 1m z z_sod_5s : float — start-of-day 5s z (0 if set A/A') Returns ------- bar_z : (N,) float64 — combined signal per 5s bar sod_z : float — combined start-of-day signal for P1 """ use_5s = exp_set in ('B', 'Bp') w_s1, w_s2 = weights # w_s1 = 5s model weight, w_s2 = 1m model weight N = len(z1m_day) if mode == 'M3': # Rank-based: percentile arrays already [0, 1]; map to [-1, +1] z2 = pct1m_day * 2.0 - 1.0 z1 = (pct5s_day * 2.0 - 1.0) if use_5s else np.zeros(N) sod2 = float(pct1m_day[0] * 2.0 - 1.0) if len(pct1m_day) > 0 else 0.0 sod1 = float(pct5s_day[0] * 2.0 - 1.0) if (use_5s and len(pct5s_day) > 0) else 0.0 elif mode == 'M2': # Confidence-weighted: z / z_post_std z2 = z1m_day / np.maximum(0.1, zstd1m_day) z1 = (z5s_day / np.maximum(0.1, zstd5s_day)) if use_5s else np.zeros(N) sod2 = z_sod_1m / max(0.1, float(zstd1m_day[0]) if len(zstd1m_day) > 0 else 1.0) sod1 = (z_sod_5s / max(0.1, float(zstd5s_day[0]) if (use_5s and len(zstd5s_day) > 0) else 1.0)) if use_5s else 0.0 else: # M1 or M4: use raw z values z2 = z1m_day z1 = z5s_day if use_5s else np.zeros(N) sod2 = z_sod_1m sod1 = z_sod_5s if use_5s else 0.0 if mode == 'M4' and use_5s: # Macro-gated micro: sigmoid(z_1m) × tanh(z_5s / K_TANH) gate = 1.0 / (1.0 + np.exp(-z2)) micro = np.tanh(z1 / K_TANH) bar_z = gate * micro sod_gate = 1.0 / (1.0 + np.exp(-sod2)) sod_z = sod_gate * float(np.tanh(sod1 / K_TANH)) else: if use_5s: denom = w_s1 + w_s2 if denom < 1e-12: denom = 1.0 bar_z = (w_s1 * z1 + w_s2 * z2) / denom sod_z = float((w_s1 * sod1 + w_s2 * sod2) / denom) else: # Single signal — w_s2 acts as overall weight scalar bar_z = w_s2 * z2 sod_z = float(w_s2 * sod2) return bar_z.astype(np.float64), float(sod_z) # ══════════════════════════════════════════════════════════════════════════════ # Config generation # ══════════════════════════════════════════════════════════════════════════════ def generate_configs(): """ Generate all 252 experiment configs (plus baseline handled separately). Returns list of dicts with keys: name, exp_set, point, mode, weights, strength """ configs = [] POINTS = ['P1', 'P3', 'P4', 'P5'] STRENGTHS = [0.2, 0.3, 0.5] # Set A: 1m only, balanced W1 for p in POINTS: for m in ['M1', 'M2', 'M3']: for a in STRENGTHS: configs.append({ 'name': f'A_{p}_{m}_W1_a{a:.1f}', 'exp_set': 'A', 'point': p, 'mode': m, 'weights': (0.0, 1.0), # (w_s1, w_s2): 1m only 'strength': a, }) # Set B: 5s+1m, balanced W1 for p in POINTS: for m in ['M1', 'M2', 'M3', 'M4']: for a in STRENGTHS: configs.append({ 'name': f'B_{p}_{m}_W1_a{a:.1f}', 'exp_set': 'B', 'point': p, 'mode': m, 'weights': (0.5, 0.5), 'strength': a, }) # Set A': 1m only, recency bias W2 and W3 for p in POINTS: for m in ['M1', 'M2', 'M3']: for a in STRENGTHS: for wname, w in [('W2', (0.0, 0.5)), ('W3', (0.0, 0.25))]: configs.append({ 'name': f'Ap_{p}_{m}_{wname}_a{a:.1f}', 'exp_set': 'Ap', 'point': p, 'mode': m, 'weights': w, 'strength': a, }) # Set B': 5s+1m, recency bias W2 and W3 for p in POINTS: for m in ['M1', 'M2', 'M3', 'M4']: for a in STRENGTHS: for wname, w in [('W2', (0.6, 0.3)), ('W3', (0.8, 0.15))]: configs.append({ 'name': f'Bp_{p}_{m}_{wname}_a{a:.1f}', 'exp_set': 'Bp', 'point': p, 'mode': m, 'weights': w, 'strength': a, }) return configs # ══════════════════════════════════════════════════════════════════════════════ # ZInjectionEngine # ══════════════════════════════════════════════════════════════════════════════ class ZInjectionEngine(LiquidationGuardEngine): """ Injects a combined z-signal at one of P1/P3/P4/P5 in the stack. Signals are precomputed — just array lookups at runtime (negligible overhead). Injection points: P1 — begin_day: modify _day_beta using start-of-day z P3 — _try_entry: block entry if bar z > gate threshold P4 — _try_entry: scale regime_size_mult before entry P5 — _try_entry: scale position notional after entry """ def __init__(self, inject_point: str, strength: float, p3_gate_thr: float = P3_GATE_RAW, **kwargs): super().__init__(**kwargs) self._inject_p1 = (inject_point == 'P1') self._inject_p3 = (inject_point == 'P3') self._inject_p4 = (inject_point == 'P4') self._inject_p5 = (inject_point == 'P5') self._strength = strength self._p3_gate_thr = p3_gate_thr # Signal arrays — set per day via set_day_signals() self._bar_z: np.ndarray = None self._sod_z: float = 0.0 self._scale_history = [] def set_day_signals(self, bar_z: np.ndarray, sod_z: float): """Called before each process_day() call.""" self._bar_z = bar_z self._sod_z = sod_z def _get_bar_z(self, bar_idx: int) -> float: if self._bar_z is None or bar_idx >= len(self._bar_z): return 0.0 return float(self._bar_z[bar_idx]) def begin_day(self, date_str, posture='APEX', direction=None): super().begin_day(date_str, posture, direction) if self._inject_p1: beta_mod = 1.0 + self._strength * float(np.tanh(self._sod_z / K_TANH)) self._day_beta = float(np.clip(self._day_beta * beta_mod, 0.0, 2.0)) def _try_entry(self, bar_idx, vel_div, prices, price_histories, v50_vel=0., v750_vel=0.): if self._inject_p4: z = self._get_bar_z(bar_idx) mod = 1.0 + self._strength * float(np.tanh(z / K_TANH)) self.regime_size_mult = float(np.clip(self.regime_size_mult * mod, 0.01, 20.0)) result = super()._try_entry(bar_idx, vel_div, prices, price_histories, v50_vel, v750_vel) if self._inject_p3 and result is not None: z = self._get_bar_z(bar_idx) if z > self._p3_gate_thr: return None if self._inject_p5 and result is not None and self.position is not None: z = self._get_bar_z(bar_idx) s = float(np.clip(1.0 + self._strength * np.tanh(z / K_TANH), 0.2, 2.0)) self.position.notional *= s self._scale_history.append(s) return result def reset(self): super().reset() self._scale_history = [] # ══════════════════════════════════════════════════════════════════════════════ # Per-config runner # ══════════════════════════════════════════════════════════════════════════════ def run_one_config(cfg, parquet_files, pq_data, signals_1m, signals_5s, vol_p60): """ Run one config dict. Returns a metrics dict. Parameters ---------- cfg : config dict from generate_configs() parquet_files : list of 5s parquet Paths to iterate over pq_data : dict[date_str -> (df, asset_cols, dvol_array)] signals_1m : dict[date_str -> {...}] or None entry signals_5s : dict[date_str -> {...}] or None (None = sets A/A' only) vol_p60 : float — 60th percentile vol threshold """ OB_ASSETS = sorted({a for ds, (df, ac, _) in pq_data.items() for a in ac}) mock_ob = MockOBProvider( imbalance_bias=-.09, depth_scale=1., assets=OB_ASSETS, imbalance_biases={ "BTCUSDT": -.086, "ETHUSDT": -.092, "BNBUSDT": +.05, "SOLUSDT": +.05, }, ) ob_eng = OBFeatureEngine(mock_ob) ob_eng.preload_date("mock", OB_ASSETS) forewarner = DolphinForewarner(models_dir=MC_MODELS) acb = AdaptiveCircuitBreaker() acb.preload_w750([pf.stem for pf in parquet_files]) # Choose P3 gate threshold based on mode p3_thr = P3_GATE_RANK if cfg['mode'] == 'M3' else P3_GATE_RAW engine = ZInjectionEngine( inject_point=cfg['point'], strength=cfg['strength'], p3_gate_thr=p3_thr, **BASE_ENGINE_KWARGS, **D_LIQ_KWARGS, ) engine.set_ob_engine(ob_eng) engine.set_acb(acb) engine.set_mc_forewarner(forewarner, MC_BASE_CFG) engine.set_esoteric_hazard_multiplier(0.) t0 = time.time() for pf in parquet_files: ds = pf.stem df, acols, dvol = pq_data[ds] vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False) sig1m = signals_1m.get(ds) if signals_1m else None sig5s = signals_5s.get(ds) if signals_5s else None use_5s = cfg['exp_set'] in ('B', 'Bp') and sig5s is not None if sig1m is not None: N = len(df) z1m_arr = sig1m['z1m_5s'] zstd1m = sig1m['zstd1m_5s'] pct1m = sig1m['pct1m_5s'] z_sod_1m = sig1m['z_sod'] if use_5s: z5s_arr = sig5s['z5s'] zstd5s = sig5s['zstd5s'] pct5s = sig5s['pct5s'] z_sod_5s = sig5s['z_sod_5s'] else: z5s_arr = np.zeros(N, dtype=np.float64) zstd5s = np.ones(N, dtype=np.float64) pct5s = np.full(N, 0.5, dtype=np.float64) z_sod_5s = 0.0 # Resize arrays to match df length (safety) def _resize(arr, n): if len(arr) == n: return arr if len(arr) > n: return arr[:n] return np.concatenate([np.zeros(n - len(arr), dtype=arr.dtype), arr]) z1m_arr = _resize(z1m_arr, N) zstd1m = _resize(zstd1m, N) pct1m = _resize(pct1m, N) z5s_arr = _resize(z5s_arr, N) zstd5s = _resize(zstd5s, N) pct5s = _resize(pct5s, N) bar_z, sod_z = combine_signals( z1m_arr, zstd1m, pct1m, z5s_arr, zstd5s, pct5s, cfg['exp_set'], cfg['mode'], cfg['weights'], z_sod_1m, z_sod_5s, ) engine.set_day_signals(bar_z, sod_z) else: engine.set_day_signals(np.zeros(len(df)), 0.0) engine.process_day(ds, df, acols, vol_regime_ok=vol_ok) elapsed = time.time() - t0 trades = engine.trade_history roi = (engine.capital - 25000.) / 25000. * 100. cap_curve = [25000.] for t_ in sorted(trades, key=lambda x: getattr(x, 'exit_bar', 0)): cap_curve.append(cap_curve[-1] + getattr(t_, 'pnl_absolute', 0.)) cap_arr = np.array(cap_curve) peak = np.maximum.accumulate(cap_arr) dd = float(np.max((peak - cap_arr) / (peak + 1e-10)) * 100.) calmar = roi / max(dd, 1e-4) sh = engine._scale_history return { 'name': cfg['name'], 'exp_set': cfg['exp_set'], 'point': cfg['point'], 'mode': cfg['mode'], 'weights': list(cfg['weights']), 'strength': cfg['strength'], 'T': len(trades), 'ROI': round(roi, 4), 'DD': round(dd, 4), 'Calmar': round(calmar, 4), 'elapsed_s': round(elapsed, 1), 'scale_mean': round(float(np.mean(sh)), 4) if sh else 1.0, } # ══════════════════════════════════════════════════════════════════════════════ # Data loading helpers # ══════════════════════════════════════════════════════════════════════════════ def _load_pq_data(parquet_files): """Load all 5s parquet files into pq_data dict.""" print("Loading 5s parquet data...") pq_data = {} for pf in parquet_files: df = pd.read_parquet(pf) ac = [c for c in df.columns if c not in META_COLS] bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None dv = np.full(len(df), np.nan) if bp is not None: for i in range(50, len(bp)): seg = bp[max(0, i - 50):i] if len(seg) >= 10: dv[i] = float(np.std(np.diff(seg) / seg[:-1])) pq_data[pf.stem] = (df, ac, dv) return pq_data def _compute_vol_p60(parquet_files): """Compute 60th percentile vol threshold from the first 2 days.""" all_vols = [] for pf in parquet_files[:2]: df = pd.read_parquet(pf) if 'BTCUSDT' not in df.columns: continue pr = df['BTCUSDT'].values for i in range(60, len(pr)): seg = pr[max(0, i - 50):i] if len(seg) >= 10: v = float(np.std(np.diff(seg) / seg[:-1])) if v > 0: all_vols.append(v) return float(np.percentile(all_vols, 60)) if all_vols else 0. def _run_baseline(parquet_files, pq_data, vol_p60): """Run D_LIQ_GOLD baseline (no injection).""" OB_ASSETS = sorted({a for ds, (df, ac, _) in pq_data.items() for a in ac}) mock_ob = MockOBProvider( imbalance_bias=-.09, depth_scale=1., assets=OB_ASSETS, imbalance_biases={ "BTCUSDT": -.086, "ETHUSDT": -.092, "BNBUSDT": +.05, "SOLUSDT": +.05, }, ) ob_eng = OBFeatureEngine(mock_ob) ob_eng.preload_date("mock", OB_ASSETS) forewarner = DolphinForewarner(models_dir=MC_MODELS) acb = AdaptiveCircuitBreaker() acb.preload_w750([pf.stem for pf in parquet_files]) engine = create_d_liq_engine(**BASE_ENGINE_KWARGS) engine.set_ob_engine(ob_eng) engine.set_acb(acb) engine.set_mc_forewarner(forewarner, MC_BASE_CFG) engine.set_esoteric_hazard_multiplier(0.) t0 = time.time() for pf in parquet_files: ds = pf.stem df, acols, dvol = pq_data[ds] vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False) engine.process_day(ds, df, acols, vol_regime_ok=vol_ok) elapsed = time.time() - t0 trades = engine.trade_history roi = (engine.capital - 25000.) / 25000. * 100. cap_curve = [25000.] for t_ in sorted(trades, key=lambda x: getattr(x, 'exit_bar', 0)): cap_curve.append(cap_curve[-1] + getattr(t_, 'pnl_absolute', 0.)) cap_arr = np.array(cap_curve) peak = np.maximum.accumulate(cap_arr) dd = float(np.max((peak - cap_arr) / (peak + 1e-10)) * 100.) calmar = roi / max(dd, 1e-4) return { 'name': 'baseline', 'exp_set': 'baseline', 'point': 'none', 'mode': 'none', 'weights': [0., 0.], 'strength': 0., 'T': len(trades), 'ROI': round(roi, 4), 'DD': round(dd, 4), 'Calmar': round(calmar, 4), 'elapsed_s': round(elapsed, 1), 'scale_mean': 1.0, } def _print_results_table(results, base): """Print sorted results table relative to baseline.""" width = 115 print("=" * width) print(f"{'Name':<36} {'Set':>4} {'P':>3} {'M':>3} {'T':>5} " f"{'ROI%':>8} {'DD%':>7} {'Calmar':>8} " f"{'dROI':>7} {'dDD':>6} {'dCal':>7} {'s_mean':>7}") print("-" * width) for r in results: dr = r['ROI'] - base['ROI'] ddd = r['DD'] - base['DD'] dcal = r['Calmar'] - base['Calmar'] flag = ' **' if r['Calmar'] > base['Calmar'] * 1.02 else '' print(f"{r['name']:<36} {r['exp_set']:>4} {r['point']:>3} {r['mode']:>3} " f"{r['T']:>5} {r['ROI']:>8.2f} {r['DD']:>7.2f} {r['Calmar']:>8.2f} " f"{dr:>+7.2f} {ddd:>+6.2f} {dcal:>+7.2f} {r['scale_mean']:>7.3f}{flag}") print("=" * width) # ══════════════════════════════════════════════════════════════════════════════ # Main # ══════════════════════════════════════════════════════════════════════════════ def main(): parser = argparse.ArgumentParser(description='exp13 multiscale ConvNeXt sweep') parser.add_argument('--subset', type=int, default=14, help='Days to run in phase-1 screening (0 = full 56 days)') parser.add_argument('--top_k', type=int, default=20, help='Top-K configs to validate on full 56 days in phase-2') parser.add_argument('--skip_sets', type=str, default='', help='Comma-separated sets to skip, e.g. "B,Bp"') parser.add_argument('--only_config', type=str, default='', help='Skip Phase-1 entirely; run just this named config on full window') parser.add_argument('--skip_5s', action='store_true', help='Skip 5s sensor load + pre-compute (saves ~3 GB RAM; safe when only running sets A/Ap)') args = parser.parse_args() skip_sets = {s.strip() for s in args.skip_sets.split(',') if s.strip()} # ── 1. Load 1m sensor ───────────────────────────────────────────────────── print(f"Loading 1m ConvNextSensor from {MODEL_1M}...") sensor_1m = ConvNextSensor(str(MODEL_1M)) print(f" epoch={sensor_1m.epoch} val_loss={sensor_1m.val_loss:.4f} " f"z_dim={sensor_1m.z_dim}\n") # ── 2. Try to load 5s sensor ─────────────────────────────────────────────── sensor_5s = None proxy_b_dim_5s = 0 if args.skip_5s: print("5s sensor: SKIPPED (--skip_5s) — sets B/Bp will be excluded.\n") skip_sets.update({'B', 'Bp'}) elif MODEL_5S.exists(): try: from dvae.convnext_5s_sensor import ConvNext5sSensor sensor_5s = ConvNext5sSensor(str(MODEL_5S)) print(f"5s sensor loaded: epoch={sensor_5s.epoch} " f"val_loss={sensor_5s.val_loss:.4f} z_dim={sensor_5s.z_dim}\n") except Exception as e: print(f"WARNING: Failed to load 5s sensor: {e}") print(" Sets B and B' will be skipped.\n") skip_sets.update({'B', 'Bp'}) else: print(f"WARNING: {MODEL_5S} not found — sets B and B' will be skipped.\n") skip_sets.update({'B', 'Bp'}) # ── 3. Enumerate all parquet files ──────────────────────────────────────── all_parquet_files = sorted(VBT5s.glob("*.parquet")) all_parquet_files = [p for p in all_parquet_files if 'catalog' not in str(p)] print(f"Dataset: {len(all_parquet_files)} days (5s scans)") n_subset = args.subset if args.subset > 0 else len(all_parquet_files) subset_files = all_parquet_files[:n_subset] print(f"Phase-1 subset: {n_subset} days\n") # ── 4. Precompute 1m signals (all 56 days) ──────────────────────────────── signals_1m_all = precompute_1m_signals(all_parquet_files, sensor_1m) # ── 5. Precompute 5s signals (all 56 days) if sensor available ──────────── signals_5s_all = None if sensor_5s is not None: proxy_b_dim_5s = find_proxy_b_dim_5s(all_parquet_files, sensor_5s) signals_5s_all = precompute_5s_signals(all_parquet_files, sensor_5s, proxy_b_dim_5s) # ── 6. Load all 56 day pq_data ──────────────────────────────────────────── pq_data_all = _load_pq_data(all_parquet_files) vol_p60 = _compute_vol_p60(all_parquet_files) print(f"vol_p60 = {vol_p60:.6f}\n") # Build subset pq_data pq_data_sub = {pf.stem: pq_data_all[pf.stem] for pf in subset_files} # ── 7. Generate configs ─────────────────────────────────────────────────── all_configs = generate_configs() active_configs = [c for c in all_configs if c['exp_set'] not in skip_sets] print(f"Total configs: {len(all_configs)} Active (after skips): {len(active_configs)}") if skip_sets: print(f" Skipped sets: {skip_sets}") print() # ── 8. PHASE 1: baseline + all configs on subset ────────────────────────── if args.only_config: # Fast-check mode: skip Phase 1 entirely, inject named config into Phase 2 named_cfg = next((c for c in all_configs if c['name'] == args.only_config), None) if named_cfg is None: valid = [c['name'] for c in all_configs[:8]] print(f"[ERROR] Config '{args.only_config}' not found. Example names: {valid}") return print(f"\n[FAST CHECK] Skipping Phase 1 — '{args.only_config}' goes straight to full-window Phase 2\n") phase1_results = [{'name': args.only_config, 'ROI': 0.0, 'DD': 0.0, 'Calmar': 999.0}] phase1_sorted = phase1_results baseline_result = {'ROI': 0.0, 'DD': 0.0, 'Calmar': 0.0, 'T': 0, 'elapsed_s': 0} else: print("=" * 80) print(f"PHASE 1: Running baseline + {len(active_configs)} configs on {n_subset} days") print("=" * 80) print("[baseline]", flush=True) baseline_result = _run_baseline(subset_files, pq_data_sub, vol_p60) print(f" T={baseline_result['T']} ROI={baseline_result['ROI']:+.2f}% " f"DD={baseline_result['DD']:.2f}% Calmar={baseline_result['Calmar']:.2f} " f"({baseline_result['elapsed_s']:.0f}s)\n") phase1_results = [] for idx, cfg in enumerate(active_configs, 1): print(f"[{idx:3d}/{len(active_configs)}] {cfg['name']}", flush=True) r = run_one_config( cfg, subset_files, pq_data_sub, signals_1m_all, signals_5s_all, vol_p60, ) phase1_results.append(r) dr = r['ROI'] - baseline_result['ROI'] ddd = r['DD'] - baseline_result['DD'] dcal = r['Calmar'] - baseline_result['Calmar'] print(f" T={r['T']} ROI={r['ROI']:+.2f}% DD={r['DD']:.2f}% " f"Calmar={r['Calmar']:.2f} dROI={dr:+.2f}pp dDD={ddd:+.2f}pp " f"dCal={dcal:+.2f} s_mean={r['scale_mean']:.3f} ({r['elapsed_s']:.0f}s)") # Sort by Calmar descending phase1_sorted = sorted(phase1_results, key=lambda x: x['Calmar'], reverse=True) if not args.only_config: print(f"\n--- Phase-1 Top 20 (subset={n_subset}d) ---") _print_results_table(phase1_sorted[:20], baseline_result) # ── 9. PHASE 2: validate top_k on full 56 days ──────────────────────────── phase2_results = [] phase2_validated = {} if (args.subset > 0 and args.top_k > 0) or args.only_config: top_k_configs = phase1_sorted if args.only_config else phase1_sorted[:args.top_k] # Get config dicts for top-K names top_k_cfg_map = {c['name']: c for c in all_configs} print(f"\n{'=' * 80}") print(f"PHASE 2: Validating top {args.top_k} configs on full {len(all_parquet_files)} days") print(f"{'=' * 80}") print("[baseline_full]", flush=True) baseline_full = _run_baseline(all_parquet_files, pq_data_all, vol_p60) print(f" T={baseline_full['T']} ROI={baseline_full['ROI']:+.2f}% " f"DD={baseline_full['DD']:.2f}% Calmar={baseline_full['Calmar']:.2f} " f"({baseline_full['elapsed_s']:.0f}s)\n") for idx, r_sub in enumerate(top_k_configs, 1): cfg = top_k_cfg_map.get(r_sub['name']) if cfg is None or cfg['exp_set'] in skip_sets: continue print(f"[{idx:3d}/{len(top_k_configs)}] {cfg['name']} (phase2)", flush=True) r_full = run_one_config( cfg, all_parquet_files, pq_data_all, signals_1m_all, signals_5s_all, vol_p60, ) phase2_results.append(r_full) dr = r_full['ROI'] - baseline_full['ROI'] ddd = r_full['DD'] - baseline_full['DD'] dcal = r_full['Calmar'] - baseline_full['Calmar'] print(f" T={r_full['T']} ROI={r_full['ROI']:+.2f}% DD={r_full['DD']:.2f}% " f"Calmar={r_full['Calmar']:.2f} dROI={dr:+.2f}pp dDD={ddd:+.2f}pp " f"dCal={dcal:+.2f} ({r_full['elapsed_s']:.0f}s)") phase2_sorted = sorted(phase2_results, key=lambda x: x['Calmar'], reverse=True) print(f"\n--- Phase-2 Final Results (full {len(all_parquet_files)}d) ---") _print_results_table(phase2_sorted, baseline_full) # Verdict print("\n=== VERDICT ===") threshold = baseline_full['Calmar'] * 1.02 print(f"Baseline (full): ROI={baseline_full['ROI']:.2f}% " f"DD={baseline_full['DD']:.2f}% Calmar={baseline_full['Calmar']:.2f}") print(f"Threshold: Calmar > {threshold:.2f} (1.02x baseline)") winners = [r for r in phase2_sorted if r['Calmar'] > threshold] if winners: best = winners[0] print(f"SIGNAL CONFIRMED — {len(winners)} config(s) beat threshold") print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} " f"ROI={best['ROI']:.2f}% DD={best['DD']:.2f}%") else: if phase2_sorted: best = phase2_sorted[0] print(f"NO improvement over D_LIQ_GOLD on full dataset") print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} " f"(threshold={threshold:.2f})") else: print(" No phase-2 results available.") phase2_validated = { 'baseline_full': baseline_full, 'results': phase2_results, } else: # Full run in phase-1 (subset=0) — just report phase2_sorted = phase1_sorted baseline_full = baseline_result print("\n=== VERDICT (full run) ===") threshold = baseline_full['Calmar'] * 1.02 print(f"Baseline: ROI={baseline_full['ROI']:.2f}% " f"DD={baseline_full['DD']:.2f}% Calmar={baseline_full['Calmar']:.2f}") print(f"Threshold: Calmar > {threshold:.2f} (1.02x baseline)") winners = [r for r in phase1_sorted if r['Calmar'] > threshold] if winners: best = winners[0] print(f"SIGNAL CONFIRMED — {len(winners)} config(s) beat threshold") print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} " f"ROI={best['ROI']:.2f}% DD={best['DD']:.2f}%") else: if phase1_sorted: best = phase1_sorted[0] print(f"NO improvement over D_LIQ_GOLD") print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} " f"(threshold={threshold:.2f})") # ── 10. Write results ───────────────────────────────────────────────────── output = { 'experiment': 'exp13_multiscale_sweep', 'model_1m_epoch': sensor_1m.epoch, 'model_1m_val_loss': sensor_1m.val_loss, 'model_5s_epoch': getattr(sensor_5s, 'epoch', None) if sensor_5s else None, 'model_5s_val_loss': getattr(sensor_5s, 'val_loss', None) if sensor_5s else None, 'proxy_b_dim_5s': proxy_b_dim_5s, 'skip_sets': list(skip_sets), 'subset_days': n_subset, 'n_all_days': len(all_parquet_files), 'baseline_subset': baseline_result, 'phase1_results': phase1_results, 'phase2': phase2_validated, } with open(OUT_FILE, 'w', encoding='utf-8') as f: json.dump(output, f, indent=2) print(f"\nResults -> {OUT_FILE}") if __name__ == '__main__': main()