1117 lines
49 KiB
Python
1117 lines
49 KiB
Python
|
|
"""
|
|||
|
|
exp13_multiscale_sweep.py — Multi-scale ConvNeXt z-signal sweep on D_LIQ_GOLD
|
|||
|
|
==============================================================================
|
|||
|
|
|
|||
|
|
Tests ConvNeXt z-signals from two models injected at multiple points in the
|
|||
|
|
D_LIQ_GOLD Alpha Engine stack.
|
|||
|
|
|
|||
|
|
Signal sources:
|
|||
|
|
S2: 1m ConvNeXt (ep=17, val_loss=19.26, trained on 5y klines)
|
|||
|
|
macro regime, updated every 12 5s bars
|
|||
|
|
S1: 5s ConvNeXt (trained on 56d scans, model at convnext_model_5s.json)
|
|||
|
|
micro regime, updated every 16 5s bars
|
|||
|
|
|
|||
|
|
Experiment sets:
|
|||
|
|
A : Alpha engine + S2 (1m model only), balanced weights
|
|||
|
|
B : Alpha engine + S1 + S2 (both models), balanced weights
|
|||
|
|
A' : Same as A but with recency bias (1m model discounted)
|
|||
|
|
B' : Same as B but with recency bias (5s model favored, 1m discounted)
|
|||
|
|
|
|||
|
|
Injection points (P):
|
|||
|
|
P1: ACBv6 beta modulation — modify _day_beta after begin_day super() call
|
|||
|
|
P3: Entry gate — block entry if z_combined > gate_threshold
|
|||
|
|
P4: regime_size_mult modulation — multiply before entry
|
|||
|
|
P5: Notional scale — multiply notional after entry result
|
|||
|
|
|
|||
|
|
Combination modes (M):
|
|||
|
|
M1: tanh soft-clamp: alpha * tanh(z / K_TANH)
|
|||
|
|
M2: confidence-weighted: multiply each z by 1 / max(0.1, z_post_std)
|
|||
|
|
M3: rank-based: percentile rank [0,1] mapped to [-1, +1]
|
|||
|
|
M4: macro-gated micro: sigmoid(z_s2) * tanh(z_s1 / K_TANH) [B/B' only]
|
|||
|
|
|
|||
|
|
Weight schemes (W):
|
|||
|
|
W1 balanced: [w_s1=0.5, w_s2=0.5] (B/B'), [w_s2=1.0] (A/A')
|
|||
|
|
W2 mild recency: [w_s1=0.6, w_s2=0.3] (B/B'), [w_s2=0.5] (A/A')
|
|||
|
|
W3 strong recency:[w_s1=0.8, w_s2=0.15] (B/B'), [w_s2=0.25] (A/A')
|
|||
|
|
|
|||
|
|
Signal strengths alpha: {0.2, 0.3, 0.5}
|
|||
|
|
Total configs: 252 + 1 baseline = 253
|
|||
|
|
|
|||
|
|
K_TANH = 1.5
|
|||
|
|
P3 gate threshold (M3): 0.75 (top quartile)
|
|||
|
|
P3 gate threshold (others): 0.5 sigma
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys, time, json, warnings, argparse
|
|||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
|||
|
|
warnings.filterwarnings('ignore')
|
|||
|
|
from pathlib import Path
|
|||
|
|
import numpy as np
|
|||
|
|
import pandas as pd
|
|||
|
|
|
|||
|
|
ROOT = Path(__file__).parent.parent
|
|||
|
|
sys.path.insert(0, str(ROOT))
|
|||
|
|
|
|||
|
|
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
|
|||
|
|
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
|
|||
|
|
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
|
|||
|
|
from nautilus_dolphin.nautilus.ob_features import (
|
|||
|
|
OBFeatureEngine, compute_imbalance_nb, compute_depth_1pct_nb,
|
|||
|
|
compute_depth_quality_nb, compute_fill_probability_nb, compute_spread_proxy_nb,
|
|||
|
|
compute_depth_asymmetry_nb, compute_imbalance_persistence_nb,
|
|||
|
|
compute_withdrawal_velocity_nb, compute_market_agreement_nb, compute_cascade_signal_nb,
|
|||
|
|
)
|
|||
|
|
from nautilus_dolphin.nautilus.ob_provider import MockOBProvider
|
|||
|
|
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
|||
|
|
from nautilus_dolphin.nautilus.proxy_boost_engine import LiquidationGuardEngine, create_d_liq_engine
|
|||
|
|
from mc.mc_ml import DolphinForewarner
|
|||
|
|
from dvae.convnext_sensor import ConvNextSensor, PROXY_B_DIM
|
|||
|
|
|
|||
|
|
# ── JIT warmup ────────────────────────────────────────────────────────────────
|
|||
|
|
print("Warming up JIT...")
|
|||
|
|
_p = np.array([1., 2., 3.], dtype=np.float64)
|
|||
|
|
compute_irp_nb(_p, -1); compute_ars_nb(1., .5, .01)
|
|||
|
|
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500., 20, 0.20)
|
|||
|
|
compute_sizing_nb(-.03, -.02, -.05, 3., .5, 5., .20, True, True, 0.,
|
|||
|
|
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
|
|||
|
|
np.zeros(5, dtype=np.float64), 0, -1, .01, .04)
|
|||
|
|
check_dc_nb(_p, 3, 1, .75)
|
|||
|
|
_b = np.array([100., 200., 300., 400., 500.], dtype=np.float64)
|
|||
|
|
_a = np.array([110., 190., 310., 390., 510.], dtype=np.float64)
|
|||
|
|
compute_imbalance_nb(_b, _a); compute_depth_1pct_nb(_b, _a)
|
|||
|
|
compute_depth_quality_nb(210., 200.); compute_fill_probability_nb(1.)
|
|||
|
|
compute_spread_proxy_nb(_b, _a); compute_depth_asymmetry_nb(_b, _a)
|
|||
|
|
compute_imbalance_persistence_nb(np.array([.1, -.1], dtype=np.float64), 2)
|
|||
|
|
compute_withdrawal_velocity_nb(np.array([100., 110.], dtype=np.float64), 1)
|
|||
|
|
compute_market_agreement_nb(np.array([.1, -.05], dtype=np.float64), 2)
|
|||
|
|
compute_cascade_signal_nb(np.array([-.05, -.15], dtype=np.float64), 2, -.10)
|
|||
|
|
print(" JIT ready.")
|
|||
|
|
|
|||
|
|
# ── Paths ─────────────────────────────────────────────────────────────────────
|
|||
|
|
VBT5s = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
|
|||
|
|
VBT1m = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
|
|||
|
|
MODEL_1M = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\dvae\convnext_model.json")
|
|||
|
|
MODEL_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\dvae\convnext_model_5s.json")
|
|||
|
|
MC_MODELS = str(ROOT / "mc_results" / "models")
|
|||
|
|
OUT_FILE = Path(__file__).parent / "exp13_multiscale_sweep_results.json"
|
|||
|
|
|
|||
|
|
META_COLS = {
|
|||
|
|
'timestamp', 'scan_number',
|
|||
|
|
'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
|||
|
|
'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
|
|||
|
|
'vel_div', 'instability_50', 'instability_150',
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
FEATURE_COLS = [
|
|||
|
|
'v50_lambda_max_velocity',
|
|||
|
|
'v150_lambda_max_velocity',
|
|||
|
|
'v300_lambda_max_velocity',
|
|||
|
|
'v750_lambda_max_velocity',
|
|||
|
|
'vel_div',
|
|||
|
|
'instability_50',
|
|||
|
|
'instability_150',
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
BASE_ENGINE_KWARGS = dict(
|
|||
|
|
initial_capital=25000., vel_div_threshold=-.02, vel_div_extreme=-.05,
|
|||
|
|
min_leverage=.5, max_leverage=5., leverage_convexity=3.,
|
|||
|
|
fraction=.20, fixed_tp_pct=.0095, stop_pct=1., max_hold_bars=120,
|
|||
|
|
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=.75,
|
|||
|
|
dc_skip_contradicts=True, dc_leverage_boost=1., dc_leverage_reduce=.5,
|
|||
|
|
use_asset_selection=True, min_irp_alignment=.45,
|
|||
|
|
use_sp_fees=True, use_sp_slippage=True,
|
|||
|
|
sp_maker_entry_rate=.62, sp_maker_exit_rate=.50,
|
|||
|
|
use_ob_edge=True, ob_edge_bps=5., ob_confirm_rate=.40,
|
|||
|
|
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
|||
|
|
)
|
|||
|
|
D_LIQ_KWARGS = dict(
|
|||
|
|
extended_soft_cap=8., extended_abs_cap=9., mc_leverage_ref=5.,
|
|||
|
|
margin_buffer=.95, threshold=.35, alpha=1., adaptive_beta=True,
|
|||
|
|
)
|
|||
|
|
MC_BASE_CFG = {
|
|||
|
|
'trial_id': 0, 'vel_div_threshold': -.020, 'vel_div_extreme': -.050,
|
|||
|
|
'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': .75,
|
|||
|
|
'dc_skip_contradicts': True, 'dc_leverage_boost': 1.00, 'dc_leverage_reduce': .50,
|
|||
|
|
'vd_trend_lookback': 10, 'min_leverage': .50, 'max_leverage': 5.00,
|
|||
|
|
'leverage_convexity': 3.00, 'fraction': .20, 'use_alpha_layers': True,
|
|||
|
|
'use_dynamic_leverage': True, 'fixed_tp_pct': .0095, 'stop_pct': 1.00,
|
|||
|
|
'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True,
|
|||
|
|
'sp_maker_entry_rate': .62, 'sp_maker_exit_rate': .50, 'use_ob_edge': True,
|
|||
|
|
'ob_edge_bps': 5.00, 'ob_confirm_rate': .40, 'ob_imbalance_bias': -.09,
|
|||
|
|
'ob_depth_scale': 1.00, 'use_asset_selection': True, 'min_irp_alignment': .45,
|
|||
|
|
'lookback': 100, 'acb_beta_high': .80, 'acb_beta_low': .20,
|
|||
|
|
'acb_w750_threshold_pct': 60,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
K_TANH = 1.5
|
|||
|
|
T_WIN = 32
|
|||
|
|
STEP_5S = 16 # 5s window stride
|
|||
|
|
P3_GATE_RAW = 0.5 # gate threshold for M1/M2 (sigma units)
|
|||
|
|
P3_GATE_RANK = 0.75 # gate threshold for M3 (top quartile of pct rank = adverse)
|
|||
|
|
PCT_RANK_WINDOW = 500 # rolling window for percentile rank precomputation
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
# Signal precomputation
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
|
|||
|
|
def _compute_rolling_pct_rank(values: np.ndarray, window: int = PCT_RANK_WINDOW) -> np.ndarray:
|
|||
|
|
"""
|
|||
|
|
Compute rolling percentile rank in [0, 1] for each element in values.
|
|||
|
|
Uses a window of preceding elements (exclusive of current).
|
|||
|
|
At positions with < 2 preceding elements, returns 0.5 (neutral).
|
|||
|
|
"""
|
|||
|
|
N = len(values)
|
|||
|
|
rank = np.full(N, 0.5, dtype=np.float64)
|
|||
|
|
for i in range(1, N):
|
|||
|
|
lo = max(0, i - window)
|
|||
|
|
seg = values[lo:i]
|
|||
|
|
if len(seg) < 2:
|
|||
|
|
rank[i] = 0.5
|
|||
|
|
else:
|
|||
|
|
rank[i] = float(np.searchsorted(np.sort(seg), values[i]) / len(seg))
|
|||
|
|
return rank
|
|||
|
|
|
|||
|
|
|
|||
|
|
def precompute_1m_signals(parquet_files_5s, sensor_1m: ConvNextSensor) -> dict:
|
|||
|
|
"""
|
|||
|
|
Precompute 1m ConvNext z-signals for every day, mapped to 5s resolution.
|
|||
|
|
|
|||
|
|
Returns
|
|||
|
|
-------
|
|||
|
|
dict[date_str -> {
|
|||
|
|
'z1m_5s' : np.ndarray (N5s,) — proxy_B z mapped to 5s bars
|
|||
|
|
'zstd1m_5s' : np.ndarray (N5s,) — z_post_std mapped to 5s bars
|
|||
|
|
'pct1m_5s' : np.ndarray (N5s,) — rolling pct rank of z (for M3)
|
|||
|
|
'z_sod' : float — start-of-day z (first window)
|
|||
|
|
}] or None entry when 1m file missing.
|
|||
|
|
"""
|
|||
|
|
print("Pre-computing 1m ConvNext z-signals...")
|
|||
|
|
all_z = [] # accumulate across all days for global pct rank
|
|||
|
|
all_ds = [] # date strings in order
|
|||
|
|
raw_per_day = {} # date_str -> (z1m_5s, zstd1m_5s, z_sod)
|
|||
|
|
|
|||
|
|
for pf5 in parquet_files_5s:
|
|||
|
|
ds = pf5.stem
|
|||
|
|
pf1 = VBT1m / f"{ds}.parquet"
|
|||
|
|
if not pf1.exists():
|
|||
|
|
raw_per_day[ds] = None
|
|||
|
|
all_ds.append(ds)
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
df1 = pd.read_parquet(pf1).replace([np.inf, -np.inf], np.nan).fillna(0.)
|
|||
|
|
n1 = len(df1)
|
|||
|
|
n5 = len(pd.read_parquet(pf5, columns=['timestamp']))
|
|||
|
|
|
|||
|
|
z_arr = np.zeros(n1, dtype=np.float64)
|
|||
|
|
zstd_arr = np.zeros(n1, dtype=np.float64)
|
|||
|
|
|
|||
|
|
for j in range(n1):
|
|||
|
|
z_mu, z_post_std = sensor_1m.encode_window(df1, j)
|
|||
|
|
z_arr[j] = float(z_mu[PROXY_B_DIM])
|
|||
|
|
zstd_arr[j] = z_post_std
|
|||
|
|
|
|||
|
|
# Map 1m -> 5s by nearest index
|
|||
|
|
z1m_5s = np.array([z_arr[min(int(i * n1 / n5), n1 - 1)] for i in range(n5)])
|
|||
|
|
zstd1m_5s = np.array([zstd_arr[min(int(i * n1 / n5), n1 - 1)] for i in range(n5)])
|
|||
|
|
|
|||
|
|
# Start-of-day z: first fully valid window (T_WIN-1 in 1m = bar index 31)
|
|||
|
|
z_sod = float(z_arr[min(T_WIN - 1, n1 - 1)])
|
|||
|
|
|
|||
|
|
raw_per_day[ds] = (z1m_5s, zstd1m_5s, z_sod)
|
|||
|
|
all_z.append(z1m_5s)
|
|||
|
|
all_ds.append(ds)
|
|||
|
|
|
|||
|
|
print(f" {ds}: z=[{z1m_5s.min():.2f},{z1m_5s.max():.2f}] "
|
|||
|
|
f"zstd=[{zstd1m_5s.min():.3f},{zstd1m_5s.max():.3f}]")
|
|||
|
|
|
|||
|
|
# Global percentile rank across all concatenated z values
|
|||
|
|
all_z_cat = np.concatenate([z for z in all_z if z is not None and len(z) > 0])
|
|||
|
|
all_pct = _compute_rolling_pct_rank(all_z_cat, window=PCT_RANK_WINDOW)
|
|||
|
|
|
|||
|
|
# Split back per day
|
|||
|
|
signals = {}
|
|||
|
|
offset = 0
|
|||
|
|
for ds in all_ds:
|
|||
|
|
entry = raw_per_day.get(ds)
|
|||
|
|
if entry is None:
|
|||
|
|
signals[ds] = None
|
|||
|
|
continue
|
|||
|
|
z1m_5s, zstd1m_5s, z_sod = entry
|
|||
|
|
N = len(z1m_5s)
|
|||
|
|
pct_slice = all_pct[offset: offset + N]
|
|||
|
|
# Pad if lengths differ (should not happen but defensive)
|
|||
|
|
if len(pct_slice) < N:
|
|||
|
|
pct_slice = np.concatenate([np.full(N - len(pct_slice), 0.5), pct_slice])
|
|||
|
|
signals[ds] = {
|
|||
|
|
'z1m_5s': z1m_5s,
|
|||
|
|
'zstd1m_5s': zstd1m_5s,
|
|||
|
|
'pct1m_5s': pct_slice,
|
|||
|
|
'z_sod': z_sod,
|
|||
|
|
}
|
|||
|
|
offset += N
|
|||
|
|
|
|||
|
|
n_ok = sum(1 for v in signals.values() if v is not None)
|
|||
|
|
print(f" 1m signals ready: {n_ok}/{len(signals)} days\n")
|
|||
|
|
return signals
|
|||
|
|
|
|||
|
|
|
|||
|
|
def find_proxy_b_dim_5s(parquet_files_5s, sensor_5s) -> int:
|
|||
|
|
"""
|
|||
|
|
Find the 5s z-dim most correlated with proxy_B (ch7 mean) using 20 probe windows
|
|||
|
|
sampled uniformly from the 5s corpus.
|
|||
|
|
|
|||
|
|
Returns proxy_b_dim_5s (int).
|
|||
|
|
"""
|
|||
|
|
from dvae.convnext_5s_sensor import ConvNext5sSensor # noqa: F401 (type hint)
|
|||
|
|
print("Finding proxy_B dim for 5s model...")
|
|||
|
|
probe_windows = []
|
|||
|
|
step = max(1, len(parquet_files_5s) // 20)
|
|||
|
|
for pf in parquet_files_5s[::step]:
|
|||
|
|
try:
|
|||
|
|
df = pd.read_parquet(pf).replace([np.inf, -np.inf], np.nan).fillna(0.)
|
|||
|
|
avail = [c for c in FEATURE_COLS if c in df.columns]
|
|||
|
|
if len(avail) < 7:
|
|||
|
|
continue
|
|||
|
|
feats = df[FEATURE_COLS].values.astype(np.float64)
|
|||
|
|
if len(feats) < T_WIN:
|
|||
|
|
continue
|
|||
|
|
# Pick one window near the middle of the day
|
|||
|
|
mid = len(feats) // 2
|
|||
|
|
start = max(0, mid - T_WIN)
|
|||
|
|
arr7 = feats[start: start + T_WIN]
|
|||
|
|
proxy_b = arr7[:, 5] - arr7[:, 3]
|
|||
|
|
arr8 = np.concatenate([arr7, proxy_b[:, np.newaxis]], axis=1) # (T, 8)
|
|||
|
|
probe_windows.append(arr8.T) # (C_IN, T_WIN)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" Warning: could not build probe from {pf.stem}: {e}")
|
|||
|
|
continue
|
|||
|
|
if len(probe_windows) >= 20:
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if not probe_windows:
|
|||
|
|
print(" No probe windows built — defaulting to dim 0")
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
probe_arr = np.stack(probe_windows, axis=0) # (N, C_IN, T_WIN)
|
|||
|
|
dim_idx, corr = sensor_5s.find_proxy_b_dim(probe_arr)
|
|||
|
|
print(f" 5s proxy_B dim = {dim_idx} (r={corr:+.3f})\n")
|
|||
|
|
return dim_idx
|
|||
|
|
|
|||
|
|
|
|||
|
|
def precompute_5s_signals(parquet_files_5s, sensor_5s, proxy_b_dim_5s: int) -> dict:
|
|||
|
|
"""
|
|||
|
|
Precompute 5s ConvNext z-signals for every day using sliding 32-bar windows
|
|||
|
|
with stride 16 (zero-order hold between updates).
|
|||
|
|
|
|||
|
|
Returns
|
|||
|
|
-------
|
|||
|
|
dict[date_str -> {
|
|||
|
|
'z5s' : np.ndarray (N,) — z per 5s bar (ZOH between windows)
|
|||
|
|
'zstd5s' : np.ndarray (N,) — z_post_std (ZOH)
|
|||
|
|
'pct5s' : np.ndarray (N,) — rolling pct rank (for M3)
|
|||
|
|
'z_sod_5s': float — first window z
|
|||
|
|
}]
|
|||
|
|
"""
|
|||
|
|
print("Pre-computing 5s ConvNext z-signals...")
|
|||
|
|
all_z = []
|
|||
|
|
all_ds = []
|
|||
|
|
raw_per_day = {}
|
|||
|
|
|
|||
|
|
for pf in parquet_files_5s:
|
|||
|
|
ds = pf.stem
|
|||
|
|
try:
|
|||
|
|
df = pd.read_parquet(pf).replace([np.inf, -np.inf], np.nan).fillna(0.)
|
|||
|
|
avail = [c for c in FEATURE_COLS if c in df.columns]
|
|||
|
|
if len(avail) < 7:
|
|||
|
|
raw_per_day[ds] = None
|
|||
|
|
all_ds.append(ds)
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
feats = df[FEATURE_COLS].values.astype(np.float64)
|
|||
|
|
N = len(feats)
|
|||
|
|
|
|||
|
|
z_arr = np.zeros(N, dtype=np.float64)
|
|||
|
|
zstd_arr = np.ones(N, dtype=np.float64)
|
|||
|
|
|
|||
|
|
last_z = 0.0
|
|||
|
|
last_zstd = 1.0
|
|||
|
|
z_sod_5s = None
|
|||
|
|
|
|||
|
|
for i in range(0, N, STEP_5S):
|
|||
|
|
end = min(i + T_WIN, N)
|
|||
|
|
start = max(0, end - T_WIN)
|
|||
|
|
arr7 = feats[start: end]
|
|||
|
|
actual = len(arr7)
|
|||
|
|
if actual < 4:
|
|||
|
|
continue
|
|||
|
|
# Pad to T_WIN if needed
|
|||
|
|
if actual < T_WIN:
|
|||
|
|
pad = np.zeros((T_WIN - actual, 7), dtype=np.float64)
|
|||
|
|
arr7 = np.concatenate([pad, arr7], axis=0)
|
|||
|
|
proxy_b = arr7[:, 5] - arr7[:, 3]
|
|||
|
|
arr8 = np.concatenate([arr7, proxy_b[:, np.newaxis]], axis=1) # (T_WIN, 8)
|
|||
|
|
z_mu, z_post_std = sensor_5s.encode_raw(arr8.T)
|
|||
|
|
last_z = float(z_mu[proxy_b_dim_5s])
|
|||
|
|
last_zstd = z_post_std
|
|||
|
|
if z_sod_5s is None:
|
|||
|
|
z_sod_5s = last_z
|
|||
|
|
# ZOH: fill from i to i+STEP_5S
|
|||
|
|
hi = min(i + STEP_5S, N)
|
|||
|
|
z_arr[i:hi] = last_z
|
|||
|
|
zstd_arr[i:hi] = last_zstd
|
|||
|
|
|
|||
|
|
if z_sod_5s is None:
|
|||
|
|
z_sod_5s = 0.0
|
|||
|
|
|
|||
|
|
raw_per_day[ds] = (z_arr, zstd_arr, z_sod_5s)
|
|||
|
|
all_z.append(z_arr)
|
|||
|
|
all_ds.append(ds)
|
|||
|
|
print(f" {ds}: z=[{z_arr.min():.2f},{z_arr.max():.2f}] "
|
|||
|
|
f"zstd=[{zstd_arr.min():.3f},{zstd_arr.max():.3f}]")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" Warning: failed to process {ds}: {e}")
|
|||
|
|
raw_per_day[ds] = None
|
|||
|
|
all_ds.append(ds)
|
|||
|
|
|
|||
|
|
# Global percentile rank
|
|||
|
|
all_z_cat = np.concatenate([z for z in all_z if z is not None and len(z) > 0])
|
|||
|
|
all_pct = _compute_rolling_pct_rank(all_z_cat, window=PCT_RANK_WINDOW)
|
|||
|
|
|
|||
|
|
signals = {}
|
|||
|
|
offset = 0
|
|||
|
|
for ds in all_ds:
|
|||
|
|
entry = raw_per_day.get(ds)
|
|||
|
|
if entry is None:
|
|||
|
|
signals[ds] = None
|
|||
|
|
continue
|
|||
|
|
z_arr, zstd_arr, z_sod_5s = entry
|
|||
|
|
N = len(z_arr)
|
|||
|
|
pct_slice = all_pct[offset: offset + N]
|
|||
|
|
if len(pct_slice) < N:
|
|||
|
|
pct_slice = np.concatenate([np.full(N - len(pct_slice), 0.5), pct_slice])
|
|||
|
|
signals[ds] = {
|
|||
|
|
'z5s': z_arr,
|
|||
|
|
'zstd5s': zstd_arr,
|
|||
|
|
'pct5s': pct_slice,
|
|||
|
|
'z_sod_5s': z_sod_5s,
|
|||
|
|
}
|
|||
|
|
offset += N
|
|||
|
|
|
|||
|
|
n_ok = sum(1 for v in signals.values() if v is not None)
|
|||
|
|
print(f" 5s signals ready: {n_ok}/{len(signals)} days\n")
|
|||
|
|
return signals
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
# Signal combination
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
|
|||
|
|
def combine_signals(
|
|||
|
|
z1m_day: np.ndarray,
|
|||
|
|
zstd1m_day: np.ndarray,
|
|||
|
|
pct1m_day: np.ndarray,
|
|||
|
|
z5s_day: np.ndarray,
|
|||
|
|
zstd5s_day: np.ndarray,
|
|||
|
|
pct5s_day: np.ndarray,
|
|||
|
|
exp_set: str,
|
|||
|
|
mode: str,
|
|||
|
|
weights: tuple,
|
|||
|
|
z_sod_1m: float,
|
|||
|
|
z_sod_5s: float,
|
|||
|
|
):
|
|||
|
|
"""
|
|||
|
|
Combine z-signals from 1m and (optionally) 5s models into a single
|
|||
|
|
per-bar signal array and a start-of-day scalar.
|
|||
|
|
|
|||
|
|
Parameters
|
|||
|
|
----------
|
|||
|
|
z1m_day, zstd1m_day, pct1m_day : (N,) arrays — 1m signal mapped to 5s bars
|
|||
|
|
z5s_day, zstd5s_day, pct5s_day : (N,) arrays — 5s signal (or zeros if set A/A')
|
|||
|
|
exp_set : 'A', 'B', 'Ap', 'Bp'
|
|||
|
|
mode : 'M1', 'M2', 'M3', 'M4'
|
|||
|
|
weights : (w_s1, w_s2) — w_s1 for 5s model, w_s2 for 1m model
|
|||
|
|
z_sod_1m : float — start-of-day 1m z
|
|||
|
|
z_sod_5s : float — start-of-day 5s z (0 if set A/A')
|
|||
|
|
|
|||
|
|
Returns
|
|||
|
|
-------
|
|||
|
|
bar_z : (N,) float64 — combined signal per 5s bar
|
|||
|
|
sod_z : float — combined start-of-day signal for P1
|
|||
|
|
"""
|
|||
|
|
use_5s = exp_set in ('B', 'Bp')
|
|||
|
|
w_s1, w_s2 = weights # w_s1 = 5s model weight, w_s2 = 1m model weight
|
|||
|
|
|
|||
|
|
N = len(z1m_day)
|
|||
|
|
|
|||
|
|
if mode == 'M3':
|
|||
|
|
# Rank-based: percentile arrays already [0, 1]; map to [-1, +1]
|
|||
|
|
z2 = pct1m_day * 2.0 - 1.0
|
|||
|
|
z1 = (pct5s_day * 2.0 - 1.0) if use_5s else np.zeros(N)
|
|||
|
|
sod2 = float(pct1m_day[0] * 2.0 - 1.0) if len(pct1m_day) > 0 else 0.0
|
|||
|
|
sod1 = float(pct5s_day[0] * 2.0 - 1.0) if (use_5s and len(pct5s_day) > 0) else 0.0
|
|||
|
|
elif mode == 'M2':
|
|||
|
|
# Confidence-weighted: z / z_post_std
|
|||
|
|
z2 = z1m_day / np.maximum(0.1, zstd1m_day)
|
|||
|
|
z1 = (z5s_day / np.maximum(0.1, zstd5s_day)) if use_5s else np.zeros(N)
|
|||
|
|
sod2 = z_sod_1m / max(0.1, float(zstd1m_day[0]) if len(zstd1m_day) > 0 else 1.0)
|
|||
|
|
sod1 = (z_sod_5s / max(0.1, float(zstd5s_day[0]) if (use_5s and len(zstd5s_day) > 0) else 1.0)) if use_5s else 0.0
|
|||
|
|
else:
|
|||
|
|
# M1 or M4: use raw z values
|
|||
|
|
z2 = z1m_day
|
|||
|
|
z1 = z5s_day if use_5s else np.zeros(N)
|
|||
|
|
sod2 = z_sod_1m
|
|||
|
|
sod1 = z_sod_5s if use_5s else 0.0
|
|||
|
|
|
|||
|
|
if mode == 'M4' and use_5s:
|
|||
|
|
# Macro-gated micro: sigmoid(z_1m) × tanh(z_5s / K_TANH)
|
|||
|
|
gate = 1.0 / (1.0 + np.exp(-z2))
|
|||
|
|
micro = np.tanh(z1 / K_TANH)
|
|||
|
|
bar_z = gate * micro
|
|||
|
|
sod_gate = 1.0 / (1.0 + np.exp(-sod2))
|
|||
|
|
sod_z = sod_gate * float(np.tanh(sod1 / K_TANH))
|
|||
|
|
else:
|
|||
|
|
if use_5s:
|
|||
|
|
denom = w_s1 + w_s2
|
|||
|
|
if denom < 1e-12:
|
|||
|
|
denom = 1.0
|
|||
|
|
bar_z = (w_s1 * z1 + w_s2 * z2) / denom
|
|||
|
|
sod_z = float((w_s1 * sod1 + w_s2 * sod2) / denom)
|
|||
|
|
else:
|
|||
|
|
# Single signal — w_s2 acts as overall weight scalar
|
|||
|
|
bar_z = w_s2 * z2
|
|||
|
|
sod_z = float(w_s2 * sod2)
|
|||
|
|
|
|||
|
|
return bar_z.astype(np.float64), float(sod_z)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
# Config generation
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
|
|||
|
|
def generate_configs():
|
|||
|
|
"""
|
|||
|
|
Generate all 252 experiment configs (plus baseline handled separately).
|
|||
|
|
|
|||
|
|
Returns list of dicts with keys:
|
|||
|
|
name, exp_set, point, mode, weights, strength
|
|||
|
|
"""
|
|||
|
|
configs = []
|
|||
|
|
POINTS = ['P1', 'P3', 'P4', 'P5']
|
|||
|
|
STRENGTHS = [0.2, 0.3, 0.5]
|
|||
|
|
|
|||
|
|
# Set A: 1m only, balanced W1
|
|||
|
|
for p in POINTS:
|
|||
|
|
for m in ['M1', 'M2', 'M3']:
|
|||
|
|
for a in STRENGTHS:
|
|||
|
|
configs.append({
|
|||
|
|
'name': f'A_{p}_{m}_W1_a{a:.1f}',
|
|||
|
|
'exp_set': 'A',
|
|||
|
|
'point': p,
|
|||
|
|
'mode': m,
|
|||
|
|
'weights': (0.0, 1.0), # (w_s1, w_s2): 1m only
|
|||
|
|
'strength': a,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# Set B: 5s+1m, balanced W1
|
|||
|
|
for p in POINTS:
|
|||
|
|
for m in ['M1', 'M2', 'M3', 'M4']:
|
|||
|
|
for a in STRENGTHS:
|
|||
|
|
configs.append({
|
|||
|
|
'name': f'B_{p}_{m}_W1_a{a:.1f}',
|
|||
|
|
'exp_set': 'B',
|
|||
|
|
'point': p,
|
|||
|
|
'mode': m,
|
|||
|
|
'weights': (0.5, 0.5),
|
|||
|
|
'strength': a,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# Set A': 1m only, recency bias W2 and W3
|
|||
|
|
for p in POINTS:
|
|||
|
|
for m in ['M1', 'M2', 'M3']:
|
|||
|
|
for a in STRENGTHS:
|
|||
|
|
for wname, w in [('W2', (0.0, 0.5)), ('W3', (0.0, 0.25))]:
|
|||
|
|
configs.append({
|
|||
|
|
'name': f'Ap_{p}_{m}_{wname}_a{a:.1f}',
|
|||
|
|
'exp_set': 'Ap',
|
|||
|
|
'point': p,
|
|||
|
|
'mode': m,
|
|||
|
|
'weights': w,
|
|||
|
|
'strength': a,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# Set B': 5s+1m, recency bias W2 and W3
|
|||
|
|
for p in POINTS:
|
|||
|
|
for m in ['M1', 'M2', 'M3', 'M4']:
|
|||
|
|
for a in STRENGTHS:
|
|||
|
|
for wname, w in [('W2', (0.6, 0.3)), ('W3', (0.8, 0.15))]:
|
|||
|
|
configs.append({
|
|||
|
|
'name': f'Bp_{p}_{m}_{wname}_a{a:.1f}',
|
|||
|
|
'exp_set': 'Bp',
|
|||
|
|
'point': p,
|
|||
|
|
'mode': m,
|
|||
|
|
'weights': w,
|
|||
|
|
'strength': a,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return configs
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
# ZInjectionEngine
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
|
|||
|
|
class ZInjectionEngine(LiquidationGuardEngine):
|
|||
|
|
"""
|
|||
|
|
Injects a combined z-signal at one of P1/P3/P4/P5 in the stack.
|
|||
|
|
Signals are precomputed — just array lookups at runtime (negligible overhead).
|
|||
|
|
|
|||
|
|
Injection points:
|
|||
|
|
P1 — begin_day: modify _day_beta using start-of-day z
|
|||
|
|
P3 — _try_entry: block entry if bar z > gate threshold
|
|||
|
|
P4 — _try_entry: scale regime_size_mult before entry
|
|||
|
|
P5 — _try_entry: scale position notional after entry
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, inject_point: str, strength: float,
|
|||
|
|
p3_gate_thr: float = P3_GATE_RAW, **kwargs):
|
|||
|
|
super().__init__(**kwargs)
|
|||
|
|
self._inject_p1 = (inject_point == 'P1')
|
|||
|
|
self._inject_p3 = (inject_point == 'P3')
|
|||
|
|
self._inject_p4 = (inject_point == 'P4')
|
|||
|
|
self._inject_p5 = (inject_point == 'P5')
|
|||
|
|
self._strength = strength
|
|||
|
|
self._p3_gate_thr = p3_gate_thr
|
|||
|
|
# Signal arrays — set per day via set_day_signals()
|
|||
|
|
self._bar_z: np.ndarray = None
|
|||
|
|
self._sod_z: float = 0.0
|
|||
|
|
self._scale_history = []
|
|||
|
|
|
|||
|
|
def set_day_signals(self, bar_z: np.ndarray, sod_z: float):
|
|||
|
|
"""Called before each process_day() call."""
|
|||
|
|
self._bar_z = bar_z
|
|||
|
|
self._sod_z = sod_z
|
|||
|
|
|
|||
|
|
def _get_bar_z(self, bar_idx: int) -> float:
|
|||
|
|
if self._bar_z is None or bar_idx >= len(self._bar_z):
|
|||
|
|
return 0.0
|
|||
|
|
return float(self._bar_z[bar_idx])
|
|||
|
|
|
|||
|
|
def begin_day(self, date_str, posture='APEX', direction=None):
|
|||
|
|
super().begin_day(date_str, posture, direction)
|
|||
|
|
if self._inject_p1:
|
|||
|
|
beta_mod = 1.0 + self._strength * float(np.tanh(self._sod_z / K_TANH))
|
|||
|
|
self._day_beta = float(np.clip(self._day_beta * beta_mod, 0.0, 2.0))
|
|||
|
|
|
|||
|
|
def _try_entry(self, bar_idx, vel_div, prices, price_histories,
|
|||
|
|
v50_vel=0., v750_vel=0.):
|
|||
|
|
if self._inject_p4:
|
|||
|
|
z = self._get_bar_z(bar_idx)
|
|||
|
|
mod = 1.0 + self._strength * float(np.tanh(z / K_TANH))
|
|||
|
|
self.regime_size_mult = float(np.clip(self.regime_size_mult * mod, 0.01, 20.0))
|
|||
|
|
|
|||
|
|
result = super()._try_entry(bar_idx, vel_div, prices, price_histories,
|
|||
|
|
v50_vel, v750_vel)
|
|||
|
|
|
|||
|
|
if self._inject_p3 and result is not None:
|
|||
|
|
z = self._get_bar_z(bar_idx)
|
|||
|
|
if z > self._p3_gate_thr:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
if self._inject_p5 and result is not None and self.position is not None:
|
|||
|
|
z = self._get_bar_z(bar_idx)
|
|||
|
|
s = float(np.clip(1.0 + self._strength * np.tanh(z / K_TANH), 0.2, 2.0))
|
|||
|
|
self.position.notional *= s
|
|||
|
|
self._scale_history.append(s)
|
|||
|
|
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
def reset(self):
|
|||
|
|
super().reset()
|
|||
|
|
self._scale_history = []
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
# Per-config runner
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
|
|||
|
|
def run_one_config(cfg, parquet_files, pq_data, signals_1m, signals_5s, vol_p60):
|
|||
|
|
"""
|
|||
|
|
Run one config dict. Returns a metrics dict.
|
|||
|
|
|
|||
|
|
Parameters
|
|||
|
|
----------
|
|||
|
|
cfg : config dict from generate_configs()
|
|||
|
|
parquet_files : list of 5s parquet Paths to iterate over
|
|||
|
|
pq_data : dict[date_str -> (df, asset_cols, dvol_array)]
|
|||
|
|
signals_1m : dict[date_str -> {...}] or None entry
|
|||
|
|
signals_5s : dict[date_str -> {...}] or None (None = sets A/A' only)
|
|||
|
|
vol_p60 : float — 60th percentile vol threshold
|
|||
|
|
"""
|
|||
|
|
OB_ASSETS = sorted({a for ds, (df, ac, _) in pq_data.items() for a in ac})
|
|||
|
|
mock_ob = MockOBProvider(
|
|||
|
|
imbalance_bias=-.09, depth_scale=1., assets=OB_ASSETS,
|
|||
|
|
imbalance_biases={
|
|||
|
|
"BTCUSDT": -.086, "ETHUSDT": -.092,
|
|||
|
|
"BNBUSDT": +.05, "SOLUSDT": +.05,
|
|||
|
|
},
|
|||
|
|
)
|
|||
|
|
ob_eng = OBFeatureEngine(mock_ob)
|
|||
|
|
ob_eng.preload_date("mock", OB_ASSETS)
|
|||
|
|
|
|||
|
|
forewarner = DolphinForewarner(models_dir=MC_MODELS)
|
|||
|
|
acb = AdaptiveCircuitBreaker()
|
|||
|
|
acb.preload_w750([pf.stem for pf in parquet_files])
|
|||
|
|
|
|||
|
|
# Choose P3 gate threshold based on mode
|
|||
|
|
p3_thr = P3_GATE_RANK if cfg['mode'] == 'M3' else P3_GATE_RAW
|
|||
|
|
|
|||
|
|
engine = ZInjectionEngine(
|
|||
|
|
inject_point=cfg['point'],
|
|||
|
|
strength=cfg['strength'],
|
|||
|
|
p3_gate_thr=p3_thr,
|
|||
|
|
**BASE_ENGINE_KWARGS,
|
|||
|
|
**D_LIQ_KWARGS,
|
|||
|
|
)
|
|||
|
|
engine.set_ob_engine(ob_eng)
|
|||
|
|
engine.set_acb(acb)
|
|||
|
|
engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
|
|||
|
|
engine.set_esoteric_hazard_multiplier(0.)
|
|||
|
|
|
|||
|
|
t0 = time.time()
|
|||
|
|
for pf in parquet_files:
|
|||
|
|
ds = pf.stem
|
|||
|
|
df, acols, dvol = pq_data[ds]
|
|||
|
|
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
|
|||
|
|
|
|||
|
|
sig1m = signals_1m.get(ds) if signals_1m else None
|
|||
|
|
sig5s = signals_5s.get(ds) if signals_5s else None
|
|||
|
|
use_5s = cfg['exp_set'] in ('B', 'Bp') and sig5s is not None
|
|||
|
|
|
|||
|
|
if sig1m is not None:
|
|||
|
|
N = len(df)
|
|||
|
|
z1m_arr = sig1m['z1m_5s']
|
|||
|
|
zstd1m = sig1m['zstd1m_5s']
|
|||
|
|
pct1m = sig1m['pct1m_5s']
|
|||
|
|
z_sod_1m = sig1m['z_sod']
|
|||
|
|
|
|||
|
|
if use_5s:
|
|||
|
|
z5s_arr = sig5s['z5s']
|
|||
|
|
zstd5s = sig5s['zstd5s']
|
|||
|
|
pct5s = sig5s['pct5s']
|
|||
|
|
z_sod_5s = sig5s['z_sod_5s']
|
|||
|
|
else:
|
|||
|
|
z5s_arr = np.zeros(N, dtype=np.float64)
|
|||
|
|
zstd5s = np.ones(N, dtype=np.float64)
|
|||
|
|
pct5s = np.full(N, 0.5, dtype=np.float64)
|
|||
|
|
z_sod_5s = 0.0
|
|||
|
|
|
|||
|
|
# Resize arrays to match df length (safety)
|
|||
|
|
def _resize(arr, n):
|
|||
|
|
if len(arr) == n:
|
|||
|
|
return arr
|
|||
|
|
if len(arr) > n:
|
|||
|
|
return arr[:n]
|
|||
|
|
return np.concatenate([np.zeros(n - len(arr), dtype=arr.dtype), arr])
|
|||
|
|
|
|||
|
|
z1m_arr = _resize(z1m_arr, N)
|
|||
|
|
zstd1m = _resize(zstd1m, N)
|
|||
|
|
pct1m = _resize(pct1m, N)
|
|||
|
|
z5s_arr = _resize(z5s_arr, N)
|
|||
|
|
zstd5s = _resize(zstd5s, N)
|
|||
|
|
pct5s = _resize(pct5s, N)
|
|||
|
|
|
|||
|
|
bar_z, sod_z = combine_signals(
|
|||
|
|
z1m_arr, zstd1m, pct1m,
|
|||
|
|
z5s_arr, zstd5s, pct5s,
|
|||
|
|
cfg['exp_set'], cfg['mode'], cfg['weights'],
|
|||
|
|
z_sod_1m, z_sod_5s,
|
|||
|
|
)
|
|||
|
|
engine.set_day_signals(bar_z, sod_z)
|
|||
|
|
else:
|
|||
|
|
engine.set_day_signals(np.zeros(len(df)), 0.0)
|
|||
|
|
|
|||
|
|
engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
|
|||
|
|
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
trades = engine.trade_history
|
|||
|
|
roi = (engine.capital - 25000.) / 25000. * 100.
|
|||
|
|
|
|||
|
|
cap_curve = [25000.]
|
|||
|
|
for t_ in sorted(trades, key=lambda x: getattr(x, 'exit_bar', 0)):
|
|||
|
|
cap_curve.append(cap_curve[-1] + getattr(t_, 'pnl_absolute', 0.))
|
|||
|
|
cap_arr = np.array(cap_curve)
|
|||
|
|
peak = np.maximum.accumulate(cap_arr)
|
|||
|
|
dd = float(np.max((peak - cap_arr) / (peak + 1e-10)) * 100.)
|
|||
|
|
calmar = roi / max(dd, 1e-4)
|
|||
|
|
sh = engine._scale_history
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
'name': cfg['name'],
|
|||
|
|
'exp_set': cfg['exp_set'],
|
|||
|
|
'point': cfg['point'],
|
|||
|
|
'mode': cfg['mode'],
|
|||
|
|
'weights': list(cfg['weights']),
|
|||
|
|
'strength': cfg['strength'],
|
|||
|
|
'T': len(trades),
|
|||
|
|
'ROI': round(roi, 4),
|
|||
|
|
'DD': round(dd, 4),
|
|||
|
|
'Calmar': round(calmar, 4),
|
|||
|
|
'elapsed_s': round(elapsed, 1),
|
|||
|
|
'scale_mean': round(float(np.mean(sh)), 4) if sh else 1.0,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
# Data loading helpers
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
|
|||
|
|
def _load_pq_data(parquet_files):
|
|||
|
|
"""Load all 5s parquet files into pq_data dict."""
|
|||
|
|
print("Loading 5s parquet data...")
|
|||
|
|
pq_data = {}
|
|||
|
|
for pf in parquet_files:
|
|||
|
|
df = pd.read_parquet(pf)
|
|||
|
|
ac = [c for c in df.columns if c not in META_COLS]
|
|||
|
|
bp = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
|
|||
|
|
dv = np.full(len(df), np.nan)
|
|||
|
|
if bp is not None:
|
|||
|
|
for i in range(50, len(bp)):
|
|||
|
|
seg = bp[max(0, i - 50):i]
|
|||
|
|
if len(seg) >= 10:
|
|||
|
|
dv[i] = float(np.std(np.diff(seg) / seg[:-1]))
|
|||
|
|
pq_data[pf.stem] = (df, ac, dv)
|
|||
|
|
return pq_data
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _compute_vol_p60(parquet_files):
|
|||
|
|
"""Compute 60th percentile vol threshold from the first 2 days."""
|
|||
|
|
all_vols = []
|
|||
|
|
for pf in parquet_files[:2]:
|
|||
|
|
df = pd.read_parquet(pf)
|
|||
|
|
if 'BTCUSDT' not in df.columns:
|
|||
|
|
continue
|
|||
|
|
pr = df['BTCUSDT'].values
|
|||
|
|
for i in range(60, len(pr)):
|
|||
|
|
seg = pr[max(0, i - 50):i]
|
|||
|
|
if len(seg) >= 10:
|
|||
|
|
v = float(np.std(np.diff(seg) / seg[:-1]))
|
|||
|
|
if v > 0:
|
|||
|
|
all_vols.append(v)
|
|||
|
|
return float(np.percentile(all_vols, 60)) if all_vols else 0.
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _run_baseline(parquet_files, pq_data, vol_p60):
|
|||
|
|
"""Run D_LIQ_GOLD baseline (no injection)."""
|
|||
|
|
OB_ASSETS = sorted({a for ds, (df, ac, _) in pq_data.items() for a in ac})
|
|||
|
|
mock_ob = MockOBProvider(
|
|||
|
|
imbalance_bias=-.09, depth_scale=1., assets=OB_ASSETS,
|
|||
|
|
imbalance_biases={
|
|||
|
|
"BTCUSDT": -.086, "ETHUSDT": -.092,
|
|||
|
|
"BNBUSDT": +.05, "SOLUSDT": +.05,
|
|||
|
|
},
|
|||
|
|
)
|
|||
|
|
ob_eng = OBFeatureEngine(mock_ob)
|
|||
|
|
ob_eng.preload_date("mock", OB_ASSETS)
|
|||
|
|
forewarner = DolphinForewarner(models_dir=MC_MODELS)
|
|||
|
|
acb = AdaptiveCircuitBreaker()
|
|||
|
|
acb.preload_w750([pf.stem for pf in parquet_files])
|
|||
|
|
engine = create_d_liq_engine(**BASE_ENGINE_KWARGS)
|
|||
|
|
engine.set_ob_engine(ob_eng)
|
|||
|
|
engine.set_acb(acb)
|
|||
|
|
engine.set_mc_forewarner(forewarner, MC_BASE_CFG)
|
|||
|
|
engine.set_esoteric_hazard_multiplier(0.)
|
|||
|
|
|
|||
|
|
t0 = time.time()
|
|||
|
|
for pf in parquet_files:
|
|||
|
|
ds = pf.stem
|
|||
|
|
df, acols, dvol = pq_data[ds]
|
|||
|
|
vol_ok = np.where(np.isfinite(dvol), dvol > vol_p60, False)
|
|||
|
|
engine.process_day(ds, df, acols, vol_regime_ok=vol_ok)
|
|||
|
|
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
trades = engine.trade_history
|
|||
|
|
roi = (engine.capital - 25000.) / 25000. * 100.
|
|||
|
|
|
|||
|
|
cap_curve = [25000.]
|
|||
|
|
for t_ in sorted(trades, key=lambda x: getattr(x, 'exit_bar', 0)):
|
|||
|
|
cap_curve.append(cap_curve[-1] + getattr(t_, 'pnl_absolute', 0.))
|
|||
|
|
cap_arr = np.array(cap_curve)
|
|||
|
|
peak = np.maximum.accumulate(cap_arr)
|
|||
|
|
dd = float(np.max((peak - cap_arr) / (peak + 1e-10)) * 100.)
|
|||
|
|
calmar = roi / max(dd, 1e-4)
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
'name': 'baseline',
|
|||
|
|
'exp_set': 'baseline',
|
|||
|
|
'point': 'none',
|
|||
|
|
'mode': 'none',
|
|||
|
|
'weights': [0., 0.],
|
|||
|
|
'strength': 0.,
|
|||
|
|
'T': len(trades),
|
|||
|
|
'ROI': round(roi, 4),
|
|||
|
|
'DD': round(dd, 4),
|
|||
|
|
'Calmar': round(calmar, 4),
|
|||
|
|
'elapsed_s': round(elapsed, 1),
|
|||
|
|
'scale_mean': 1.0,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _print_results_table(results, base):
|
|||
|
|
"""Print sorted results table relative to baseline."""
|
|||
|
|
width = 115
|
|||
|
|
print("=" * width)
|
|||
|
|
print(f"{'Name':<36} {'Set':>4} {'P':>3} {'M':>3} {'T':>5} "
|
|||
|
|
f"{'ROI%':>8} {'DD%':>7} {'Calmar':>8} "
|
|||
|
|
f"{'dROI':>7} {'dDD':>6} {'dCal':>7} {'s_mean':>7}")
|
|||
|
|
print("-" * width)
|
|||
|
|
for r in results:
|
|||
|
|
dr = r['ROI'] - base['ROI']
|
|||
|
|
ddd = r['DD'] - base['DD']
|
|||
|
|
dcal = r['Calmar'] - base['Calmar']
|
|||
|
|
flag = ' **' if r['Calmar'] > base['Calmar'] * 1.02 else ''
|
|||
|
|
print(f"{r['name']:<36} {r['exp_set']:>4} {r['point']:>3} {r['mode']:>3} "
|
|||
|
|
f"{r['T']:>5} {r['ROI']:>8.2f} {r['DD']:>7.2f} {r['Calmar']:>8.2f} "
|
|||
|
|
f"{dr:>+7.2f} {ddd:>+6.2f} {dcal:>+7.2f} {r['scale_mean']:>7.3f}{flag}")
|
|||
|
|
print("=" * width)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
# Main
|
|||
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
parser = argparse.ArgumentParser(description='exp13 multiscale ConvNeXt sweep')
|
|||
|
|
parser.add_argument('--subset', type=int, default=14,
|
|||
|
|
help='Days to run in phase-1 screening (0 = full 56 days)')
|
|||
|
|
parser.add_argument('--top_k', type=int, default=20,
|
|||
|
|
help='Top-K configs to validate on full 56 days in phase-2')
|
|||
|
|
parser.add_argument('--skip_sets', type=str, default='',
|
|||
|
|
help='Comma-separated sets to skip, e.g. "B,Bp"')
|
|||
|
|
parser.add_argument('--only_config', type=str, default='',
|
|||
|
|
help='Skip Phase-1 entirely; run just this named config on full window')
|
|||
|
|
parser.add_argument('--skip_5s', action='store_true',
|
|||
|
|
help='Skip 5s sensor load + pre-compute (saves ~3 GB RAM; safe when only running sets A/Ap)')
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
skip_sets = {s.strip() for s in args.skip_sets.split(',') if s.strip()}
|
|||
|
|
|
|||
|
|
# ── 1. Load 1m sensor ─────────────────────────────────────────────────────
|
|||
|
|
print(f"Loading 1m ConvNextSensor from {MODEL_1M}...")
|
|||
|
|
sensor_1m = ConvNextSensor(str(MODEL_1M))
|
|||
|
|
print(f" epoch={sensor_1m.epoch} val_loss={sensor_1m.val_loss:.4f} "
|
|||
|
|
f"z_dim={sensor_1m.z_dim}\n")
|
|||
|
|
|
|||
|
|
# ── 2. Try to load 5s sensor ───────────────────────────────────────────────
|
|||
|
|
sensor_5s = None
|
|||
|
|
proxy_b_dim_5s = 0
|
|||
|
|
if args.skip_5s:
|
|||
|
|
print("5s sensor: SKIPPED (--skip_5s) — sets B/Bp will be excluded.\n")
|
|||
|
|
skip_sets.update({'B', 'Bp'})
|
|||
|
|
elif MODEL_5S.exists():
|
|||
|
|
try:
|
|||
|
|
from dvae.convnext_5s_sensor import ConvNext5sSensor
|
|||
|
|
sensor_5s = ConvNext5sSensor(str(MODEL_5S))
|
|||
|
|
print(f"5s sensor loaded: epoch={sensor_5s.epoch} "
|
|||
|
|
f"val_loss={sensor_5s.val_loss:.4f} z_dim={sensor_5s.z_dim}\n")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"WARNING: Failed to load 5s sensor: {e}")
|
|||
|
|
print(" Sets B and B' will be skipped.\n")
|
|||
|
|
skip_sets.update({'B', 'Bp'})
|
|||
|
|
else:
|
|||
|
|
print(f"WARNING: {MODEL_5S} not found — sets B and B' will be skipped.\n")
|
|||
|
|
skip_sets.update({'B', 'Bp'})
|
|||
|
|
|
|||
|
|
# ── 3. Enumerate all parquet files ────────────────────────────────────────
|
|||
|
|
all_parquet_files = sorted(VBT5s.glob("*.parquet"))
|
|||
|
|
all_parquet_files = [p for p in all_parquet_files if 'catalog' not in str(p)]
|
|||
|
|
print(f"Dataset: {len(all_parquet_files)} days (5s scans)")
|
|||
|
|
|
|||
|
|
n_subset = args.subset if args.subset > 0 else len(all_parquet_files)
|
|||
|
|
subset_files = all_parquet_files[:n_subset]
|
|||
|
|
print(f"Phase-1 subset: {n_subset} days\n")
|
|||
|
|
|
|||
|
|
# ── 4. Precompute 1m signals (all 56 days) ────────────────────────────────
|
|||
|
|
signals_1m_all = precompute_1m_signals(all_parquet_files, sensor_1m)
|
|||
|
|
|
|||
|
|
# ── 5. Precompute 5s signals (all 56 days) if sensor available ────────────
|
|||
|
|
signals_5s_all = None
|
|||
|
|
if sensor_5s is not None:
|
|||
|
|
proxy_b_dim_5s = find_proxy_b_dim_5s(all_parquet_files, sensor_5s)
|
|||
|
|
signals_5s_all = precompute_5s_signals(all_parquet_files, sensor_5s, proxy_b_dim_5s)
|
|||
|
|
|
|||
|
|
# ── 6. Load all 56 day pq_data ────────────────────────────────────────────
|
|||
|
|
pq_data_all = _load_pq_data(all_parquet_files)
|
|||
|
|
vol_p60 = _compute_vol_p60(all_parquet_files)
|
|||
|
|
print(f"vol_p60 = {vol_p60:.6f}\n")
|
|||
|
|
|
|||
|
|
# Build subset pq_data
|
|||
|
|
pq_data_sub = {pf.stem: pq_data_all[pf.stem] for pf in subset_files}
|
|||
|
|
|
|||
|
|
# ── 7. Generate configs ───────────────────────────────────────────────────
|
|||
|
|
all_configs = generate_configs()
|
|||
|
|
active_configs = [c for c in all_configs if c['exp_set'] not in skip_sets]
|
|||
|
|
print(f"Total configs: {len(all_configs)} Active (after skips): {len(active_configs)}")
|
|||
|
|
if skip_sets:
|
|||
|
|
print(f" Skipped sets: {skip_sets}")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# ── 8. PHASE 1: baseline + all configs on subset ──────────────────────────
|
|||
|
|
if args.only_config:
|
|||
|
|
# Fast-check mode: skip Phase 1 entirely, inject named config into Phase 2
|
|||
|
|
named_cfg = next((c for c in all_configs if c['name'] == args.only_config), None)
|
|||
|
|
if named_cfg is None:
|
|||
|
|
valid = [c['name'] for c in all_configs[:8]]
|
|||
|
|
print(f"[ERROR] Config '{args.only_config}' not found. Example names: {valid}")
|
|||
|
|
return
|
|||
|
|
print(f"\n[FAST CHECK] Skipping Phase 1 — '{args.only_config}' goes straight to full-window Phase 2\n")
|
|||
|
|
phase1_results = [{'name': args.only_config, 'ROI': 0.0, 'DD': 0.0, 'Calmar': 999.0}]
|
|||
|
|
phase1_sorted = phase1_results
|
|||
|
|
baseline_result = {'ROI': 0.0, 'DD': 0.0, 'Calmar': 0.0, 'T': 0, 'elapsed_s': 0}
|
|||
|
|
else:
|
|||
|
|
print("=" * 80)
|
|||
|
|
print(f"PHASE 1: Running baseline + {len(active_configs)} configs on {n_subset} days")
|
|||
|
|
print("=" * 80)
|
|||
|
|
|
|||
|
|
print("[baseline]", flush=True)
|
|||
|
|
baseline_result = _run_baseline(subset_files, pq_data_sub, vol_p60)
|
|||
|
|
print(f" T={baseline_result['T']} ROI={baseline_result['ROI']:+.2f}% "
|
|||
|
|
f"DD={baseline_result['DD']:.2f}% Calmar={baseline_result['Calmar']:.2f} "
|
|||
|
|
f"({baseline_result['elapsed_s']:.0f}s)\n")
|
|||
|
|
|
|||
|
|
phase1_results = []
|
|||
|
|
for idx, cfg in enumerate(active_configs, 1):
|
|||
|
|
print(f"[{idx:3d}/{len(active_configs)}] {cfg['name']}", flush=True)
|
|||
|
|
r = run_one_config(
|
|||
|
|
cfg, subset_files, pq_data_sub,
|
|||
|
|
signals_1m_all, signals_5s_all, vol_p60,
|
|||
|
|
)
|
|||
|
|
phase1_results.append(r)
|
|||
|
|
dr = r['ROI'] - baseline_result['ROI']
|
|||
|
|
ddd = r['DD'] - baseline_result['DD']
|
|||
|
|
dcal = r['Calmar'] - baseline_result['Calmar']
|
|||
|
|
print(f" T={r['T']} ROI={r['ROI']:+.2f}% DD={r['DD']:.2f}% "
|
|||
|
|
f"Calmar={r['Calmar']:.2f} dROI={dr:+.2f}pp dDD={ddd:+.2f}pp "
|
|||
|
|
f"dCal={dcal:+.2f} s_mean={r['scale_mean']:.3f} ({r['elapsed_s']:.0f}s)")
|
|||
|
|
|
|||
|
|
# Sort by Calmar descending
|
|||
|
|
phase1_sorted = sorted(phase1_results, key=lambda x: x['Calmar'], reverse=True)
|
|||
|
|
|
|||
|
|
if not args.only_config:
|
|||
|
|
print(f"\n--- Phase-1 Top 20 (subset={n_subset}d) ---")
|
|||
|
|
_print_results_table(phase1_sorted[:20], baseline_result)
|
|||
|
|
|
|||
|
|
# ── 9. PHASE 2: validate top_k on full 56 days ────────────────────────────
|
|||
|
|
phase2_results = []
|
|||
|
|
phase2_validated = {}
|
|||
|
|
|
|||
|
|
if (args.subset > 0 and args.top_k > 0) or args.only_config:
|
|||
|
|
top_k_configs = phase1_sorted if args.only_config else phase1_sorted[:args.top_k]
|
|||
|
|
# Get config dicts for top-K names
|
|||
|
|
top_k_cfg_map = {c['name']: c for c in all_configs}
|
|||
|
|
|
|||
|
|
print(f"\n{'=' * 80}")
|
|||
|
|
print(f"PHASE 2: Validating top {args.top_k} configs on full {len(all_parquet_files)} days")
|
|||
|
|
print(f"{'=' * 80}")
|
|||
|
|
|
|||
|
|
print("[baseline_full]", flush=True)
|
|||
|
|
baseline_full = _run_baseline(all_parquet_files, pq_data_all, vol_p60)
|
|||
|
|
print(f" T={baseline_full['T']} ROI={baseline_full['ROI']:+.2f}% "
|
|||
|
|
f"DD={baseline_full['DD']:.2f}% Calmar={baseline_full['Calmar']:.2f} "
|
|||
|
|
f"({baseline_full['elapsed_s']:.0f}s)\n")
|
|||
|
|
|
|||
|
|
for idx, r_sub in enumerate(top_k_configs, 1):
|
|||
|
|
cfg = top_k_cfg_map.get(r_sub['name'])
|
|||
|
|
if cfg is None or cfg['exp_set'] in skip_sets:
|
|||
|
|
continue
|
|||
|
|
print(f"[{idx:3d}/{len(top_k_configs)}] {cfg['name']} (phase2)", flush=True)
|
|||
|
|
r_full = run_one_config(
|
|||
|
|
cfg, all_parquet_files, pq_data_all,
|
|||
|
|
signals_1m_all, signals_5s_all, vol_p60,
|
|||
|
|
)
|
|||
|
|
phase2_results.append(r_full)
|
|||
|
|
dr = r_full['ROI'] - baseline_full['ROI']
|
|||
|
|
ddd = r_full['DD'] - baseline_full['DD']
|
|||
|
|
dcal = r_full['Calmar'] - baseline_full['Calmar']
|
|||
|
|
print(f" T={r_full['T']} ROI={r_full['ROI']:+.2f}% DD={r_full['DD']:.2f}% "
|
|||
|
|
f"Calmar={r_full['Calmar']:.2f} dROI={dr:+.2f}pp dDD={ddd:+.2f}pp "
|
|||
|
|
f"dCal={dcal:+.2f} ({r_full['elapsed_s']:.0f}s)")
|
|||
|
|
|
|||
|
|
phase2_sorted = sorted(phase2_results, key=lambda x: x['Calmar'], reverse=True)
|
|||
|
|
|
|||
|
|
print(f"\n--- Phase-2 Final Results (full {len(all_parquet_files)}d) ---")
|
|||
|
|
_print_results_table(phase2_sorted, baseline_full)
|
|||
|
|
|
|||
|
|
# Verdict
|
|||
|
|
print("\n=== VERDICT ===")
|
|||
|
|
threshold = baseline_full['Calmar'] * 1.02
|
|||
|
|
print(f"Baseline (full): ROI={baseline_full['ROI']:.2f}% "
|
|||
|
|
f"DD={baseline_full['DD']:.2f}% Calmar={baseline_full['Calmar']:.2f}")
|
|||
|
|
print(f"Threshold: Calmar > {threshold:.2f} (1.02x baseline)")
|
|||
|
|
winners = [r for r in phase2_sorted if r['Calmar'] > threshold]
|
|||
|
|
if winners:
|
|||
|
|
best = winners[0]
|
|||
|
|
print(f"SIGNAL CONFIRMED — {len(winners)} config(s) beat threshold")
|
|||
|
|
print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} "
|
|||
|
|
f"ROI={best['ROI']:.2f}% DD={best['DD']:.2f}%")
|
|||
|
|
else:
|
|||
|
|
if phase2_sorted:
|
|||
|
|
best = phase2_sorted[0]
|
|||
|
|
print(f"NO improvement over D_LIQ_GOLD on full dataset")
|
|||
|
|
print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} "
|
|||
|
|
f"(threshold={threshold:.2f})")
|
|||
|
|
else:
|
|||
|
|
print(" No phase-2 results available.")
|
|||
|
|
|
|||
|
|
phase2_validated = {
|
|||
|
|
'baseline_full': baseline_full,
|
|||
|
|
'results': phase2_results,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
else:
|
|||
|
|
# Full run in phase-1 (subset=0) — just report
|
|||
|
|
phase2_sorted = phase1_sorted
|
|||
|
|
baseline_full = baseline_result
|
|||
|
|
|
|||
|
|
print("\n=== VERDICT (full run) ===")
|
|||
|
|
threshold = baseline_full['Calmar'] * 1.02
|
|||
|
|
print(f"Baseline: ROI={baseline_full['ROI']:.2f}% "
|
|||
|
|
f"DD={baseline_full['DD']:.2f}% Calmar={baseline_full['Calmar']:.2f}")
|
|||
|
|
print(f"Threshold: Calmar > {threshold:.2f} (1.02x baseline)")
|
|||
|
|
winners = [r for r in phase1_sorted if r['Calmar'] > threshold]
|
|||
|
|
if winners:
|
|||
|
|
best = winners[0]
|
|||
|
|
print(f"SIGNAL CONFIRMED — {len(winners)} config(s) beat threshold")
|
|||
|
|
print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} "
|
|||
|
|
f"ROI={best['ROI']:.2f}% DD={best['DD']:.2f}%")
|
|||
|
|
else:
|
|||
|
|
if phase1_sorted:
|
|||
|
|
best = phase1_sorted[0]
|
|||
|
|
print(f"NO improvement over D_LIQ_GOLD")
|
|||
|
|
print(f" Best: [{best['name']}] Calmar={best['Calmar']:.2f} "
|
|||
|
|
f"(threshold={threshold:.2f})")
|
|||
|
|
|
|||
|
|
# ── 10. Write results ─────────────────────────────────────────────────────
|
|||
|
|
output = {
|
|||
|
|
'experiment': 'exp13_multiscale_sweep',
|
|||
|
|
'model_1m_epoch': sensor_1m.epoch,
|
|||
|
|
'model_1m_val_loss': sensor_1m.val_loss,
|
|||
|
|
'model_5s_epoch': getattr(sensor_5s, 'epoch', None) if sensor_5s else None,
|
|||
|
|
'model_5s_val_loss': getattr(sensor_5s, 'val_loss', None) if sensor_5s else None,
|
|||
|
|
'proxy_b_dim_5s': proxy_b_dim_5s,
|
|||
|
|
'skip_sets': list(skip_sets),
|
|||
|
|
'subset_days': n_subset,
|
|||
|
|
'n_all_days': len(all_parquet_files),
|
|||
|
|
'baseline_subset': baseline_result,
|
|||
|
|
'phase1_results': phase1_results,
|
|||
|
|
'phase2': phase2_validated,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
with open(OUT_FILE, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(output, f, indent=2)
|
|||
|
|
print(f"\nResults -> {OUT_FILE}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
main()
|