initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
577
nautilus_dolphin/test_1m_vs_5s_comparison.py
Executable file
577
nautilus_dolphin/test_1m_vs_5s_comparison.py
Executable file
@@ -0,0 +1,577 @@
|
||||
"""
|
||||
1m Klines vs 5s NG5 — Overlapping Period Comparison Study
|
||||
===========================================================
|
||||
Overlapping window: 2026-01-01 to 2026-03-05 (64 days)
|
||||
- 1m klines data: vbt_cache_klines/2026-*.parquet (1439 rows/day, 1-min bars)
|
||||
- 5s NG5 data: vbt_cache_ng5/2026-*.parquet (~6154 rows/day, 5s bars)
|
||||
|
||||
Analyses:
|
||||
1. Signal distribution comparison (vel_div, v50, v150, v750, instability)
|
||||
2. Cross-correlation and lead-lag structure (1m vel_div vs 5s vel_div)
|
||||
3. PCA on both signal spaces (shared vs unique variance)
|
||||
4. Backtest performance comparison (same engine, same dates, both data sources)
|
||||
5. Signal alignment quantification (how often do both timescales agree?)
|
||||
6. Statistical characteristics: skew, kurtosis, autocorrelation, stationarity tests
|
||||
|
||||
Run: python test_1m_vs_5s_comparison.py
|
||||
Output: run_logs/1m_vs_5s_comparison_TIMESTAMP.json + .md report
|
||||
"""
|
||||
import sys, time, json, warnings
|
||||
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||||
warnings.filterwarnings('ignore')
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
from scipy.stats import spearmanr, pearsonr, ks_2samp
|
||||
|
||||
HCM = Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict')
|
||||
KLINES_DIR = HCM / 'vbt_cache_klines'
|
||||
NG5_DIR = HCM / 'vbt_cache_ng5'
|
||||
LOGS_DIR = HCM / 'nautilus_dolphin' / 'run_logs'
|
||||
LOGS_DIR.mkdir(exist_ok=True)
|
||||
|
||||
OVERLAP_START = '2026-01-01'
|
||||
OVERLAP_END = '2026-03-05'
|
||||
|
||||
SIGNAL_COLS = ['vel_div', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
||||
'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
|
||||
'instability_50', 'instability_150']
|
||||
|
||||
t0 = time.time()
|
||||
run_ts = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
print("=" * 70)
|
||||
print("1m vs 5s Signal Comparison Study")
|
||||
print(f"Overlap window: {OVERLAP_START} to {OVERLAP_END}")
|
||||
print("=" * 70)
|
||||
|
||||
# ── 1. Load data ────────────────────────────────────────────────────────────────
|
||||
|
||||
def load_overlap(data_dir: Path, label: str) -> pd.DataFrame:
|
||||
from datetime import datetime as dt, timedelta
|
||||
d = dt.strptime(OVERLAP_START, '%Y-%m-%d')
|
||||
end = dt.strptime(OVERLAP_END, '%Y-%m-%d')
|
||||
frames = []
|
||||
while d <= end:
|
||||
ds = d.strftime('%Y-%m-%d')
|
||||
pf = data_dir / f'{ds}.parquet'
|
||||
if pf.exists():
|
||||
df = pd.read_parquet(pf)
|
||||
df['date_str'] = ds
|
||||
frames.append(df)
|
||||
d += timedelta(days=1)
|
||||
if not frames:
|
||||
print(f" ERROR: No parquets found for {label} in {data_dir}")
|
||||
return pd.DataFrame()
|
||||
full = pd.concat(frames, ignore_index=True)
|
||||
print(f" {label}: {len(frames)} dates, {len(full):,} rows, cols={list(full.columns[:8])}")
|
||||
return full
|
||||
|
||||
print("\n--- Loading data ---")
|
||||
df_1m = load_overlap(KLINES_DIR, '1m klines')
|
||||
df_5s = load_overlap(NG5_DIR, '5s NG5')
|
||||
|
||||
if df_1m.empty or df_5s.empty:
|
||||
print("ABORT: Missing data for one or both timescales")
|
||||
sys.exit(1)
|
||||
|
||||
# ── 2. Signal distribution comparison ─────────────────────────────────────────
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SECTION 1: Signal Distribution Comparison")
|
||||
print("=" * 70)
|
||||
|
||||
results = {'run_ts': run_ts, 'overlap': f'{OVERLAP_START}/{OVERLAP_END}'}
|
||||
dist_results = {}
|
||||
|
||||
for col in SIGNAL_COLS:
|
||||
if col not in df_1m.columns or col not in df_5s.columns:
|
||||
continue
|
||||
s1 = df_1m[col].dropna()
|
||||
s5 = df_5s[col].dropna()
|
||||
|
||||
# KS test: are distributions different?
|
||||
ks_stat, ks_p = ks_2samp(s1.values, s5.values)
|
||||
|
||||
dist_results[col] = {
|
||||
'1m_mean': float(s1.mean()), '1m_std': float(s1.std()),
|
||||
'1m_p5': float(s1.quantile(0.05)), '1m_p50': float(s1.median()),
|
||||
'1m_p95': float(s1.quantile(0.95)),
|
||||
'1m_skew': float(s1.skew()), '1m_kurt': float(s1.kurtosis()),
|
||||
'5s_mean': float(s5.mean()), '5s_std': float(s5.std()),
|
||||
'5s_p5': float(s5.quantile(0.05)), '5s_p50': float(s5.median()),
|
||||
'5s_p95': float(s5.quantile(0.95)),
|
||||
'5s_skew': float(s5.skew()), '5s_kurt': float(s5.kurtosis()),
|
||||
'ks_stat': float(ks_stat), 'ks_p': float(ks_p),
|
||||
'scale_ratio_std': float(s1.std() / s5.std()) if s5.std() > 0 else None,
|
||||
}
|
||||
|
||||
print(f"\n{col}:")
|
||||
print(f" 1m: mean={s1.mean():.4f} std={s1.std():.4f} p5={s1.quantile(0.05):.4f} "
|
||||
f"p50={s1.median():.4f} p95={s1.quantile(0.95):.4f} "
|
||||
f"skew={s1.skew():.3f} kurt={s1.kurtosis():.3f}")
|
||||
print(f" 5s: mean={s5.mean():.4f} std={s5.std():.4f} p5={s5.quantile(0.05):.4f} "
|
||||
f"p50={s5.median():.4f} p95={s5.quantile(0.95):.4f} "
|
||||
f"skew={s5.skew():.3f} kurt={s5.kurtosis():.3f}")
|
||||
print(f" Scale ratio (1m_std/5s_std): {s1.std()/s5.std():.2f}x | "
|
||||
f"KS stat={ks_stat:.4f} p={ks_p:.4f} ({'DIFFERENT' if ks_p < 0.05 else 'similar'})")
|
||||
|
||||
results['distributions'] = dist_results
|
||||
|
||||
# ── 3. Cross-correlation and lead-lag ─────────────────────────────────────────
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SECTION 2: Cross-Correlation and Lead-Lag (1m vs 5s vel_div)")
|
||||
print("=" * 70)
|
||||
print("Methodology: resample both to common 5-min bars, compute cross-corr at lags 0-12")
|
||||
|
||||
# Resample both to 5-minute bars (common time grid)
|
||||
def resample_to_5min(df: pd.DataFrame, col: str = 'vel_div') -> pd.Series:
|
||||
if 'timestamp' not in df.columns:
|
||||
return pd.Series(dtype=float)
|
||||
ts = pd.to_datetime(df['timestamp'])
|
||||
s = pd.Series(df[col].values, index=ts)
|
||||
# Resample to 5-min, take last value (most recent)
|
||||
return s.resample('5min').last().dropna()
|
||||
|
||||
vd_1m_5min = resample_to_5min(df_1m, 'vel_div')
|
||||
vd_5s_5min = resample_to_5min(df_5s, 'vel_div')
|
||||
|
||||
# Align on common index
|
||||
common_idx = vd_1m_5min.index.intersection(vd_5s_5min.index)
|
||||
a1 = vd_1m_5min.reindex(common_idx)
|
||||
a5 = vd_5s_5min.reindex(common_idx)
|
||||
a1_clean = a1.dropna()
|
||||
a5_clean = a5.reindex(a1_clean.index).dropna()
|
||||
common_clean = a1_clean.index.intersection(a5_clean.index)
|
||||
x1 = a1_clean.reindex(common_clean).values
|
||||
x5 = a5_clean.reindex(common_clean).values
|
||||
|
||||
print(f" Common 5-min bars: {len(common_clean)}")
|
||||
|
||||
crosscorr = {}
|
||||
print(f"\n {'Lag':>6} {'Pearson r':>10} {'p-value':>10} {'Spearman r':>11}")
|
||||
print(f" {'-'*6} {'-'*10} {'-'*10} {'-'*11}")
|
||||
for lag in range(-6, 13): # -6 to +12 lags (negative = 1m leads 5s)
|
||||
if lag < 0:
|
||||
a_1m = x1[-lag:]
|
||||
a_5s = x5[:lag]
|
||||
elif lag == 0:
|
||||
a_1m = x1
|
||||
a_5s = x5
|
||||
else:
|
||||
a_1m = x1[:-lag]
|
||||
a_5s = x5[lag:]
|
||||
n = min(len(a_1m), len(a_5s))
|
||||
a_1m, a_5s = a_1m[:n], a_5s[:n]
|
||||
if n < 10:
|
||||
continue
|
||||
pr, pp = pearsonr(a_1m, a_5s)
|
||||
sr, sp = spearmanr(a_1m, a_5s)
|
||||
crosscorr[lag] = {'pearson_r': float(pr), 'pearson_p': float(pp),
|
||||
'spearman_r': float(sr), 'spearman_p': float(sp), 'n': n}
|
||||
marker = ' <-- PEAK' if lag == 0 else (' <-- 1m LEADS' if lag < 0 and abs(pr) > 0.3 else '')
|
||||
print(f" lag={lag:+3d} r={pr:+.4f} p={pp:.4f} rho={sr:+.4f}{marker}")
|
||||
|
||||
results['crosscorr_5min'] = crosscorr
|
||||
|
||||
# Find best lag
|
||||
best_lag = max(crosscorr.items(), key=lambda x: abs(x[1]['pearson_r']))
|
||||
print(f"\n Best lag: {best_lag[0]:+d} (r={best_lag[1]['pearson_r']:+.4f})")
|
||||
if best_lag[0] < 0:
|
||||
print(f" INTERPRETATION: 1m signal LEADS 5s by {abs(best_lag[0])} × 5min = {abs(best_lag[0])*5} minutes")
|
||||
elif best_lag[0] > 0:
|
||||
print(f" INTERPRETATION: 5s signal LEADS 1m by {best_lag[0]} × 5min = {best_lag[0]*5} minutes")
|
||||
else:
|
||||
print(f" INTERPRETATION: Signals are contemporaneous (no lead-lag at 5-min resolution)")
|
||||
|
||||
# ── 4. PCA on both signal spaces ───────────────────────────────────────────────
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SECTION 3: PCA — Shared vs Unique Variance")
|
||||
print("=" * 70)
|
||||
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
pca_results = {}
|
||||
|
||||
for label, df in [('1m', df_1m), ('5s', df_5s)]:
|
||||
cols_avail = [c for c in SIGNAL_COLS if c in df.columns]
|
||||
X = df[cols_avail].dropna()
|
||||
if len(X) < 100:
|
||||
continue
|
||||
scaler = StandardScaler()
|
||||
Xs = scaler.fit_transform(X)
|
||||
pca = PCA()
|
||||
pca.fit(Xs)
|
||||
evr = pca.explained_variance_ratio_
|
||||
cumvar = np.cumsum(evr)
|
||||
n_for_90 = int(np.searchsorted(cumvar, 0.90)) + 1
|
||||
|
||||
pca_results[label] = {
|
||||
'n_features': len(cols_avail),
|
||||
'n_samples': len(X),
|
||||
'explained_variance_ratio': evr.tolist(),
|
||||
'cumulative_variance': cumvar.tolist(),
|
||||
'n_components_90pct': n_for_90,
|
||||
'components_top3': pca.components_[:3].tolist(),
|
||||
}
|
||||
|
||||
print(f"\n{label} PCA ({len(X):,} samples, {len(cols_avail)} features):")
|
||||
for i, (ev, cv) in enumerate(zip(evr[:6], cumvar[:6])):
|
||||
print(f" PC{i+1}: var={ev:.4f} ({ev*100:.1f}%) cumul={cv*100:.1f}%")
|
||||
print(f" Components for 90% variance: {n_for_90}")
|
||||
print(f" PC1 loadings: " + ", ".join(f"{c}={v:.3f}" for c, v in zip(cols_avail, pca.components_[0])))
|
||||
print(f" PC2 loadings: " + ", ".join(f"{c}={v:.3f}" for c, v in zip(cols_avail, pca.components_[1])))
|
||||
|
||||
# Joint PCA on 5-min resampled aligned data
|
||||
print("\n--- Joint PCA on aligned 5-min data ---")
|
||||
joint_cols = [c for c in SIGNAL_COLS if c in df_1m.columns and c in df_5s.columns]
|
||||
joint_frames = []
|
||||
for ds in sorted(df_1m['date_str'].unique()):
|
||||
sub_1m = df_1m[df_1m['date_str'] == ds]
|
||||
sub_5s = df_5s[df_5s['date_str'] == ds]
|
||||
if len(sub_1m) < 50 or len(sub_5s) < 100:
|
||||
continue
|
||||
for col in joint_cols:
|
||||
if 'timestamp' not in sub_1m.columns:
|
||||
break
|
||||
ts_1m = pd.to_datetime(sub_1m['timestamp'])
|
||||
s1m = pd.Series(sub_1m[col].values, index=ts_1m).resample('5min').last()
|
||||
ts_5s = pd.to_datetime(sub_5s['timestamp'])
|
||||
s5s = pd.Series(sub_5s[col].values, index=ts_5s).resample('5min').last()
|
||||
idx = s1m.index.intersection(s5s.index)
|
||||
if len(idx) < 10:
|
||||
break
|
||||
else:
|
||||
row = {}
|
||||
for col in joint_cols:
|
||||
ts_1m = pd.to_datetime(sub_1m['timestamp'])
|
||||
s1m = pd.Series(sub_1m[col].values, index=ts_1m).resample('5min').last()
|
||||
ts_5s = pd.to_datetime(sub_5s['timestamp'])
|
||||
s5s = pd.Series(sub_5s[col].values, index=ts_5s).resample('5min').last()
|
||||
idx = s1m.index.intersection(s5s.index)
|
||||
row[f'1m_{col}'] = s1m.reindex(idx).values.tolist()
|
||||
row[f'5s_{col}'] = s5s.reindex(idx).values.tolist()
|
||||
# Build aligned dataframe for this date
|
||||
try:
|
||||
n = len(s1m.reindex(idx))
|
||||
date_df = pd.DataFrame({f'1m_{c}': pd.Series(sub_1m[c].values, index=pd.to_datetime(sub_1m['timestamp'])).resample('5min').last().reindex(idx).values for c in joint_cols} |
|
||||
{f'5s_{c}': pd.Series(sub_5s[c].values, index=pd.to_datetime(sub_5s['timestamp'])).resample('5min').last().reindex(idx).values for c in joint_cols})
|
||||
joint_frames.append(date_df)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if joint_frames:
|
||||
joint_df = pd.concat(joint_frames, ignore_index=True).dropna()
|
||||
print(f" Joint aligned data: {len(joint_df):,} 5-min bars × {len(joint_df.columns)} features")
|
||||
if len(joint_df) > 50:
|
||||
Xj = StandardScaler().fit_transform(joint_df)
|
||||
pca_j = PCA()
|
||||
pca_j.fit(Xj)
|
||||
evr_j = pca_j.explained_variance_ratio_
|
||||
cumvar_j = np.cumsum(evr_j)
|
||||
# How much variance is "shared" (first PC explains variance from both 1m and 5s features)?
|
||||
pc1_load = pca_j.components_[0]
|
||||
cols_j = list(joint_df.columns)
|
||||
pc1_1m_load = [abs(pc1_load[i]) for i, c in enumerate(cols_j) if c.startswith('1m_')]
|
||||
pc1_5s_load = [abs(pc1_load[i]) for i, c in enumerate(cols_j) if c.startswith('5s_')]
|
||||
shared_signal = evr_j[0] # PC1 = shared component
|
||||
|
||||
pca_results['joint'] = {
|
||||
'n_samples': len(joint_df),
|
||||
'n_features': len(joint_df.columns),
|
||||
'explained_variance_ratio': evr_j.tolist(),
|
||||
'pc1_variance': float(evr_j[0]),
|
||||
'pc1_1m_mean_loading': float(np.mean(pc1_1m_load)),
|
||||
'pc1_5s_mean_loading': float(np.mean(pc1_5s_load)),
|
||||
}
|
||||
|
||||
print(f"\n Joint PCA variance explained:")
|
||||
for i, (ev, cv) in enumerate(zip(evr_j[:6], cumvar_j[:6])):
|
||||
print(f" PC{i+1}: {ev*100:.1f}% (cumul {cv*100:.1f}%)")
|
||||
print(f"\n PC1 (shared) explains {evr_j[0]*100:.1f}% of joint variance")
|
||||
print(f" PC1 mean |loading| — 1m features: {np.mean(pc1_1m_load):.4f}")
|
||||
print(f" PC1 mean |loading| — 5s features: {np.mean(pc1_5s_load):.4f}")
|
||||
if np.mean(pc1_1m_load) > 0.1 and np.mean(pc1_5s_load) > 0.1:
|
||||
print(f" INTERPRETATION: PC1 loads strongly on BOTH timescales -> genuine shared variance")
|
||||
else:
|
||||
print(f" INTERPRETATION: PC1 loads unevenly -> signals are largely independent")
|
||||
|
||||
results['pca'] = pca_results
|
||||
|
||||
# ── 5. Signal alignment quantification ────────────────────────────────────────
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SECTION 4: Signal Alignment Quantification")
|
||||
print("=" * 70)
|
||||
print("How often does 1m vel_div < -0.50 align with 5s vel_div < -0.02?")
|
||||
|
||||
# Need daily-level alignment
|
||||
VD_1M_THRESH = -0.50
|
||||
VD_5S_THRESH = -0.02
|
||||
|
||||
align_results = []
|
||||
for ds in sorted(df_1m['date_str'].unique()):
|
||||
sub_1m = df_1m[df_1m['date_str'] == ds]
|
||||
sub_5s = df_5s[df_5s['date_str'] == ds]
|
||||
if len(sub_1m) < 100 or len(sub_5s) < 100:
|
||||
continue
|
||||
# 1m signal: fraction of bars signaling
|
||||
frac_1m = (sub_1m['vel_div'] < VD_1M_THRESH).mean()
|
||||
# 5s signal: fraction of bars signaling
|
||||
frac_5s = (sub_5s['vel_div'] < VD_5S_THRESH).mean()
|
||||
# min daily vel_div (peak signal strength)
|
||||
min_1m = sub_1m['vel_div'].min()
|
||||
min_5s = sub_5s['vel_div'].min()
|
||||
align_results.append({
|
||||
'date': ds, 'frac_1m': frac_1m, 'frac_5s': frac_5s,
|
||||
'min_1m': min_1m, 'min_5s': min_5s,
|
||||
'both_signal': (frac_1m > 0) and (frac_5s > 0),
|
||||
'only_1m': (frac_1m > 0) and (frac_5s == 0),
|
||||
'only_5s': (frac_1m == 0) and (frac_5s > 0),
|
||||
'neither': (frac_1m == 0) and (frac_5s == 0),
|
||||
})
|
||||
|
||||
align_df = pd.DataFrame(align_results)
|
||||
n = len(align_df)
|
||||
both = align_df['both_signal'].sum()
|
||||
only_1m = align_df['only_1m'].sum()
|
||||
only_5s = align_df['only_5s'].sum()
|
||||
neither = align_df['neither'].sum()
|
||||
|
||||
print(f" Analysis over {n} overlapping days:")
|
||||
print(f" Both signal : {both}/{n} ({both/n*100:.1f}%) — MTF alignment days")
|
||||
print(f" Only 1m signals: {only_1m}/{n} ({only_1m/n*100:.1f}%)")
|
||||
print(f" Only 5s signals: {only_5s}/{n} ({only_5s/n*100:.1f}%)")
|
||||
print(f" Neither signals: {neither}/{n} ({neither/n*100:.1f}%)")
|
||||
|
||||
# Correlation between daily signal fractions
|
||||
corr_frac, p_frac = pearsonr(align_df['frac_1m'], align_df['frac_5s'])
|
||||
corr_min, p_min = pearsonr(align_df['min_1m'], align_df['min_5s'])
|
||||
print(f"\n Pearson corr (daily signal fraction 1m vs 5s): r={corr_frac:.4f} p={p_frac:.4f}")
|
||||
print(f" Pearson corr (daily min vel_div 1m vs 5s): r={corr_min:.4f} p={p_min:.4f}")
|
||||
|
||||
results['alignment'] = {
|
||||
'n_days': n,
|
||||
'both_signal_pct': float(both/n*100),
|
||||
'only_1m_pct': float(only_1m/n*100),
|
||||
'only_5s_pct': float(only_5s/n*100),
|
||||
'neither_pct': float(neither/n*100),
|
||||
'corr_daily_frac': float(corr_frac), 'p_daily_frac': float(p_frac),
|
||||
'corr_daily_min': float(corr_min), 'p_daily_min': float(p_min),
|
||||
}
|
||||
|
||||
# ── 6. Autocorrelation structure ───────────────────────────────────────────────
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SECTION 5: Autocorrelation and Stationarity")
|
||||
print("=" * 70)
|
||||
|
||||
def acf_lags(series, max_lag=20):
|
||||
s = series.dropna().values
|
||||
s = s - s.mean()
|
||||
result = {}
|
||||
for lag in range(1, max_lag+1):
|
||||
if len(s) <= lag:
|
||||
break
|
||||
cov = np.mean(s[lag:] * s[:-lag])
|
||||
var = np.mean(s**2)
|
||||
result[lag] = float(cov / var) if var > 0 else 0.0
|
||||
return result
|
||||
|
||||
print(f"\nACF (vel_div, lags 1-10):")
|
||||
acf_1m = acf_lags(df_1m['vel_div'], 10)
|
||||
acf_5s = acf_lags(df_5s['vel_div'], 10)
|
||||
print(f" {'Lag':>4} {'1m ACF':>8} {'5s ACF':>8}")
|
||||
for lag in range(1, 11):
|
||||
print(f" {lag:>4} {acf_1m.get(lag, 0):>+8.4f} {acf_5s.get(lag, 0):>+8.4f}")
|
||||
|
||||
# ADF stationarity test
|
||||
try:
|
||||
from statsmodels.tsa.stattools import adfuller
|
||||
for label, ser in [('1m', df_1m['vel_div']), ('5s', df_5s['vel_div'])]:
|
||||
adf_stat, adf_p, _, _, crit, _ = adfuller(ser.dropna().values[:5000], maxlag=5)
|
||||
print(f"\n ADF test {label} vel_div: stat={adf_stat:.4f} p={adf_p:.6f} "
|
||||
f"{'STATIONARY' if adf_p < 0.05 else 'NON-STATIONARY'}")
|
||||
results[f'adf_{label}'] = {'stat': float(adf_stat), 'p': float(adf_p)}
|
||||
except ImportError:
|
||||
print(" statsmodels not available — skipping ADF test")
|
||||
|
||||
results['acf_1m'] = acf_1m
|
||||
results['acf_5s'] = acf_5s
|
||||
|
||||
# ── 7. Backtest performance comparison ────────────────────────────────────────
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SECTION 6: Backtest Performance — 1m vs 5s on Overlapping 64 Days")
|
||||
print("=" * 70)
|
||||
print("NOTE: 1m system uses VD_THRESHOLD=-0.50, 5s uses -0.02. Engine identical otherwise.")
|
||||
print("Running 1m system on 64 overlap days...")
|
||||
|
||||
sys.path.insert(0, str(HCM / 'nautilus_dolphin'))
|
||||
|
||||
try:
|
||||
from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine
|
||||
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker, ACBConfig
|
||||
from mc.mc_ml import DolphinForewarner
|
||||
|
||||
MC_MODELS_DIR = str(HCM / 'nautilus_dolphin' / 'mc_results' / 'models')
|
||||
MC_BASE_CFG = {'trial_id': 0, 'vel_div_threshold': -0.02, 'vel_div_extreme': -0.05,
|
||||
'use_direction_confirm': True, 'dc_lookback_bars': 7,
|
||||
'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True,
|
||||
'dc_leverage_boost': 1.0, 'dc_leverage_reduce': 0.5,
|
||||
'vd_trend_lookback': 10, 'min_leverage': 0.5, 'max_leverage': 5.0,
|
||||
'leverage_convexity': 3.0, 'fraction': 0.2, 'use_alpha_layers': True,
|
||||
'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0099, 'stop_pct': 1.0,
|
||||
'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True,
|
||||
'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.5,
|
||||
'use_ob_edge': True, 'ob_edge_bps': 5.0, 'ob_confirm_rate': 0.4,
|
||||
'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.0,
|
||||
'use_asset_selection': True, 'min_irp_alignment': 0.45,
|
||||
'lookback': 100, 'acb_beta_high': 0.8, 'acb_beta_low': 0.2,
|
||||
'acb_w750_threshold_pct': 60}
|
||||
|
||||
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
||||
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
|
||||
'instability_50', 'instability_150'}
|
||||
|
||||
def run_overlap_backtest(data_dir, vd_thresh, vd_extreme, label):
|
||||
engine_kwargs = dict(
|
||||
initial_capital=25000.0,
|
||||
vel_div_threshold=vd_thresh, vel_div_extreme=vd_extreme,
|
||||
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
|
||||
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
|
||||
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
||||
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
||||
use_asset_selection=True, min_irp_alignment=0.45,
|
||||
use_sp_fees=True, use_sp_slippage=True,
|
||||
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
|
||||
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
||||
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
||||
)
|
||||
|
||||
from datetime import datetime as dt, timedelta
|
||||
d = dt.strptime(OVERLAP_START, '%Y-%m-%d')
|
||||
end = dt.strptime(OVERLAP_END, '%Y-%m-%d')
|
||||
date_strings = []
|
||||
while d <= end:
|
||||
pf = data_dir / f'{d.strftime("%Y-%m-%d")}.parquet'
|
||||
if pf.exists():
|
||||
date_strings.append(d.strftime('%Y-%m-%d'))
|
||||
d += timedelta(days=1)
|
||||
|
||||
pq_data = {}
|
||||
for ds in date_strings:
|
||||
df = pd.read_parquet(data_dir / f'{ds}.parquet')
|
||||
asset_cols = [c for c in df.columns if c not in META_COLS]
|
||||
dvol_arr = df['v50_lambda_max_velocity'].fillna(0).values
|
||||
pq_data[ds] = (df, asset_cols, dvol_arr)
|
||||
|
||||
acb = AdaptiveCircuitBreaker(ACBConfig(W750_THRESHOLD_PCT=60, BETA_HIGH=0.8, BETA_LOW=0.2))
|
||||
acb.preload_w750(date_strings)
|
||||
# Populate w750 from parquet
|
||||
for ds, (df, _, _) in pq_data.items():
|
||||
if 'v750_lambda_max_velocity' in df.columns:
|
||||
v750 = df['v750_lambda_max_velocity'].dropna()
|
||||
if len(v750) > 0:
|
||||
acb._w750_vel_cache[ds] = float(v750.median())
|
||||
w750_vals = [v for v in acb._w750_vel_cache.values() if v != 0.0]
|
||||
if w750_vals:
|
||||
acb._w750_threshold = float(np.percentile(w750_vals, acb.config.W750_THRESHOLD_PCT))
|
||||
|
||||
try:
|
||||
fw = DolphinForewarner(MC_MODELS_DIR)
|
||||
except Exception:
|
||||
fw = None
|
||||
|
||||
engine = NDAlphaEngine(**engine_kwargs)
|
||||
engine.set_acb(acb)
|
||||
if fw:
|
||||
engine.set_mc_forewarner(fw, MC_BASE_CFG)
|
||||
|
||||
daily = []
|
||||
for ds in date_strings:
|
||||
df, asset_cols, dvol_arr = pq_data[ds]
|
||||
if len(df) < 200:
|
||||
continue
|
||||
vol_p60 = np.nanpercentile(dvol_arr, 60)
|
||||
vol_ok = np.where(np.isfinite(dvol_arr), dvol_arr > vol_p60, False)
|
||||
result = engine.process_day(ds, df, asset_cols, vol_regime_ok=vol_ok)
|
||||
daily.append(result)
|
||||
|
||||
all_trades = [{'pnl': t.pnl_absolute, 'pnl_pct': t.pnl_pct * 100,
|
||||
'bars_held': t.bars_held, 'exit_reason': t.exit_reason,
|
||||
'leverage': t.leverage}
|
||||
for t in engine.trade_history]
|
||||
|
||||
cap_series = [engine_kwargs['initial_capital']] + [r['capital'] for r in daily]
|
||||
peak = max(cap_series)
|
||||
min_cap = min(cap_series)
|
||||
dd = (min_cap - peak) / peak * 100
|
||||
|
||||
roi = (cap_series[-1] / cap_series[0] - 1) * 100
|
||||
wins = [t for t in all_trades if t['pnl'] > 0]
|
||||
losses = [t for t in all_trades if t['pnl'] < 0]
|
||||
wr = len(wins) / len(all_trades) * 100 if all_trades else 0
|
||||
pf = sum(t['pnl'] for t in wins) / abs(sum(t['pnl'] for t in losses)) if losses else float('inf')
|
||||
tp_exits = sum(1 for t in all_trades if t['exit_reason'] == 'FIXED_TP')
|
||||
mh_exits = sum(1 for t in all_trades if t['exit_reason'] == 'MAX_HOLD')
|
||||
|
||||
print(f"\n {label} ({OVERLAP_START} to {OVERLAP_END}, {len(daily)} days):")
|
||||
print(f" ROI: {roi:+.2f}%")
|
||||
print(f" PF: {pf:.4f}")
|
||||
print(f" Max DD: {dd:.2f}%")
|
||||
print(f" WR: {wr:.2f}%")
|
||||
print(f" Trades: {len(all_trades)} ({len(all_trades)/len(daily):.2f}/day)")
|
||||
print(f" TP exits: {tp_exits} ({tp_exits/max(1,len(all_trades))*100:.1f}%)")
|
||||
print(f" MH exits: {mh_exits} ({mh_exits/max(1,len(all_trades))*100:.1f}%)")
|
||||
print(f" Avg lev: {np.mean([t['leverage'] for t in all_trades]):.3f}x" if all_trades else "")
|
||||
|
||||
return {
|
||||
'label': label, 'n_days': len(daily), 'n_trades': len(all_trades),
|
||||
'roi_pct': float(roi), 'pf': float(pf), 'max_dd_pct': float(dd),
|
||||
'wr_pct': float(wr), 'trades_per_day': float(len(all_trades)/max(1,len(daily))),
|
||||
'tp_exits_pct': float(tp_exits/max(1,len(all_trades))*100),
|
||||
'mh_exits_pct': float(mh_exits/max(1,len(all_trades))*100),
|
||||
'avg_leverage': float(np.mean([t['leverage'] for t in all_trades])) if all_trades else 0,
|
||||
}
|
||||
|
||||
res_1m = run_overlap_backtest(KLINES_DIR, -0.50, -1.25, '1m klines (64d overlap)')
|
||||
res_5s = run_overlap_backtest(NG5_DIR, -0.02, -0.05, '5s NG5 (64d overlap)')
|
||||
|
||||
results['backtest_comparison'] = {'1m': res_1m, '5s': res_5s}
|
||||
|
||||
print(f"\n COMPARISON TABLE:")
|
||||
print(f" {'Metric':<20} {'1m klines':>12} {'5s NG5':>12} {'Delta':>10}")
|
||||
print(f" {'-'*56}")
|
||||
for k, label in [('roi_pct','ROI %'), ('pf','PF'), ('max_dd_pct','Max DD %'),
|
||||
('wr_pct','WR %'), ('trades_per_day','Trades/day'),
|
||||
('tp_exits_pct','TP exits %'), ('avg_leverage','Avg leverage')]:
|
||||
v1 = res_1m.get(k, 0)
|
||||
v5 = res_5s.get(k, 0)
|
||||
delta = v1 - v5
|
||||
print(f" {label:<20} {v1:>12.3f} {v5:>12.3f} {delta:>+10.3f}")
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f" Backtest comparison failed: {e}")
|
||||
traceback.print_exc()
|
||||
results['backtest_comparison'] = {'error': str(e)}
|
||||
|
||||
# ── 8. Save results ────────────────────────────────────────────────────────────
|
||||
|
||||
elapsed = time.time() - t0
|
||||
results['runtime_s'] = float(elapsed)
|
||||
|
||||
out_json = LOGS_DIR / f'1m_vs_5s_comparison_{run_ts}.json'
|
||||
with open(out_json, 'w') as f:
|
||||
json.dump(results, f, indent=2, default=str)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print(f"COMPLETE in {elapsed:.1f}s")
|
||||
print(f"Results saved: {out_json}")
|
||||
print("=" * 70)
|
||||
Reference in New Issue
Block a user