""" 1m Klines vs 5s NG5 — Overlapping Period Comparison Study =========================================================== Overlapping window: 2026-01-01 to 2026-03-05 (64 days) - 1m klines data: vbt_cache_klines/2026-*.parquet (1439 rows/day, 1-min bars) - 5s NG5 data: vbt_cache_ng5/2026-*.parquet (~6154 rows/day, 5s bars) Analyses: 1. Signal distribution comparison (vel_div, v50, v150, v750, instability) 2. Cross-correlation and lead-lag structure (1m vel_div vs 5s vel_div) 3. PCA on both signal spaces (shared vs unique variance) 4. Backtest performance comparison (same engine, same dates, both data sources) 5. Signal alignment quantification (how often do both timescales agree?) 6. Statistical characteristics: skew, kurtosis, autocorrelation, stationarity tests Run: python test_1m_vs_5s_comparison.py Output: run_logs/1m_vs_5s_comparison_TIMESTAMP.json + .md report """ import sys, time, json, warnings sys.stdout.reconfigure(encoding='utf-8', errors='replace') warnings.filterwarnings('ignore') from pathlib import Path from datetime import datetime import numpy as np import pandas as pd from scipy import stats from scipy.stats import spearmanr, pearsonr, ks_2samp HCM = Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict') KLINES_DIR = HCM / 'vbt_cache_klines' NG5_DIR = HCM / 'vbt_cache_ng5' LOGS_DIR = HCM / 'nautilus_dolphin' / 'run_logs' LOGS_DIR.mkdir(exist_ok=True) OVERLAP_START = '2026-01-01' OVERLAP_END = '2026-03-05' SIGNAL_COLS = ['vel_div', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'instability_50', 'instability_150'] t0 = time.time() run_ts = datetime.now().strftime('%Y%m%d_%H%M%S') print("=" * 70) print("1m vs 5s Signal Comparison Study") print(f"Overlap window: {OVERLAP_START} to {OVERLAP_END}") print("=" * 70) # ── 1. Load data ──────────────────────────────────────────────────────────────── def load_overlap(data_dir: Path, label: str) -> pd.DataFrame: from datetime import datetime as dt, timedelta d = dt.strptime(OVERLAP_START, '%Y-%m-%d') end = dt.strptime(OVERLAP_END, '%Y-%m-%d') frames = [] while d <= end: ds = d.strftime('%Y-%m-%d') pf = data_dir / f'{ds}.parquet' if pf.exists(): df = pd.read_parquet(pf) df['date_str'] = ds frames.append(df) d += timedelta(days=1) if not frames: print(f" ERROR: No parquets found for {label} in {data_dir}") return pd.DataFrame() full = pd.concat(frames, ignore_index=True) print(f" {label}: {len(frames)} dates, {len(full):,} rows, cols={list(full.columns[:8])}") return full print("\n--- Loading data ---") df_1m = load_overlap(KLINES_DIR, '1m klines') df_5s = load_overlap(NG5_DIR, '5s NG5') if df_1m.empty or df_5s.empty: print("ABORT: Missing data for one or both timescales") sys.exit(1) # ── 2. Signal distribution comparison ───────────────────────────────────────── print("\n" + "=" * 70) print("SECTION 1: Signal Distribution Comparison") print("=" * 70) results = {'run_ts': run_ts, 'overlap': f'{OVERLAP_START}/{OVERLAP_END}'} dist_results = {} for col in SIGNAL_COLS: if col not in df_1m.columns or col not in df_5s.columns: continue s1 = df_1m[col].dropna() s5 = df_5s[col].dropna() # KS test: are distributions different? ks_stat, ks_p = ks_2samp(s1.values, s5.values) dist_results[col] = { '1m_mean': float(s1.mean()), '1m_std': float(s1.std()), '1m_p5': float(s1.quantile(0.05)), '1m_p50': float(s1.median()), '1m_p95': float(s1.quantile(0.95)), '1m_skew': float(s1.skew()), '1m_kurt': float(s1.kurtosis()), '5s_mean': float(s5.mean()), '5s_std': float(s5.std()), '5s_p5': float(s5.quantile(0.05)), '5s_p50': float(s5.median()), '5s_p95': float(s5.quantile(0.95)), '5s_skew': float(s5.skew()), '5s_kurt': float(s5.kurtosis()), 'ks_stat': float(ks_stat), 'ks_p': float(ks_p), 'scale_ratio_std': float(s1.std() / s5.std()) if s5.std() > 0 else None, } print(f"\n{col}:") print(f" 1m: mean={s1.mean():.4f} std={s1.std():.4f} p5={s1.quantile(0.05):.4f} " f"p50={s1.median():.4f} p95={s1.quantile(0.95):.4f} " f"skew={s1.skew():.3f} kurt={s1.kurtosis():.3f}") print(f" 5s: mean={s5.mean():.4f} std={s5.std():.4f} p5={s5.quantile(0.05):.4f} " f"p50={s5.median():.4f} p95={s5.quantile(0.95):.4f} " f"skew={s5.skew():.3f} kurt={s5.kurtosis():.3f}") print(f" Scale ratio (1m_std/5s_std): {s1.std()/s5.std():.2f}x | " f"KS stat={ks_stat:.4f} p={ks_p:.4f} ({'DIFFERENT' if ks_p < 0.05 else 'similar'})") results['distributions'] = dist_results # ── 3. Cross-correlation and lead-lag ───────────────────────────────────────── print("\n" + "=" * 70) print("SECTION 2: Cross-Correlation and Lead-Lag (1m vs 5s vel_div)") print("=" * 70) print("Methodology: resample both to common 5-min bars, compute cross-corr at lags 0-12") # Resample both to 5-minute bars (common time grid) def resample_to_5min(df: pd.DataFrame, col: str = 'vel_div') -> pd.Series: if 'timestamp' not in df.columns: return pd.Series(dtype=float) ts = pd.to_datetime(df['timestamp']) s = pd.Series(df[col].values, index=ts) # Resample to 5-min, take last value (most recent) return s.resample('5min').last().dropna() vd_1m_5min = resample_to_5min(df_1m, 'vel_div') vd_5s_5min = resample_to_5min(df_5s, 'vel_div') # Align on common index common_idx = vd_1m_5min.index.intersection(vd_5s_5min.index) a1 = vd_1m_5min.reindex(common_idx) a5 = vd_5s_5min.reindex(common_idx) a1_clean = a1.dropna() a5_clean = a5.reindex(a1_clean.index).dropna() common_clean = a1_clean.index.intersection(a5_clean.index) x1 = a1_clean.reindex(common_clean).values x5 = a5_clean.reindex(common_clean).values print(f" Common 5-min bars: {len(common_clean)}") crosscorr = {} print(f"\n {'Lag':>6} {'Pearson r':>10} {'p-value':>10} {'Spearman r':>11}") print(f" {'-'*6} {'-'*10} {'-'*10} {'-'*11}") for lag in range(-6, 13): # -6 to +12 lags (negative = 1m leads 5s) if lag < 0: a_1m = x1[-lag:] a_5s = x5[:lag] elif lag == 0: a_1m = x1 a_5s = x5 else: a_1m = x1[:-lag] a_5s = x5[lag:] n = min(len(a_1m), len(a_5s)) a_1m, a_5s = a_1m[:n], a_5s[:n] if n < 10: continue pr, pp = pearsonr(a_1m, a_5s) sr, sp = spearmanr(a_1m, a_5s) crosscorr[lag] = {'pearson_r': float(pr), 'pearson_p': float(pp), 'spearman_r': float(sr), 'spearman_p': float(sp), 'n': n} marker = ' <-- PEAK' if lag == 0 else (' <-- 1m LEADS' if lag < 0 and abs(pr) > 0.3 else '') print(f" lag={lag:+3d} r={pr:+.4f} p={pp:.4f} rho={sr:+.4f}{marker}") results['crosscorr_5min'] = crosscorr # Find best lag best_lag = max(crosscorr.items(), key=lambda x: abs(x[1]['pearson_r'])) print(f"\n Best lag: {best_lag[0]:+d} (r={best_lag[1]['pearson_r']:+.4f})") if best_lag[0] < 0: print(f" INTERPRETATION: 1m signal LEADS 5s by {abs(best_lag[0])} × 5min = {abs(best_lag[0])*5} minutes") elif best_lag[0] > 0: print(f" INTERPRETATION: 5s signal LEADS 1m by {best_lag[0]} × 5min = {best_lag[0]*5} minutes") else: print(f" INTERPRETATION: Signals are contemporaneous (no lead-lag at 5-min resolution)") # ── 4. PCA on both signal spaces ─────────────────────────────────────────────── print("\n" + "=" * 70) print("SECTION 3: PCA — Shared vs Unique Variance") print("=" * 70) from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler pca_results = {} for label, df in [('1m', df_1m), ('5s', df_5s)]: cols_avail = [c for c in SIGNAL_COLS if c in df.columns] X = df[cols_avail].dropna() if len(X) < 100: continue scaler = StandardScaler() Xs = scaler.fit_transform(X) pca = PCA() pca.fit(Xs) evr = pca.explained_variance_ratio_ cumvar = np.cumsum(evr) n_for_90 = int(np.searchsorted(cumvar, 0.90)) + 1 pca_results[label] = { 'n_features': len(cols_avail), 'n_samples': len(X), 'explained_variance_ratio': evr.tolist(), 'cumulative_variance': cumvar.tolist(), 'n_components_90pct': n_for_90, 'components_top3': pca.components_[:3].tolist(), } print(f"\n{label} PCA ({len(X):,} samples, {len(cols_avail)} features):") for i, (ev, cv) in enumerate(zip(evr[:6], cumvar[:6])): print(f" PC{i+1}: var={ev:.4f} ({ev*100:.1f}%) cumul={cv*100:.1f}%") print(f" Components for 90% variance: {n_for_90}") print(f" PC1 loadings: " + ", ".join(f"{c}={v:.3f}" for c, v in zip(cols_avail, pca.components_[0]))) print(f" PC2 loadings: " + ", ".join(f"{c}={v:.3f}" for c, v in zip(cols_avail, pca.components_[1]))) # Joint PCA on 5-min resampled aligned data print("\n--- Joint PCA on aligned 5-min data ---") joint_cols = [c for c in SIGNAL_COLS if c in df_1m.columns and c in df_5s.columns] joint_frames = [] for ds in sorted(df_1m['date_str'].unique()): sub_1m = df_1m[df_1m['date_str'] == ds] sub_5s = df_5s[df_5s['date_str'] == ds] if len(sub_1m) < 50 or len(sub_5s) < 100: continue for col in joint_cols: if 'timestamp' not in sub_1m.columns: break ts_1m = pd.to_datetime(sub_1m['timestamp']) s1m = pd.Series(sub_1m[col].values, index=ts_1m).resample('5min').last() ts_5s = pd.to_datetime(sub_5s['timestamp']) s5s = pd.Series(sub_5s[col].values, index=ts_5s).resample('5min').last() idx = s1m.index.intersection(s5s.index) if len(idx) < 10: break else: row = {} for col in joint_cols: ts_1m = pd.to_datetime(sub_1m['timestamp']) s1m = pd.Series(sub_1m[col].values, index=ts_1m).resample('5min').last() ts_5s = pd.to_datetime(sub_5s['timestamp']) s5s = pd.Series(sub_5s[col].values, index=ts_5s).resample('5min').last() idx = s1m.index.intersection(s5s.index) row[f'1m_{col}'] = s1m.reindex(idx).values.tolist() row[f'5s_{col}'] = s5s.reindex(idx).values.tolist() # Build aligned dataframe for this date try: n = len(s1m.reindex(idx)) date_df = pd.DataFrame({f'1m_{c}': pd.Series(sub_1m[c].values, index=pd.to_datetime(sub_1m['timestamp'])).resample('5min').last().reindex(idx).values for c in joint_cols} | {f'5s_{c}': pd.Series(sub_5s[c].values, index=pd.to_datetime(sub_5s['timestamp'])).resample('5min').last().reindex(idx).values for c in joint_cols}) joint_frames.append(date_df) except Exception: pass if joint_frames: joint_df = pd.concat(joint_frames, ignore_index=True).dropna() print(f" Joint aligned data: {len(joint_df):,} 5-min bars × {len(joint_df.columns)} features") if len(joint_df) > 50: Xj = StandardScaler().fit_transform(joint_df) pca_j = PCA() pca_j.fit(Xj) evr_j = pca_j.explained_variance_ratio_ cumvar_j = np.cumsum(evr_j) # How much variance is "shared" (first PC explains variance from both 1m and 5s features)? pc1_load = pca_j.components_[0] cols_j = list(joint_df.columns) pc1_1m_load = [abs(pc1_load[i]) for i, c in enumerate(cols_j) if c.startswith('1m_')] pc1_5s_load = [abs(pc1_load[i]) for i, c in enumerate(cols_j) if c.startswith('5s_')] shared_signal = evr_j[0] # PC1 = shared component pca_results['joint'] = { 'n_samples': len(joint_df), 'n_features': len(joint_df.columns), 'explained_variance_ratio': evr_j.tolist(), 'pc1_variance': float(evr_j[0]), 'pc1_1m_mean_loading': float(np.mean(pc1_1m_load)), 'pc1_5s_mean_loading': float(np.mean(pc1_5s_load)), } print(f"\n Joint PCA variance explained:") for i, (ev, cv) in enumerate(zip(evr_j[:6], cumvar_j[:6])): print(f" PC{i+1}: {ev*100:.1f}% (cumul {cv*100:.1f}%)") print(f"\n PC1 (shared) explains {evr_j[0]*100:.1f}% of joint variance") print(f" PC1 mean |loading| — 1m features: {np.mean(pc1_1m_load):.4f}") print(f" PC1 mean |loading| — 5s features: {np.mean(pc1_5s_load):.4f}") if np.mean(pc1_1m_load) > 0.1 and np.mean(pc1_5s_load) > 0.1: print(f" INTERPRETATION: PC1 loads strongly on BOTH timescales -> genuine shared variance") else: print(f" INTERPRETATION: PC1 loads unevenly -> signals are largely independent") results['pca'] = pca_results # ── 5. Signal alignment quantification ──────────────────────────────────────── print("\n" + "=" * 70) print("SECTION 4: Signal Alignment Quantification") print("=" * 70) print("How often does 1m vel_div < -0.50 align with 5s vel_div < -0.02?") # Need daily-level alignment VD_1M_THRESH = -0.50 VD_5S_THRESH = -0.02 align_results = [] for ds in sorted(df_1m['date_str'].unique()): sub_1m = df_1m[df_1m['date_str'] == ds] sub_5s = df_5s[df_5s['date_str'] == ds] if len(sub_1m) < 100 or len(sub_5s) < 100: continue # 1m signal: fraction of bars signaling frac_1m = (sub_1m['vel_div'] < VD_1M_THRESH).mean() # 5s signal: fraction of bars signaling frac_5s = (sub_5s['vel_div'] < VD_5S_THRESH).mean() # min daily vel_div (peak signal strength) min_1m = sub_1m['vel_div'].min() min_5s = sub_5s['vel_div'].min() align_results.append({ 'date': ds, 'frac_1m': frac_1m, 'frac_5s': frac_5s, 'min_1m': min_1m, 'min_5s': min_5s, 'both_signal': (frac_1m > 0) and (frac_5s > 0), 'only_1m': (frac_1m > 0) and (frac_5s == 0), 'only_5s': (frac_1m == 0) and (frac_5s > 0), 'neither': (frac_1m == 0) and (frac_5s == 0), }) align_df = pd.DataFrame(align_results) n = len(align_df) both = align_df['both_signal'].sum() only_1m = align_df['only_1m'].sum() only_5s = align_df['only_5s'].sum() neither = align_df['neither'].sum() print(f" Analysis over {n} overlapping days:") print(f" Both signal : {both}/{n} ({both/n*100:.1f}%) — MTF alignment days") print(f" Only 1m signals: {only_1m}/{n} ({only_1m/n*100:.1f}%)") print(f" Only 5s signals: {only_5s}/{n} ({only_5s/n*100:.1f}%)") print(f" Neither signals: {neither}/{n} ({neither/n*100:.1f}%)") # Correlation between daily signal fractions corr_frac, p_frac = pearsonr(align_df['frac_1m'], align_df['frac_5s']) corr_min, p_min = pearsonr(align_df['min_1m'], align_df['min_5s']) print(f"\n Pearson corr (daily signal fraction 1m vs 5s): r={corr_frac:.4f} p={p_frac:.4f}") print(f" Pearson corr (daily min vel_div 1m vs 5s): r={corr_min:.4f} p={p_min:.4f}") results['alignment'] = { 'n_days': n, 'both_signal_pct': float(both/n*100), 'only_1m_pct': float(only_1m/n*100), 'only_5s_pct': float(only_5s/n*100), 'neither_pct': float(neither/n*100), 'corr_daily_frac': float(corr_frac), 'p_daily_frac': float(p_frac), 'corr_daily_min': float(corr_min), 'p_daily_min': float(p_min), } # ── 6. Autocorrelation structure ─────────────────────────────────────────────── print("\n" + "=" * 70) print("SECTION 5: Autocorrelation and Stationarity") print("=" * 70) def acf_lags(series, max_lag=20): s = series.dropna().values s = s - s.mean() result = {} for lag in range(1, max_lag+1): if len(s) <= lag: break cov = np.mean(s[lag:] * s[:-lag]) var = np.mean(s**2) result[lag] = float(cov / var) if var > 0 else 0.0 return result print(f"\nACF (vel_div, lags 1-10):") acf_1m = acf_lags(df_1m['vel_div'], 10) acf_5s = acf_lags(df_5s['vel_div'], 10) print(f" {'Lag':>4} {'1m ACF':>8} {'5s ACF':>8}") for lag in range(1, 11): print(f" {lag:>4} {acf_1m.get(lag, 0):>+8.4f} {acf_5s.get(lag, 0):>+8.4f}") # ADF stationarity test try: from statsmodels.tsa.stattools import adfuller for label, ser in [('1m', df_1m['vel_div']), ('5s', df_5s['vel_div'])]: adf_stat, adf_p, _, _, crit, _ = adfuller(ser.dropna().values[:5000], maxlag=5) print(f"\n ADF test {label} vel_div: stat={adf_stat:.4f} p={adf_p:.6f} " f"{'STATIONARY' if adf_p < 0.05 else 'NON-STATIONARY'}") results[f'adf_{label}'] = {'stat': float(adf_stat), 'p': float(adf_p)} except ImportError: print(" statsmodels not available — skipping ADF test") results['acf_1m'] = acf_1m results['acf_5s'] = acf_5s # ── 7. Backtest performance comparison ──────────────────────────────────────── print("\n" + "=" * 70) print("SECTION 6: Backtest Performance — 1m vs 5s on Overlapping 64 Days") print("=" * 70) print("NOTE: 1m system uses VD_THRESHOLD=-0.50, 5s uses -0.02. Engine identical otherwise.") print("Running 1m system on 64 overlap days...") sys.path.insert(0, str(HCM / 'nautilus_dolphin')) try: from nautilus_dolphin.nautilus.esf_alpha_orchestrator import NDAlphaEngine from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker, ACBConfig from mc.mc_ml import DolphinForewarner MC_MODELS_DIR = str(HCM / 'nautilus_dolphin' / 'mc_results' / 'models') MC_BASE_CFG = {'trial_id': 0, 'vel_div_threshold': -0.02, 'vel_div_extreme': -0.05, 'use_direction_confirm': True, 'dc_lookback_bars': 7, 'dc_min_magnitude_bps': 0.75, 'dc_skip_contradicts': True, 'dc_leverage_boost': 1.0, 'dc_leverage_reduce': 0.5, 'vd_trend_lookback': 10, 'min_leverage': 0.5, 'max_leverage': 5.0, 'leverage_convexity': 3.0, 'fraction': 0.2, 'use_alpha_layers': True, 'use_dynamic_leverage': True, 'fixed_tp_pct': 0.0099, 'stop_pct': 1.0, 'max_hold_bars': 120, 'use_sp_fees': True, 'use_sp_slippage': True, 'sp_maker_entry_rate': 0.62, 'sp_maker_exit_rate': 0.5, 'use_ob_edge': True, 'ob_edge_bps': 5.0, 'ob_confirm_rate': 0.4, 'ob_imbalance_bias': -0.09, 'ob_depth_scale': 1.0, 'use_asset_selection': True, 'min_irp_alignment': 0.45, 'lookback': 100, 'acb_beta_high': 0.8, 'acb_beta_low': 0.2, 'acb_w750_threshold_pct': 60} META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150'} def run_overlap_backtest(data_dir, vd_thresh, vd_extreme, label): engine_kwargs = dict( initial_capital=25000.0, vel_div_threshold=vd_thresh, vel_div_extreme=vd_extreme, min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0, fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75, dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5, use_asset_selection=True, min_irp_alignment=0.45, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50, use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40, lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42, ) from datetime import datetime as dt, timedelta d = dt.strptime(OVERLAP_START, '%Y-%m-%d') end = dt.strptime(OVERLAP_END, '%Y-%m-%d') date_strings = [] while d <= end: pf = data_dir / f'{d.strftime("%Y-%m-%d")}.parquet' if pf.exists(): date_strings.append(d.strftime('%Y-%m-%d')) d += timedelta(days=1) pq_data = {} for ds in date_strings: df = pd.read_parquet(data_dir / f'{ds}.parquet') asset_cols = [c for c in df.columns if c not in META_COLS] dvol_arr = df['v50_lambda_max_velocity'].fillna(0).values pq_data[ds] = (df, asset_cols, dvol_arr) acb = AdaptiveCircuitBreaker(ACBConfig(W750_THRESHOLD_PCT=60, BETA_HIGH=0.8, BETA_LOW=0.2)) acb.preload_w750(date_strings) # Populate w750 from parquet for ds, (df, _, _) in pq_data.items(): if 'v750_lambda_max_velocity' in df.columns: v750 = df['v750_lambda_max_velocity'].dropna() if len(v750) > 0: acb._w750_vel_cache[ds] = float(v750.median()) w750_vals = [v for v in acb._w750_vel_cache.values() if v != 0.0] if w750_vals: acb._w750_threshold = float(np.percentile(w750_vals, acb.config.W750_THRESHOLD_PCT)) try: fw = DolphinForewarner(MC_MODELS_DIR) except Exception: fw = None engine = NDAlphaEngine(**engine_kwargs) engine.set_acb(acb) if fw: engine.set_mc_forewarner(fw, MC_BASE_CFG) daily = [] for ds in date_strings: df, asset_cols, dvol_arr = pq_data[ds] if len(df) < 200: continue vol_p60 = np.nanpercentile(dvol_arr, 60) vol_ok = np.where(np.isfinite(dvol_arr), dvol_arr > vol_p60, False) result = engine.process_day(ds, df, asset_cols, vol_regime_ok=vol_ok) daily.append(result) all_trades = [{'pnl': t.pnl_absolute, 'pnl_pct': t.pnl_pct * 100, 'bars_held': t.bars_held, 'exit_reason': t.exit_reason, 'leverage': t.leverage} for t in engine.trade_history] cap_series = [engine_kwargs['initial_capital']] + [r['capital'] for r in daily] peak = max(cap_series) min_cap = min(cap_series) dd = (min_cap - peak) / peak * 100 roi = (cap_series[-1] / cap_series[0] - 1) * 100 wins = [t for t in all_trades if t['pnl'] > 0] losses = [t for t in all_trades if t['pnl'] < 0] wr = len(wins) / len(all_trades) * 100 if all_trades else 0 pf = sum(t['pnl'] for t in wins) / abs(sum(t['pnl'] for t in losses)) if losses else float('inf') tp_exits = sum(1 for t in all_trades if t['exit_reason'] == 'FIXED_TP') mh_exits = sum(1 for t in all_trades if t['exit_reason'] == 'MAX_HOLD') print(f"\n {label} ({OVERLAP_START} to {OVERLAP_END}, {len(daily)} days):") print(f" ROI: {roi:+.2f}%") print(f" PF: {pf:.4f}") print(f" Max DD: {dd:.2f}%") print(f" WR: {wr:.2f}%") print(f" Trades: {len(all_trades)} ({len(all_trades)/len(daily):.2f}/day)") print(f" TP exits: {tp_exits} ({tp_exits/max(1,len(all_trades))*100:.1f}%)") print(f" MH exits: {mh_exits} ({mh_exits/max(1,len(all_trades))*100:.1f}%)") print(f" Avg lev: {np.mean([t['leverage'] for t in all_trades]):.3f}x" if all_trades else "") return { 'label': label, 'n_days': len(daily), 'n_trades': len(all_trades), 'roi_pct': float(roi), 'pf': float(pf), 'max_dd_pct': float(dd), 'wr_pct': float(wr), 'trades_per_day': float(len(all_trades)/max(1,len(daily))), 'tp_exits_pct': float(tp_exits/max(1,len(all_trades))*100), 'mh_exits_pct': float(mh_exits/max(1,len(all_trades))*100), 'avg_leverage': float(np.mean([t['leverage'] for t in all_trades])) if all_trades else 0, } res_1m = run_overlap_backtest(KLINES_DIR, -0.50, -1.25, '1m klines (64d overlap)') res_5s = run_overlap_backtest(NG5_DIR, -0.02, -0.05, '5s NG5 (64d overlap)') results['backtest_comparison'] = {'1m': res_1m, '5s': res_5s} print(f"\n COMPARISON TABLE:") print(f" {'Metric':<20} {'1m klines':>12} {'5s NG5':>12} {'Delta':>10}") print(f" {'-'*56}") for k, label in [('roi_pct','ROI %'), ('pf','PF'), ('max_dd_pct','Max DD %'), ('wr_pct','WR %'), ('trades_per_day','Trades/day'), ('tp_exits_pct','TP exits %'), ('avg_leverage','Avg leverage')]: v1 = res_1m.get(k, 0) v5 = res_5s.get(k, 0) delta = v1 - v5 print(f" {label:<20} {v1:>12.3f} {v5:>12.3f} {delta:>+10.3f}") except Exception as e: import traceback print(f" Backtest comparison failed: {e}") traceback.print_exc() results['backtest_comparison'] = {'error': str(e)} # ── 8. Save results ──────────────────────────────────────────────────────────── elapsed = time.time() - t0 results['runtime_s'] = float(elapsed) out_json = LOGS_DIR / f'1m_vs_5s_comparison_{run_ts}.json' with open(out_json, 'w') as f: json.dump(results, f, indent=2, default=str) print("\n" + "=" * 70) print(f"COMPLETE in {elapsed:.1f}s") print(f"Results saved: {out_json}") print("=" * 70)