"""Crossover Scalp — 5s Gold Standard Data ========================================== Option 4: Port vel_div crossover to 5s resolution. Signal: vel_div <= -ENTRY_T → LONG Exit: vel_div >= +ENTRY_T (mean-reversion complete) OR MAX_HOLD bars reached (safety cap) 1 bar = ~5 seconds on this dataset. Legacy optimal hold: 120 bars × 5s = 600s = 10 min. Sweep: ENTRY_T = [0.020, 0.050, 0.100, 0.200] MAX_HOLD = [10, 20, 60, 120, 240] bars (50s, 100s, 5m, 10m, 20m) Compare PF vs 1m klines crossover result (PF=1.007 ungated). Output: run_logs/crossover_5s_YYYYMMDD_HHMMSS.csv run_logs/crossover_5s_top_YYYYMMDD_HHMMSS.txt Runtime: ~10s """ import sys, time, csv, gc sys.stdout.reconfigure(encoding='utf-8', errors='replace') from pathlib import Path from datetime import datetime from collections import defaultdict import numpy as np import pandas as pd VBT_DIR_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache") LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs") # Sweep parameters ENTRY_Ts = [0.020, 0.050, 0.100, 0.200] MAX_HOLDS = [10, 20, 60, 120, 240] # bars (× 5s = real seconds) # stats[(entry_t, max_hold)] = {wins, losses, gw, gl, n, total_hold} stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0}) # Also track per-date stats for PF per file daily_rows = [] parquet_files = sorted(VBT_DIR_5S.glob("*.parquet")) parquet_files = [p for p in parquet_files if 'catalog' not in str(p)] total = len(parquet_files) print(f"Files: {total} (5s gold standard data)") print(f"Entry Ts: {ENTRY_Ts}") print(f"MaxHold: {MAX_HOLDS} bars (×5s = {[h*5 for h in MAX_HOLDS]}s)") print() t0 = time.time() # Control baseline: fraction of bars where BTCUSDT moves ±0.95% within 120 bars ctrl_stats = defaultdict(lambda: {'dn': 0, 'up': 0, 'n': 0}) for i, pf in enumerate(parquet_files): ds = pf.stem try: df = pd.read_parquet(pf) except Exception: continue if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns: continue vd = df['vel_div'].values.astype(np.float64) btc = df['BTCUSDT'].values.astype(np.float64) vd = np.where(np.isfinite(vd), vd, 0.0) btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan) n = len(btc) del df MAX_H = max(MAX_HOLDS) if n < MAX_H + 5: del vd, btc continue # cross_back[t] = True when vel_div has returned to >= +ENTRY_T (computed per threshold) # Iterate per ENTRY_T for entry_t in ENTRY_Ts: entry_mask = (vd <= -entry_t) & np.isfinite(btc) cross_back = (vd >= entry_t) # Build trades for all max_holds at once # For each entry, find the first cross_back within each max_hold window # Vectorized approach: build the trade list once, then tally by max_hold # trade list: (exit_bar_first_crossover, ret_at_crossover, ret_at_each_hold[]) # Since MAX_HOLDS is [10,20,60,120,240], we find crossover for max_hold=240 first # then earlier exits apply to smaller max_hold caps too for t in range(n - MAX_H): if not entry_mask[t]: continue ep = btc[t] if not np.isfinite(ep) or ep <= 0: continue # Find first crossover bar first_cross = MAX_H # default: no crossover within max window for k in range(1, MAX_H + 1): tb = t + k if tb >= n: first_cross = k break if cross_back[tb]: first_cross = k break for max_hold in MAX_HOLDS: # Actual exit: min(first_cross, max_hold) exit_bar = min(first_cross, max_hold) tb = t + exit_bar if tb >= n: continue xp = btc[tb] if not np.isfinite(xp) or xp <= 0: continue ret = (xp - ep) / ep # LONG return key = (entry_t, max_hold) s = stats[key] if ret >= 0: s['wins'] += 1 s['gw'] += ret else: s['losses'] += 1 s['gl'] += abs(ret) s['n'] += 1 s['total_hold'] += exit_bar del entry_mask, cross_back del vd, btc if (i + 1) % 10 == 0: gc.collect() print(f" [{i+1}/{total}] {ds} {time.time()-t0:.0f}s") elapsed = time.time() - t0 print(f"\nPass complete: {elapsed:.0f}s\n") # ─── Results Table ───────────────────────────────────────────────────────────── rows = [] for entry_t in ENTRY_Ts: for max_hold in MAX_HOLDS: key = (entry_t, max_hold) s = stats.get(key, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0}) n_t = s['wins'] + s['losses'] if n_t == 0: continue pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan')) wr = s['wins'] / n_t * 100 avg_hold = s['total_hold'] / n_t avg_win = s['gw'] / s['wins'] if s['wins'] > 0 else 0.0 avg_loss = s['gl'] / s['losses'] if s['losses'] > 0 else 0.0 hold_sec = avg_hold * 5 # 5s per bar rows.append({ 'entry_t': entry_t, 'max_hold_bars': max_hold, 'max_hold_sec': max_hold * 5, 'n_trades': n_t, 'pf': round(pf, 4), 'wr': round(wr, 3), 'avg_hold_bars': round(avg_hold, 2), 'avg_hold_sec': round(hold_sec, 1), 'avg_win_pct': round(avg_win * 100, 4), 'avg_loss_pct': round(avg_loss * 100, 4), 'gross_win': round(s['gw'], 6), 'gross_loss': round(s['gl'], 6), }) # ─── Console ────────────────────────────────────────────────────────────────── print(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} {'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}") print("-" * 90) for r in rows: marker = " ◄" if r['pf'] > 1.01 else "" print(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s " f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% " f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%{marker}") # Highlight best best = max(rows, key=lambda r: r['pf']) if rows else None if best: print(f"\n Best: T={best['entry_t']:.3f} MaxH={best['max_hold_bars']}b ({best['max_hold_sec']}s) " f"PF={best['pf']:.4f} WR={best['wr']:.1f}% AvgHold={best['avg_hold_sec']:.0f}s " f"N={best['n_trades']:,}") # ─── Comparison with 1m ──────────────────────────────────────────────────────── print(f"\n{'='*50}") print(f" 1m KLINES REFERENCE (ungated crossover):") print(f" PF=1.0073 N=1,005,665 AvgHold=2.2 bars (2.2 min)") print(f" BEST3 (9h,12h,18h): PF=1.0429 N=127,760") print(f" 5s GOLD STANDARD ({total} days, 2025-12-31 to 2026-02-26):") # ─── Save ────────────────────────────────────────────────────────────────────── LOG_DIR.mkdir(exist_ok=True) ts_str = datetime.now().strftime("%Y%m%d_%H%M%S") out_csv = LOG_DIR / f"crossover_5s_{ts_str}.csv" if rows: with open(out_csv, 'w', newline='') as f: w = csv.DictWriter(f, fieldnames=rows[0].keys()) w.writeheader(); w.writerows(rows) print(f"\n → {out_csv}") out_txt = LOG_DIR / f"crossover_5s_top_{ts_str}.txt" with open(out_txt, 'w', encoding='utf-8') as f: f.write(f"Crossover Scalp — 5s Gold Standard Data\n") f.write(f"56 days 2025-12-31 to 2026-02-26\n") f.write(f"Runtime: {elapsed:.0f}s\n\n") f.write(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} " f"{'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}\n") f.write("-" * 90 + "\n") for r in rows: f.write(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s " f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% " f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%\n") f.write(f"\n1m reference (ungated): PF=1.0073 BEST3: PF=1.0429\n") print(f" → {out_txt}") print(f"\n Runtime: {elapsed:.0f}s") print(f" KEY: PF > 1.01 on 5s with decent N = potential real edge at 5s resolution.") print(f" AvgHold short (< 30s) = micro scalp viable. AvgHold > 60s = slow mean reversion.")