226 lines
9.2 KiB
Python
226 lines
9.2 KiB
Python
|
|
"""Crossover Scalp — 5s Gold Standard Data
|
|||
|
|
==========================================
|
|||
|
|
Option 4: Port vel_div crossover to 5s resolution.
|
|||
|
|
|
|||
|
|
Signal: vel_div <= -ENTRY_T → LONG
|
|||
|
|
Exit: vel_div >= +ENTRY_T (mean-reversion complete)
|
|||
|
|
OR MAX_HOLD bars reached (safety cap)
|
|||
|
|
|
|||
|
|
1 bar = ~5 seconds on this dataset.
|
|||
|
|
Legacy optimal hold: 120 bars × 5s = 600s = 10 min.
|
|||
|
|
|
|||
|
|
Sweep:
|
|||
|
|
ENTRY_T = [0.020, 0.050, 0.100, 0.200]
|
|||
|
|
MAX_HOLD = [10, 20, 60, 120, 240] bars (50s, 100s, 5m, 10m, 20m)
|
|||
|
|
|
|||
|
|
Compare PF vs 1m klines crossover result (PF=1.007 ungated).
|
|||
|
|
|
|||
|
|
Output:
|
|||
|
|
run_logs/crossover_5s_YYYYMMDD_HHMMSS.csv
|
|||
|
|
run_logs/crossover_5s_top_YYYYMMDD_HHMMSS.txt
|
|||
|
|
Runtime: ~10s
|
|||
|
|
"""
|
|||
|
|
import sys, time, csv, gc
|
|||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
|||
|
|
from pathlib import Path
|
|||
|
|
from datetime import datetime
|
|||
|
|
from collections import defaultdict
|
|||
|
|
import numpy as np
|
|||
|
|
import pandas as pd
|
|||
|
|
|
|||
|
|
VBT_DIR_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
|
|||
|
|
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
|
|||
|
|
|
|||
|
|
# Sweep parameters
|
|||
|
|
ENTRY_Ts = [0.020, 0.050, 0.100, 0.200]
|
|||
|
|
MAX_HOLDS = [10, 20, 60, 120, 240] # bars (× 5s = real seconds)
|
|||
|
|
|
|||
|
|
# stats[(entry_t, max_hold)] = {wins, losses, gw, gl, n, total_hold}
|
|||
|
|
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
|
|||
|
|
# Also track per-date stats for PF per file
|
|||
|
|
daily_rows = []
|
|||
|
|
|
|||
|
|
parquet_files = sorted(VBT_DIR_5S.glob("*.parquet"))
|
|||
|
|
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
|||
|
|
total = len(parquet_files)
|
|||
|
|
print(f"Files: {total} (5s gold standard data)")
|
|||
|
|
print(f"Entry Ts: {ENTRY_Ts}")
|
|||
|
|
print(f"MaxHold: {MAX_HOLDS} bars (×5s = {[h*5 for h in MAX_HOLDS]}s)")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
t0 = time.time()
|
|||
|
|
|
|||
|
|
# Control baseline: fraction of bars where BTCUSDT moves ±0.95% within 120 bars
|
|||
|
|
ctrl_stats = defaultdict(lambda: {'dn': 0, 'up': 0, 'n': 0})
|
|||
|
|
|
|||
|
|
for i, pf in enumerate(parquet_files):
|
|||
|
|
ds = pf.stem
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
df = pd.read_parquet(pf)
|
|||
|
|
except Exception:
|
|||
|
|
continue
|
|||
|
|
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
vd = df['vel_div'].values.astype(np.float64)
|
|||
|
|
btc = df['BTCUSDT'].values.astype(np.float64)
|
|||
|
|
vd = np.where(np.isfinite(vd), vd, 0.0)
|
|||
|
|
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
|
|||
|
|
n = len(btc)
|
|||
|
|
del df
|
|||
|
|
|
|||
|
|
MAX_H = max(MAX_HOLDS)
|
|||
|
|
if n < MAX_H + 5:
|
|||
|
|
del vd, btc
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# cross_back[t] = True when vel_div has returned to >= +ENTRY_T (computed per threshold)
|
|||
|
|
# Iterate per ENTRY_T
|
|||
|
|
for entry_t in ENTRY_Ts:
|
|||
|
|
entry_mask = (vd <= -entry_t) & np.isfinite(btc)
|
|||
|
|
cross_back = (vd >= entry_t)
|
|||
|
|
|
|||
|
|
# Build trades for all max_holds at once
|
|||
|
|
# For each entry, find the first cross_back within each max_hold window
|
|||
|
|
# Vectorized approach: build the trade list once, then tally by max_hold
|
|||
|
|
|
|||
|
|
# trade list: (exit_bar_first_crossover, ret_at_crossover, ret_at_each_hold[])
|
|||
|
|
# Since MAX_HOLDS is [10,20,60,120,240], we find crossover for max_hold=240 first
|
|||
|
|
# then earlier exits apply to smaller max_hold caps too
|
|||
|
|
|
|||
|
|
for t in range(n - MAX_H):
|
|||
|
|
if not entry_mask[t]:
|
|||
|
|
continue
|
|||
|
|
ep = btc[t]
|
|||
|
|
if not np.isfinite(ep) or ep <= 0:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Find first crossover bar
|
|||
|
|
first_cross = MAX_H # default: no crossover within max window
|
|||
|
|
for k in range(1, MAX_H + 1):
|
|||
|
|
tb = t + k
|
|||
|
|
if tb >= n:
|
|||
|
|
first_cross = k
|
|||
|
|
break
|
|||
|
|
if cross_back[tb]:
|
|||
|
|
first_cross = k
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
for max_hold in MAX_HOLDS:
|
|||
|
|
# Actual exit: min(first_cross, max_hold)
|
|||
|
|
exit_bar = min(first_cross, max_hold)
|
|||
|
|
tb = t + exit_bar
|
|||
|
|
if tb >= n:
|
|||
|
|
continue
|
|||
|
|
xp = btc[tb]
|
|||
|
|
if not np.isfinite(xp) or xp <= 0:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
ret = (xp - ep) / ep # LONG return
|
|||
|
|
|
|||
|
|
key = (entry_t, max_hold)
|
|||
|
|
s = stats[key]
|
|||
|
|
if ret >= 0:
|
|||
|
|
s['wins'] += 1
|
|||
|
|
s['gw'] += ret
|
|||
|
|
else:
|
|||
|
|
s['losses'] += 1
|
|||
|
|
s['gl'] += abs(ret)
|
|||
|
|
s['n'] += 1
|
|||
|
|
s['total_hold'] += exit_bar
|
|||
|
|
|
|||
|
|
del entry_mask, cross_back
|
|||
|
|
|
|||
|
|
del vd, btc
|
|||
|
|
|
|||
|
|
if (i + 1) % 10 == 0:
|
|||
|
|
gc.collect()
|
|||
|
|
print(f" [{i+1}/{total}] {ds} {time.time()-t0:.0f}s")
|
|||
|
|
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
print(f"\nPass complete: {elapsed:.0f}s\n")
|
|||
|
|
|
|||
|
|
# ─── Results Table ─────────────────────────────────────────────────────────────
|
|||
|
|
rows = []
|
|||
|
|
for entry_t in ENTRY_Ts:
|
|||
|
|
for max_hold in MAX_HOLDS:
|
|||
|
|
key = (entry_t, max_hold)
|
|||
|
|
s = stats.get(key, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
|
|||
|
|
n_t = s['wins'] + s['losses']
|
|||
|
|
if n_t == 0:
|
|||
|
|
continue
|
|||
|
|
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
|
|||
|
|
wr = s['wins'] / n_t * 100
|
|||
|
|
avg_hold = s['total_hold'] / n_t
|
|||
|
|
avg_win = s['gw'] / s['wins'] if s['wins'] > 0 else 0.0
|
|||
|
|
avg_loss = s['gl'] / s['losses'] if s['losses'] > 0 else 0.0
|
|||
|
|
hold_sec = avg_hold * 5 # 5s per bar
|
|||
|
|
rows.append({
|
|||
|
|
'entry_t': entry_t,
|
|||
|
|
'max_hold_bars': max_hold,
|
|||
|
|
'max_hold_sec': max_hold * 5,
|
|||
|
|
'n_trades': n_t,
|
|||
|
|
'pf': round(pf, 4),
|
|||
|
|
'wr': round(wr, 3),
|
|||
|
|
'avg_hold_bars': round(avg_hold, 2),
|
|||
|
|
'avg_hold_sec': round(hold_sec, 1),
|
|||
|
|
'avg_win_pct': round(avg_win * 100, 4),
|
|||
|
|
'avg_loss_pct': round(avg_loss * 100, 4),
|
|||
|
|
'gross_win': round(s['gw'], 6),
|
|||
|
|
'gross_loss': round(s['gl'], 6),
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# ─── Console ──────────────────────────────────────────────────────────────────
|
|||
|
|
print(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} {'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}")
|
|||
|
|
print("-" * 90)
|
|||
|
|
for r in rows:
|
|||
|
|
marker = " ◄" if r['pf'] > 1.01 else ""
|
|||
|
|
print(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s "
|
|||
|
|
f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% "
|
|||
|
|
f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%{marker}")
|
|||
|
|
|
|||
|
|
# Highlight best
|
|||
|
|
best = max(rows, key=lambda r: r['pf']) if rows else None
|
|||
|
|
if best:
|
|||
|
|
print(f"\n Best: T={best['entry_t']:.3f} MaxH={best['max_hold_bars']}b ({best['max_hold_sec']}s) "
|
|||
|
|
f"PF={best['pf']:.4f} WR={best['wr']:.1f}% AvgHold={best['avg_hold_sec']:.0f}s "
|
|||
|
|
f"N={best['n_trades']:,}")
|
|||
|
|
|
|||
|
|
# ─── Comparison with 1m ────────────────────────────────────────────────────────
|
|||
|
|
print(f"\n{'='*50}")
|
|||
|
|
print(f" 1m KLINES REFERENCE (ungated crossover):")
|
|||
|
|
print(f" PF=1.0073 N=1,005,665 AvgHold=2.2 bars (2.2 min)")
|
|||
|
|
print(f" BEST3 (9h,12h,18h): PF=1.0429 N=127,760")
|
|||
|
|
print(f" 5s GOLD STANDARD ({total} days, 2025-12-31 to 2026-02-26):")
|
|||
|
|
|
|||
|
|
# ─── Save ──────────────────────────────────────────────────────────────────────
|
|||
|
|
LOG_DIR.mkdir(exist_ok=True)
|
|||
|
|
ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|||
|
|
|
|||
|
|
out_csv = LOG_DIR / f"crossover_5s_{ts_str}.csv"
|
|||
|
|
if rows:
|
|||
|
|
with open(out_csv, 'w', newline='') as f:
|
|||
|
|
w = csv.DictWriter(f, fieldnames=rows[0].keys())
|
|||
|
|
w.writeheader(); w.writerows(rows)
|
|||
|
|
print(f"\n → {out_csv}")
|
|||
|
|
|
|||
|
|
out_txt = LOG_DIR / f"crossover_5s_top_{ts_str}.txt"
|
|||
|
|
with open(out_txt, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(f"Crossover Scalp — 5s Gold Standard Data\n")
|
|||
|
|
f.write(f"56 days 2025-12-31 to 2026-02-26\n")
|
|||
|
|
f.write(f"Runtime: {elapsed:.0f}s\n\n")
|
|||
|
|
f.write(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} "
|
|||
|
|
f"{'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}\n")
|
|||
|
|
f.write("-" * 90 + "\n")
|
|||
|
|
for r in rows:
|
|||
|
|
f.write(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s "
|
|||
|
|
f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% "
|
|||
|
|
f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%\n")
|
|||
|
|
f.write(f"\n1m reference (ungated): PF=1.0073 BEST3: PF=1.0429\n")
|
|||
|
|
|
|||
|
|
print(f" → {out_txt}")
|
|||
|
|
print(f"\n Runtime: {elapsed:.0f}s")
|
|||
|
|
print(f" KEY: PF > 1.01 on 5s with decent N = potential real edge at 5s resolution.")
|
|||
|
|
print(f" AvgHold short (< 30s) = micro scalp viable. AvgHold > 60s = slow mean reversion.")
|