DOLPHIN/nautilus_dolphin/crossover_5s_test.py

"""Crossover Scalp — 5s Gold Standard Data
==========================================
Option 4: Port vel_div crossover to 5s resolution.

Signal:  vel_div <= -ENTRY_T  →  LONG
Exit:    vel_div >= +ENTRY_T  (mean-reversion complete)
OR       MAX_HOLD bars reached (safety cap)

1 bar = ~5 seconds on this dataset.
Legacy optimal hold: 120 bars × 5s = 600s = 10 min.

Sweep:
  ENTRY_T  = [0.020, 0.050, 0.100, 0.200]
  MAX_HOLD = [10, 20, 60, 120, 240] bars (50s, 100s, 5m, 10m, 20m)

Compare PF vs 1m klines crossover result (PF=1.007 ungated).

Output:
  run_logs/crossover_5s_YYYYMMDD_HHMMSS.csv
  run_logs/crossover_5s_top_YYYYMMDD_HHMMSS.txt
Runtime: ~10s
"""
import sys, time, csv, gc
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import pandas as pd

VBT_DIR_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
LOG_DIR    = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")

# Sweep parameters
ENTRY_Ts = [0.020, 0.050, 0.100, 0.200]
MAX_HOLDS = [10, 20, 60, 120, 240]  # bars (× 5s = real seconds)

# stats[(entry_t, max_hold)] = {wins, losses, gw, gl, n, total_hold}
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
# Also track per-date stats for PF per file
daily_rows = []

parquet_files = sorted(VBT_DIR_5S.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)
print(f"Files:   {total}   (5s gold standard data)")
print(f"Entry Ts: {ENTRY_Ts}")
print(f"MaxHold:  {MAX_HOLDS} bars  (×5s = {[h*5 for h in MAX_HOLDS]}s)")
print()

t0 = time.time()

# Control baseline: fraction of bars where BTCUSDT moves ±0.95% within 120 bars
ctrl_stats = defaultdict(lambda: {'dn': 0, 'up': 0, 'n': 0})

for i, pf in enumerate(parquet_files):
    ds = pf.stem

    try:
        df = pd.read_parquet(pf)
    except Exception:
        continue
    if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
        continue

    vd   = df['vel_div'].values.astype(np.float64)
    btc  = df['BTCUSDT'].values.astype(np.float64)
    vd   = np.where(np.isfinite(vd),  vd,  0.0)
    btc  = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
    n    = len(btc)
    del df

    MAX_H = max(MAX_HOLDS)
    if n < MAX_H + 5:
        del vd, btc
        continue

    # cross_back[t] = True when vel_div has returned to >= +ENTRY_T (computed per threshold)
    # Iterate per ENTRY_T
    for entry_t in ENTRY_Ts:
        entry_mask  = (vd <= -entry_t) & np.isfinite(btc)
        cross_back  = (vd >=  entry_t)

        # Build trades for all max_holds at once
        # For each entry, find the first cross_back within each max_hold window
        # Vectorized approach: build the trade list once, then tally by max_hold

        # trade list: (exit_bar_first_crossover, ret_at_crossover, ret_at_each_hold[])
        # Since MAX_HOLDS is [10,20,60,120,240], we find crossover for max_hold=240 first
        # then earlier exits apply to smaller max_hold caps too

        for t in range(n - MAX_H):
            if not entry_mask[t]:
                continue
            ep = btc[t]
            if not np.isfinite(ep) or ep <= 0:
                continue

            # Find first crossover bar
            first_cross = MAX_H  # default: no crossover within max window
            for k in range(1, MAX_H + 1):
                tb = t + k
                if tb >= n:
                    first_cross = k
                    break
                if cross_back[tb]:
                    first_cross = k
                    break

            for max_hold in MAX_HOLDS:
                # Actual exit: min(first_cross, max_hold)
                exit_bar = min(first_cross, max_hold)
                tb = t + exit_bar
                if tb >= n:
                    continue
                xp = btc[tb]
                if not np.isfinite(xp) or xp <= 0:
                    continue

                ret = (xp - ep) / ep  # LONG return

                key = (entry_t, max_hold)
                s = stats[key]
                if ret >= 0:
                    s['wins']    += 1
                    s['gw']      += ret
                else:
                    s['losses']  += 1
                    s['gl']      += abs(ret)
                s['n']           += 1
                s['total_hold']  += exit_bar

        del entry_mask, cross_back

    del vd, btc

    if (i + 1) % 10 == 0:
        gc.collect()
        print(f"  [{i+1}/{total}] {ds}  {time.time()-t0:.0f}s")

elapsed = time.time() - t0
print(f"\nPass complete: {elapsed:.0f}s\n")

# ─── Results Table ─────────────────────────────────────────────────────────────
rows = []
for entry_t in ENTRY_Ts:
    for max_hold in MAX_HOLDS:
        key = (entry_t, max_hold)
        s = stats.get(key, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
        n_t = s['wins'] + s['losses']
        if n_t == 0:
            continue
        pf       = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
        wr       = s['wins'] / n_t * 100
        avg_hold = s['total_hold'] / n_t
        avg_win  = s['gw'] / s['wins']   if s['wins']   > 0 else 0.0
        avg_loss = s['gl'] / s['losses'] if s['losses'] > 0 else 0.0
        hold_sec = avg_hold * 5  # 5s per bar
        rows.append({
            'entry_t': entry_t,
            'max_hold_bars': max_hold,
            'max_hold_sec': max_hold * 5,
            'n_trades': n_t,
            'pf': round(pf, 4),
            'wr': round(wr, 3),
            'avg_hold_bars': round(avg_hold, 2),
            'avg_hold_sec': round(hold_sec, 1),
            'avg_win_pct': round(avg_win * 100, 4),
            'avg_loss_pct': round(avg_loss * 100, 4),
            'gross_win': round(s['gw'], 6),
            'gross_loss': round(s['gl'], 6),
        })

# ─── Console ──────────────────────────────────────────────────────────────────
print(f"{'EntryT':>8}  {'MaxH':>5}  {'MaxSec':>6}  {'N':>8}  {'PF':>7}  {'WR%':>6}  {'AvgH_s':>7}  {'AvgW%':>7}  {'AvgL%':>7}")
print("-" * 90)
for r in rows:
    marker = " ◄" if r['pf'] > 1.01 else ""
    print(f"  T={r['entry_t']:.3f}  {r['max_hold_bars']:>5}b  {r['max_hold_sec']:>5}s  "
          f"{r['n_trades']:>8,}  {r['pf']:>7.4f}  {r['wr']:>6.2f}%  "
          f"{r['avg_hold_sec']:>7.1f}s  {r['avg_win_pct']:>7.4f}%  {r['avg_loss_pct']:>7.4f}%{marker}")

# Highlight best
best = max(rows, key=lambda r: r['pf']) if rows else None
if best:
    print(f"\n  Best: T={best['entry_t']:.3f}  MaxH={best['max_hold_bars']}b ({best['max_hold_sec']}s)  "
          f"PF={best['pf']:.4f}  WR={best['wr']:.1f}%  AvgHold={best['avg_hold_sec']:.0f}s  "
          f"N={best['n_trades']:,}")

# ─── Comparison with 1m ────────────────────────────────────────────────────────
print(f"\n{'='*50}")
print(f"  1m KLINES REFERENCE (ungated crossover):")
print(f"    PF=1.0073  N=1,005,665  AvgHold=2.2 bars (2.2 min)")
print(f"    BEST3 (9h,12h,18h): PF=1.0429  N=127,760")
print(f"  5s GOLD STANDARD ({total} days, 2025-12-31 to 2026-02-26):")

# ─── Save ──────────────────────────────────────────────────────────────────────
LOG_DIR.mkdir(exist_ok=True)
ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")

out_csv = LOG_DIR / f"crossover_5s_{ts_str}.csv"
if rows:
    with open(out_csv, 'w', newline='') as f:
        w = csv.DictWriter(f, fieldnames=rows[0].keys())
        w.writeheader(); w.writerows(rows)
    print(f"\n  → {out_csv}")

out_txt = LOG_DIR / f"crossover_5s_top_{ts_str}.txt"
with open(out_txt, 'w', encoding='utf-8') as f:
    f.write(f"Crossover Scalp — 5s Gold Standard Data\n")
    f.write(f"56 days  2025-12-31 to 2026-02-26\n")
    f.write(f"Runtime: {elapsed:.0f}s\n\n")
    f.write(f"{'EntryT':>8}  {'MaxH':>5}  {'MaxSec':>6}  {'N':>8}  {'PF':>7}  {'WR%':>6}  "
            f"{'AvgH_s':>7}  {'AvgW%':>7}  {'AvgL%':>7}\n")
    f.write("-" * 90 + "\n")
    for r in rows:
        f.write(f"  T={r['entry_t']:.3f}  {r['max_hold_bars']:>5}b  {r['max_hold_sec']:>5}s  "
                f"{r['n_trades']:>8,}  {r['pf']:>7.4f}  {r['wr']:>6.2f}%  "
                f"{r['avg_hold_sec']:>7.1f}s  {r['avg_win_pct']:>7.4f}%  {r['avg_loss_pct']:>7.4f}%\n")
    f.write(f"\n1m reference (ungated): PF=1.0073  BEST3: PF=1.0429\n")

print(f"  → {out_txt}")
print(f"\n  Runtime: {elapsed:.0f}s")
print(f"  KEY: PF > 1.01 on 5s with decent N = potential real edge at 5s resolution.")
print(f"       AvgHold short (< 30s) = micro scalp viable. AvgHold > 60s = slow mean reversion.")