Files
DOLPHIN/nautilus_dolphin/crossover_5s_test.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

226 lines
9.2 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Crossover Scalp — 5s Gold Standard Data
==========================================
Option 4: Port vel_div crossover to 5s resolution.
Signal: vel_div <= -ENTRY_T → LONG
Exit: vel_div >= +ENTRY_T (mean-reversion complete)
OR MAX_HOLD bars reached (safety cap)
1 bar = ~5 seconds on this dataset.
Legacy optimal hold: 120 bars × 5s = 600s = 10 min.
Sweep:
ENTRY_T = [0.020, 0.050, 0.100, 0.200]
MAX_HOLD = [10, 20, 60, 120, 240] bars (50s, 100s, 5m, 10m, 20m)
Compare PF vs 1m klines crossover result (PF=1.007 ungated).
Output:
run_logs/crossover_5s_YYYYMMDD_HHMMSS.csv
run_logs/crossover_5s_top_YYYYMMDD_HHMMSS.txt
Runtime: ~10s
"""
import sys, time, csv, gc
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import pandas as pd
VBT_DIR_5S = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
# Sweep parameters
ENTRY_Ts = [0.020, 0.050, 0.100, 0.200]
MAX_HOLDS = [10, 20, 60, 120, 240] # bars (× 5s = real seconds)
# stats[(entry_t, max_hold)] = {wins, losses, gw, gl, n, total_hold}
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
# Also track per-date stats for PF per file
daily_rows = []
parquet_files = sorted(VBT_DIR_5S.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)
print(f"Files: {total} (5s gold standard data)")
print(f"Entry Ts: {ENTRY_Ts}")
print(f"MaxHold: {MAX_HOLDS} bars (×5s = {[h*5 for h in MAX_HOLDS]}s)")
print()
t0 = time.time()
# Control baseline: fraction of bars where BTCUSDT moves ±0.95% within 120 bars
ctrl_stats = defaultdict(lambda: {'dn': 0, 'up': 0, 'n': 0})
for i, pf in enumerate(parquet_files):
ds = pf.stem
try:
df = pd.read_parquet(pf)
except Exception:
continue
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
continue
vd = df['vel_div'].values.astype(np.float64)
btc = df['BTCUSDT'].values.astype(np.float64)
vd = np.where(np.isfinite(vd), vd, 0.0)
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
n = len(btc)
del df
MAX_H = max(MAX_HOLDS)
if n < MAX_H + 5:
del vd, btc
continue
# cross_back[t] = True when vel_div has returned to >= +ENTRY_T (computed per threshold)
# Iterate per ENTRY_T
for entry_t in ENTRY_Ts:
entry_mask = (vd <= -entry_t) & np.isfinite(btc)
cross_back = (vd >= entry_t)
# Build trades for all max_holds at once
# For each entry, find the first cross_back within each max_hold window
# Vectorized approach: build the trade list once, then tally by max_hold
# trade list: (exit_bar_first_crossover, ret_at_crossover, ret_at_each_hold[])
# Since MAX_HOLDS is [10,20,60,120,240], we find crossover for max_hold=240 first
# then earlier exits apply to smaller max_hold caps too
for t in range(n - MAX_H):
if not entry_mask[t]:
continue
ep = btc[t]
if not np.isfinite(ep) or ep <= 0:
continue
# Find first crossover bar
first_cross = MAX_H # default: no crossover within max window
for k in range(1, MAX_H + 1):
tb = t + k
if tb >= n:
first_cross = k
break
if cross_back[tb]:
first_cross = k
break
for max_hold in MAX_HOLDS:
# Actual exit: min(first_cross, max_hold)
exit_bar = min(first_cross, max_hold)
tb = t + exit_bar
if tb >= n:
continue
xp = btc[tb]
if not np.isfinite(xp) or xp <= 0:
continue
ret = (xp - ep) / ep # LONG return
key = (entry_t, max_hold)
s = stats[key]
if ret >= 0:
s['wins'] += 1
s['gw'] += ret
else:
s['losses'] += 1
s['gl'] += abs(ret)
s['n'] += 1
s['total_hold'] += exit_bar
del entry_mask, cross_back
del vd, btc
if (i + 1) % 10 == 0:
gc.collect()
print(f" [{i+1}/{total}] {ds} {time.time()-t0:.0f}s")
elapsed = time.time() - t0
print(f"\nPass complete: {elapsed:.0f}s\n")
# ─── Results Table ─────────────────────────────────────────────────────────────
rows = []
for entry_t in ENTRY_Ts:
for max_hold in MAX_HOLDS:
key = (entry_t, max_hold)
s = stats.get(key, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
n_t = s['wins'] + s['losses']
if n_t == 0:
continue
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
wr = s['wins'] / n_t * 100
avg_hold = s['total_hold'] / n_t
avg_win = s['gw'] / s['wins'] if s['wins'] > 0 else 0.0
avg_loss = s['gl'] / s['losses'] if s['losses'] > 0 else 0.0
hold_sec = avg_hold * 5 # 5s per bar
rows.append({
'entry_t': entry_t,
'max_hold_bars': max_hold,
'max_hold_sec': max_hold * 5,
'n_trades': n_t,
'pf': round(pf, 4),
'wr': round(wr, 3),
'avg_hold_bars': round(avg_hold, 2),
'avg_hold_sec': round(hold_sec, 1),
'avg_win_pct': round(avg_win * 100, 4),
'avg_loss_pct': round(avg_loss * 100, 4),
'gross_win': round(s['gw'], 6),
'gross_loss': round(s['gl'], 6),
})
# ─── Console ──────────────────────────────────────────────────────────────────
print(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} {'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}")
print("-" * 90)
for r in rows:
marker = "" if r['pf'] > 1.01 else ""
print(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s "
f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% "
f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%{marker}")
# Highlight best
best = max(rows, key=lambda r: r['pf']) if rows else None
if best:
print(f"\n Best: T={best['entry_t']:.3f} MaxH={best['max_hold_bars']}b ({best['max_hold_sec']}s) "
f"PF={best['pf']:.4f} WR={best['wr']:.1f}% AvgHold={best['avg_hold_sec']:.0f}s "
f"N={best['n_trades']:,}")
# ─── Comparison with 1m ────────────────────────────────────────────────────────
print(f"\n{'='*50}")
print(f" 1m KLINES REFERENCE (ungated crossover):")
print(f" PF=1.0073 N=1,005,665 AvgHold=2.2 bars (2.2 min)")
print(f" BEST3 (9h,12h,18h): PF=1.0429 N=127,760")
print(f" 5s GOLD STANDARD ({total} days, 2025-12-31 to 2026-02-26):")
# ─── Save ──────────────────────────────────────────────────────────────────────
LOG_DIR.mkdir(exist_ok=True)
ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
out_csv = LOG_DIR / f"crossover_5s_{ts_str}.csv"
if rows:
with open(out_csv, 'w', newline='') as f:
w = csv.DictWriter(f, fieldnames=rows[0].keys())
w.writeheader(); w.writerows(rows)
print(f"\n{out_csv}")
out_txt = LOG_DIR / f"crossover_5s_top_{ts_str}.txt"
with open(out_txt, 'w', encoding='utf-8') as f:
f.write(f"Crossover Scalp — 5s Gold Standard Data\n")
f.write(f"56 days 2025-12-31 to 2026-02-26\n")
f.write(f"Runtime: {elapsed:.0f}s\n\n")
f.write(f"{'EntryT':>8} {'MaxH':>5} {'MaxSec':>6} {'N':>8} {'PF':>7} {'WR%':>6} "
f"{'AvgH_s':>7} {'AvgW%':>7} {'AvgL%':>7}\n")
f.write("-" * 90 + "\n")
for r in rows:
f.write(f" T={r['entry_t']:.3f} {r['max_hold_bars']:>5}b {r['max_hold_sec']:>5}s "
f"{r['n_trades']:>8,} {r['pf']:>7.4f} {r['wr']:>6.2f}% "
f"{r['avg_hold_sec']:>7.1f}s {r['avg_win_pct']:>7.4f}% {r['avg_loss_pct']:>7.4f}%\n")
f.write(f"\n1m reference (ungated): PF=1.0073 BEST3: PF=1.0429\n")
print(f"{out_txt}")
print(f"\n Runtime: {elapsed:.0f}s")
print(f" KEY: PF > 1.01 on 5s with decent N = potential real edge at 5s resolution.")
print(f" AvgHold short (< 30s) = micro scalp viable. AvgHold > 60s = slow mean reversion.")