initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
299
nautilus_dolphin/rvol_gated_crossover_5y.py
Executable file
299
nautilus_dolphin/rvol_gated_crossover_5y.py
Executable file
@@ -0,0 +1,299 @@
|
||||
"""Realized-Vol Gated Crossover — 5y Klines
|
||||
==========================================
|
||||
Critical fee-viability test.
|
||||
|
||||
EV per trade on ungated crossover: +0.0002% (UNGATED)
|
||||
EV per trade on BEST3 hour gate: +0.0014%
|
||||
After 4bps maker fee: -0.039% ← NEGATIVE
|
||||
|
||||
Hypothesis: Q4 realized_vol days (volatile) have +23pp edge on the old
|
||||
directional strategy. In the crossover framing, volatile days should generate
|
||||
LARGER per-trade moves → higher EV per trade → potentially fee-viable.
|
||||
|
||||
This script:
|
||||
1. Computes per-day realized_vol
|
||||
2. Classifies days into quartiles (Q1=calm, Q4=volatile)
|
||||
3. Runs crossover (vel_div <= -0.020 → LONG, exit vel_div >= +0.020) per quartile
|
||||
4. Reports per-trade stats (avg_win%, avg_loss%, EV%) and PF per quartile
|
||||
5. Also tests SHORT direction (vel_div >= +0.020 → SHORT, exit vel_div <= -0.020)
|
||||
in Q4 volatile days (should benefit from +23pp historical edge)
|
||||
|
||||
Fee thresholds:
|
||||
Maker RT: 4bps = 0.04% ← minimum viable EV per trade
|
||||
Taker RT: 10bps = 0.10% ← typical market order
|
||||
|
||||
Output:
|
||||
run_logs/rvol_gated_crossover_YYYYMMDD_HHMMSS.csv
|
||||
Runtime: ~15s
|
||||
"""
|
||||
import sys, time, csv, gc
|
||||
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
|
||||
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
|
||||
|
||||
ENTRY_T = 0.020
|
||||
MAX_HOLD = 20 # bars safety cap
|
||||
YEARS = ['2021', '2022', '2023', '2024', '2025', '2026']
|
||||
|
||||
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
||||
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
||||
total = len(parquet_files)
|
||||
print(f"Files: {total}")
|
||||
|
||||
# Pass 1: compute realized_vol per day
|
||||
daily_rvol = {}
|
||||
daily_n = {}
|
||||
|
||||
t0 = time.time()
|
||||
for pf in parquet_files:
|
||||
ds = pf.stem
|
||||
try:
|
||||
df = pd.read_parquet(pf, columns=['BTCUSDT'])
|
||||
except Exception:
|
||||
continue
|
||||
btc = df['BTCUSDT'].values.astype(np.float64)
|
||||
btc = btc[np.isfinite(btc) & (btc > 0)]
|
||||
if len(btc) < 2:
|
||||
continue
|
||||
log_r = np.diff(np.log(btc))
|
||||
daily_rvol[ds] = float(np.std(log_r))
|
||||
daily_n[ds] = len(btc)
|
||||
|
||||
# Quartile breakpoints
|
||||
rvols = np.array(list(daily_rvol.values()))
|
||||
q25, q50, q75 = np.percentile(rvols, [25, 50, 75])
|
||||
print(f"Realized-vol quartiles: Q1<{q25:.6f} Q2<{q50:.6f} Q3<{q75:.6f} Q4>={q75:.6f}")
|
||||
|
||||
def rvol_quartile(rv):
|
||||
if rv < q25: return 'Q1_calm'
|
||||
if rv < q50: return 'Q2'
|
||||
if rv < q75: return 'Q3'
|
||||
return 'Q4_volatile'
|
||||
|
||||
# stats[(rvol_q, direction)] = {wins, losses, gw, gl, n, total_hold}
|
||||
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
|
||||
# Also per year breakdown
|
||||
stats_yr = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
|
||||
daily_rows = []
|
||||
|
||||
print(f"\nPass 2: running crossover per rvol quartile...")
|
||||
|
||||
for i, pf in enumerate(parquet_files):
|
||||
ds = pf.stem
|
||||
year = ds[:4]
|
||||
rv = daily_rvol.get(ds)
|
||||
if rv is None:
|
||||
continue
|
||||
rvq = rvol_quartile(rv)
|
||||
|
||||
try:
|
||||
df = pd.read_parquet(pf)
|
||||
except Exception:
|
||||
continue
|
||||
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
|
||||
continue
|
||||
|
||||
vd = df['vel_div'].values.astype(np.float64)
|
||||
btc = df['BTCUSDT'].values.astype(np.float64)
|
||||
vd = np.where(np.isfinite(vd), vd, 0.0)
|
||||
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
|
||||
n = len(btc)
|
||||
del df
|
||||
|
||||
if n < MAX_HOLD + 5:
|
||||
del vd, btc
|
||||
continue
|
||||
|
||||
# LONG crossover: enter vel_div <= -ENTRY_T, exit vel_div >= +ENTRY_T
|
||||
long_entry = (vd <= -ENTRY_T) & np.isfinite(btc)
|
||||
long_cross = (vd >= ENTRY_T)
|
||||
# SHORT crossover: enter vel_div >= +ENTRY_T, exit vel_div <= -ENTRY_T
|
||||
short_entry = (vd >= ENTRY_T) & np.isfinite(btc)
|
||||
short_cross = (vd <= -ENTRY_T)
|
||||
|
||||
day_stats = {'L': {}, 'S': {}}
|
||||
|
||||
for direction, entry_mask, cross_back in [
|
||||
('L', long_entry, long_cross),
|
||||
('S', short_entry, short_cross)]:
|
||||
|
||||
d_wins = d_losses = 0
|
||||
d_gw = d_gl = 0.0
|
||||
d_n = d_hold = 0
|
||||
|
||||
for t in range(n - MAX_HOLD):
|
||||
if not entry_mask[t]:
|
||||
continue
|
||||
ep = btc[t]
|
||||
if not np.isfinite(ep) or ep <= 0:
|
||||
continue
|
||||
|
||||
# Find exit
|
||||
exit_bar = MAX_HOLD
|
||||
for k in range(1, MAX_HOLD + 1):
|
||||
tb = t + k
|
||||
if tb >= n:
|
||||
exit_bar = k
|
||||
break
|
||||
if cross_back[tb]:
|
||||
exit_bar = k
|
||||
break
|
||||
|
||||
tb = t + exit_bar
|
||||
if tb >= n:
|
||||
continue
|
||||
xp = btc[tb]
|
||||
if not np.isfinite(xp) or xp <= 0:
|
||||
continue
|
||||
|
||||
if direction == 'L':
|
||||
ret = (xp - ep) / ep
|
||||
else:
|
||||
ret = (ep - xp) / ep # SHORT return
|
||||
|
||||
if ret >= 0:
|
||||
d_wins += 1; d_gw += ret
|
||||
else:
|
||||
d_losses += 1; d_gl += abs(ret)
|
||||
d_n += 1; d_hold += exit_bar
|
||||
|
||||
key = (rvq, direction)
|
||||
key_yr = (rvq, direction, year)
|
||||
for k in [key]:
|
||||
s = stats[k]
|
||||
s['wins'] += d_wins; s['losses'] += d_losses
|
||||
s['gw'] += d_gw; s['gl'] += d_gl
|
||||
s['n'] += d_n; s['total_hold'] += d_hold
|
||||
s = stats_yr[key_yr]
|
||||
s['wins'] += d_wins; s['losses'] += d_losses
|
||||
s['gw'] += d_gw; s['gl'] += d_gl
|
||||
s['n'] += d_n; s['total_hold'] += d_hold
|
||||
|
||||
day_stats[direction] = {
|
||||
'n': d_n, 'wr': d_wins/d_n*100 if d_n > 0 else 0,
|
||||
'avg_win': d_gw/d_wins*100 if d_wins > 0 else 0,
|
||||
'avg_loss': d_gl/d_losses*100 if d_losses > 0 else 0,
|
||||
}
|
||||
|
||||
daily_rows.append({
|
||||
'date': ds, 'year': year, 'rvol_q': rvq, 'rvol': round(rv, 8),
|
||||
'n_long': day_stats['L'].get('n', 0),
|
||||
'wr_long': round(day_stats['L'].get('wr', 0), 2),
|
||||
'n_short': day_stats['S'].get('n', 0),
|
||||
'wr_short': round(day_stats['S'].get('wr', 0), 2),
|
||||
})
|
||||
|
||||
del vd, btc, long_entry, long_cross, short_entry, short_cross
|
||||
|
||||
if (i + 1) % 200 == 0:
|
||||
gc.collect()
|
||||
print(f" [{i+1}/{total}] {ds} {time.time()-t0:.0f}s")
|
||||
|
||||
elapsed = time.time() - t0
|
||||
print(f"\nPass complete: {elapsed:.0f}s\n")
|
||||
|
||||
# ─── Results ──────────────────────────────────────────────────────────────────
|
||||
RVOL_BINS = ['Q1_calm', 'Q2', 'Q3', 'Q4_volatile']
|
||||
DIRS = ['L', 'S']
|
||||
|
||||
rows = []
|
||||
print(f"{'RVolQ':<14} {'Dir'} {'N':>8} {'PF':>7} {'WR%':>6} {'AvgH':>5} "
|
||||
f"{'AvgW%':>8} {'AvgL%':>8} {'EV%':>9} {'EV-4bp':>9} {'EV-10bp':>10}")
|
||||
print("-" * 110)
|
||||
|
||||
for rvq in RVOL_BINS:
|
||||
for direction in DIRS:
|
||||
key = (rvq, direction)
|
||||
s = stats.get(key, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0, 'n': 0, 'total_hold': 0})
|
||||
n = s['n']
|
||||
if n == 0:
|
||||
continue
|
||||
|
||||
wr = s['wins'] / n * 100
|
||||
pf = s['gw'] / s['gl'] if s['gl'] > 0 else 999.0
|
||||
avg_hold = s['total_hold'] / n
|
||||
avg_win = s['gw'] / s['wins'] * 100 if s['wins'] > 0 else 0.0
|
||||
avg_loss = s['gl'] / s['losses'] * 100 if s['losses'] > 0 else 0.0
|
||||
ev = (s['gw'] - s['gl']) / n * 100 # EV per trade in %
|
||||
ev_4bp = ev - 0.04 # after maker RT fee
|
||||
ev_10bp = ev - 0.10 # after taker RT fee
|
||||
|
||||
marker = " ◄◄ FEE-VIABLE (maker)" if ev_4bp > 0 else (" ◄ BORDERLINE" if ev > 0.03 else "")
|
||||
|
||||
print(f"{rvq:<14} {direction} {n:>8,} {pf:>7.4f} {wr:>6.2f}% {avg_hold:>5.2f} "
|
||||
f"{avg_win:>8.4f}% {avg_loss:>8.4f}% {ev:>+9.4f}% {ev_4bp:>+9.4f}% {ev_10bp:>+10.4f}%{marker}")
|
||||
|
||||
# Per-year PF
|
||||
yr_pfs = {}
|
||||
for yr in YEARS:
|
||||
ky = (rvq, direction, yr)
|
||||
sy = stats_yr.get(ky)
|
||||
if sy and sy['n'] > 0:
|
||||
pfy = sy['gw'] / sy['gl'] if sy['gl'] > 0 else 999.0
|
||||
yr_pfs[yr] = round(pfy, 3)
|
||||
else:
|
||||
yr_pfs[yr] = float('nan')
|
||||
|
||||
rows.append({
|
||||
'rvol_q': rvq, 'direction': direction, 'n_trades': n,
|
||||
'pf': round(pf, 4), 'wr': round(wr, 3),
|
||||
'avg_hold_bars': round(avg_hold, 2),
|
||||
'avg_win_pct': round(avg_win, 5), 'avg_loss_pct': round(avg_loss, 5),
|
||||
'ev_pct': round(ev, 5),
|
||||
'ev_minus_4bp': round(ev_4bp, 5), 'ev_minus_10bp': round(ev_10bp, 5),
|
||||
**{f'pf_{yr}': yr_pfs[yr] for yr in YEARS},
|
||||
})
|
||||
print()
|
||||
|
||||
# Per-year breakdown for Q4 only
|
||||
print(f"\n{'='*80}")
|
||||
print(f" Q4 VOLATILE — Per-Year PF (LONG and SHORT crossover)")
|
||||
print(f"{'='*80}")
|
||||
print(f" {'Year':<6} {'L_PF':>7} {'L_N':>8} {'L_EV%':>9} | {'S_PF':>7} {'S_N':>8} {'S_EV%':>9}")
|
||||
print(f" {'-'*70}")
|
||||
for yr in YEARS:
|
||||
for direction, label in [('L', 'L'), ('S', 'S')]:
|
||||
key = ('Q4_volatile', direction, yr)
|
||||
s = stats_yr.get(key)
|
||||
if s and s['n'] > 0:
|
||||
pfy = s['gw'] / s['gl'] if s['gl'] > 0 else 999.0
|
||||
evy = (s['gw'] - s['gl']) / s['n'] * 100
|
||||
else:
|
||||
pfy = evy = float('nan')
|
||||
globals()[f'{label}_{yr}_pf'] = pfy
|
||||
globals()[f'{label}_{yr}_ev'] = evy
|
||||
globals()[f'{label}_{yr}_n'] = s['n'] if s else 0
|
||||
print(f" {yr:<6} {globals()[f'L_{yr}_pf']:>7.3f} {globals()[f'L_{yr}_n']:>8,} "
|
||||
f"{globals()[f'L_{yr}_ev']:>+9.4f}% | "
|
||||
f"{globals()[f'S_{yr}_pf']:>7.3f} {globals()[f'S_{yr}_n']:>8,} "
|
||||
f"{globals()[f'S_{yr}_ev']:>+9.4f}%")
|
||||
|
||||
# ─── Save ──────────────────────────────────────────────────────────────────────
|
||||
LOG_DIR.mkdir(exist_ok=True)
|
||||
ts_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
out_csv = LOG_DIR / f"rvol_gated_crossover_{ts_str}.csv"
|
||||
if rows:
|
||||
with open(out_csv, 'w', newline='') as f:
|
||||
w = csv.DictWriter(f, fieldnames=rows[0].keys())
|
||||
w.writeheader(); w.writerows(rows)
|
||||
print(f"\n → {out_csv}")
|
||||
|
||||
daily_csv = LOG_DIR / f"rvol_gated_daily_{ts_str}.csv"
|
||||
if daily_rows:
|
||||
with open(daily_csv, 'w', newline='') as f:
|
||||
w = csv.DictWriter(f, fieldnames=daily_rows[0].keys())
|
||||
w.writeheader(); w.writerows(daily_rows)
|
||||
print(f" → {daily_csv}")
|
||||
|
||||
print(f"\n Runtime: {elapsed:.0f}s")
|
||||
print(f"\n KEY:")
|
||||
print(f" EV-4bp > 0 = fee-viable with maker orders")
|
||||
print(f" EV-10bp > 0 = fee-viable with market orders")
|
||||
print(f" Q4_volatile = top 25% most volatile days — target regime")
|
||||
Reference in New Issue
Block a user