Files
DOLPHIN/nautilus_dolphin/test_edge_signal_5y.py

154 lines
5.9 KiB
Python
Raw Normal View History

"""Fast Vectorized Edge Signal Existence Test — 5-Year Klines
=============================================================
Tests four hypotheses simultaneously:
A. vel_div <= -0.02 SHORT (original system)
B. vel_div <= -0.02 LONG (inverse: signal predicts UP)
C. vel_div >= +0.02 SHORT (opposite sign as SHORT trigger)
D. vel_div >= +0.02 LONG (opposite sign predicts UP)
Compares each against unconditional 120-bar baseline.
No engine stack raw signal only. ~1-3 min runtime.
"""
import sys, time
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
import pandas as pd
from collections import defaultdict
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
TP_BPS = 95
MAX_HOLD = 120
tp_pct = TP_BPS / 10000.0
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)
print(f"Files: {total} TP={TP_BPS}bps MAX_HOLD={MAX_HOLD} bars\n")
# Four buckets: (signal_mask_fn, direction)
# direction: -1=SHORT (price must fall tp_pct), +1=LONG (price must rise tp_pct)
TESTS = [
('A: vd<=-0.02 SHORT', lambda vd: vd <= -0.02, -1),
('B: vd<=-0.02 LONG', lambda vd: vd <= -0.02, +1),
('C: vd>=+0.02 SHORT', lambda vd: vd >= +0.02, -1),
('D: vd>=+0.02 LONG', lambda vd: vd >= +0.02, +1),
]
# Per-year per-test stats
stats = {label: defaultdict(lambda: {
'total_bars': 0, 'signal_bars': 0,
'wins': 0, 'losses': 0,
'loss_returns': [],
}) for label, _, _ in TESTS}
ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0}) # control per year
t0 = time.time()
for i, pf in enumerate(parquet_files):
ds = pf.stem
year = ds[:4]
try:
df = pd.read_parquet(pf)
except Exception:
continue
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
continue
vd = np.where(np.isfinite(df['vel_div'].values), df['vel_div'].values, 0.0)
btc = df['BTCUSDT'].values
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
n = len(vd)
for label, mask_fn, direction in TESTS:
ys = stats[label][year]
ys['total_bars'] += n
sig_idx = np.where(mask_fn(vd))[0]
for j in sig_idx:
ep = btc[j]
if not np.isfinite(ep) or ep <= 0:
continue
end_j = min(j + MAX_HOLD, n - 1)
fut = btc[j+1:end_j+1]
fm = np.isfinite(fut)
if not np.any(fm):
continue
fin = np.where(fm, fut, np.nan)
if direction == -1: # SHORT: need price to fall
tp_price = ep * (1.0 - tp_pct)
hit = np.nanmin(fin) <= tp_price
else: # LONG: need price to rise
tp_price = ep * (1.0 + tp_pct)
hit = np.nanmax(fin) >= tp_price
if hit:
ys['wins'] += 1
else:
last = fin[~np.isnan(fin)][-1]
ret = (last - ep) / ep * direction # sign-adjusted
ys['losses'] += 1
ys['loss_returns'].append(ret)
ys['signal_bars'] += 1
# Control: unconditional every 60th bar
cy = ctrl[year]
for j in range(0, n - MAX_HOLD, 60):
ep = btc[j]; ex = btc[j + MAX_HOLD]
if np.isfinite(ep) and np.isfinite(ex) and ep > 0:
ret = (ex - ep) / ep
cy['up'] += int(ret >= tp_pct)
cy['dn'] += int(ret <= -tp_pct)
cy['n'] += 1
if (i + 1) % 200 == 0:
el = time.time() - t0
print(f" [{i+1}/{total}] {ds} {el:.0f}s")
elapsed = time.time() - t0
# ── Print results ──────────────────────────────────────────────────────────────
ctrl_dn_tot = sum(cy['dn'] for cy in ctrl.values())
ctrl_up_tot = sum(cy['up'] for cy in ctrl.values())
ctrl_n_tot = sum(cy['n'] for cy in ctrl.values())
ctrl_dn_pct = ctrl_dn_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0
ctrl_up_pct = ctrl_up_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0
print(f"\nControl (unconditional 120-bar):")
print(f" DOWN {TP_BPS}bps: {ctrl_dn_pct:.1f}% UP {TP_BPS}bps: {ctrl_up_pct:.1f}% n={ctrl_n_tot:,}")
for label, _, direction in TESTS:
all_yrs = stats[label]
tot_sig = sum(y['signal_bars'] for y in all_yrs.values())
tot_win = sum(y['wins'] for y in all_yrs.values())
tot_los = sum(y['losses'] for y in all_yrs.values())
tot_n = tot_win + tot_los
wr = tot_win / tot_n * 100 if tot_n else 0
gw = tot_win * tp_pct
all_lr = [r for y in all_yrs.values() for r in y['loss_returns']]
gl = abs(sum(all_lr)) if all_lr else 0
pf = gw / gl if gl > 0 else 999
ctrl_bl = ctrl_dn_pct if direction == -1 else ctrl_up_pct
edge = wr - ctrl_bl
print(f"\n{''*60}")
print(f" {label}")
print(f" {'Year':<6} {'Sigs':>8} {'WR':>7} {'PF':>7} {'Edge':>8}")
print(f" {''*46}")
for year in sorted(all_yrs.keys()):
ys = all_yrs[year]
yn = ys['wins'] + ys['losses']
ywr = ys['wins'] / yn * 100 if yn else 0
ygw = ys['wins'] * tp_pct
ygl = abs(sum(ys['loss_returns'])) if ys['loss_returns'] else 0
ypf = ygw / ygl if ygl > 0 else 999
yedge = ywr - ctrl_bl
print(f" {year:<6} {ys['signal_bars']:>8,} {ywr:>6.1f}% {ypf:>7.3f} {yedge:>+7.1f}pp")
print(f" {''*46}")
print(f" {'TOTAL':<6} {tot_sig:>8,} {wr:>6.1f}% {pf:>7.3f} {edge:>+7.1f}pp ← {'EDGE' if edge > 2 else 'WEAK' if edge > 0 else 'NO EDGE'}")
print(f"\n{'='*60}")
print(f" Runtime: {elapsed:.0f}s")
print(f"{'='*60}")