154 lines
5.9 KiB
Python
154 lines
5.9 KiB
Python
|
|
"""Fast Vectorized Edge Signal Existence Test — 5-Year Klines
|
||
|
|
=============================================================
|
||
|
|
Tests four hypotheses simultaneously:
|
||
|
|
A. vel_div <= -0.02 SHORT (original system)
|
||
|
|
B. vel_div <= -0.02 LONG (inverse: signal predicts UP)
|
||
|
|
C. vel_div >= +0.02 SHORT (opposite sign as SHORT trigger)
|
||
|
|
D. vel_div >= +0.02 LONG (opposite sign predicts UP)
|
||
|
|
|
||
|
|
Compares each against unconditional 120-bar baseline.
|
||
|
|
No engine stack — raw signal only. ~1-3 min runtime.
|
||
|
|
"""
|
||
|
|
import sys, time
|
||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||
|
|
from pathlib import Path
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
from collections import defaultdict
|
||
|
|
|
||
|
|
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
|
||
|
|
TP_BPS = 95
|
||
|
|
MAX_HOLD = 120
|
||
|
|
tp_pct = TP_BPS / 10000.0
|
||
|
|
|
||
|
|
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
||
|
|
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
||
|
|
total = len(parquet_files)
|
||
|
|
print(f"Files: {total} TP={TP_BPS}bps MAX_HOLD={MAX_HOLD} bars\n")
|
||
|
|
|
||
|
|
# Four buckets: (signal_mask_fn, direction)
|
||
|
|
# direction: -1=SHORT (price must fall tp_pct), +1=LONG (price must rise tp_pct)
|
||
|
|
TESTS = [
|
||
|
|
('A: vd<=-0.02 SHORT', lambda vd: vd <= -0.02, -1),
|
||
|
|
('B: vd<=-0.02 LONG', lambda vd: vd <= -0.02, +1),
|
||
|
|
('C: vd>=+0.02 SHORT', lambda vd: vd >= +0.02, -1),
|
||
|
|
('D: vd>=+0.02 LONG', lambda vd: vd >= +0.02, +1),
|
||
|
|
]
|
||
|
|
|
||
|
|
# Per-year per-test stats
|
||
|
|
stats = {label: defaultdict(lambda: {
|
||
|
|
'total_bars': 0, 'signal_bars': 0,
|
||
|
|
'wins': 0, 'losses': 0,
|
||
|
|
'loss_returns': [],
|
||
|
|
}) for label, _, _ in TESTS}
|
||
|
|
|
||
|
|
ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0}) # control per year
|
||
|
|
|
||
|
|
t0 = time.time()
|
||
|
|
for i, pf in enumerate(parquet_files):
|
||
|
|
ds = pf.stem
|
||
|
|
year = ds[:4]
|
||
|
|
try:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
except Exception:
|
||
|
|
continue
|
||
|
|
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
|
||
|
|
continue
|
||
|
|
|
||
|
|
vd = np.where(np.isfinite(df['vel_div'].values), df['vel_div'].values, 0.0)
|
||
|
|
btc = df['BTCUSDT'].values
|
||
|
|
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
|
||
|
|
n = len(vd)
|
||
|
|
|
||
|
|
for label, mask_fn, direction in TESTS:
|
||
|
|
ys = stats[label][year]
|
||
|
|
ys['total_bars'] += n
|
||
|
|
sig_idx = np.where(mask_fn(vd))[0]
|
||
|
|
|
||
|
|
for j in sig_idx:
|
||
|
|
ep = btc[j]
|
||
|
|
if not np.isfinite(ep) or ep <= 0:
|
||
|
|
continue
|
||
|
|
end_j = min(j + MAX_HOLD, n - 1)
|
||
|
|
fut = btc[j+1:end_j+1]
|
||
|
|
fm = np.isfinite(fut)
|
||
|
|
if not np.any(fm):
|
||
|
|
continue
|
||
|
|
|
||
|
|
fin = np.where(fm, fut, np.nan)
|
||
|
|
if direction == -1: # SHORT: need price to fall
|
||
|
|
tp_price = ep * (1.0 - tp_pct)
|
||
|
|
hit = np.nanmin(fin) <= tp_price
|
||
|
|
else: # LONG: need price to rise
|
||
|
|
tp_price = ep * (1.0 + tp_pct)
|
||
|
|
hit = np.nanmax(fin) >= tp_price
|
||
|
|
|
||
|
|
if hit:
|
||
|
|
ys['wins'] += 1
|
||
|
|
else:
|
||
|
|
last = fin[~np.isnan(fin)][-1]
|
||
|
|
ret = (last - ep) / ep * direction # sign-adjusted
|
||
|
|
ys['losses'] += 1
|
||
|
|
ys['loss_returns'].append(ret)
|
||
|
|
ys['signal_bars'] += 1
|
||
|
|
|
||
|
|
# Control: unconditional every 60th bar
|
||
|
|
cy = ctrl[year]
|
||
|
|
for j in range(0, n - MAX_HOLD, 60):
|
||
|
|
ep = btc[j]; ex = btc[j + MAX_HOLD]
|
||
|
|
if np.isfinite(ep) and np.isfinite(ex) and ep > 0:
|
||
|
|
ret = (ex - ep) / ep
|
||
|
|
cy['up'] += int(ret >= tp_pct)
|
||
|
|
cy['dn'] += int(ret <= -tp_pct)
|
||
|
|
cy['n'] += 1
|
||
|
|
|
||
|
|
if (i + 1) % 200 == 0:
|
||
|
|
el = time.time() - t0
|
||
|
|
print(f" [{i+1}/{total}] {ds} {el:.0f}s")
|
||
|
|
|
||
|
|
elapsed = time.time() - t0
|
||
|
|
|
||
|
|
# ── Print results ──────────────────────────────────────────────────────────────
|
||
|
|
ctrl_dn_tot = sum(cy['dn'] for cy in ctrl.values())
|
||
|
|
ctrl_up_tot = sum(cy['up'] for cy in ctrl.values())
|
||
|
|
ctrl_n_tot = sum(cy['n'] for cy in ctrl.values())
|
||
|
|
ctrl_dn_pct = ctrl_dn_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0
|
||
|
|
ctrl_up_pct = ctrl_up_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0
|
||
|
|
|
||
|
|
print(f"\nControl (unconditional 120-bar):")
|
||
|
|
print(f" DOWN {TP_BPS}bps: {ctrl_dn_pct:.1f}% UP {TP_BPS}bps: {ctrl_up_pct:.1f}% n={ctrl_n_tot:,}")
|
||
|
|
|
||
|
|
for label, _, direction in TESTS:
|
||
|
|
all_yrs = stats[label]
|
||
|
|
tot_sig = sum(y['signal_bars'] for y in all_yrs.values())
|
||
|
|
tot_win = sum(y['wins'] for y in all_yrs.values())
|
||
|
|
tot_los = sum(y['losses'] for y in all_yrs.values())
|
||
|
|
tot_n = tot_win + tot_los
|
||
|
|
wr = tot_win / tot_n * 100 if tot_n else 0
|
||
|
|
gw = tot_win * tp_pct
|
||
|
|
all_lr = [r for y in all_yrs.values() for r in y['loss_returns']]
|
||
|
|
gl = abs(sum(all_lr)) if all_lr else 0
|
||
|
|
pf = gw / gl if gl > 0 else 999
|
||
|
|
ctrl_bl = ctrl_dn_pct if direction == -1 else ctrl_up_pct
|
||
|
|
edge = wr - ctrl_bl
|
||
|
|
|
||
|
|
print(f"\n{'─'*60}")
|
||
|
|
print(f" {label}")
|
||
|
|
print(f" {'Year':<6} {'Sigs':>8} {'WR':>7} {'PF':>7} {'Edge':>8}")
|
||
|
|
print(f" {'─'*46}")
|
||
|
|
for year in sorted(all_yrs.keys()):
|
||
|
|
ys = all_yrs[year]
|
||
|
|
yn = ys['wins'] + ys['losses']
|
||
|
|
ywr = ys['wins'] / yn * 100 if yn else 0
|
||
|
|
ygw = ys['wins'] * tp_pct
|
||
|
|
ygl = abs(sum(ys['loss_returns'])) if ys['loss_returns'] else 0
|
||
|
|
ypf = ygw / ygl if ygl > 0 else 999
|
||
|
|
yedge = ywr - ctrl_bl
|
||
|
|
print(f" {year:<6} {ys['signal_bars']:>8,} {ywr:>6.1f}% {ypf:>7.3f} {yedge:>+7.1f}pp")
|
||
|
|
print(f" {'─'*46}")
|
||
|
|
print(f" {'TOTAL':<6} {tot_sig:>8,} {wr:>6.1f}% {pf:>7.3f} {edge:>+7.1f}pp ← {'EDGE' if edge > 2 else 'WEAK' if edge > 0 else 'NO EDGE'}")
|
||
|
|
|
||
|
|
print(f"\n{'='*60}")
|
||
|
|
print(f" Runtime: {elapsed:.0f}s")
|
||
|
|
print(f"{'='*60}")
|