"""Fast Vectorized Edge Signal Existence Test — 5-Year Klines ============================================================= Tests four hypotheses simultaneously: A. vel_div <= -0.02 SHORT (original system) B. vel_div <= -0.02 LONG (inverse: signal predicts UP) C. vel_div >= +0.02 SHORT (opposite sign as SHORT trigger) D. vel_div >= +0.02 LONG (opposite sign predicts UP) Compares each against unconditional 120-bar baseline. No engine stack — raw signal only. ~1-3 min runtime. """ import sys, time sys.stdout.reconfigure(encoding='utf-8', errors='replace') from pathlib import Path import numpy as np import pandas as pd from collections import defaultdict VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines") TP_BPS = 95 MAX_HOLD = 120 tp_pct = TP_BPS / 10000.0 parquet_files = sorted(VBT_DIR.glob("*.parquet")) parquet_files = [p for p in parquet_files if 'catalog' not in str(p)] total = len(parquet_files) print(f"Files: {total} TP={TP_BPS}bps MAX_HOLD={MAX_HOLD} bars\n") # Four buckets: (signal_mask_fn, direction) # direction: -1=SHORT (price must fall tp_pct), +1=LONG (price must rise tp_pct) TESTS = [ ('A: vd<=-0.02 SHORT', lambda vd: vd <= -0.02, -1), ('B: vd<=-0.02 LONG', lambda vd: vd <= -0.02, +1), ('C: vd>=+0.02 SHORT', lambda vd: vd >= +0.02, -1), ('D: vd>=+0.02 LONG', lambda vd: vd >= +0.02, +1), ] # Per-year per-test stats stats = {label: defaultdict(lambda: { 'total_bars': 0, 'signal_bars': 0, 'wins': 0, 'losses': 0, 'loss_returns': [], }) for label, _, _ in TESTS} ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0}) # control per year t0 = time.time() for i, pf in enumerate(parquet_files): ds = pf.stem year = ds[:4] try: df = pd.read_parquet(pf) except Exception: continue if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns: continue vd = np.where(np.isfinite(df['vel_div'].values), df['vel_div'].values, 0.0) btc = df['BTCUSDT'].values btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan) n = len(vd) for label, mask_fn, direction in TESTS: ys = stats[label][year] ys['total_bars'] += n sig_idx = np.where(mask_fn(vd))[0] for j in sig_idx: ep = btc[j] if not np.isfinite(ep) or ep <= 0: continue end_j = min(j + MAX_HOLD, n - 1) fut = btc[j+1:end_j+1] fm = np.isfinite(fut) if not np.any(fm): continue fin = np.where(fm, fut, np.nan) if direction == -1: # SHORT: need price to fall tp_price = ep * (1.0 - tp_pct) hit = np.nanmin(fin) <= tp_price else: # LONG: need price to rise tp_price = ep * (1.0 + tp_pct) hit = np.nanmax(fin) >= tp_price if hit: ys['wins'] += 1 else: last = fin[~np.isnan(fin)][-1] ret = (last - ep) / ep * direction # sign-adjusted ys['losses'] += 1 ys['loss_returns'].append(ret) ys['signal_bars'] += 1 # Control: unconditional every 60th bar cy = ctrl[year] for j in range(0, n - MAX_HOLD, 60): ep = btc[j]; ex = btc[j + MAX_HOLD] if np.isfinite(ep) and np.isfinite(ex) and ep > 0: ret = (ex - ep) / ep cy['up'] += int(ret >= tp_pct) cy['dn'] += int(ret <= -tp_pct) cy['n'] += 1 if (i + 1) % 200 == 0: el = time.time() - t0 print(f" [{i+1}/{total}] {ds} {el:.0f}s") elapsed = time.time() - t0 # ── Print results ────────────────────────────────────────────────────────────── ctrl_dn_tot = sum(cy['dn'] for cy in ctrl.values()) ctrl_up_tot = sum(cy['up'] for cy in ctrl.values()) ctrl_n_tot = sum(cy['n'] for cy in ctrl.values()) ctrl_dn_pct = ctrl_dn_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0 ctrl_up_pct = ctrl_up_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0 print(f"\nControl (unconditional 120-bar):") print(f" DOWN {TP_BPS}bps: {ctrl_dn_pct:.1f}% UP {TP_BPS}bps: {ctrl_up_pct:.1f}% n={ctrl_n_tot:,}") for label, _, direction in TESTS: all_yrs = stats[label] tot_sig = sum(y['signal_bars'] for y in all_yrs.values()) tot_win = sum(y['wins'] for y in all_yrs.values()) tot_los = sum(y['losses'] for y in all_yrs.values()) tot_n = tot_win + tot_los wr = tot_win / tot_n * 100 if tot_n else 0 gw = tot_win * tp_pct all_lr = [r for y in all_yrs.values() for r in y['loss_returns']] gl = abs(sum(all_lr)) if all_lr else 0 pf = gw / gl if gl > 0 else 999 ctrl_bl = ctrl_dn_pct if direction == -1 else ctrl_up_pct edge = wr - ctrl_bl print(f"\n{'─'*60}") print(f" {label}") print(f" {'Year':<6} {'Sigs':>8} {'WR':>7} {'PF':>7} {'Edge':>8}") print(f" {'─'*46}") for year in sorted(all_yrs.keys()): ys = all_yrs[year] yn = ys['wins'] + ys['losses'] ywr = ys['wins'] / yn * 100 if yn else 0 ygw = ys['wins'] * tp_pct ygl = abs(sum(ys['loss_returns'])) if ys['loss_returns'] else 0 ypf = ygw / ygl if ygl > 0 else 999 yedge = ywr - ctrl_bl print(f" {year:<6} {ys['signal_bars']:>8,} {ywr:>6.1f}% {ypf:>7.3f} {yedge:>+7.1f}pp") print(f" {'─'*46}") print(f" {'TOTAL':<6} {tot_sig:>8,} {wr:>6.1f}% {pf:>7.3f} {edge:>+7.1f}pp ← {'EDGE' if edge > 2 else 'WEAK' if edge > 0 else 'NO EDGE'}") print(f"\n{'='*60}") print(f" Runtime: {elapsed:.0f}s") print(f"{'='*60}")