DOLPHIN/nautilus_dolphin/test_edge_signal_5y.py

"""Fast Vectorized Edge Signal Existence Test — 5-Year Klines
=============================================================
Tests four hypotheses simultaneously:
  A. vel_div <= -0.02  SHORT (original system)
  B. vel_div <= -0.02  LONG  (inverse: signal predicts UP)
  C. vel_div >= +0.02  SHORT (opposite sign as SHORT trigger)
  D. vel_div >= +0.02  LONG  (opposite sign predicts UP)

Compares each against unconditional 120-bar baseline.
No engine stack — raw signal only. ~1-3 min runtime.
"""
import sys, time
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
import numpy as np
import pandas as pd
from collections import defaultdict

VBT_DIR  = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
TP_BPS   = 95
MAX_HOLD = 120
tp_pct   = TP_BPS / 10000.0

parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)
print(f"Files: {total}   TP={TP_BPS}bps   MAX_HOLD={MAX_HOLD} bars\n")

# Four buckets: (signal_mask_fn, direction)
# direction: -1=SHORT (price must fall tp_pct), +1=LONG (price must rise tp_pct)
TESTS = [
    ('A: vd<=-0.02 SHORT', lambda vd: vd <= -0.02, -1),
    ('B: vd<=-0.02 LONG',  lambda vd: vd <= -0.02, +1),
    ('C: vd>=+0.02 SHORT', lambda vd: vd >= +0.02, -1),
    ('D: vd>=+0.02 LONG',  lambda vd: vd >= +0.02, +1),
]

# Per-year per-test stats
stats = {label: defaultdict(lambda: {
    'total_bars': 0, 'signal_bars': 0,
    'wins': 0, 'losses': 0,
    'loss_returns': [],
}) for label, _, _ in TESTS}

ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0})  # control per year

t0 = time.time()
for i, pf in enumerate(parquet_files):
    ds   = pf.stem
    year = ds[:4]
    try:
        df = pd.read_parquet(pf)
    except Exception:
        continue
    if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
        continue

    vd  = np.where(np.isfinite(df['vel_div'].values), df['vel_div'].values, 0.0)
    btc = df['BTCUSDT'].values
    btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
    n   = len(vd)

    for label, mask_fn, direction in TESTS:
        ys = stats[label][year]
        ys['total_bars'] += n
        sig_idx = np.where(mask_fn(vd))[0]

        for j in sig_idx:
            ep = btc[j]
            if not np.isfinite(ep) or ep <= 0:
                continue
            end_j = min(j + MAX_HOLD, n - 1)
            fut   = btc[j+1:end_j+1]
            fm    = np.isfinite(fut)
            if not np.any(fm):
                continue

            fin = np.where(fm, fut, np.nan)
            if direction == -1:          # SHORT: need price to fall
                tp_price = ep * (1.0 - tp_pct)
                hit = np.nanmin(fin) <= tp_price
            else:                        # LONG: need price to rise
                tp_price = ep * (1.0 + tp_pct)
                hit = np.nanmax(fin) >= tp_price

            if hit:
                ys['wins'] += 1
            else:
                last = fin[~np.isnan(fin)][-1]
                ret  = (last - ep) / ep * direction  # sign-adjusted
                ys['losses'] += 1
                ys['loss_returns'].append(ret)
            ys['signal_bars'] += 1

    # Control: unconditional every 60th bar
    cy = ctrl[year]
    for j in range(0, n - MAX_HOLD, 60):
        ep = btc[j]; ex = btc[j + MAX_HOLD]
        if np.isfinite(ep) and np.isfinite(ex) and ep > 0:
            ret = (ex - ep) / ep
            cy['up'] += int(ret >=  tp_pct)
            cy['dn'] += int(ret <= -tp_pct)
            cy['n']  += 1

    if (i + 1) % 200 == 0:
        el = time.time() - t0
        print(f"  [{i+1}/{total}] {ds}  {el:.0f}s")

elapsed = time.time() - t0

# ── Print results ──────────────────────────────────────────────────────────────
ctrl_dn_tot = sum(cy['dn'] for cy in ctrl.values())
ctrl_up_tot = sum(cy['up'] for cy in ctrl.values())
ctrl_n_tot  = sum(cy['n']  for cy in ctrl.values())
ctrl_dn_pct = ctrl_dn_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0
ctrl_up_pct = ctrl_up_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0

print(f"\nControl (unconditional 120-bar):")
print(f"  DOWN {TP_BPS}bps: {ctrl_dn_pct:.1f}%   UP {TP_BPS}bps: {ctrl_up_pct:.1f}%   n={ctrl_n_tot:,}")

for label, _, direction in TESTS:
    all_yrs = stats[label]
    tot_sig = sum(y['signal_bars'] for y in all_yrs.values())
    tot_win = sum(y['wins']        for y in all_yrs.values())
    tot_los = sum(y['losses']      for y in all_yrs.values())
    tot_n   = tot_win + tot_los
    wr      = tot_win / tot_n * 100 if tot_n else 0
    gw      = tot_win * tp_pct
    all_lr  = [r for y in all_yrs.values() for r in y['loss_returns']]
    gl      = abs(sum(all_lr)) if all_lr else 0
    pf      = gw / gl if gl > 0 else 999
    ctrl_bl = ctrl_dn_pct if direction == -1 else ctrl_up_pct
    edge    = wr - ctrl_bl

    print(f"\n{'─'*60}")
    print(f"  {label}")
    print(f"  {'Year':<6} {'Sigs':>8} {'WR':>7} {'PF':>7} {'Edge':>8}")
    print(f"  {'─'*46}")
    for year in sorted(all_yrs.keys()):
        ys  = all_yrs[year]
        yn  = ys['wins'] + ys['losses']
        ywr = ys['wins'] / yn * 100 if yn else 0
        ygw = ys['wins'] * tp_pct
        ygl = abs(sum(ys['loss_returns'])) if ys['loss_returns'] else 0
        ypf = ygw / ygl if ygl > 0 else 999
        yedge = ywr - ctrl_bl
        print(f"  {year:<6} {ys['signal_bars']:>8,} {ywr:>6.1f}% {ypf:>7.3f} {yedge:>+7.1f}pp")
    print(f"  {'─'*46}")
    print(f"  {'TOTAL':<6} {tot_sig:>8,} {wr:>6.1f}% {pf:>7.3f} {edge:>+7.1f}pp  ← {'EDGE' if edge > 2 else 'WEAK' if edge > 0 else 'NO EDGE'}")

print(f"\n{'='*60}")
print(f"  Runtime: {elapsed:.0f}s")
print(f"{'='*60}")
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`"""Fast Vectorized Edge Signal Existence Test — 5-Year Klines`
			`=============================================================`
			`Tests four hypotheses simultaneously:`
			`A. vel_div <= -0.02 SHORT (original system)`
			`B. vel_div <= -0.02 LONG (inverse: signal predicts UP)`
			`C. vel_div >= +0.02 SHORT (opposite sign as SHORT trigger)`
			`D. vel_div >= +0.02 LONG (opposite sign predicts UP)`

			`Compares each against unconditional 120-bar baseline.`
			`No engine stack — raw signal only. ~1-3 min runtime.`
			`"""`
			`import sys, time`
			`sys.stdout.reconfigure(encoding='utf-8', errors='replace')`
			`from pathlib import Path`
			`import numpy as np`
			`import pandas as pd`
			`from collections import defaultdict`

			`VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")`
			`TP_BPS = 95`
			`MAX_HOLD = 120`
			`tp_pct = TP_BPS / 10000.0`

			`parquet_files = sorted(VBT_DIR.glob("*.parquet"))`
			`parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]`
			`total = len(parquet_files)`
			`print(f"Files: {total} TP={TP_BPS}bps MAX_HOLD={MAX_HOLD} bars\n")`

			`# Four buckets: (signal_mask_fn, direction)`
			`# direction: -1=SHORT (price must fall tp_pct), +1=LONG (price must rise tp_pct)`
			`TESTS = [`
			`('A: vd<=-0.02 SHORT', lambda vd: vd <= -0.02, -1),`
			`('B: vd<=-0.02 LONG', lambda vd: vd <= -0.02, +1),`
			`('C: vd>=+0.02 SHORT', lambda vd: vd >= +0.02, -1),`
			`('D: vd>=+0.02 LONG', lambda vd: vd >= +0.02, +1),`
			`]`

			`# Per-year per-test stats`
			`stats = {label: defaultdict(lambda: {`
			`'total_bars': 0, 'signal_bars': 0,`
			`'wins': 0, 'losses': 0,`
			`'loss_returns': [],`
			`}) for label, _, _ in TESTS}`

			`ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0}) # control per year`

			`t0 = time.time()`
			`for i, pf in enumerate(parquet_files):`
			`ds = pf.stem`
			`year = ds[:4]`
			`try:`
			`df = pd.read_parquet(pf)`
			`except Exception:`
			`continue`
			`if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:`
			`continue`

			`vd = np.where(np.isfinite(df['vel_div'].values), df['vel_div'].values, 0.0)`
			`btc = df['BTCUSDT'].values`
			`btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)`
			`n = len(vd)`

			`for label, mask_fn, direction in TESTS:`
			`ys = stats[label][year]`
			`ys['total_bars'] += n`
			`sig_idx = np.where(mask_fn(vd))[0]`

			`for j in sig_idx:`
			`ep = btc[j]`
			`if not np.isfinite(ep) or ep <= 0:`
			`continue`
			`end_j = min(j + MAX_HOLD, n - 1)`
			`fut = btc[j+1:end_j+1]`
			`fm = np.isfinite(fut)`
			`if not np.any(fm):`
			`continue`

			`fin = np.where(fm, fut, np.nan)`
			`if direction == -1: # SHORT: need price to fall`
			`tp_price = ep * (1.0 - tp_pct)`
			`hit = np.nanmin(fin) <= tp_price`
			`else: # LONG: need price to rise`
			`tp_price = ep * (1.0 + tp_pct)`
			`hit = np.nanmax(fin) >= tp_price`

			`if hit:`
			`ys['wins'] += 1`
			`else:`
			`last = fin[~np.isnan(fin)][-1]`
			`ret = (last - ep) / ep * direction # sign-adjusted`
			`ys['losses'] += 1`
			`ys['loss_returns'].append(ret)`
			`ys['signal_bars'] += 1`

			`# Control: unconditional every 60th bar`
			`cy = ctrl[year]`
			`for j in range(0, n - MAX_HOLD, 60):`
			`ep = btc[j]; ex = btc[j + MAX_HOLD]`
			`if np.isfinite(ep) and np.isfinite(ex) and ep > 0:`
			`ret = (ex - ep) / ep`
			`cy['up'] += int(ret >= tp_pct)`
			`cy['dn'] += int(ret <= -tp_pct)`
			`cy['n'] += 1`

			`if (i + 1) % 200 == 0:`
			`el = time.time() - t0`
			`print(f" [{i+1}/{total}] {ds} {el:.0f}s")`

			`elapsed = time.time() - t0`

			`# ── Print results ──────────────────────────────────────────────────────────────`
			`ctrl_dn_tot = sum(cy['dn'] for cy in ctrl.values())`
			`ctrl_up_tot = sum(cy['up'] for cy in ctrl.values())`
			`ctrl_n_tot = sum(cy['n'] for cy in ctrl.values())`
			`ctrl_dn_pct = ctrl_dn_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0`
			`ctrl_up_pct = ctrl_up_tot / ctrl_n_tot * 100 if ctrl_n_tot else 0`

			`print(f"\nControl (unconditional 120-bar):")`
			`print(f" DOWN {TP_BPS}bps: {ctrl_dn_pct:.1f}% UP {TP_BPS}bps: {ctrl_up_pct:.1f}% n={ctrl_n_tot:,}")`

			`for label, _, direction in TESTS:`
			`all_yrs = stats[label]`
			`tot_sig = sum(y['signal_bars'] for y in all_yrs.values())`
			`tot_win = sum(y['wins'] for y in all_yrs.values())`
			`tot_los = sum(y['losses'] for y in all_yrs.values())`
			`tot_n = tot_win + tot_los`
			`wr = tot_win / tot_n * 100 if tot_n else 0`
			`gw = tot_win * tp_pct`
			`all_lr = [r for y in all_yrs.values() for r in y['loss_returns']]`
			`gl = abs(sum(all_lr)) if all_lr else 0`
			`pf = gw / gl if gl > 0 else 999`
			`ctrl_bl = ctrl_dn_pct if direction == -1 else ctrl_up_pct`
			`edge = wr - ctrl_bl`

			`print(f"\n{'─'*60}")`
			`print(f" {label}")`
			`print(f" {'Year':<6} {'Sigs':>8} {'WR':>7} {'PF':>7} {'Edge':>8}")`
			`print(f" {'─'*46}")`
			`for year in sorted(all_yrs.keys()):`
			`ys = all_yrs[year]`
			`yn = ys['wins'] + ys['losses']`
			`ywr = ys['wins'] / yn * 100 if yn else 0`
			`ygw = ys['wins'] * tp_pct`
			`ygl = abs(sum(ys['loss_returns'])) if ys['loss_returns'] else 0`
			`ypf = ygw / ygl if ygl > 0 else 999`
			`yedge = ywr - ctrl_bl`
			`print(f" {year:<6} {ys['signal_bars']:>8,} {ywr:>6.1f}% {ypf:>7.3f} {yedge:>+7.1f}pp")`
			`print(f" {'─'*46}")`
			`print(f" {'TOTAL':<6} {tot_sig:>8,} {wr:>6.1f}% {pf:>7.3f} {edge:>+7.1f}pp ← {'EDGE' if edge > 2 else 'WEAK' if edge > 0 else 'NO EDGE'}")`

			`print(f"\n{'='*60}")`
			`print(f" Runtime: {elapsed:.0f}s")`
			`print(f"{'='*60}")`