271 lines
11 KiB
Python
271 lines
11 KiB
Python
|
|
"""vel_div Signal Freshness / Depletion Test
|
||
|
|
============================================
|
||
|
|
Hypothesis: "firing late into a depleted move"
|
||
|
|
|
||
|
|
On 1m klines, vel_div may have been below threshold for many bars already by
|
||
|
|
the time we "see" a signal. The NG3 5s system catches it at bar 1; the 1m
|
||
|
|
system catches it at bar N (the move is already mostly done).
|
||
|
|
|
||
|
|
Test: compute edge conditional on HOW MANY BARS the signal has been continuously
|
||
|
|
active (bars_since_trigger). If edge decays with signal age → hypothesis confirmed.
|
||
|
|
|
||
|
|
Also tests: does tightening to FIRST FIRES ONLY (cooldown between signals)
|
||
|
|
recover the edge?
|
||
|
|
|
||
|
|
Outputs:
|
||
|
|
run_logs/depletion_SHORT_YYYYMMDD.csv — per (bars_since_trigger_bucket, year)
|
||
|
|
run_logs/depletion_LONG_YYYYMMDD.csv — same for LONG
|
||
|
|
Console: edge decay table
|
||
|
|
"""
|
||
|
|
import sys, time, csv, gc
|
||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||
|
|
from pathlib import Path
|
||
|
|
from datetime import datetime
|
||
|
|
from collections import defaultdict
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
from numpy.lib.stride_tricks import sliding_window_view
|
||
|
|
|
||
|
|
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
|
||
|
|
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
|
||
|
|
TP_BPS = 95
|
||
|
|
MAX_HOLD = 120
|
||
|
|
tp_pct = TP_BPS / 10000.0
|
||
|
|
|
||
|
|
# Focus threshold: the current system threshold
|
||
|
|
SHORT_T = -0.020
|
||
|
|
LONG_T = +0.020
|
||
|
|
|
||
|
|
# Buckets: how many bars has this signal been continuously active?
|
||
|
|
# "fresh" = 1st bar, "stale" = been below threshold for a long time
|
||
|
|
AGE_BUCKETS = [
|
||
|
|
('fresh_1', 1, 1),
|
||
|
|
('young_2_5', 2, 5),
|
||
|
|
('mid_6_20', 6, 20),
|
||
|
|
('old_21_60', 21, 60),
|
||
|
|
('stale_61+', 61, 9999),
|
||
|
|
]
|
||
|
|
|
||
|
|
# Also test with a cooldown filter: only fire on the FIRST bar of each trigger episode
|
||
|
|
# (simulates catching the signal fresh, like a 5s system would)
|
||
|
|
COOLDOWN_BARS = 60 # min bars between consecutive signals
|
||
|
|
|
||
|
|
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
||
|
|
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
||
|
|
total = len(parquet_files)
|
||
|
|
print(f"Files: {total} SHORT_T={SHORT_T} LONG_T={LONG_T}")
|
||
|
|
print(f"TP={TP_BPS}bps MAX_HOLD={MAX_HOLD} COOLDOWN={COOLDOWN_BARS} bars\n")
|
||
|
|
|
||
|
|
# Accumulators
|
||
|
|
# stats[(direction, age_bucket, year)] = {wins, losses, gw, gl}
|
||
|
|
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
|
||
|
|
# cooldown stats
|
||
|
|
cd_stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
|
||
|
|
# control
|
||
|
|
ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0})
|
||
|
|
|
||
|
|
t0 = time.time()
|
||
|
|
|
||
|
|
for i, pf in enumerate(parquet_files):
|
||
|
|
ds = pf.stem
|
||
|
|
year = ds[:4]
|
||
|
|
|
||
|
|
try:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
except Exception:
|
||
|
|
continue
|
||
|
|
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
|
||
|
|
continue
|
||
|
|
|
||
|
|
vd = df['vel_div'].values.astype(np.float64)
|
||
|
|
btc = df['BTCUSDT'].values.astype(np.float64)
|
||
|
|
vd = np.where(np.isfinite(vd), vd, 0.0)
|
||
|
|
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
|
||
|
|
n = len(btc)
|
||
|
|
if n < MAX_HOLD + 5:
|
||
|
|
del df, vd, btc
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Control
|
||
|
|
ck = (year,)
|
||
|
|
for j in range(0, n - MAX_HOLD, 60):
|
||
|
|
ep = btc[j]; ex = btc[j + MAX_HOLD]
|
||
|
|
if np.isfinite(ep) and np.isfinite(ex) and ep > 0:
|
||
|
|
r = (ex - ep) / ep
|
||
|
|
ctrl[ck]['up'] += int(r >= tp_pct)
|
||
|
|
ctrl[ck]['dn'] += int(r <= -tp_pct)
|
||
|
|
ctrl[ck]['n'] += 1
|
||
|
|
|
||
|
|
# Precompute rolling windows (only for bars where we can look forward MAX_HOLD)
|
||
|
|
n_usable = n - MAX_HOLD
|
||
|
|
windows = sliding_window_view(btc, MAX_HOLD + 1)[:n_usable] # (n_usable, 121)
|
||
|
|
ep_arr = windows[:, 0]
|
||
|
|
fut_min = np.nanmin(windows[:, 1:], axis=1)
|
||
|
|
fut_max = np.nanmax(windows[:, 1:], axis=1)
|
||
|
|
last_px = windows[:, -1]
|
||
|
|
valid = np.isfinite(ep_arr) & (ep_arr > 0)
|
||
|
|
|
||
|
|
for direction, threshold in [('S', SHORT_T), ('L', LONG_T)]:
|
||
|
|
# Compute continuous trigger age for each bar
|
||
|
|
# age[j] = number of consecutive bars (including j) where signal has been active
|
||
|
|
if direction == 'S':
|
||
|
|
active = (vd[:n_usable] <= threshold)
|
||
|
|
else:
|
||
|
|
active = (vd[:n_usable] >= threshold)
|
||
|
|
|
||
|
|
age = np.zeros(n_usable, dtype=np.int32)
|
||
|
|
for j in range(n_usable):
|
||
|
|
if active[j]:
|
||
|
|
age[j] = age[j-1] + 1 if j > 0 else 1
|
||
|
|
else:
|
||
|
|
age[j] = 0
|
||
|
|
|
||
|
|
sig_idx = np.where(active & valid)[0]
|
||
|
|
if len(sig_idx) == 0:
|
||
|
|
continue
|
||
|
|
|
||
|
|
ep_s = ep_arr[sig_idx]
|
||
|
|
fmin_s = fut_min[sig_idx]
|
||
|
|
fmax_s = fut_max[sig_idx]
|
||
|
|
last_s = last_px[sig_idx]
|
||
|
|
age_s = age[sig_idx]
|
||
|
|
|
||
|
|
if direction == 'S':
|
||
|
|
hit = fmin_s <= ep_s * (1.0 - tp_pct)
|
||
|
|
lret = np.where(np.isfinite(last_s), (ep_s - last_s) / ep_s, 0.0)
|
||
|
|
else:
|
||
|
|
hit = fmax_s >= ep_s * (1.0 + tp_pct)
|
||
|
|
lret = np.where(np.isfinite(last_s), (last_s - ep_s) / ep_s, 0.0)
|
||
|
|
|
||
|
|
# Age-bucketed stats
|
||
|
|
for bucket_name, age_lo, age_hi in AGE_BUCKETS:
|
||
|
|
mask = (age_s >= age_lo) & (age_s <= age_hi)
|
||
|
|
if not np.any(mask):
|
||
|
|
continue
|
||
|
|
w = int(np.sum(hit[mask]))
|
||
|
|
l = int(np.sum(~hit[mask]))
|
||
|
|
gw = w * tp_pct
|
||
|
|
gl = float(np.sum(np.abs(lret[~hit & mask])))
|
||
|
|
k = (direction, bucket_name, year)
|
||
|
|
stats[k]['wins'] += w
|
||
|
|
stats[k]['losses'] += l
|
||
|
|
stats[k]['gw'] += gw
|
||
|
|
stats[k]['gl'] += gl
|
||
|
|
|
||
|
|
# Cooldown filter: only fire on FIRST bar of each episode (age == 1)
|
||
|
|
# OR any bar after COOLDOWN_BARS since last fire
|
||
|
|
last_fire = -COOLDOWN_BARS - 1
|
||
|
|
for idx_pos in range(len(sig_idx)):
|
||
|
|
j = sig_idx[idx_pos]
|
||
|
|
if age_s[idx_pos] == 1 or (j - last_fire) >= COOLDOWN_BARS:
|
||
|
|
last_fire = j
|
||
|
|
w = int(hit[idx_pos])
|
||
|
|
l = 1 - w
|
||
|
|
gw = w * tp_pct
|
||
|
|
gl = float(abs(lret[idx_pos])) if not hit[idx_pos] else 0.0
|
||
|
|
ck2 = (direction, year)
|
||
|
|
cd_stats[ck2]['wins'] += w
|
||
|
|
cd_stats[ck2]['losses'] += l
|
||
|
|
cd_stats[ck2]['gw'] += gw
|
||
|
|
cd_stats[ck2]['gl'] += gl
|
||
|
|
|
||
|
|
del df, vd, btc, windows, ep_arr, fut_min, fut_max, last_px, valid, age
|
||
|
|
|
||
|
|
if (i + 1) % 100 == 0:
|
||
|
|
gc.collect()
|
||
|
|
elapsed = time.time() - t0
|
||
|
|
print(f" [{i+1}/{total}] {ds} {elapsed/60:.1f}m")
|
||
|
|
|
||
|
|
elapsed = time.time() - t0
|
||
|
|
print(f"\nPass complete: {elapsed:.0f}s")
|
||
|
|
|
||
|
|
# Control baselines
|
||
|
|
ctrl_dn = sum(v['dn'] for v in ctrl.values())
|
||
|
|
ctrl_up = sum(v['up'] for v in ctrl.values())
|
||
|
|
ctrl_n = sum(v['n'] for v in ctrl.values())
|
||
|
|
ctrl_dn_pct = ctrl_dn / ctrl_n * 100 if ctrl_n else 0
|
||
|
|
ctrl_up_pct = ctrl_up / ctrl_n * 100 if ctrl_n else 0
|
||
|
|
print(f"\nControl: DOWN={ctrl_dn_pct:.1f}% UP={ctrl_up_pct:.1f}% n={ctrl_n:,}")
|
||
|
|
|
||
|
|
YEARS = ['2021', '2022', '2023', '2024', '2025', '2026']
|
||
|
|
|
||
|
|
def print_depletion_table(direction, ctrl_bl):
|
||
|
|
print(f"\n{'='*90}")
|
||
|
|
print(f" SIGNAL FRESHNESS / DEPLETION — {direction} ctrl={ctrl_bl:.1f}%")
|
||
|
|
print(f" (reading: does edge decay as vel_div has been active for longer?)")
|
||
|
|
print(f"{'='*90}")
|
||
|
|
hdr = f" {'Bucket':<16}" + "".join(f" {yr:>10}" for yr in YEARS) + f" {'TOTAL':>10} {'n_trades':>9}"
|
||
|
|
print(hdr)
|
||
|
|
print(f" {'-'*88}")
|
||
|
|
for bucket_name, _, _ in AGE_BUCKETS:
|
||
|
|
yr_edges = []
|
||
|
|
tot_w = tot_l = tot_n = 0
|
||
|
|
for yr in YEARS:
|
||
|
|
k = (direction, bucket_name, yr)
|
||
|
|
s = stats.get(k, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
|
||
|
|
n_t = s['wins'] + s['losses']
|
||
|
|
wr = s['wins'] / n_t * 100 if n_t > 0 else float('nan')
|
||
|
|
edge = wr - ctrl_bl if n_t > 0 else float('nan')
|
||
|
|
yr_edges.append(f"{edge:>+8.1f}pp" if n_t > 0 else " ---")
|
||
|
|
tot_w += s['wins']; tot_l += s['losses']; tot_n += n_t
|
||
|
|
tot_wr = tot_w / tot_n * 100 if tot_n > 0 else float('nan')
|
||
|
|
tot_edge = tot_wr - ctrl_bl if tot_n > 0 else float('nan')
|
||
|
|
print(f" {bucket_name:<16}" + "".join(f" {e:>10}" for e in yr_edges) +
|
||
|
|
f" {tot_edge:>+8.1f}pp {tot_n:>9,}")
|
||
|
|
print(f" {'-'*88}")
|
||
|
|
print(f" ({'freshest' if direction=='S' else 'freshest'} = strongest edge → confirms 'firing late' hypothesis if edge decays)")
|
||
|
|
|
||
|
|
print_depletion_table('S', ctrl_dn_pct)
|
||
|
|
print_depletion_table('L', ctrl_up_pct)
|
||
|
|
|
||
|
|
# Cooldown filter summary
|
||
|
|
print(f"\n{'='*70}")
|
||
|
|
print(f" COOLDOWN FILTER (fire only on fresh signal OR after {COOLDOWN_BARS}-bar gap)")
|
||
|
|
print(f" (simulates catching the signal at the same moment a faster system would)")
|
||
|
|
print(f"{'='*70}")
|
||
|
|
print(f" {'Dir':<5} {'Year':<6} {'n_trades':>9} {'WR':>8} {'PF':>8} {'Edge':>9}")
|
||
|
|
print(f" {'-'*50}")
|
||
|
|
for direction, ctrl_bl in [('S', ctrl_dn_pct), ('L', ctrl_up_pct)]:
|
||
|
|
tot_w = tot_l = 0; tot_gw = tot_gl = 0.0
|
||
|
|
for yr in YEARS:
|
||
|
|
ck2 = (direction, yr)
|
||
|
|
s = cd_stats.get(ck2, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
|
||
|
|
n_t = s['wins'] + s['losses']
|
||
|
|
if n_t == 0:
|
||
|
|
continue
|
||
|
|
wr = s['wins'] / n_t * 100
|
||
|
|
pf = s['gw'] / s['gl'] if s['gl'] > 0 else 999
|
||
|
|
edge = wr - ctrl_bl
|
||
|
|
print(f" {direction:<5} {yr:<6} {n_t:>9,} {wr:>7.1f}% {pf:>8.3f} {edge:>+8.1f}pp")
|
||
|
|
tot_w += s['wins']; tot_l += s['losses']; tot_gw += s['gw']; tot_gl += s['gl']
|
||
|
|
tot_n = tot_w + tot_l
|
||
|
|
if tot_n > 0:
|
||
|
|
tot_wr = tot_w / tot_n * 100
|
||
|
|
tot_pf = tot_gw / tot_gl if tot_gl > 0 else 999
|
||
|
|
tot_edge = tot_wr - ctrl_bl
|
||
|
|
print(f" {direction:<5} {'TOTAL':<6} {tot_n:>9,} {tot_wr:>7.1f}% {tot_pf:>8.3f} {tot_edge:>+8.1f}pp")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Save CSV
|
||
|
|
LOG_DIR.mkdir(exist_ok=True)
|
||
|
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
|
rows = []
|
||
|
|
for (direction, bucket_name, yr), s in stats.items():
|
||
|
|
n_t = s['wins'] + s['losses']
|
||
|
|
ctrl_bl = ctrl_dn_pct if direction == 'S' else ctrl_up_pct
|
||
|
|
wr = s['wins'] / n_t * 100 if n_t > 0 else float('nan')
|
||
|
|
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
|
||
|
|
edge = wr - ctrl_bl if n_t > 0 else float('nan')
|
||
|
|
rows.append({'direction': direction, 'age_bucket': bucket_name, 'year': yr,
|
||
|
|
'n_trades': n_t, 'wins': s['wins'], 'losses': s['losses'],
|
||
|
|
'wr': round(wr, 3), 'pf': round(pf, 4), 'edge_pp': round(edge, 3),
|
||
|
|
'gross_win': round(s['gw'], 6), 'gross_loss': round(s['gl'], 6)})
|
||
|
|
|
||
|
|
out_path = LOG_DIR / f"depletion_test_{ts}.csv"
|
||
|
|
with open(out_path, 'w', newline='') as f:
|
||
|
|
w = csv.DictWriter(f, fieldnames=rows[0].keys())
|
||
|
|
w.writeheader(); w.writerows(rows)
|
||
|
|
|
||
|
|
print(f"\n → {out_path}")
|
||
|
|
print(f" Runtime: {elapsed:.0f}s")
|