219 lines
8.9 KiB
Python
219 lines
8.9 KiB
Python
|
|
"""MAX_HOLD Sweep — 5y Klines
|
|||
|
|
=============================
|
|||
|
|
Root cause of -99.9% ROI: MAX_HOLD=120 bars × 60s = 2 HOURS.
|
|||
|
|
Legacy optimal hold = 120 bars × 5s = 600s = 10 MINUTES.
|
|||
|
|
On 1m klines that's 10 bars.
|
|||
|
|
|
|||
|
|
This sweep tests MAX_HOLD = [3, 5, 8, 10, 12, 15, 20, 30, 60, 120] bars
|
|||
|
|
on SHORT and LONG directions at the default threshold (+-0.020).
|
|||
|
|
|
|||
|
|
Metric: Profit Factor (gross_win / gross_loss) and edge (WR - baseline).
|
|||
|
|
PF > 1.0 = profitable raw (before fees/leverage). Target: find the sweet spot.
|
|||
|
|
|
|||
|
|
Also sweeps TP_BPS = [60, 95, 120, 150] to find the right TP/hold combo.
|
|||
|
|
|
|||
|
|
Output: run_logs/maxhold_sweep_YYYYMMDD_HHMMSS.csv + console summary
|
|||
|
|
Runtime: ~2-3 minutes
|
|||
|
|
"""
|
|||
|
|
import sys, time, csv, gc
|
|||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
|||
|
|
from pathlib import Path
|
|||
|
|
from datetime import datetime
|
|||
|
|
from collections import defaultdict
|
|||
|
|
import numpy as np
|
|||
|
|
import pandas as pd
|
|||
|
|
from numpy.lib.stride_tricks import sliding_window_view
|
|||
|
|
|
|||
|
|
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
|
|||
|
|
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
|
|||
|
|
|
|||
|
|
SHORT_T = -0.020
|
|||
|
|
LONG_T = +0.020
|
|||
|
|
|
|||
|
|
# Sweep parameters
|
|||
|
|
HOLD_TIMES = [3, 5, 8, 10, 12, 15, 20, 30, 60, 120]
|
|||
|
|
TP_BPS_LIST = [60, 95, 120, 150]
|
|||
|
|
|
|||
|
|
MAX_HOLD_MAX = max(HOLD_TIMES) # precompute windows up to this size
|
|||
|
|
|
|||
|
|
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
|||
|
|
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
|||
|
|
total = len(parquet_files)
|
|||
|
|
print(f"Files: {total}")
|
|||
|
|
print(f"Hold times (bars): {HOLD_TIMES}")
|
|||
|
|
print(f"TP (bps): {TP_BPS_LIST}")
|
|||
|
|
print(f"Thresholds: SHORT<={SHORT_T} LONG>={LONG_T}")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# stats[(direction, hold, tp_bps, year)] = {wins, losses, gw, gl}
|
|||
|
|
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
|
|||
|
|
ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0}) # ctrl[(hold, tp_bps, year)]
|
|||
|
|
|
|||
|
|
t0 = time.time()
|
|||
|
|
|
|||
|
|
for i, pf in enumerate(parquet_files):
|
|||
|
|
ds = pf.stem
|
|||
|
|
year = ds[:4]
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
df = pd.read_parquet(pf)
|
|||
|
|
except Exception:
|
|||
|
|
continue
|
|||
|
|
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
vd = df['vel_div'].values.astype(np.float64)
|
|||
|
|
btc = df['BTCUSDT'].values.astype(np.float64)
|
|||
|
|
vd = np.where(np.isfinite(vd), vd, 0.0)
|
|||
|
|
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
|
|||
|
|
n = len(btc)
|
|||
|
|
del df
|
|||
|
|
|
|||
|
|
if n < MAX_HOLD_MAX + 5:
|
|||
|
|
del vd, btc
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
n_usable = n - MAX_HOLD_MAX
|
|||
|
|
# Precompute the largest window; sub-windows reuse slices
|
|||
|
|
big_windows = sliding_window_view(btc, MAX_HOLD_MAX + 1)[:n_usable]
|
|||
|
|
ep_arr = big_windows[:, 0]
|
|||
|
|
valid = np.isfinite(ep_arr) & (ep_arr > 0)
|
|||
|
|
|
|||
|
|
short_active = (vd[:n_usable] <= SHORT_T) & valid
|
|||
|
|
long_active = (vd[:n_usable] >= LONG_T) & valid
|
|||
|
|
|
|||
|
|
for hold in HOLD_TIMES:
|
|||
|
|
# For this hold, extract futures from the large window
|
|||
|
|
# big_windows[:,1:hold+1] covers bars 1..hold
|
|||
|
|
sub = big_windows[:, 1:hold + 1] # shape (n_usable, hold)
|
|||
|
|
fut_min = np.nanmin(sub, axis=1)
|
|||
|
|
fut_max = np.nanmax(sub, axis=1)
|
|||
|
|
last_px = big_windows[:, hold] # bar at exactly hold
|
|||
|
|
|
|||
|
|
for tp_bps in TP_BPS_LIST:
|
|||
|
|
tp_pct = tp_bps / 10_000.0
|
|||
|
|
|
|||
|
|
# Control baseline (sample every 30 bars to keep speed)
|
|||
|
|
ctrl_key = (hold, tp_bps, year)
|
|||
|
|
for j in range(0, n_usable, 30):
|
|||
|
|
ep = ep_arr[j]
|
|||
|
|
if not valid[j]:
|
|||
|
|
continue
|
|||
|
|
lp = last_px[j]
|
|||
|
|
if not np.isfinite(lp):
|
|||
|
|
continue
|
|||
|
|
r_dn = (ep - fut_min[j]) / ep
|
|||
|
|
r_up = (fut_max[j] - ep) / ep
|
|||
|
|
ctrl[ctrl_key]['dn'] += int(r_dn >= tp_pct)
|
|||
|
|
ctrl[ctrl_key]['up'] += int(r_up >= tp_pct)
|
|||
|
|
ctrl[ctrl_key]['n'] += 1
|
|||
|
|
|
|||
|
|
for direction, sig_mask in [('S', short_active), ('L', long_active)]:
|
|||
|
|
idx = np.where(sig_mask)[0]
|
|||
|
|
if len(idx) == 0:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
ep_s = ep_arr[idx]
|
|||
|
|
fmin_s = fut_min[idx]
|
|||
|
|
fmax_s = fut_max[idx]
|
|||
|
|
last_s = last_px[idx]
|
|||
|
|
|
|||
|
|
if direction == 'S':
|
|||
|
|
hit = fmin_s <= ep_s * (1.0 - tp_pct)
|
|||
|
|
lret = np.where(np.isfinite(last_s), (ep_s - last_s) / ep_s, 0.0)
|
|||
|
|
else:
|
|||
|
|
hit = fmax_s >= ep_s * (1.0 + tp_pct)
|
|||
|
|
lret = np.where(np.isfinite(last_s), (last_s - ep_s) / ep_s, 0.0)
|
|||
|
|
|
|||
|
|
w = int(np.sum(hit))
|
|||
|
|
l = int(np.sum(~hit))
|
|||
|
|
gw = w * tp_pct
|
|||
|
|
gl = float(np.sum(np.abs(lret[~hit])))
|
|||
|
|
k = (direction, hold, tp_bps, year)
|
|||
|
|
stats[k]['wins'] += w
|
|||
|
|
stats[k]['losses'] += l
|
|||
|
|
stats[k]['gw'] += gw
|
|||
|
|
stats[k]['gl'] += gl
|
|||
|
|
|
|||
|
|
del vd, btc, big_windows, ep_arr, valid, short_active, long_active
|
|||
|
|
del sub, fut_min, fut_max, last_px
|
|||
|
|
|
|||
|
|
if (i + 1) % 200 == 0:
|
|||
|
|
gc.collect()
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
print(f" [{i+1}/{total}] {ds} {elapsed:.0f}s")
|
|||
|
|
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
print(f"\nPass complete: {elapsed:.0f}s\n")
|
|||
|
|
|
|||
|
|
YEARS = ['2021', '2022', '2023', '2024', '2025', '2026']
|
|||
|
|
|
|||
|
|
# Print summary: for each direction × tp_bps, show PF vs hold time
|
|||
|
|
for direction in ['S', 'L']:
|
|||
|
|
for tp_bps in TP_BPS_LIST:
|
|||
|
|
tp_pct = tp_bps / 10_000.0
|
|||
|
|
print(f"\n{'='*85}")
|
|||
|
|
print(f" {direction} TP={tp_bps}bps — PF by hold time (bars=minutes on 1m klines)")
|
|||
|
|
print(f"{'='*85}")
|
|||
|
|
hdr = f" {'hold':>6}" + "".join(f" {yr:>10}" for yr in YEARS) + f" {'TOTAL_PF':>9} {'WR%':>7} {'Edge':>7}"
|
|||
|
|
print(hdr)
|
|||
|
|
print(f" {'-'*83}")
|
|||
|
|
for hold in HOLD_TIMES:
|
|||
|
|
yr_pfs = []
|
|||
|
|
tot_w = tot_l = 0; tot_gw = tot_gl = 0.0
|
|||
|
|
for yr in YEARS:
|
|||
|
|
k = (direction, hold, tp_bps, yr)
|
|||
|
|
s = stats.get(k, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
|
|||
|
|
ck = (hold, tp_bps, yr)
|
|||
|
|
c = ctrl.get(ck, {'dn': 0, 'up': 0, 'n': 1})
|
|||
|
|
bl = (c['dn'] / c['n'] * 100) if direction == 'S' else (c['up'] / c['n'] * 100)
|
|||
|
|
n_t = s['wins'] + s['losses']
|
|||
|
|
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
|
|||
|
|
yr_pfs.append(f"{pf:>8.3f}" if n_t > 0 else " ---")
|
|||
|
|
tot_w += s['wins']; tot_l += s['losses']; tot_gw += s['gw']; tot_gl += s['gl']
|
|||
|
|
# Total
|
|||
|
|
tot_n = tot_w + tot_l
|
|||
|
|
tot_pf = tot_gw / tot_gl if tot_gl > 0 else 999.0
|
|||
|
|
tot_wr = tot_w / tot_n * 100 if tot_n > 0 else 0.0
|
|||
|
|
# control baseline (aggregate across years)
|
|||
|
|
tot_ctrl_dn = sum(ctrl.get((hold, tp_bps, yr), {'dn': 0, 'n': 1})['dn'] for yr in YEARS)
|
|||
|
|
tot_ctrl_up = sum(ctrl.get((hold, tp_bps, yr), {'up': 0, 'n': 1})['up'] for yr in YEARS)
|
|||
|
|
tot_ctrl_n = sum(ctrl.get((hold, tp_bps, yr), {'n': 1})['n'] for yr in YEARS)
|
|||
|
|
ctrl_bl = (tot_ctrl_dn / tot_ctrl_n * 100) if direction == 'S' else (tot_ctrl_up / tot_ctrl_n * 100)
|
|||
|
|
edge = tot_wr - ctrl_bl
|
|||
|
|
hold_min = hold # on 1m klines, 1 bar = 1 minute
|
|||
|
|
print(f" {hold:>3}b={hold_min:>2}m" + "".join(f" {pf:>10}" for pf in yr_pfs) +
|
|||
|
|
f" {tot_pf:>9.3f} {tot_wr:>6.1f}% {edge:>+6.1f}pp")
|
|||
|
|
|
|||
|
|
# Save CSV
|
|||
|
|
LOG_DIR.mkdir(exist_ok=True)
|
|||
|
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|||
|
|
rows = []
|
|||
|
|
for (direction, hold, tp_bps, yr), s in stats.items():
|
|||
|
|
tp_pct = tp_bps / 10_000.0
|
|||
|
|
ck = (hold, tp_bps, yr)
|
|||
|
|
c = ctrl.get(ck, {'dn': 0, 'up': 0, 'n': 1})
|
|||
|
|
bl = (c['dn'] / c['n'] * 100) if direction == 'S' else (c['up'] / c['n'] * 100)
|
|||
|
|
n_t = s['wins'] + s['losses']
|
|||
|
|
wr = s['wins'] / n_t * 100 if n_t > 0 else float('nan')
|
|||
|
|
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
|
|||
|
|
edge = wr - bl if n_t > 0 else float('nan')
|
|||
|
|
rows.append({
|
|||
|
|
'direction': direction, 'hold_bars': hold, 'hold_min': hold,
|
|||
|
|
'tp_bps': tp_bps, 'year': yr,
|
|||
|
|
'n_trades': n_t, 'wins': s['wins'], 'losses': s['losses'],
|
|||
|
|
'wr': round(wr, 3), 'pf': round(pf, 4), 'edge_pp': round(edge, 3),
|
|||
|
|
'gross_win': round(s['gw'], 6), 'gross_loss': round(s['gl'], 6),
|
|||
|
|
'ctrl_bl': round(bl, 3),
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
out_path = LOG_DIR / f"maxhold_sweep_{ts}.csv"
|
|||
|
|
with open(out_path, 'w', newline='') as f:
|
|||
|
|
w = csv.DictWriter(f, fieldnames=rows[0].keys())
|
|||
|
|
w.writeheader(); w.writerows(rows)
|
|||
|
|
|
|||
|
|
print(f"\n → {out_path}")
|
|||
|
|
print(f" Runtime: {elapsed:.0f}s")
|
|||
|
|
print(f"\n KEY: Look for PF > 1.0 rows — that's the profitable hold/TP combination.")
|
|||
|
|
print(f" Legacy optimal: 10 bars (10 min on 1m klines) at 95bps TP.")
|