Files
DOLPHIN/nautilus_dolphin/maxhold_sweep_5y.py
hjnormey 01c19662cb initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00

219 lines
8.9 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""MAX_HOLD Sweep — 5y Klines
=============================
Root cause of -99.9% ROI: MAX_HOLD=120 bars × 60s = 2 HOURS.
Legacy optimal hold = 120 bars × 5s = 600s = 10 MINUTES.
On 1m klines that's 10 bars.
This sweep tests MAX_HOLD = [3, 5, 8, 10, 12, 15, 20, 30, 60, 120] bars
on SHORT and LONG directions at the default threshold (+-0.020).
Metric: Profit Factor (gross_win / gross_loss) and edge (WR - baseline).
PF > 1.0 = profitable raw (before fees/leverage). Target: find the sweet spot.
Also sweeps TP_BPS = [60, 95, 120, 150] to find the right TP/hold combo.
Output: run_logs/maxhold_sweep_YYYYMMDD_HHMMSS.csv + console summary
Runtime: ~2-3 minutes
"""
import sys, time, csv, gc
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines")
LOG_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\nautilus_dolphin\run_logs")
SHORT_T = -0.020
LONG_T = +0.020
# Sweep parameters
HOLD_TIMES = [3, 5, 8, 10, 12, 15, 20, 30, 60, 120]
TP_BPS_LIST = [60, 95, 120, 150]
MAX_HOLD_MAX = max(HOLD_TIMES) # precompute windows up to this size
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
total = len(parquet_files)
print(f"Files: {total}")
print(f"Hold times (bars): {HOLD_TIMES}")
print(f"TP (bps): {TP_BPS_LIST}")
print(f"Thresholds: SHORT<={SHORT_T} LONG>={LONG_T}")
print()
# stats[(direction, hold, tp_bps, year)] = {wins, losses, gw, gl}
stats = defaultdict(lambda: {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
ctrl = defaultdict(lambda: {'up': 0, 'dn': 0, 'n': 0}) # ctrl[(hold, tp_bps, year)]
t0 = time.time()
for i, pf in enumerate(parquet_files):
ds = pf.stem
year = ds[:4]
try:
df = pd.read_parquet(pf)
except Exception:
continue
if 'vel_div' not in df.columns or 'BTCUSDT' not in df.columns:
continue
vd = df['vel_div'].values.astype(np.float64)
btc = df['BTCUSDT'].values.astype(np.float64)
vd = np.where(np.isfinite(vd), vd, 0.0)
btc = np.where(np.isfinite(btc) & (btc > 0), btc, np.nan)
n = len(btc)
del df
if n < MAX_HOLD_MAX + 5:
del vd, btc
continue
n_usable = n - MAX_HOLD_MAX
# Precompute the largest window; sub-windows reuse slices
big_windows = sliding_window_view(btc, MAX_HOLD_MAX + 1)[:n_usable]
ep_arr = big_windows[:, 0]
valid = np.isfinite(ep_arr) & (ep_arr > 0)
short_active = (vd[:n_usable] <= SHORT_T) & valid
long_active = (vd[:n_usable] >= LONG_T) & valid
for hold in HOLD_TIMES:
# For this hold, extract futures from the large window
# big_windows[:,1:hold+1] covers bars 1..hold
sub = big_windows[:, 1:hold + 1] # shape (n_usable, hold)
fut_min = np.nanmin(sub, axis=1)
fut_max = np.nanmax(sub, axis=1)
last_px = big_windows[:, hold] # bar at exactly hold
for tp_bps in TP_BPS_LIST:
tp_pct = tp_bps / 10_000.0
# Control baseline (sample every 30 bars to keep speed)
ctrl_key = (hold, tp_bps, year)
for j in range(0, n_usable, 30):
ep = ep_arr[j]
if not valid[j]:
continue
lp = last_px[j]
if not np.isfinite(lp):
continue
r_dn = (ep - fut_min[j]) / ep
r_up = (fut_max[j] - ep) / ep
ctrl[ctrl_key]['dn'] += int(r_dn >= tp_pct)
ctrl[ctrl_key]['up'] += int(r_up >= tp_pct)
ctrl[ctrl_key]['n'] += 1
for direction, sig_mask in [('S', short_active), ('L', long_active)]:
idx = np.where(sig_mask)[0]
if len(idx) == 0:
continue
ep_s = ep_arr[idx]
fmin_s = fut_min[idx]
fmax_s = fut_max[idx]
last_s = last_px[idx]
if direction == 'S':
hit = fmin_s <= ep_s * (1.0 - tp_pct)
lret = np.where(np.isfinite(last_s), (ep_s - last_s) / ep_s, 0.0)
else:
hit = fmax_s >= ep_s * (1.0 + tp_pct)
lret = np.where(np.isfinite(last_s), (last_s - ep_s) / ep_s, 0.0)
w = int(np.sum(hit))
l = int(np.sum(~hit))
gw = w * tp_pct
gl = float(np.sum(np.abs(lret[~hit])))
k = (direction, hold, tp_bps, year)
stats[k]['wins'] += w
stats[k]['losses'] += l
stats[k]['gw'] += gw
stats[k]['gl'] += gl
del vd, btc, big_windows, ep_arr, valid, short_active, long_active
del sub, fut_min, fut_max, last_px
if (i + 1) % 200 == 0:
gc.collect()
elapsed = time.time() - t0
print(f" [{i+1}/{total}] {ds} {elapsed:.0f}s")
elapsed = time.time() - t0
print(f"\nPass complete: {elapsed:.0f}s\n")
YEARS = ['2021', '2022', '2023', '2024', '2025', '2026']
# Print summary: for each direction × tp_bps, show PF vs hold time
for direction in ['S', 'L']:
for tp_bps in TP_BPS_LIST:
tp_pct = tp_bps / 10_000.0
print(f"\n{'='*85}")
print(f" {direction} TP={tp_bps}bps — PF by hold time (bars=minutes on 1m klines)")
print(f"{'='*85}")
hdr = f" {'hold':>6}" + "".join(f" {yr:>10}" for yr in YEARS) + f" {'TOTAL_PF':>9} {'WR%':>7} {'Edge':>7}"
print(hdr)
print(f" {'-'*83}")
for hold in HOLD_TIMES:
yr_pfs = []
tot_w = tot_l = 0; tot_gw = tot_gl = 0.0
for yr in YEARS:
k = (direction, hold, tp_bps, yr)
s = stats.get(k, {'wins': 0, 'losses': 0, 'gw': 0.0, 'gl': 0.0})
ck = (hold, tp_bps, yr)
c = ctrl.get(ck, {'dn': 0, 'up': 0, 'n': 1})
bl = (c['dn'] / c['n'] * 100) if direction == 'S' else (c['up'] / c['n'] * 100)
n_t = s['wins'] + s['losses']
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
yr_pfs.append(f"{pf:>8.3f}" if n_t > 0 else " ---")
tot_w += s['wins']; tot_l += s['losses']; tot_gw += s['gw']; tot_gl += s['gl']
# Total
tot_n = tot_w + tot_l
tot_pf = tot_gw / tot_gl if tot_gl > 0 else 999.0
tot_wr = tot_w / tot_n * 100 if tot_n > 0 else 0.0
# control baseline (aggregate across years)
tot_ctrl_dn = sum(ctrl.get((hold, tp_bps, yr), {'dn': 0, 'n': 1})['dn'] for yr in YEARS)
tot_ctrl_up = sum(ctrl.get((hold, tp_bps, yr), {'up': 0, 'n': 1})['up'] for yr in YEARS)
tot_ctrl_n = sum(ctrl.get((hold, tp_bps, yr), {'n': 1})['n'] for yr in YEARS)
ctrl_bl = (tot_ctrl_dn / tot_ctrl_n * 100) if direction == 'S' else (tot_ctrl_up / tot_ctrl_n * 100)
edge = tot_wr - ctrl_bl
hold_min = hold # on 1m klines, 1 bar = 1 minute
print(f" {hold:>3}b={hold_min:>2}m" + "".join(f" {pf:>10}" for pf in yr_pfs) +
f" {tot_pf:>9.3f} {tot_wr:>6.1f}% {edge:>+6.1f}pp")
# Save CSV
LOG_DIR.mkdir(exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
rows = []
for (direction, hold, tp_bps, yr), s in stats.items():
tp_pct = tp_bps / 10_000.0
ck = (hold, tp_bps, yr)
c = ctrl.get(ck, {'dn': 0, 'up': 0, 'n': 1})
bl = (c['dn'] / c['n'] * 100) if direction == 'S' else (c['up'] / c['n'] * 100)
n_t = s['wins'] + s['losses']
wr = s['wins'] / n_t * 100 if n_t > 0 else float('nan')
pf = s['gw'] / s['gl'] if s['gl'] > 0 else (999.0 if s['gw'] > 0 else float('nan'))
edge = wr - bl if n_t > 0 else float('nan')
rows.append({
'direction': direction, 'hold_bars': hold, 'hold_min': hold,
'tp_bps': tp_bps, 'year': yr,
'n_trades': n_t, 'wins': s['wins'], 'losses': s['losses'],
'wr': round(wr, 3), 'pf': round(pf, 4), 'edge_pp': round(edge, 3),
'gross_win': round(s['gw'], 6), 'gross_loss': round(s['gl'], 6),
'ctrl_bl': round(bl, 3),
})
out_path = LOG_DIR / f"maxhold_sweep_{ts}.csv"
with open(out_path, 'w', newline='') as f:
w = csv.DictWriter(f, fieldnames=rows[0].keys())
w.writeheader(); w.writerows(rows)
print(f"\n{out_path}")
print(f" Runtime: {elapsed:.0f}s")
print(f"\n KEY: Look for PF > 1.0 rows — that's the profitable hold/TP combination.")
print(f" Legacy optimal: 10 bars (10 min on 1m klines) at 95bps TP.")