291 lines
12 KiB
Python
291 lines
12 KiB
Python
|
|
"""Inverse ACB sweep: multiple boost curves + DD guard + overfitting check.
|
||
|
|
|
||
|
|
Approaches:
|
||
|
|
1. Linear: boost = 1.0 + k * signals
|
||
|
|
2. v5-stepped: discrete levels per signal count (legacy-inspired)
|
||
|
|
3. Log: boost = 1.0 + k * log(1 + signals)
|
||
|
|
4. Convex: boost = 1.0 + k * signals^2
|
||
|
|
5. Fat-tail: boost = 1.0 + k * signals^1.5
|
||
|
|
6. Adaptive: boost based on trailing stress-day WR
|
||
|
|
|
||
|
|
Overfitting check: split into first/second half, verify best approach holds OOS.
|
||
|
|
"""
|
||
|
|
import sys, time, math
|
||
|
|
from pathlib import Path
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
||
|
|
|
||
|
|
print("Compiling numba kernels...")
|
||
|
|
t_jit = time.time()
|
||
|
|
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
|
||
|
|
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
|
||
|
|
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
|
||
|
|
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
||
|
|
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
|
||
|
|
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
|
||
|
|
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
|
||
|
|
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
|
||
|
|
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
|
||
|
|
check_dc_nb(_p, 3, 1, 0.75)
|
||
|
|
print(f" JIT compile: {time.time() - t_jit:.1f}s")
|
||
|
|
|
||
|
|
from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
|
||
|
|
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
|
||
|
|
|
||
|
|
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
|
||
|
|
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
|
||
|
|
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
|
||
|
|
'instability_50', 'instability_150'}
|
||
|
|
|
||
|
|
ENGINE_KWARGS = dict(
|
||
|
|
initial_capital=25000.0,
|
||
|
|
vel_div_threshold=-0.02, vel_div_extreme=-0.05,
|
||
|
|
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
|
||
|
|
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
|
||
|
|
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
|
||
|
|
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
|
||
|
|
use_asset_selection=True, min_irp_alignment=0.45,
|
||
|
|
use_sp_fees=True, use_sp_slippage=True,
|
||
|
|
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
|
||
|
|
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
|
||
|
|
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
|
||
|
|
)
|
||
|
|
|
||
|
|
acb = AdaptiveCircuitBreaker()
|
||
|
|
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
|
||
|
|
acb_signals = {pf.stem: acb.get_cut_for_date(pf.stem)['signals'] for pf in parquet_files}
|
||
|
|
|
||
|
|
# Vol percentiles
|
||
|
|
all_vols = []
|
||
|
|
for pf in parquet_files[:2]:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
if 'BTCUSDT' not in df.columns: continue
|
||
|
|
prices = df['BTCUSDT'].values
|
||
|
|
for i in range(60, len(prices)):
|
||
|
|
seg = prices[max(0, i-50):i]
|
||
|
|
if len(seg) < 10: continue
|
||
|
|
rets = np.diff(seg) / seg[:-1]
|
||
|
|
v = float(np.std(rets))
|
||
|
|
if v > 0: all_vols.append(v)
|
||
|
|
vol_p60 = float(np.percentile(all_vols, 60))
|
||
|
|
|
||
|
|
# Pre-load all parquet data to avoid re-reading
|
||
|
|
print("Pre-loading parquet data...")
|
||
|
|
pq_data = {}
|
||
|
|
for pf in parquet_files:
|
||
|
|
df = pd.read_parquet(pf)
|
||
|
|
asset_cols = [c for c in df.columns if c not in META_COLS]
|
||
|
|
btc_prices = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
|
||
|
|
date_vol = np.full(len(df), np.nan)
|
||
|
|
if btc_prices is not None:
|
||
|
|
for i in range(50, len(btc_prices)):
|
||
|
|
seg = btc_prices[max(0, i-50):i]
|
||
|
|
if len(seg) < 10: continue
|
||
|
|
rets = np.diff(seg) / seg[:-1]
|
||
|
|
date_vol[i] = float(np.std(rets))
|
||
|
|
pq_data[pf.stem] = (df, asset_cols, date_vol)
|
||
|
|
print(f" Loaded {len(pq_data)} dates")
|
||
|
|
|
||
|
|
|
||
|
|
# ── Boost curve definitions ──────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def curve_baseline(signals):
|
||
|
|
return 1.0
|
||
|
|
|
||
|
|
def make_linear(k):
|
||
|
|
def f(signals): return 1.0 + k * signals if signals >= 1.0 else 1.0
|
||
|
|
f.__name__ = f"linear_k{k}"
|
||
|
|
return f
|
||
|
|
|
||
|
|
def make_v5_stepped(levels):
|
||
|
|
"""levels: dict {signal_threshold: boost}"""
|
||
|
|
def f(signals):
|
||
|
|
boost = 1.0
|
||
|
|
for thresh in sorted(levels.keys()):
|
||
|
|
if signals >= thresh:
|
||
|
|
boost = levels[thresh]
|
||
|
|
return boost
|
||
|
|
f.__name__ = f"v5_{len(levels)}lvl"
|
||
|
|
return f
|
||
|
|
|
||
|
|
def make_log(k):
|
||
|
|
def f(signals): return 1.0 + k * math.log1p(signals) if signals >= 1.0 else 1.0
|
||
|
|
f.__name__ = f"log_k{k}"
|
||
|
|
return f
|
||
|
|
|
||
|
|
def make_convex(k, power=2.0):
|
||
|
|
def f(signals): return 1.0 + k * (signals ** power) if signals >= 1.0 else 1.0
|
||
|
|
f.__name__ = f"convex_k{k}_p{power}"
|
||
|
|
return f
|
||
|
|
|
||
|
|
def make_fat_tail(k):
|
||
|
|
def f(signals): return 1.0 + k * (signals ** 1.5) if signals >= 1.0 else 1.0
|
||
|
|
f.__name__ = f"fat_tail_k{k}"
|
||
|
|
return f
|
||
|
|
|
||
|
|
|
||
|
|
# ── Strategies to test ────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
strategies = {
|
||
|
|
"baseline": (curve_baseline, 1.0), # (boost_fn, dd_guard_pct)
|
||
|
|
# Linear variants
|
||
|
|
"linear_0.15": (make_linear(0.15), 0.03),
|
||
|
|
"linear_0.25": (make_linear(0.25), 0.03),
|
||
|
|
"linear_0.40": (make_linear(0.40), 0.03),
|
||
|
|
# v5-stepped (legacy inspired)
|
||
|
|
"v5_conservative": (make_v5_stepped({1: 1.0, 2: 1.2, 3: 1.4}), 0.03),
|
||
|
|
"v5_moderate": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.03),
|
||
|
|
"v5_aggressive": (make_v5_stepped({1: 1.1, 2: 1.5, 3: 2.0}), 0.03),
|
||
|
|
# Logarithmic
|
||
|
|
"log_0.3": (make_log(0.3), 0.03),
|
||
|
|
"log_0.5": (make_log(0.5), 0.03),
|
||
|
|
# Convex (quadratic)
|
||
|
|
"convex_0.08": (make_convex(0.08), 0.03),
|
||
|
|
"convex_0.15": (make_convex(0.15), 0.03),
|
||
|
|
# Fat-tailed
|
||
|
|
"fat_tail_0.10": (make_fat_tail(0.10), 0.03),
|
||
|
|
"fat_tail_0.20": (make_fat_tail(0.20), 0.03),
|
||
|
|
# DD guard variants (with v5_moderate boost)
|
||
|
|
"v5mod_dd2pct": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.02),
|
||
|
|
"v5mod_dd4pct": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.04),
|
||
|
|
"v5mod_noguard": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 1.0),
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def run_backtest(file_list, boost_fn, dd_guard_pct):
|
||
|
|
engine = NDAlphaEngine(**ENGINE_KWARGS)
|
||
|
|
bar_idx = 0
|
||
|
|
price_histories = {}
|
||
|
|
peak_capital = engine.capital
|
||
|
|
max_dd = 0.0
|
||
|
|
date_pnls = []
|
||
|
|
|
||
|
|
for pf in file_list:
|
||
|
|
date_str = pf.stem
|
||
|
|
signals = acb_signals[date_str]
|
||
|
|
size_mult = boost_fn(signals)
|
||
|
|
|
||
|
|
engine.regime_direction = -1
|
||
|
|
engine.regime_size_mult = size_mult
|
||
|
|
engine.regime_dd_halt = False
|
||
|
|
|
||
|
|
cap_start = engine.capital
|
||
|
|
day_peak = cap_start
|
||
|
|
use_dd_guard = size_mult > 1.0
|
||
|
|
|
||
|
|
df, asset_cols, date_vol = pq_data[date_str]
|
||
|
|
|
||
|
|
bars_in_date = 0
|
||
|
|
for row_i in range(len(df)):
|
||
|
|
row = df.iloc[row_i]
|
||
|
|
vel_div = row.get("vel_div")
|
||
|
|
if vel_div is None or not np.isfinite(vel_div):
|
||
|
|
bar_idx += 1; bars_in_date += 1; continue
|
||
|
|
prices = {}
|
||
|
|
for ac in asset_cols:
|
||
|
|
p = row[ac]
|
||
|
|
if p and p > 0 and np.isfinite(p):
|
||
|
|
prices[ac] = float(p)
|
||
|
|
if ac not in price_histories: price_histories[ac] = []
|
||
|
|
price_histories[ac].append(float(p))
|
||
|
|
if not prices:
|
||
|
|
bar_idx += 1; bars_in_date += 1; continue
|
||
|
|
if bars_in_date < 100:
|
||
|
|
vol_regime_ok = False
|
||
|
|
else:
|
||
|
|
v = date_vol[row_i]
|
||
|
|
vol_regime_ok = (np.isfinite(v) and v > vol_p60)
|
||
|
|
|
||
|
|
engine.process_bar(bar_idx=bar_idx, vel_div=float(vel_div),
|
||
|
|
prices=prices, vol_regime_ok=vol_regime_ok,
|
||
|
|
price_histories=price_histories)
|
||
|
|
|
||
|
|
if use_dd_guard:
|
||
|
|
day_peak = max(day_peak, engine.capital)
|
||
|
|
if day_peak > 0 and (day_peak - engine.capital) / day_peak > dd_guard_pct:
|
||
|
|
engine.regime_dd_halt = True
|
||
|
|
|
||
|
|
bar_idx += 1; bars_in_date += 1
|
||
|
|
|
||
|
|
cap_end = engine.capital
|
||
|
|
date_pnls.append(cap_end - cap_start)
|
||
|
|
peak_capital = max(peak_capital, cap_end)
|
||
|
|
dd = (peak_capital - cap_end) / peak_capital * 100 if peak_capital > 0 else 0
|
||
|
|
max_dd = max(max_dd, dd)
|
||
|
|
|
||
|
|
trades = engine.trade_history
|
||
|
|
wins = [t for t in trades if t.pnl_absolute > 0]
|
||
|
|
losses = [t for t in trades if t.pnl_absolute <= 0]
|
||
|
|
gw = sum(t.pnl_absolute for t in wins) if wins else 0
|
||
|
|
gl = abs(sum(t.pnl_absolute for t in losses)) if losses else 0
|
||
|
|
pf = gw / gl if gl > 0 else float("inf")
|
||
|
|
roi = (engine.capital - 25000) / 25000 * 100
|
||
|
|
daily_rets = [p / 25000 * 100 for p in date_pnls] # approx
|
||
|
|
sharpe = np.mean(daily_rets) / np.std(daily_rets) * np.sqrt(365) if np.std(daily_rets) > 0 else 0
|
||
|
|
|
||
|
|
return {
|
||
|
|
'roi': roi, 'pf': pf, 'max_dd': max_dd, 'sharpe': sharpe,
|
||
|
|
'trades': len(trades), 'wr': len(wins)/len(trades)*100 if trades else 0,
|
||
|
|
'capital': engine.capital, 'fees': engine.total_fees,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# ── Run all strategies ────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
print(f"\n{'='*110}")
|
||
|
|
print(f"{'STRATEGY':<22} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'SHARPE':>7} {'TRADES':>7} {'WR%':>6} {'CAPITAL':>10} {'FEES':>9}")
|
||
|
|
print(f"{'='*110}")
|
||
|
|
|
||
|
|
results_full = {}
|
||
|
|
t0 = time.time()
|
||
|
|
|
||
|
|
for name, (boost_fn, dd_guard) in strategies.items():
|
||
|
|
t1 = time.time()
|
||
|
|
r = run_backtest(parquet_files, boost_fn, dd_guard)
|
||
|
|
elapsed = time.time() - t1
|
||
|
|
results_full[name] = r
|
||
|
|
marker = " <--" if r['roi'] > results_full.get('baseline', {}).get('roi', -999) and name != "baseline" else ""
|
||
|
|
print(f"{name:<22} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['max_dd']:>6.2f} {r['sharpe']:>7.2f} "
|
||
|
|
f"{r['trades']:>7} {r['wr']:>6.1f} {r['capital']:>10.2f} {r['fees']:>9.2f}{marker}")
|
||
|
|
|
||
|
|
print(f"\nFull sweep: {time.time()-t0:.0f}s")
|
||
|
|
|
||
|
|
# ── Overfitting check: split in half ─────────────────────────────────────────
|
||
|
|
|
||
|
|
mid = len(parquet_files) // 2
|
||
|
|
first_half = parquet_files[:mid]
|
||
|
|
second_half = parquet_files[mid:]
|
||
|
|
|
||
|
|
print(f"\n{'='*110}")
|
||
|
|
print(f" OVERFITTING CHECK: First half ({first_half[0].stem} to {first_half[-1].stem}) vs Second half ({second_half[0].stem} to {second_half[-1].stem})")
|
||
|
|
print(f"{'='*110}")
|
||
|
|
print(f"{'STRATEGY':<22} {'H1 ROI%':>8} {'H2 ROI%':>8} {'H1 PF':>6} {'H2 PF':>6} {'H1 DD%':>7} {'H2 DD%':>7} {'STABLE?':>8}")
|
||
|
|
|
||
|
|
# Only test top strategies to save tokens/time
|
||
|
|
top_strats = ["baseline", "linear_0.15", "linear_0.25", "v5_conservative", "v5_moderate",
|
||
|
|
"log_0.3", "fat_tail_0.10", "convex_0.08", "v5mod_noguard"]
|
||
|
|
|
||
|
|
for name in top_strats:
|
||
|
|
boost_fn, dd_guard = strategies[name]
|
||
|
|
r1 = run_backtest(first_half, boost_fn, dd_guard)
|
||
|
|
r2 = run_backtest(second_half, boost_fn, dd_guard)
|
||
|
|
# "Stable" = both halves beat baseline, or both halves in same direction
|
||
|
|
b1 = run_backtest(first_half, curve_baseline, 1.0) if name == top_strats[0] else None
|
||
|
|
stable = "YES" if (r1['roi'] > 0 and r2['roi'] > 0) else "NO"
|
||
|
|
if name != "baseline":
|
||
|
|
stable = "YES" if r2['roi'] >= results_full['baseline']['roi'] * 0.3 else "OVERFIT?"
|
||
|
|
print(f"{name:<22} {r1['roi']:>+8.2f} {r2['roi']:>+8.2f} {r1['pf']:>6.3f} {r2['pf']:>6.3f} "
|
||
|
|
f"{r1['max_dd']:>7.2f} {r2['max_dd']:>7.2f} {stable:>8}")
|
||
|
|
|
||
|
|
print(f"\nTotal time: {time.time()-t0:.0f}s")
|
||
|
|
|
||
|
|
# Best strategy
|
||
|
|
best = max(results_full.items(), key=lambda x: x[1]['roi'] if x[0] != "baseline" else -999)
|
||
|
|
base_r = results_full['baseline']
|
||
|
|
print(f"\n=== BEST: {best[0]} ===")
|
||
|
|
print(f"ROI: {base_r['roi']:+.2f}% -> {best[1]['roi']:+.2f}% ({best[1]['roi']-base_r['roi']:+.2f}%)")
|
||
|
|
print(f"PF: {base_r['pf']:.3f} -> {best[1]['pf']:.3f}")
|
||
|
|
print(f"DD: {base_r['max_dd']:.2f}% -> {best[1]['max_dd']:.2f}%")
|