Files
DOLPHIN/nautilus_dolphin/test_pf_acb_sweep.py

291 lines
12 KiB
Python
Raw Normal View History

"""Inverse ACB sweep: multiple boost curves + DD guard + overfitting check.
Approaches:
1. Linear: boost = 1.0 + k * signals
2. v5-stepped: discrete levels per signal count (legacy-inspired)
3. Log: boost = 1.0 + k * log(1 + signals)
4. Convex: boost = 1.0 + k * signals^2
5. Fat-tail: boost = 1.0 + k * signals^1.5
6. Adaptive: boost based on trailing stress-day WR
Overfitting check: split into first/second half, verify best approach holds OOS.
"""
import sys, time, math
from pathlib import Path
import numpy as np
import pandas as pd
sys.path.insert(0, str(Path(__file__).parent))
print("Compiling numba kernels...")
t_jit = time.time()
from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb
from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb
from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb
_p = np.array([1.0, 2.0, 3.0], dtype=np.float64)
compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01)
rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20)
compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0,
np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64),
np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04)
check_dc_nb(_p, 3, 1, 0.75)
print(f" JIT compile: {time.time() - t_jit:.1f}s")
from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine
from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker
VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache")
META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div',
'instability_50', 'instability_150'}
ENGINE_KWARGS = dict(
initial_capital=25000.0,
vel_div_threshold=-0.02, vel_div_extreme=-0.05,
min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0,
fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120,
use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75,
dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5,
use_asset_selection=True, min_irp_alignment=0.45,
use_sp_fees=True, use_sp_slippage=True,
sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50,
use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40,
lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42,
)
acb = AdaptiveCircuitBreaker()
parquet_files = sorted(VBT_DIR.glob("*.parquet"))
acb_signals = {pf.stem: acb.get_cut_for_date(pf.stem)['signals'] for pf in parquet_files}
# Vol percentiles
all_vols = []
for pf in parquet_files[:2]:
df = pd.read_parquet(pf)
if 'BTCUSDT' not in df.columns: continue
prices = df['BTCUSDT'].values
for i in range(60, len(prices)):
seg = prices[max(0, i-50):i]
if len(seg) < 10: continue
rets = np.diff(seg) / seg[:-1]
v = float(np.std(rets))
if v > 0: all_vols.append(v)
vol_p60 = float(np.percentile(all_vols, 60))
# Pre-load all parquet data to avoid re-reading
print("Pre-loading parquet data...")
pq_data = {}
for pf in parquet_files:
df = pd.read_parquet(pf)
asset_cols = [c for c in df.columns if c not in META_COLS]
btc_prices = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None
date_vol = np.full(len(df), np.nan)
if btc_prices is not None:
for i in range(50, len(btc_prices)):
seg = btc_prices[max(0, i-50):i]
if len(seg) < 10: continue
rets = np.diff(seg) / seg[:-1]
date_vol[i] = float(np.std(rets))
pq_data[pf.stem] = (df, asset_cols, date_vol)
print(f" Loaded {len(pq_data)} dates")
# ── Boost curve definitions ──────────────────────────────────────────────────
def curve_baseline(signals):
return 1.0
def make_linear(k):
def f(signals): return 1.0 + k * signals if signals >= 1.0 else 1.0
f.__name__ = f"linear_k{k}"
return f
def make_v5_stepped(levels):
"""levels: dict {signal_threshold: boost}"""
def f(signals):
boost = 1.0
for thresh in sorted(levels.keys()):
if signals >= thresh:
boost = levels[thresh]
return boost
f.__name__ = f"v5_{len(levels)}lvl"
return f
def make_log(k):
def f(signals): return 1.0 + k * math.log1p(signals) if signals >= 1.0 else 1.0
f.__name__ = f"log_k{k}"
return f
def make_convex(k, power=2.0):
def f(signals): return 1.0 + k * (signals ** power) if signals >= 1.0 else 1.0
f.__name__ = f"convex_k{k}_p{power}"
return f
def make_fat_tail(k):
def f(signals): return 1.0 + k * (signals ** 1.5) if signals >= 1.0 else 1.0
f.__name__ = f"fat_tail_k{k}"
return f
# ── Strategies to test ────────────────────────────────────────────────────────
strategies = {
"baseline": (curve_baseline, 1.0), # (boost_fn, dd_guard_pct)
# Linear variants
"linear_0.15": (make_linear(0.15), 0.03),
"linear_0.25": (make_linear(0.25), 0.03),
"linear_0.40": (make_linear(0.40), 0.03),
# v5-stepped (legacy inspired)
"v5_conservative": (make_v5_stepped({1: 1.0, 2: 1.2, 3: 1.4}), 0.03),
"v5_moderate": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.03),
"v5_aggressive": (make_v5_stepped({1: 1.1, 2: 1.5, 3: 2.0}), 0.03),
# Logarithmic
"log_0.3": (make_log(0.3), 0.03),
"log_0.5": (make_log(0.5), 0.03),
# Convex (quadratic)
"convex_0.08": (make_convex(0.08), 0.03),
"convex_0.15": (make_convex(0.15), 0.03),
# Fat-tailed
"fat_tail_0.10": (make_fat_tail(0.10), 0.03),
"fat_tail_0.20": (make_fat_tail(0.20), 0.03),
# DD guard variants (with v5_moderate boost)
"v5mod_dd2pct": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.02),
"v5mod_dd4pct": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.04),
"v5mod_noguard": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 1.0),
}
def run_backtest(file_list, boost_fn, dd_guard_pct):
engine = NDAlphaEngine(**ENGINE_KWARGS)
bar_idx = 0
price_histories = {}
peak_capital = engine.capital
max_dd = 0.0
date_pnls = []
for pf in file_list:
date_str = pf.stem
signals = acb_signals[date_str]
size_mult = boost_fn(signals)
engine.regime_direction = -1
engine.regime_size_mult = size_mult
engine.regime_dd_halt = False
cap_start = engine.capital
day_peak = cap_start
use_dd_guard = size_mult > 1.0
df, asset_cols, date_vol = pq_data[date_str]
bars_in_date = 0
for row_i in range(len(df)):
row = df.iloc[row_i]
vel_div = row.get("vel_div")
if vel_div is None or not np.isfinite(vel_div):
bar_idx += 1; bars_in_date += 1; continue
prices = {}
for ac in asset_cols:
p = row[ac]
if p and p > 0 and np.isfinite(p):
prices[ac] = float(p)
if ac not in price_histories: price_histories[ac] = []
price_histories[ac].append(float(p))
if not prices:
bar_idx += 1; bars_in_date += 1; continue
if bars_in_date < 100:
vol_regime_ok = False
else:
v = date_vol[row_i]
vol_regime_ok = (np.isfinite(v) and v > vol_p60)
engine.process_bar(bar_idx=bar_idx, vel_div=float(vel_div),
prices=prices, vol_regime_ok=vol_regime_ok,
price_histories=price_histories)
if use_dd_guard:
day_peak = max(day_peak, engine.capital)
if day_peak > 0 and (day_peak - engine.capital) / day_peak > dd_guard_pct:
engine.regime_dd_halt = True
bar_idx += 1; bars_in_date += 1
cap_end = engine.capital
date_pnls.append(cap_end - cap_start)
peak_capital = max(peak_capital, cap_end)
dd = (peak_capital - cap_end) / peak_capital * 100 if peak_capital > 0 else 0
max_dd = max(max_dd, dd)
trades = engine.trade_history
wins = [t for t in trades if t.pnl_absolute > 0]
losses = [t for t in trades if t.pnl_absolute <= 0]
gw = sum(t.pnl_absolute for t in wins) if wins else 0
gl = abs(sum(t.pnl_absolute for t in losses)) if losses else 0
pf = gw / gl if gl > 0 else float("inf")
roi = (engine.capital - 25000) / 25000 * 100
daily_rets = [p / 25000 * 100 for p in date_pnls] # approx
sharpe = np.mean(daily_rets) / np.std(daily_rets) * np.sqrt(365) if np.std(daily_rets) > 0 else 0
return {
'roi': roi, 'pf': pf, 'max_dd': max_dd, 'sharpe': sharpe,
'trades': len(trades), 'wr': len(wins)/len(trades)*100 if trades else 0,
'capital': engine.capital, 'fees': engine.total_fees,
}
# ── Run all strategies ────────────────────────────────────────────────────────
print(f"\n{'='*110}")
print(f"{'STRATEGY':<22} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'SHARPE':>7} {'TRADES':>7} {'WR%':>6} {'CAPITAL':>10} {'FEES':>9}")
print(f"{'='*110}")
results_full = {}
t0 = time.time()
for name, (boost_fn, dd_guard) in strategies.items():
t1 = time.time()
r = run_backtest(parquet_files, boost_fn, dd_guard)
elapsed = time.time() - t1
results_full[name] = r
marker = " <--" if r['roi'] > results_full.get('baseline', {}).get('roi', -999) and name != "baseline" else ""
print(f"{name:<22} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['max_dd']:>6.2f} {r['sharpe']:>7.2f} "
f"{r['trades']:>7} {r['wr']:>6.1f} {r['capital']:>10.2f} {r['fees']:>9.2f}{marker}")
print(f"\nFull sweep: {time.time()-t0:.0f}s")
# ── Overfitting check: split in half ─────────────────────────────────────────
mid = len(parquet_files) // 2
first_half = parquet_files[:mid]
second_half = parquet_files[mid:]
print(f"\n{'='*110}")
print(f" OVERFITTING CHECK: First half ({first_half[0].stem} to {first_half[-1].stem}) vs Second half ({second_half[0].stem} to {second_half[-1].stem})")
print(f"{'='*110}")
print(f"{'STRATEGY':<22} {'H1 ROI%':>8} {'H2 ROI%':>8} {'H1 PF':>6} {'H2 PF':>6} {'H1 DD%':>7} {'H2 DD%':>7} {'STABLE?':>8}")
# Only test top strategies to save tokens/time
top_strats = ["baseline", "linear_0.15", "linear_0.25", "v5_conservative", "v5_moderate",
"log_0.3", "fat_tail_0.10", "convex_0.08", "v5mod_noguard"]
for name in top_strats:
boost_fn, dd_guard = strategies[name]
r1 = run_backtest(first_half, boost_fn, dd_guard)
r2 = run_backtest(second_half, boost_fn, dd_guard)
# "Stable" = both halves beat baseline, or both halves in same direction
b1 = run_backtest(first_half, curve_baseline, 1.0) if name == top_strats[0] else None
stable = "YES" if (r1['roi'] > 0 and r2['roi'] > 0) else "NO"
if name != "baseline":
stable = "YES" if r2['roi'] >= results_full['baseline']['roi'] * 0.3 else "OVERFIT?"
print(f"{name:<22} {r1['roi']:>+8.2f} {r2['roi']:>+8.2f} {r1['pf']:>6.3f} {r2['pf']:>6.3f} "
f"{r1['max_dd']:>7.2f} {r2['max_dd']:>7.2f} {stable:>8}")
print(f"\nTotal time: {time.time()-t0:.0f}s")
# Best strategy
best = max(results_full.items(), key=lambda x: x[1]['roi'] if x[0] != "baseline" else -999)
base_r = results_full['baseline']
print(f"\n=== BEST: {best[0]} ===")
print(f"ROI: {base_r['roi']:+.2f}% -> {best[1]['roi']:+.2f}% ({best[1]['roi']-base_r['roi']:+.2f}%)")
print(f"PF: {base_r['pf']:.3f} -> {best[1]['pf']:.3f}")
print(f"DD: {base_r['max_dd']:.2f}% -> {best[1]['max_dd']:.2f}%")