"""Inverse ACB sweep: multiple boost curves + DD guard + overfitting check. Approaches: 1. Linear: boost = 1.0 + k * signals 2. v5-stepped: discrete levels per signal count (legacy-inspired) 3. Log: boost = 1.0 + k * log(1 + signals) 4. Convex: boost = 1.0 + k * signals^2 5. Fat-tail: boost = 1.0 + k * signals^1.5 6. Adaptive: boost based on trailing stress-day WR Overfitting check: split into first/second half, verify best approach holds OOS. """ import sys, time, math from pathlib import Path import numpy as np import pandas as pd sys.path.insert(0, str(Path(__file__).parent)) print("Compiling numba kernels...") t_jit = time.time() from nautilus_dolphin.nautilus.alpha_asset_selector import compute_irp_nb, compute_ars_nb, rank_assets_irp_nb from nautilus_dolphin.nautilus.alpha_bet_sizer import compute_sizing_nb from nautilus_dolphin.nautilus.alpha_signal_generator import check_dc_nb _p = np.array([1.0, 2.0, 3.0], dtype=np.float64) compute_irp_nb(_p, -1); compute_ars_nb(1.0, 0.5, 0.01) rank_assets_irp_nb(np.ones((10, 2), dtype=np.float64), 8, -1, 5, 500.0, 20, 0.20) compute_sizing_nb(-0.03, -0.02, -0.05, 3.0, 0.5, 5.0, 0.20, True, True, 0.0, np.zeros(4, dtype=np.int64), np.zeros(4, dtype=np.int64), np.zeros(5, dtype=np.float64), 0, -1, 0.01, 0.04) check_dc_nb(_p, 3, 1, 0.75) print(f" JIT compile: {time.time() - t_jit:.1f}s") from nautilus_dolphin.nautilus.alpha_orchestrator import NDAlphaEngine from nautilus_dolphin.nautilus.adaptive_circuit_breaker import AdaptiveCircuitBreaker VBT_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache") META_COLS = {'timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150'} ENGINE_KWARGS = dict( initial_capital=25000.0, vel_div_threshold=-0.02, vel_div_extreme=-0.05, min_leverage=0.5, max_leverage=5.0, leverage_convexity=3.0, fraction=0.20, fixed_tp_pct=0.0099, stop_pct=1.0, max_hold_bars=120, use_direction_confirm=True, dc_lookback_bars=7, dc_min_magnitude_bps=0.75, dc_skip_contradicts=True, dc_leverage_boost=1.0, dc_leverage_reduce=0.5, use_asset_selection=True, min_irp_alignment=0.45, use_sp_fees=True, use_sp_slippage=True, sp_maker_entry_rate=0.62, sp_maker_exit_rate=0.50, use_ob_edge=True, ob_edge_bps=5.0, ob_confirm_rate=0.40, lookback=100, use_alpha_layers=True, use_dynamic_leverage=True, seed=42, ) acb = AdaptiveCircuitBreaker() parquet_files = sorted(VBT_DIR.glob("*.parquet")) acb_signals = {pf.stem: acb.get_cut_for_date(pf.stem)['signals'] for pf in parquet_files} # Vol percentiles all_vols = [] for pf in parquet_files[:2]: df = pd.read_parquet(pf) if 'BTCUSDT' not in df.columns: continue prices = df['BTCUSDT'].values for i in range(60, len(prices)): seg = prices[max(0, i-50):i] if len(seg) < 10: continue rets = np.diff(seg) / seg[:-1] v = float(np.std(rets)) if v > 0: all_vols.append(v) vol_p60 = float(np.percentile(all_vols, 60)) # Pre-load all parquet data to avoid re-reading print("Pre-loading parquet data...") pq_data = {} for pf in parquet_files: df = pd.read_parquet(pf) asset_cols = [c for c in df.columns if c not in META_COLS] btc_prices = df['BTCUSDT'].values if 'BTCUSDT' in df.columns else None date_vol = np.full(len(df), np.nan) if btc_prices is not None: for i in range(50, len(btc_prices)): seg = btc_prices[max(0, i-50):i] if len(seg) < 10: continue rets = np.diff(seg) / seg[:-1] date_vol[i] = float(np.std(rets)) pq_data[pf.stem] = (df, asset_cols, date_vol) print(f" Loaded {len(pq_data)} dates") # ── Boost curve definitions ────────────────────────────────────────────────── def curve_baseline(signals): return 1.0 def make_linear(k): def f(signals): return 1.0 + k * signals if signals >= 1.0 else 1.0 f.__name__ = f"linear_k{k}" return f def make_v5_stepped(levels): """levels: dict {signal_threshold: boost}""" def f(signals): boost = 1.0 for thresh in sorted(levels.keys()): if signals >= thresh: boost = levels[thresh] return boost f.__name__ = f"v5_{len(levels)}lvl" return f def make_log(k): def f(signals): return 1.0 + k * math.log1p(signals) if signals >= 1.0 else 1.0 f.__name__ = f"log_k{k}" return f def make_convex(k, power=2.0): def f(signals): return 1.0 + k * (signals ** power) if signals >= 1.0 else 1.0 f.__name__ = f"convex_k{k}_p{power}" return f def make_fat_tail(k): def f(signals): return 1.0 + k * (signals ** 1.5) if signals >= 1.0 else 1.0 f.__name__ = f"fat_tail_k{k}" return f # ── Strategies to test ──────────────────────────────────────────────────────── strategies = { "baseline": (curve_baseline, 1.0), # (boost_fn, dd_guard_pct) # Linear variants "linear_0.15": (make_linear(0.15), 0.03), "linear_0.25": (make_linear(0.25), 0.03), "linear_0.40": (make_linear(0.40), 0.03), # v5-stepped (legacy inspired) "v5_conservative": (make_v5_stepped({1: 1.0, 2: 1.2, 3: 1.4}), 0.03), "v5_moderate": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.03), "v5_aggressive": (make_v5_stepped({1: 1.1, 2: 1.5, 3: 2.0}), 0.03), # Logarithmic "log_0.3": (make_log(0.3), 0.03), "log_0.5": (make_log(0.5), 0.03), # Convex (quadratic) "convex_0.08": (make_convex(0.08), 0.03), "convex_0.15": (make_convex(0.15), 0.03), # Fat-tailed "fat_tail_0.10": (make_fat_tail(0.10), 0.03), "fat_tail_0.20": (make_fat_tail(0.20), 0.03), # DD guard variants (with v5_moderate boost) "v5mod_dd2pct": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.02), "v5mod_dd4pct": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 0.04), "v5mod_noguard": (make_v5_stepped({1: 1.0, 2: 1.3, 3: 1.6}), 1.0), } def run_backtest(file_list, boost_fn, dd_guard_pct): engine = NDAlphaEngine(**ENGINE_KWARGS) bar_idx = 0 price_histories = {} peak_capital = engine.capital max_dd = 0.0 date_pnls = [] for pf in file_list: date_str = pf.stem signals = acb_signals[date_str] size_mult = boost_fn(signals) engine.regime_direction = -1 engine.regime_size_mult = size_mult engine.regime_dd_halt = False cap_start = engine.capital day_peak = cap_start use_dd_guard = size_mult > 1.0 df, asset_cols, date_vol = pq_data[date_str] bars_in_date = 0 for row_i in range(len(df)): row = df.iloc[row_i] vel_div = row.get("vel_div") if vel_div is None or not np.isfinite(vel_div): bar_idx += 1; bars_in_date += 1; continue prices = {} for ac in asset_cols: p = row[ac] if p and p > 0 and np.isfinite(p): prices[ac] = float(p) if ac not in price_histories: price_histories[ac] = [] price_histories[ac].append(float(p)) if not prices: bar_idx += 1; bars_in_date += 1; continue if bars_in_date < 100: vol_regime_ok = False else: v = date_vol[row_i] vol_regime_ok = (np.isfinite(v) and v > vol_p60) engine.process_bar(bar_idx=bar_idx, vel_div=float(vel_div), prices=prices, vol_regime_ok=vol_regime_ok, price_histories=price_histories) if use_dd_guard: day_peak = max(day_peak, engine.capital) if day_peak > 0 and (day_peak - engine.capital) / day_peak > dd_guard_pct: engine.regime_dd_halt = True bar_idx += 1; bars_in_date += 1 cap_end = engine.capital date_pnls.append(cap_end - cap_start) peak_capital = max(peak_capital, cap_end) dd = (peak_capital - cap_end) / peak_capital * 100 if peak_capital > 0 else 0 max_dd = max(max_dd, dd) trades = engine.trade_history wins = [t for t in trades if t.pnl_absolute > 0] losses = [t for t in trades if t.pnl_absolute <= 0] gw = sum(t.pnl_absolute for t in wins) if wins else 0 gl = abs(sum(t.pnl_absolute for t in losses)) if losses else 0 pf = gw / gl if gl > 0 else float("inf") roi = (engine.capital - 25000) / 25000 * 100 daily_rets = [p / 25000 * 100 for p in date_pnls] # approx sharpe = np.mean(daily_rets) / np.std(daily_rets) * np.sqrt(365) if np.std(daily_rets) > 0 else 0 return { 'roi': roi, 'pf': pf, 'max_dd': max_dd, 'sharpe': sharpe, 'trades': len(trades), 'wr': len(wins)/len(trades)*100 if trades else 0, 'capital': engine.capital, 'fees': engine.total_fees, } # ── Run all strategies ──────────────────────────────────────────────────────── print(f"\n{'='*110}") print(f"{'STRATEGY':<22} {'ROI%':>7} {'PF':>6} {'DD%':>6} {'SHARPE':>7} {'TRADES':>7} {'WR%':>6} {'CAPITAL':>10} {'FEES':>9}") print(f"{'='*110}") results_full = {} t0 = time.time() for name, (boost_fn, dd_guard) in strategies.items(): t1 = time.time() r = run_backtest(parquet_files, boost_fn, dd_guard) elapsed = time.time() - t1 results_full[name] = r marker = " <--" if r['roi'] > results_full.get('baseline', {}).get('roi', -999) and name != "baseline" else "" print(f"{name:<22} {r['roi']:>+7.2f} {r['pf']:>6.3f} {r['max_dd']:>6.2f} {r['sharpe']:>7.2f} " f"{r['trades']:>7} {r['wr']:>6.1f} {r['capital']:>10.2f} {r['fees']:>9.2f}{marker}") print(f"\nFull sweep: {time.time()-t0:.0f}s") # ── Overfitting check: split in half ───────────────────────────────────────── mid = len(parquet_files) // 2 first_half = parquet_files[:mid] second_half = parquet_files[mid:] print(f"\n{'='*110}") print(f" OVERFITTING CHECK: First half ({first_half[0].stem} to {first_half[-1].stem}) vs Second half ({second_half[0].stem} to {second_half[-1].stem})") print(f"{'='*110}") print(f"{'STRATEGY':<22} {'H1 ROI%':>8} {'H2 ROI%':>8} {'H1 PF':>6} {'H2 PF':>6} {'H1 DD%':>7} {'H2 DD%':>7} {'STABLE?':>8}") # Only test top strategies to save tokens/time top_strats = ["baseline", "linear_0.15", "linear_0.25", "v5_conservative", "v5_moderate", "log_0.3", "fat_tail_0.10", "convex_0.08", "v5mod_noguard"] for name in top_strats: boost_fn, dd_guard = strategies[name] r1 = run_backtest(first_half, boost_fn, dd_guard) r2 = run_backtest(second_half, boost_fn, dd_guard) # "Stable" = both halves beat baseline, or both halves in same direction b1 = run_backtest(first_half, curve_baseline, 1.0) if name == top_strats[0] else None stable = "YES" if (r1['roi'] > 0 and r2['roi'] > 0) else "NO" if name != "baseline": stable = "YES" if r2['roi'] >= results_full['baseline']['roi'] * 0.3 else "OVERFIT?" print(f"{name:<22} {r1['roi']:>+8.2f} {r2['roi']:>+8.2f} {r1['pf']:>6.3f} {r2['pf']:>6.3f} " f"{r1['max_dd']:>7.2f} {r2['max_dd']:>7.2f} {stable:>8}") print(f"\nTotal time: {time.time()-t0:.0f}s") # Best strategy best = max(results_full.items(), key=lambda x: x[1]['roi'] if x[0] != "baseline" else -999) base_r = results_full['baseline'] print(f"\n=== BEST: {best[0]} ===") print(f"ROI: {base_r['roi']:+.2f}% -> {best[1]['roi']:+.2f}% ({best[1]['roi']-base_r['roi']:+.2f}%)") print(f"PF: {base_r['pf']:.3f} -> {best[1]['pf']:.3f}") print(f"DD: {base_r['max_dd']:.2f}% -> {best[1]['max_dd']:.2f}%")