""" Nautilus Arrow Long-Running Alpha Engine Test =============================================== Reads Arrow IPC files directly (no intermediate Parquet cache), converts on-the-fly to VBT DataFrames, and runs run_full_backtest() day-by-day using champion_5x_f20. Optimized: reads all Arrow files per day in bulk (open/read, not mmap) to avoid the extreme overhead of 8k+ individual memory-map calls. Usage (activate Siloqy first): python test_nautilus_arrow_longrun.py python test_nautilus_arrow_longrun.py --start 2026-02-01 --end 2026-02-25 python test_nautilus_arrow_longrun.py --poll # continuous polling """ import sys import json import time import argparse import warnings from pathlib import Path from datetime import datetime, timedelta import numpy as np import pandas as pd import pyarrow as pa import pyarrow.ipc as ipc warnings.filterwarnings('ignore') PROJECT_ROOT = Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict') sys.path.insert(0, str(PROJECT_ROOT)) sys.path.insert(0, str(PROJECT_ROOT / 'nautilus_dolphin')) DEFAULT_ARROW_BASE = PROJECT_ROOT / 'arrow_backfill' EXCLUDED_ASSETS = {'TUSDUSDT', 'USDCUSDT'} def load_arrow_day_fast(date_dir: Path) -> pd.DataFrame: """ Bulk-read all scan_*.arrow files for one day into a VBT-compatible DataFrame. Uses batched file reads (much faster than individual memory-maps). """ arrow_files = sorted(date_dir.glob('scan_*.arrow')) if not arrow_files: return pd.DataFrame() rows = [] last_prices = {} errors = 0 for af in arrow_files: try: raw = af.read_bytes() reader = ipc.open_file(pa.BufferReader(raw)) table = reader.read_all() if len(table) == 0: continue row = {col: table.column(col)[0].as_py() for col in table.column_names} ts_ns = row.get('timestamp_ns') or 0 if not ts_ns: continue ts = pd.Timestamp(ts_ns, unit='ns') v50 = float(row.get('w50_velocity', 0) or 0) v150 = float(row.get('w150_velocity', 0) or 0) if v50 == 0.0 and v150 == 0.0: continue v300 = row.get('w300_velocity') v750 = row.get('w750_velocity') vd = float(row.get('vel_div', v50 - v150) or (v50 - v150)) i50 = row.get('w50_instability') i150 = row.get('w150_instability') assets_raw = json.loads(row.get('assets_json', '[]') or '[]') prices_raw = json.loads(row.get('asset_prices_json', '[]') or '[]') price_map = {} for asset, price in zip(assets_raw, prices_raw): if asset in EXCLUDED_ASSETS: continue if price is not None and float(price) > 0: price_map[asset] = float(price) last_prices[asset] = float(price) elif asset in last_prices: price_map[asset] = last_prices[asset] if 'BTCUSDT' not in price_map: continue rec = { 'timestamp': ts, 'scan_number': int(row.get('scan_number', 0) or 0), 'v50_lambda_max_velocity': v50, 'v150_lambda_max_velocity': v150, 'v300_lambda_max_velocity': float(v300) if v300 is not None else np.nan, 'v750_lambda_max_velocity': float(v750) if v750 is not None else np.nan, 'vel_div': vd, 'instability_50': float(i50) if i50 is not None else np.nan, 'instability_150': float(i150) if i150 is not None else np.nan, } rec.update(price_map) rows.append(rec) except Exception: errors += 1 continue if not rows: return pd.DataFrame() df = pd.DataFrame(rows).sort_values('timestamp').reset_index(drop=True) core = ['timestamp', 'scan_number', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity', 'v300_lambda_max_velocity', 'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150'] price_cols = [c for c in df.columns if c not in core] if price_cols: df[price_cols] = df[price_cols].ffill() return df def discover_arrow_dates(arrow_base: Path, start=None, end=None): dates = [] if not arrow_base.exists(): return dates for d in sorted(arrow_base.iterdir()): if d.is_dir() and len(d.name) == 10 and d.name[4] == '-': if '_SKIP' in d.name: continue if any(d.glob('scan_*.arrow')): if start and d.name < start: continue if end and d.name > end: continue dates.append(d.name) return dates def run_longrun_test(arrow_base, start, end, poll=False, poll_interval=30): from dolphin_vbt_real import run_full_backtest from dolphin_paper_trade_adaptive_cb_v2 import STRATEGIES, INIT_CAPITAL champion = STRATEGIES['champion_5x_f20'] print('=' * 70) print(' NAUTILUS ARROW LONG-RUNNING ALPHA ENGINE TEST') print(f' Strategy: champion_5x_f20 | Capital: ${INIT_CAPITAL:,.0f}') print(f' Arrow source: {arrow_base}') print(f' Date range: {start} -> {end}') print(f' Mode: {"POLL (continuous)" if poll else "BATCH (one-shot)"}') print('=' * 70) sys.stdout.flush() capital = INIT_CAPITAL total_tr = 0 total_wins = 0 total_fees = 0.0 peak = capital max_dd = 0.0 processed = set() day_results = [] def process_date(date_str): nonlocal capital, total_tr, total_wins, total_fees, peak, max_dd date_dir = arrow_base / date_str t0 = time.time() df = load_arrow_day_fast(date_dir) load_time = time.time() - t0 if len(df) < 200: print(f' {date_str}: {len(df)} scans (< 200) -- SKIPPED') sys.stdout.flush() return None t0 = time.time() result = run_full_backtest(df, champion, init_cash=capital, seed=42, verbose=False) bt_time = time.time() - t0 capital = result['capital'] total_tr += result['trades'] total_wins += result['wins'] total_fees += result['total_fees'] if capital > peak: peak = capital dd = (peak - capital) / peak * 100 if dd > max_dd: max_dd = dd wr = total_wins / max(total_tr, 1) * 100 roi = (capital - INIT_CAPITAL) / INIT_CAPITAL * 100 day_rec = { 'date': date_str, 'scans': len(df), 'day_trades': result['trades'], 'day_wins': result['wins'], 'capital': round(capital, 2), 'cum_trades': total_tr, 'cum_wr': round(wr, 2), 'cum_roi': round(roi, 4), 'max_dd': round(max_dd, 4), 'load_ms': int(load_time * 1000), 'bt_ms': int(bt_time * 1000), } day_results.append(day_rec) print(f' {date_str}: {len(df):>5} scans | ' f'{result["trades"]:>2} tr ({result["wins"]}W) | ' f'cap=${capital:>10,.2f} | ' f'WR={wr:.1f}% ROI={roi:+.2f}% DD={max_dd:.1f}% | ' f'[{int(load_time*1000)}ms+{int(bt_time*1000)}ms]') sys.stdout.flush() return day_rec dates = discover_arrow_dates(arrow_base, start, end) print(f'\nFound {len(dates)} Arrow dates to process\n') sys.stdout.flush() for date_str in dates: process_date(date_str) processed.add(date_str) wr = total_wins / max(total_tr, 1) * 100 roi = (capital - INIT_CAPITAL) / INIT_CAPITAL * 100 print('\n' + '=' * 70) print(' FINAL RESULTS') print('=' * 70) print(f' Days processed: {len(day_results)}') print(f' Total trades: {total_tr}') print(f' Total wins: {total_wins}') print(f' Win rate: {wr:.2f}%') print(f' Final capital: ${capital:,.2f}') print(f' ROI: {roi:+.4f}%') print(f' Max drawdown: {max_dd:.4f}%') print(f' Total fees: ${total_fees:,.2f}') print('=' * 70) checks = [('WR >= 40%', wr >= 40.0), ('DD <= 20%', max_dd <= 20.0)] all_pass = True print('\n BENCHMARK CHECK:') for label, ok in checks: status = 'OK' if ok else 'FAIL' print(f' {label:20s} -> [{status}]') if not ok: all_pass = False if all_pass: print('\n VERDICT: PASS -- Arrow pipeline produces valid champion signals') else: print('\n VERDICT: WARN -- some benchmarks missed') ts_str = datetime.now().strftime('%Y%m%d_%H%M%S') results_dir = PROJECT_ROOT / 'vbt_results' results_dir.mkdir(exist_ok=True) out_path = results_dir / f'nautilus_arrow_longrun_{ts_str}.json' out = { 'timestamp': datetime.now().isoformat(), 'strategy': 'champion_5x_f20', 'arrow_source': str(arrow_base), 'date_range': [start, end], 'summary': { 'days': len(day_results), 'trades': total_tr, 'wins': total_wins, 'win_rate': round(wr, 2), 'final_capital': round(capital, 2), 'roi_pct': round(roi, 4), 'max_dd_pct': round(max_dd, 4), 'total_fees': round(total_fees, 2), }, 'daily': day_results, } with open(out_path, 'w') as f: json.dump(out, f, indent=2) print(f'\n Results saved -> {out_path}') sys.stdout.flush() if poll: print(f'\n POLL mode: checking every {poll_interval}s for new dates...') sys.stdout.flush() while True: time.sleep(poll_interval) new_dates = discover_arrow_dates(arrow_base) for d in new_dates: if d not in processed: print(f'\n [POLL] New date: {d}') process_date(d) processed.add(d) return out def main(): parser = argparse.ArgumentParser(description='Nautilus Arrow long-running test') parser.add_argument('--arrow-base', default=str(DEFAULT_ARROW_BASE)) parser.add_argument('--start', default='2026-01-01') parser.add_argument('--end', default='2026-02-25') parser.add_argument('--poll', action='store_true') parser.add_argument('--poll-interval', type=int, default=30) args = parser.parse_args() run_longrun_test( arrow_base=Path(args.arrow_base), start=args.start, end=args.end, poll=args.poll, poll_interval=args.poll_interval, ) if __name__ == '__main__': main()