306 lines
11 KiB
Python
306 lines
11 KiB
Python
|
|
"""
|
||
|
|
Nautilus Arrow Long-Running Alpha Engine Test
|
||
|
|
===============================================
|
||
|
|
Reads Arrow IPC files directly (no intermediate Parquet cache), converts
|
||
|
|
on-the-fly to VBT DataFrames, and runs run_full_backtest() day-by-day
|
||
|
|
using champion_5x_f20.
|
||
|
|
|
||
|
|
Optimized: reads all Arrow files per day in bulk (open/read, not mmap)
|
||
|
|
to avoid the extreme overhead of 8k+ individual memory-map calls.
|
||
|
|
|
||
|
|
Usage (activate Siloqy first):
|
||
|
|
python test_nautilus_arrow_longrun.py
|
||
|
|
python test_nautilus_arrow_longrun.py --start 2026-02-01 --end 2026-02-25
|
||
|
|
python test_nautilus_arrow_longrun.py --poll # continuous polling
|
||
|
|
"""
|
||
|
|
|
||
|
|
import sys
|
||
|
|
import json
|
||
|
|
import time
|
||
|
|
import argparse
|
||
|
|
import warnings
|
||
|
|
from pathlib import Path
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
import pyarrow as pa
|
||
|
|
import pyarrow.ipc as ipc
|
||
|
|
|
||
|
|
warnings.filterwarnings('ignore')
|
||
|
|
|
||
|
|
PROJECT_ROOT = Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict')
|
||
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
||
|
|
sys.path.insert(0, str(PROJECT_ROOT / 'nautilus_dolphin'))
|
||
|
|
|
||
|
|
DEFAULT_ARROW_BASE = PROJECT_ROOT / 'arrow_backfill'
|
||
|
|
EXCLUDED_ASSETS = {'TUSDUSDT', 'USDCUSDT'}
|
||
|
|
|
||
|
|
|
||
|
|
def load_arrow_day_fast(date_dir: Path) -> pd.DataFrame:
|
||
|
|
"""
|
||
|
|
Bulk-read all scan_*.arrow files for one day into a VBT-compatible DataFrame.
|
||
|
|
Uses batched file reads (much faster than individual memory-maps).
|
||
|
|
"""
|
||
|
|
arrow_files = sorted(date_dir.glob('scan_*.arrow'))
|
||
|
|
if not arrow_files:
|
||
|
|
return pd.DataFrame()
|
||
|
|
|
||
|
|
rows = []
|
||
|
|
last_prices = {}
|
||
|
|
errors = 0
|
||
|
|
|
||
|
|
for af in arrow_files:
|
||
|
|
try:
|
||
|
|
raw = af.read_bytes()
|
||
|
|
reader = ipc.open_file(pa.BufferReader(raw))
|
||
|
|
table = reader.read_all()
|
||
|
|
if len(table) == 0:
|
||
|
|
continue
|
||
|
|
|
||
|
|
row = {col: table.column(col)[0].as_py() for col in table.column_names}
|
||
|
|
|
||
|
|
ts_ns = row.get('timestamp_ns') or 0
|
||
|
|
if not ts_ns:
|
||
|
|
continue
|
||
|
|
ts = pd.Timestamp(ts_ns, unit='ns')
|
||
|
|
|
||
|
|
v50 = float(row.get('w50_velocity', 0) or 0)
|
||
|
|
v150 = float(row.get('w150_velocity', 0) or 0)
|
||
|
|
if v50 == 0.0 and v150 == 0.0:
|
||
|
|
continue
|
||
|
|
|
||
|
|
v300 = row.get('w300_velocity')
|
||
|
|
v750 = row.get('w750_velocity')
|
||
|
|
vd = float(row.get('vel_div', v50 - v150) or (v50 - v150))
|
||
|
|
i50 = row.get('w50_instability')
|
||
|
|
i150 = row.get('w150_instability')
|
||
|
|
|
||
|
|
assets_raw = json.loads(row.get('assets_json', '[]') or '[]')
|
||
|
|
prices_raw = json.loads(row.get('asset_prices_json', '[]') or '[]')
|
||
|
|
|
||
|
|
price_map = {}
|
||
|
|
for asset, price in zip(assets_raw, prices_raw):
|
||
|
|
if asset in EXCLUDED_ASSETS:
|
||
|
|
continue
|
||
|
|
if price is not None and float(price) > 0:
|
||
|
|
price_map[asset] = float(price)
|
||
|
|
last_prices[asset] = float(price)
|
||
|
|
elif asset in last_prices:
|
||
|
|
price_map[asset] = last_prices[asset]
|
||
|
|
|
||
|
|
if 'BTCUSDT' not in price_map:
|
||
|
|
continue
|
||
|
|
|
||
|
|
rec = {
|
||
|
|
'timestamp': ts,
|
||
|
|
'scan_number': int(row.get('scan_number', 0) or 0),
|
||
|
|
'v50_lambda_max_velocity': v50,
|
||
|
|
'v150_lambda_max_velocity': v150,
|
||
|
|
'v300_lambda_max_velocity': float(v300) if v300 is not None else np.nan,
|
||
|
|
'v750_lambda_max_velocity': float(v750) if v750 is not None else np.nan,
|
||
|
|
'vel_div': vd,
|
||
|
|
'instability_50': float(i50) if i50 is not None else np.nan,
|
||
|
|
'instability_150': float(i150) if i150 is not None else np.nan,
|
||
|
|
}
|
||
|
|
rec.update(price_map)
|
||
|
|
rows.append(rec)
|
||
|
|
except Exception:
|
||
|
|
errors += 1
|
||
|
|
continue
|
||
|
|
|
||
|
|
if not rows:
|
||
|
|
return pd.DataFrame()
|
||
|
|
|
||
|
|
df = pd.DataFrame(rows).sort_values('timestamp').reset_index(drop=True)
|
||
|
|
core = ['timestamp', 'scan_number', 'v50_lambda_max_velocity',
|
||
|
|
'v150_lambda_max_velocity', 'v300_lambda_max_velocity',
|
||
|
|
'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150']
|
||
|
|
price_cols = [c for c in df.columns if c not in core]
|
||
|
|
if price_cols:
|
||
|
|
df[price_cols] = df[price_cols].ffill()
|
||
|
|
return df
|
||
|
|
|
||
|
|
|
||
|
|
def discover_arrow_dates(arrow_base: Path, start=None, end=None):
|
||
|
|
dates = []
|
||
|
|
if not arrow_base.exists():
|
||
|
|
return dates
|
||
|
|
for d in sorted(arrow_base.iterdir()):
|
||
|
|
if d.is_dir() and len(d.name) == 10 and d.name[4] == '-':
|
||
|
|
if '_SKIP' in d.name:
|
||
|
|
continue
|
||
|
|
if any(d.glob('scan_*.arrow')):
|
||
|
|
if start and d.name < start:
|
||
|
|
continue
|
||
|
|
if end and d.name > end:
|
||
|
|
continue
|
||
|
|
dates.append(d.name)
|
||
|
|
return dates
|
||
|
|
|
||
|
|
|
||
|
|
def run_longrun_test(arrow_base, start, end, poll=False, poll_interval=30):
|
||
|
|
from dolphin_vbt_real import run_full_backtest
|
||
|
|
from dolphin_paper_trade_adaptive_cb_v2 import STRATEGIES, INIT_CAPITAL
|
||
|
|
champion = STRATEGIES['champion_5x_f20']
|
||
|
|
|
||
|
|
print('=' * 70)
|
||
|
|
print(' NAUTILUS ARROW LONG-RUNNING ALPHA ENGINE TEST')
|
||
|
|
print(f' Strategy: champion_5x_f20 | Capital: ${INIT_CAPITAL:,.0f}')
|
||
|
|
print(f' Arrow source: {arrow_base}')
|
||
|
|
print(f' Date range: {start} -> {end}')
|
||
|
|
print(f' Mode: {"POLL (continuous)" if poll else "BATCH (one-shot)"}')
|
||
|
|
print('=' * 70)
|
||
|
|
sys.stdout.flush()
|
||
|
|
|
||
|
|
capital = INIT_CAPITAL
|
||
|
|
total_tr = 0
|
||
|
|
total_wins = 0
|
||
|
|
total_fees = 0.0
|
||
|
|
peak = capital
|
||
|
|
max_dd = 0.0
|
||
|
|
processed = set()
|
||
|
|
day_results = []
|
||
|
|
|
||
|
|
def process_date(date_str):
|
||
|
|
nonlocal capital, total_tr, total_wins, total_fees, peak, max_dd
|
||
|
|
|
||
|
|
date_dir = arrow_base / date_str
|
||
|
|
t0 = time.time()
|
||
|
|
df = load_arrow_day_fast(date_dir)
|
||
|
|
load_time = time.time() - t0
|
||
|
|
|
||
|
|
if len(df) < 200:
|
||
|
|
print(f' {date_str}: {len(df)} scans (< 200) -- SKIPPED')
|
||
|
|
sys.stdout.flush()
|
||
|
|
return None
|
||
|
|
|
||
|
|
t0 = time.time()
|
||
|
|
result = run_full_backtest(df, champion, init_cash=capital, seed=42, verbose=False)
|
||
|
|
bt_time = time.time() - t0
|
||
|
|
|
||
|
|
capital = result['capital']
|
||
|
|
total_tr += result['trades']
|
||
|
|
total_wins += result['wins']
|
||
|
|
total_fees += result['total_fees']
|
||
|
|
if capital > peak:
|
||
|
|
peak = capital
|
||
|
|
dd = (peak - capital) / peak * 100
|
||
|
|
if dd > max_dd:
|
||
|
|
max_dd = dd
|
||
|
|
|
||
|
|
wr = total_wins / max(total_tr, 1) * 100
|
||
|
|
roi = (capital - INIT_CAPITAL) / INIT_CAPITAL * 100
|
||
|
|
|
||
|
|
day_rec = {
|
||
|
|
'date': date_str, 'scans': len(df),
|
||
|
|
'day_trades': result['trades'], 'day_wins': result['wins'],
|
||
|
|
'capital': round(capital, 2),
|
||
|
|
'cum_trades': total_tr, 'cum_wr': round(wr, 2),
|
||
|
|
'cum_roi': round(roi, 4), 'max_dd': round(max_dd, 4),
|
||
|
|
'load_ms': int(load_time * 1000), 'bt_ms': int(bt_time * 1000),
|
||
|
|
}
|
||
|
|
day_results.append(day_rec)
|
||
|
|
|
||
|
|
print(f' {date_str}: {len(df):>5} scans | '
|
||
|
|
f'{result["trades"]:>2} tr ({result["wins"]}W) | '
|
||
|
|
f'cap=${capital:>10,.2f} | '
|
||
|
|
f'WR={wr:.1f}% ROI={roi:+.2f}% DD={max_dd:.1f}% | '
|
||
|
|
f'[{int(load_time*1000)}ms+{int(bt_time*1000)}ms]')
|
||
|
|
sys.stdout.flush()
|
||
|
|
return day_rec
|
||
|
|
|
||
|
|
dates = discover_arrow_dates(arrow_base, start, end)
|
||
|
|
print(f'\nFound {len(dates)} Arrow dates to process\n')
|
||
|
|
sys.stdout.flush()
|
||
|
|
|
||
|
|
for date_str in dates:
|
||
|
|
process_date(date_str)
|
||
|
|
processed.add(date_str)
|
||
|
|
|
||
|
|
wr = total_wins / max(total_tr, 1) * 100
|
||
|
|
roi = (capital - INIT_CAPITAL) / INIT_CAPITAL * 100
|
||
|
|
|
||
|
|
print('\n' + '=' * 70)
|
||
|
|
print(' FINAL RESULTS')
|
||
|
|
print('=' * 70)
|
||
|
|
print(f' Days processed: {len(day_results)}')
|
||
|
|
print(f' Total trades: {total_tr}')
|
||
|
|
print(f' Total wins: {total_wins}')
|
||
|
|
print(f' Win rate: {wr:.2f}%')
|
||
|
|
print(f' Final capital: ${capital:,.2f}')
|
||
|
|
print(f' ROI: {roi:+.4f}%')
|
||
|
|
print(f' Max drawdown: {max_dd:.4f}%')
|
||
|
|
print(f' Total fees: ${total_fees:,.2f}')
|
||
|
|
print('=' * 70)
|
||
|
|
|
||
|
|
checks = [('WR >= 40%', wr >= 40.0), ('DD <= 20%', max_dd <= 20.0)]
|
||
|
|
all_pass = True
|
||
|
|
print('\n BENCHMARK CHECK:')
|
||
|
|
for label, ok in checks:
|
||
|
|
status = 'OK' if ok else 'FAIL'
|
||
|
|
print(f' {label:20s} -> [{status}]')
|
||
|
|
if not ok:
|
||
|
|
all_pass = False
|
||
|
|
|
||
|
|
if all_pass:
|
||
|
|
print('\n VERDICT: PASS -- Arrow pipeline produces valid champion signals')
|
||
|
|
else:
|
||
|
|
print('\n VERDICT: WARN -- some benchmarks missed')
|
||
|
|
|
||
|
|
ts_str = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||
|
|
results_dir = PROJECT_ROOT / 'vbt_results'
|
||
|
|
results_dir.mkdir(exist_ok=True)
|
||
|
|
out_path = results_dir / f'nautilus_arrow_longrun_{ts_str}.json'
|
||
|
|
out = {
|
||
|
|
'timestamp': datetime.now().isoformat(),
|
||
|
|
'strategy': 'champion_5x_f20',
|
||
|
|
'arrow_source': str(arrow_base),
|
||
|
|
'date_range': [start, end],
|
||
|
|
'summary': {
|
||
|
|
'days': len(day_results), 'trades': total_tr, 'wins': total_wins,
|
||
|
|
'win_rate': round(wr, 2), 'final_capital': round(capital, 2),
|
||
|
|
'roi_pct': round(roi, 4), 'max_dd_pct': round(max_dd, 4),
|
||
|
|
'total_fees': round(total_fees, 2),
|
||
|
|
},
|
||
|
|
'daily': day_results,
|
||
|
|
}
|
||
|
|
with open(out_path, 'w') as f:
|
||
|
|
json.dump(out, f, indent=2)
|
||
|
|
print(f'\n Results saved -> {out_path}')
|
||
|
|
sys.stdout.flush()
|
||
|
|
|
||
|
|
if poll:
|
||
|
|
print(f'\n POLL mode: checking every {poll_interval}s for new dates...')
|
||
|
|
sys.stdout.flush()
|
||
|
|
while True:
|
||
|
|
time.sleep(poll_interval)
|
||
|
|
new_dates = discover_arrow_dates(arrow_base)
|
||
|
|
for d in new_dates:
|
||
|
|
if d not in processed:
|
||
|
|
print(f'\n [POLL] New date: {d}')
|
||
|
|
process_date(d)
|
||
|
|
processed.add(d)
|
||
|
|
|
||
|
|
return out
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = argparse.ArgumentParser(description='Nautilus Arrow long-running test')
|
||
|
|
parser.add_argument('--arrow-base', default=str(DEFAULT_ARROW_BASE))
|
||
|
|
parser.add_argument('--start', default='2026-01-01')
|
||
|
|
parser.add_argument('--end', default='2026-02-25')
|
||
|
|
parser.add_argument('--poll', action='store_true')
|
||
|
|
parser.add_argument('--poll-interval', type=int, default=30)
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
run_longrun_test(
|
||
|
|
arrow_base=Path(args.arrow_base),
|
||
|
|
start=args.start, end=args.end,
|
||
|
|
poll=args.poll, poll_interval=args.poll_interval,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|