initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
305
nautilus_dolphin/test_nautilus_arrow_longrun.py
Executable file
305
nautilus_dolphin/test_nautilus_arrow_longrun.py
Executable file
@@ -0,0 +1,305 @@
|
||||
"""
|
||||
Nautilus Arrow Long-Running Alpha Engine Test
|
||||
===============================================
|
||||
Reads Arrow IPC files directly (no intermediate Parquet cache), converts
|
||||
on-the-fly to VBT DataFrames, and runs run_full_backtest() day-by-day
|
||||
using champion_5x_f20.
|
||||
|
||||
Optimized: reads all Arrow files per day in bulk (open/read, not mmap)
|
||||
to avoid the extreme overhead of 8k+ individual memory-map calls.
|
||||
|
||||
Usage (activate Siloqy first):
|
||||
python test_nautilus_arrow_longrun.py
|
||||
python test_nautilus_arrow_longrun.py --start 2026-02-01 --end 2026-02-25
|
||||
python test_nautilus_arrow_longrun.py --poll # continuous polling
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import argparse
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.ipc as ipc
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
PROJECT_ROOT = Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict')
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
sys.path.insert(0, str(PROJECT_ROOT / 'nautilus_dolphin'))
|
||||
|
||||
DEFAULT_ARROW_BASE = PROJECT_ROOT / 'arrow_backfill'
|
||||
EXCLUDED_ASSETS = {'TUSDUSDT', 'USDCUSDT'}
|
||||
|
||||
|
||||
def load_arrow_day_fast(date_dir: Path) -> pd.DataFrame:
|
||||
"""
|
||||
Bulk-read all scan_*.arrow files for one day into a VBT-compatible DataFrame.
|
||||
Uses batched file reads (much faster than individual memory-maps).
|
||||
"""
|
||||
arrow_files = sorted(date_dir.glob('scan_*.arrow'))
|
||||
if not arrow_files:
|
||||
return pd.DataFrame()
|
||||
|
||||
rows = []
|
||||
last_prices = {}
|
||||
errors = 0
|
||||
|
||||
for af in arrow_files:
|
||||
try:
|
||||
raw = af.read_bytes()
|
||||
reader = ipc.open_file(pa.BufferReader(raw))
|
||||
table = reader.read_all()
|
||||
if len(table) == 0:
|
||||
continue
|
||||
|
||||
row = {col: table.column(col)[0].as_py() for col in table.column_names}
|
||||
|
||||
ts_ns = row.get('timestamp_ns') or 0
|
||||
if not ts_ns:
|
||||
continue
|
||||
ts = pd.Timestamp(ts_ns, unit='ns')
|
||||
|
||||
v50 = float(row.get('w50_velocity', 0) or 0)
|
||||
v150 = float(row.get('w150_velocity', 0) or 0)
|
||||
if v50 == 0.0 and v150 == 0.0:
|
||||
continue
|
||||
|
||||
v300 = row.get('w300_velocity')
|
||||
v750 = row.get('w750_velocity')
|
||||
vd = float(row.get('vel_div', v50 - v150) or (v50 - v150))
|
||||
i50 = row.get('w50_instability')
|
||||
i150 = row.get('w150_instability')
|
||||
|
||||
assets_raw = json.loads(row.get('assets_json', '[]') or '[]')
|
||||
prices_raw = json.loads(row.get('asset_prices_json', '[]') or '[]')
|
||||
|
||||
price_map = {}
|
||||
for asset, price in zip(assets_raw, prices_raw):
|
||||
if asset in EXCLUDED_ASSETS:
|
||||
continue
|
||||
if price is not None and float(price) > 0:
|
||||
price_map[asset] = float(price)
|
||||
last_prices[asset] = float(price)
|
||||
elif asset in last_prices:
|
||||
price_map[asset] = last_prices[asset]
|
||||
|
||||
if 'BTCUSDT' not in price_map:
|
||||
continue
|
||||
|
||||
rec = {
|
||||
'timestamp': ts,
|
||||
'scan_number': int(row.get('scan_number', 0) or 0),
|
||||
'v50_lambda_max_velocity': v50,
|
||||
'v150_lambda_max_velocity': v150,
|
||||
'v300_lambda_max_velocity': float(v300) if v300 is not None else np.nan,
|
||||
'v750_lambda_max_velocity': float(v750) if v750 is not None else np.nan,
|
||||
'vel_div': vd,
|
||||
'instability_50': float(i50) if i50 is not None else np.nan,
|
||||
'instability_150': float(i150) if i150 is not None else np.nan,
|
||||
}
|
||||
rec.update(price_map)
|
||||
rows.append(rec)
|
||||
except Exception:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
if not rows:
|
||||
return pd.DataFrame()
|
||||
|
||||
df = pd.DataFrame(rows).sort_values('timestamp').reset_index(drop=True)
|
||||
core = ['timestamp', 'scan_number', 'v50_lambda_max_velocity',
|
||||
'v150_lambda_max_velocity', 'v300_lambda_max_velocity',
|
||||
'v750_lambda_max_velocity', 'vel_div', 'instability_50', 'instability_150']
|
||||
price_cols = [c for c in df.columns if c not in core]
|
||||
if price_cols:
|
||||
df[price_cols] = df[price_cols].ffill()
|
||||
return df
|
||||
|
||||
|
||||
def discover_arrow_dates(arrow_base: Path, start=None, end=None):
|
||||
dates = []
|
||||
if not arrow_base.exists():
|
||||
return dates
|
||||
for d in sorted(arrow_base.iterdir()):
|
||||
if d.is_dir() and len(d.name) == 10 and d.name[4] == '-':
|
||||
if '_SKIP' in d.name:
|
||||
continue
|
||||
if any(d.glob('scan_*.arrow')):
|
||||
if start and d.name < start:
|
||||
continue
|
||||
if end and d.name > end:
|
||||
continue
|
||||
dates.append(d.name)
|
||||
return dates
|
||||
|
||||
|
||||
def run_longrun_test(arrow_base, start, end, poll=False, poll_interval=30):
|
||||
from dolphin_vbt_real import run_full_backtest
|
||||
from dolphin_paper_trade_adaptive_cb_v2 import STRATEGIES, INIT_CAPITAL
|
||||
champion = STRATEGIES['champion_5x_f20']
|
||||
|
||||
print('=' * 70)
|
||||
print(' NAUTILUS ARROW LONG-RUNNING ALPHA ENGINE TEST')
|
||||
print(f' Strategy: champion_5x_f20 | Capital: ${INIT_CAPITAL:,.0f}')
|
||||
print(f' Arrow source: {arrow_base}')
|
||||
print(f' Date range: {start} -> {end}')
|
||||
print(f' Mode: {"POLL (continuous)" if poll else "BATCH (one-shot)"}')
|
||||
print('=' * 70)
|
||||
sys.stdout.flush()
|
||||
|
||||
capital = INIT_CAPITAL
|
||||
total_tr = 0
|
||||
total_wins = 0
|
||||
total_fees = 0.0
|
||||
peak = capital
|
||||
max_dd = 0.0
|
||||
processed = set()
|
||||
day_results = []
|
||||
|
||||
def process_date(date_str):
|
||||
nonlocal capital, total_tr, total_wins, total_fees, peak, max_dd
|
||||
|
||||
date_dir = arrow_base / date_str
|
||||
t0 = time.time()
|
||||
df = load_arrow_day_fast(date_dir)
|
||||
load_time = time.time() - t0
|
||||
|
||||
if len(df) < 200:
|
||||
print(f' {date_str}: {len(df)} scans (< 200) -- SKIPPED')
|
||||
sys.stdout.flush()
|
||||
return None
|
||||
|
||||
t0 = time.time()
|
||||
result = run_full_backtest(df, champion, init_cash=capital, seed=42, verbose=False)
|
||||
bt_time = time.time() - t0
|
||||
|
||||
capital = result['capital']
|
||||
total_tr += result['trades']
|
||||
total_wins += result['wins']
|
||||
total_fees += result['total_fees']
|
||||
if capital > peak:
|
||||
peak = capital
|
||||
dd = (peak - capital) / peak * 100
|
||||
if dd > max_dd:
|
||||
max_dd = dd
|
||||
|
||||
wr = total_wins / max(total_tr, 1) * 100
|
||||
roi = (capital - INIT_CAPITAL) / INIT_CAPITAL * 100
|
||||
|
||||
day_rec = {
|
||||
'date': date_str, 'scans': len(df),
|
||||
'day_trades': result['trades'], 'day_wins': result['wins'],
|
||||
'capital': round(capital, 2),
|
||||
'cum_trades': total_tr, 'cum_wr': round(wr, 2),
|
||||
'cum_roi': round(roi, 4), 'max_dd': round(max_dd, 4),
|
||||
'load_ms': int(load_time * 1000), 'bt_ms': int(bt_time * 1000),
|
||||
}
|
||||
day_results.append(day_rec)
|
||||
|
||||
print(f' {date_str}: {len(df):>5} scans | '
|
||||
f'{result["trades"]:>2} tr ({result["wins"]}W) | '
|
||||
f'cap=${capital:>10,.2f} | '
|
||||
f'WR={wr:.1f}% ROI={roi:+.2f}% DD={max_dd:.1f}% | '
|
||||
f'[{int(load_time*1000)}ms+{int(bt_time*1000)}ms]')
|
||||
sys.stdout.flush()
|
||||
return day_rec
|
||||
|
||||
dates = discover_arrow_dates(arrow_base, start, end)
|
||||
print(f'\nFound {len(dates)} Arrow dates to process\n')
|
||||
sys.stdout.flush()
|
||||
|
||||
for date_str in dates:
|
||||
process_date(date_str)
|
||||
processed.add(date_str)
|
||||
|
||||
wr = total_wins / max(total_tr, 1) * 100
|
||||
roi = (capital - INIT_CAPITAL) / INIT_CAPITAL * 100
|
||||
|
||||
print('\n' + '=' * 70)
|
||||
print(' FINAL RESULTS')
|
||||
print('=' * 70)
|
||||
print(f' Days processed: {len(day_results)}')
|
||||
print(f' Total trades: {total_tr}')
|
||||
print(f' Total wins: {total_wins}')
|
||||
print(f' Win rate: {wr:.2f}%')
|
||||
print(f' Final capital: ${capital:,.2f}')
|
||||
print(f' ROI: {roi:+.4f}%')
|
||||
print(f' Max drawdown: {max_dd:.4f}%')
|
||||
print(f' Total fees: ${total_fees:,.2f}')
|
||||
print('=' * 70)
|
||||
|
||||
checks = [('WR >= 40%', wr >= 40.0), ('DD <= 20%', max_dd <= 20.0)]
|
||||
all_pass = True
|
||||
print('\n BENCHMARK CHECK:')
|
||||
for label, ok in checks:
|
||||
status = 'OK' if ok else 'FAIL'
|
||||
print(f' {label:20s} -> [{status}]')
|
||||
if not ok:
|
||||
all_pass = False
|
||||
|
||||
if all_pass:
|
||||
print('\n VERDICT: PASS -- Arrow pipeline produces valid champion signals')
|
||||
else:
|
||||
print('\n VERDICT: WARN -- some benchmarks missed')
|
||||
|
||||
ts_str = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
results_dir = PROJECT_ROOT / 'vbt_results'
|
||||
results_dir.mkdir(exist_ok=True)
|
||||
out_path = results_dir / f'nautilus_arrow_longrun_{ts_str}.json'
|
||||
out = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'strategy': 'champion_5x_f20',
|
||||
'arrow_source': str(arrow_base),
|
||||
'date_range': [start, end],
|
||||
'summary': {
|
||||
'days': len(day_results), 'trades': total_tr, 'wins': total_wins,
|
||||
'win_rate': round(wr, 2), 'final_capital': round(capital, 2),
|
||||
'roi_pct': round(roi, 4), 'max_dd_pct': round(max_dd, 4),
|
||||
'total_fees': round(total_fees, 2),
|
||||
},
|
||||
'daily': day_results,
|
||||
}
|
||||
with open(out_path, 'w') as f:
|
||||
json.dump(out, f, indent=2)
|
||||
print(f'\n Results saved -> {out_path}')
|
||||
sys.stdout.flush()
|
||||
|
||||
if poll:
|
||||
print(f'\n POLL mode: checking every {poll_interval}s for new dates...')
|
||||
sys.stdout.flush()
|
||||
while True:
|
||||
time.sleep(poll_interval)
|
||||
new_dates = discover_arrow_dates(arrow_base)
|
||||
for d in new_dates:
|
||||
if d not in processed:
|
||||
print(f'\n [POLL] New date: {d}')
|
||||
process_date(d)
|
||||
processed.add(d)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Nautilus Arrow long-running test')
|
||||
parser.add_argument('--arrow-base', default=str(DEFAULT_ARROW_BASE))
|
||||
parser.add_argument('--start', default='2026-01-01')
|
||||
parser.add_argument('--end', default='2026-02-25')
|
||||
parser.add_argument('--poll', action='store_true')
|
||||
parser.add_argument('--poll-interval', type=int, default=30)
|
||||
args = parser.parse_args()
|
||||
|
||||
run_longrun_test(
|
||||
arrow_base=Path(args.arrow_base),
|
||||
start=args.start, end=args.end,
|
||||
poll=args.poll, poll_interval=args.poll_interval,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user