""" Nautilus-Dolphin Backtest with Existing Parquet Data ====================================================== Runs full backtest using existing vbt_cache parquet data. Author: Claude Date: 2026-02-19 """ import os import sys import json import asyncio from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any import logging import pandas as pd # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s | %(levelname)-8s | %(message)s', datefmt='%H:%M:%S' ) logger = logging.getLogger(__name__) # Add project root to path project_root = Path(__file__).parent sys.path.insert(0, str(project_root)) from nautilus_dolphin.nautilus.parquet_data_adapter import ParquetDataAdapter # Check Nautilus availability try: from nautilus_trader.backtest.node import BacktestNode from nautilus_trader.backtest.config import ( BacktestRunConfig, BacktestEngineConfig, BacktestVenueConfig, BacktestDataConfig, ) from nautilus_trader.config import ImportableStrategyConfig from nautilus_trader.execution.config import ImportableExecAlgorithmConfig, ExecAlgorithmConfig from nautilus_trader.persistence.catalog import ParquetDataCatalog from nautilus_trader.model.identifiers import Venue, InstrumentId from nautilus_trader.model.data import QuoteTick from nautilus_trader.risk.config import RiskEngineConfig from nautilus_trader.cache.config import CacheConfig NAUTILUS_AVAILABLE = True logger.info("[OK] Nautilus Trader imports successful") except ImportError as e: logger.error(f"[FAIL] Nautilus import error: {e}") NAUTILUS_AVAILABLE = False raise RuntimeError("Nautilus Trader is required") # Import Dolphin modules from nautilus_dolphin.nautilus.strategy_config import ( create_tight_3_3_config, DolphinStrategyConfig, ) class NDBacktestWithExistingData: """ Runs Nautilus-Dolphin backtest using existing vbt_cache parquet data. """ def __init__( self, vbt_cache_path: str = "vbt_cache", output_dir: str = "backtest_results", ): """ Initialize backtest runner. Args: vbt_cache_path: Path to vbt_cache directory output_dir: Directory for results """ self.vbt_cache_path = Path(vbt_cache_path) self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) # Create data adapter self.adapter = ParquetDataAdapter( vbt_cache_path=vbt_cache_path, output_catalog_path=str(self.vbt_cache_path / "catalog"), ) logger.info("[OK] NDBacktestWithExistingData initialized") def prepare_data_catalog( self, assets: list, start_date: str, end_date: str, venue: str = "BINANCE_FUTURES", ) -> str: """ Prepare Nautilus data catalog from existing parquet data. Args: assets: List of assets to include start_date: Start date (YYYY-MM-DD) end_date: End date (YYYY-MM-DD) venue: Venue for the instruments Returns: Path to catalog """ logger.info("=" * 80) logger.info("PREPARING DATA CATALOG") logger.info("=" * 80) catalog_path = self.adapter.create_nautilus_catalog( assets=assets, start_date=start_date, end_date=end_date, venue=venue, ) return catalog_path def run_backtest( self, catalog_path: str, strategy_config: Dict[str, Any], assets: list, start_date: str, end_date: str, ) -> Dict[str, Any]: """ Execute backtest using Nautilus BacktestNode. Args: catalog_path: Path to Nautilus catalog strategy_config: Strategy configuration assets: List of assets start_date: Start date end_date: End date Returns: Backtest results """ logger.info("=" * 80) logger.info("CONFIGURING BACKTEST") logger.info("=" * 80) venue_id = getattr(strategy_config, 'venue', "BINANCE_FUTURES") # Venue configuration max_leverage = getattr(strategy_config, 'max_leverage', 2.5) venue_config = BacktestVenueConfig( name=venue_id, oms_type="NETTING", account_type="MARGIN", base_currency="USDT", starting_balances=["100000 USDT"], default_leverage=str(max_leverage), ) logger.info(f"[OK] Venue: {venue_id} (leverage: {max_leverage}x)") # Data configuration for each asset data_configs = [] for asset in assets: instrument_id = f"{asset}.{venue_id}" # Add Tick data config data_config = BacktestDataConfig( catalog_path=catalog_path, data_cls="nautilus_trader.model.data:QuoteTick", instrument_id=instrument_id, ) data_configs.append(data_config) # Add Signal data config (carried as QuoteTicks on a separate instrument) signal_config = BacktestDataConfig( catalog_path=catalog_path, data_cls="nautilus_trader.model.data:QuoteTick", instrument_id=f"{asset}.SIGNAL.{venue_id}", ) data_configs.append(signal_config) logger.info(f"[OK] Data configs: {len(data_configs)} sources (Ticks + Signals)") # Strategy configuration # Create the strategy config object directly strategy_cfg = create_tight_3_3_config() nautilus_strategy_config = ImportableStrategyConfig( strategy_path="nautilus_dolphin.nautilus.strategy:DolphinExecutionStrategy", config_path="nautilus_dolphin.nautilus.strategy_config:DolphinStrategyConfig", config=strategy_cfg.dict(), ) # Engine configuration # Register SmartExecAlgorithm here since it's no longer allowed in on_start exec_algorithm_config = ImportableExecAlgorithmConfig( exec_algorithm_path="nautilus_dolphin.nautilus.smart_exec_algorithm:SmartExecAlgorithm", config_path="nautilus_trader.execution.config:ExecAlgorithmConfig", config={ 'exec_algorithm_id': "SMART_EXEC", 'entry_timeout_sec': 25, 'entry_abort_threshold_bps': 5.0, 'exit_timeout_sec': 10, 'maker_fee_rate': 0.0002, 'taker_fee_rate': 0.0005 } ) engine_config = BacktestEngineConfig( strategies=[nautilus_strategy_config], exec_algorithms=[exec_algorithm_config], risk_engine=RiskEngineConfig( bypass=True, # We handle risk in strategy ), cache=CacheConfig( tick_capacity=1_000_000, bar_capacity=100_000, ), ) # Complete run configuration run_config = BacktestRunConfig( venues=[venue_config], data=data_configs, engine=engine_config, chunk_size=None, # One-shot for speed and reliability in this case raise_exception=True, dispose_on_completion=False, ) logger.info("=" * 80) logger.info("RUNNING BACKTEST") logger.info("=" * 80) logger.info("This may take several minutes...") # Execute backtest try: logger.info("Initializing BacktestNode...") backtest_node = BacktestNode(configs=[run_config]) logger.info("BacktestNode initialized. Starting run...") backtest_node.run() logger.info("[OK] Backtest completed!") # Get engine to extract metrics logger.info(f"Retrieving engine for ID: {run_config.id}") engine = backtest_node.get_engine(run_config.id) # Get results logger.info("Retrieving results from engine...") result = engine.get_result() if result: logger.info(f"Processing result: {result}") # Extract trades from result directly try: trades = self._extract_trades_from_result(result) except Exception as e: logger.warning(f"Failed to extract trades from result: {e}. Falling back to old cache method...") trades = self._extract_trades(engine) else: # Fallback to engine cache if no result object logger.warning("No result object found, falling back to engine cache...") trades = self._extract_trades(engine) logger.info(f"Extracted {len(trades)} trades from cache.") logger.info("Computing metrics...") metrics = self._compute_metrics(trades) # Enrich from BacktestResult.stats_pnls (Nautilus internal accounting) if result and hasattr(result, 'stats_pnls'): stats = result.stats_pnls.get('USDT', {}) or next(iter(result.stats_pnls.values()), {}) if stats: metrics['total_pnl'] = float(stats.get('PnL (total)', metrics['total_pnl'])) metrics['roi'] = float(stats.get('PnL% (total)', metrics['roi'])) metrics['win_rate'] = float(stats.get('Win Rate', metrics['win_rate'])) metrics['max_winner'] = float(stats.get('Max Winner', 0)) metrics['max_loser'] = float(stats.get('Max Loser', 0)) metrics['expectancy'] = float(stats.get('Expectancy', 0)) logger.info(f"Nautilus stats: PnL={metrics['total_pnl']:.2f}, " f"ROI={metrics['roi']:.4%}, WR={metrics['win_rate']:.2%}") # Enrich from BacktestResult.stats_returns if available if result and hasattr(result, 'stats_returns'): ret_stats = result.stats_returns if ret_stats: for k in ('Sharpe Ratio (252 days)', 'Sortino Ratio (252 days)', 'Profit Factor', 'Returns Volatility (252 days)'): v = ret_stats.get(k) if v is not None: try: metrics[k] = float(v) except (TypeError, ValueError): pass # Pull trade count from Nautilus result (total_orders / 2 = round-trips) if result and hasattr(result, 'total_orders'): metrics['nautilus_total_orders'] = result.total_orders metrics['nautilus_total_events'] = result.total_events metrics['nautilus_total_positions'] = result.total_positions est_trades = result.total_orders // 2 if est_trades > len(trades): metrics['estimated_trades'] = est_trades logger.info(f"Nautilus reports {result.total_orders} orders = ~{est_trades} round-trips") logger.info("Metrics computed.") except Exception as e: logger.error(f"[CRITICAL] Backtest failed: {e}") import traceback logger.error(traceback.format_exc()) # Ensure we flush logs sys.stdout.flush() sys.stderr.flush() raise result_data = { "timestamp": datetime.now().isoformat(), "strategy_config": strategy_config, "backtest_params": { "assets": assets, "start_date": start_date, "end_date": end_date, "venue": venue_id, }, "trades": trades, "metrics": metrics, "trade_count": len(trades), } # Save results output_file = self.output_dir / f"nd_backtest_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(output_file, 'w') as f: json.dump(result_data, f, indent=2, default=str) logger.info("=" * 80) logger.info("BACKTEST RESULTS") logger.info("=" * 80) logger.info(f"[OK] Results saved: {output_file}") logger.info(f"[SUMMARY] Total trades: {len(trades)}") logger.info(f"[SUMMARY] Win rate: {metrics.get('win_rate', 0):.2%}") logger.info(f"[SUMMARY] Total P&L: ${metrics.get('total_pnl', 0):,.2f}") logger.info(f"[SUMMARY] ROI: {metrics.get('roi', 0):.2%}") return result_data def _extract_trades(self, engine) -> list: """Extract closed positions from backtest engine.""" trades = [] # Get closed positions from cache for position in engine.cache.positions_closed(): trade = { "trade_id": str(position.id), "instrument_id": str(position.instrument_id), "entry_time": str(pd.to_datetime(position.ts_opened, unit='ns', utc=True)), "exit_time": str(pd.to_datetime(position.ts_closed, unit='ns', utc=True)), "entry_price": float(position.avg_px_open), "exit_price": float(position.avg_px_close), "direction": str(position.side), "quantity": float(position.quantity), "pnl": float(position.realized_pnl), } trades.append(trade) return trades def _extract_trades_from_result(self, result) -> list: """Extract closed positions from Nautilus BacktestResult.""" trades = [] # In modern Nautilus, result.positions() returns the list of closed positions for position in result.positions(): trade = { "trade_id": str(position.id), "instrument_id": str(position.instrument_id), "entry_time": str(pd.to_datetime(position.ts_opened, unit='ns', utc=True)), "exit_time": str(pd.to_datetime(position.ts_closed, unit='ns', utc=True)), "entry_price": float(position.avg_px_open), "exit_price": float(position.avg_px_close), "direction": str(position.side), "quantity": float(position.quantity), "pnl": float(position.realized_pnl), } trades.append(trade) return trades def _compute_metrics(self, trades: list) -> Dict[str, Any]: """Compute performance metrics.""" if not trades: return { "win_rate": 0, "total_pnl": 0, "roi": 0, } winning_trades = [t for t in trades if t.get('pnl', 0) > 0] total_pnl = sum(t.get('pnl', 0) for t in trades) return { "win_rate": len(winning_trades) / len(trades) if trades else 0, "total_pnl": round(total_pnl, 2), "roi": total_pnl / 100000, # Based on 100k starting balance "winning_trades": len(winning_trades), "losing_trades": len(trades) - len(winning_trades), } async def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser( description="Run Nautilus-Dolphin backtest with existing parquet data" ) parser.add_argument( "--vbt-cache", type=str, default="vbt_cache", help="Path to vbt_cache directory", ) parser.add_argument( "--assets", type=str, default="BTCUSDT", help="Comma-separated list of assets", ) parser.add_argument( "--start-date", type=str, default="2026-01-01", help="Start date (YYYY-MM-DD)", ) parser.add_argument( "--end-date", type=str, default="2026-01-07", help="End date (YYYY-MM-DD)", ) parser.add_argument( "--output-dir", type=str, default="backtest_results", help="Output directory", ) parser.add_argument( "--reference-file", type=str, default="itest_v7_results.json", help="Path to itest_v7 reference results for comparison", ) args = parser.parse_args() # Parse assets assets = [a.strip() for a in args.assets.split(",")] # Create strategy config (tight_3_3) strategy_config = create_tight_3_3_config() # Initialize backtest runner runner = NDBacktestWithExistingData( vbt_cache_path=args.vbt_cache, output_dir=args.output_dir, ) # Prepare data catalog catalog_path = runner.prepare_data_catalog( assets=assets, start_date=args.start_date, end_date=args.end_date, ) # Run backtest results = runner.run_backtest( catalog_path=catalog_path, strategy_config=strategy_config, assets=assets, start_date=args.start_date, end_date=args.end_date, ) # Compare with reference if available if os.path.exists(args.reference_file): logger.info("=" * 80) logger.info("COMPARISON WITH ITEST_V7 REFERENCE") logger.info("=" * 80) with open(args.reference_file, 'r') as f: ref_data = json.load(f) ref_trades = ref_data.get('total_trades', 4009) ref_win_rate = ref_data.get('win_rate', 0.3198) ref_roi = ref_data.get('roi', -0.7609) nd_trades = results['trade_count'] nd_win_rate = results['metrics']['win_rate'] nd_roi = results['metrics']['roi'] logger.info(f"Trades: Ref={ref_trades}, ND={nd_trades}, Diff={abs(nd_trades - ref_trades)}") logger.info(f"Win Rate: Ref={ref_win_rate:.2%}, ND={nd_win_rate:.2%}, Diff={abs(nd_win_rate - ref_win_rate):.2%}") logger.info(f"ROI: Ref={ref_roi:.2%}, ND={nd_roi:.2%}, Diff={abs(nd_roi - ref_roi):.2%}") return results if __name__ == "__main__": asyncio.run(main())