initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
510
nautilus_dolphin/run_nd_backtest_with_existing_data.py
Executable file
510
nautilus_dolphin/run_nd_backtest_with_existing_data.py
Executable file
@@ -0,0 +1,510 @@
|
||||
"""
|
||||
Nautilus-Dolphin Backtest with Existing Parquet Data
|
||||
======================================================
|
||||
Runs full backtest using existing vbt_cache parquet data.
|
||||
|
||||
Author: Claude
|
||||
Date: 2026-02-19
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
import logging
|
||||
import pandas as pd
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s | %(levelname)-8s | %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Add project root to path
|
||||
project_root = Path(__file__).parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from nautilus_dolphin.nautilus.parquet_data_adapter import ParquetDataAdapter
|
||||
|
||||
# Check Nautilus availability
|
||||
try:
|
||||
from nautilus_trader.backtest.node import BacktestNode
|
||||
from nautilus_trader.backtest.config import (
|
||||
BacktestRunConfig,
|
||||
BacktestEngineConfig,
|
||||
BacktestVenueConfig,
|
||||
BacktestDataConfig,
|
||||
)
|
||||
from nautilus_trader.config import ImportableStrategyConfig
|
||||
from nautilus_trader.execution.config import ImportableExecAlgorithmConfig, ExecAlgorithmConfig
|
||||
from nautilus_trader.persistence.catalog import ParquetDataCatalog
|
||||
from nautilus_trader.model.identifiers import Venue, InstrumentId
|
||||
from nautilus_trader.model.data import QuoteTick
|
||||
from nautilus_trader.risk.config import RiskEngineConfig
|
||||
from nautilus_trader.cache.config import CacheConfig
|
||||
NAUTILUS_AVAILABLE = True
|
||||
logger.info("[OK] Nautilus Trader imports successful")
|
||||
except ImportError as e:
|
||||
logger.error(f"[FAIL] Nautilus import error: {e}")
|
||||
NAUTILUS_AVAILABLE = False
|
||||
raise RuntimeError("Nautilus Trader is required")
|
||||
|
||||
# Import Dolphin modules
|
||||
from nautilus_dolphin.nautilus.strategy_config import (
|
||||
create_tight_3_3_config,
|
||||
DolphinStrategyConfig,
|
||||
)
|
||||
|
||||
|
||||
class NDBacktestWithExistingData:
|
||||
"""
|
||||
Runs Nautilus-Dolphin backtest using existing vbt_cache parquet data.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vbt_cache_path: str = "vbt_cache",
|
||||
output_dir: str = "backtest_results",
|
||||
):
|
||||
"""
|
||||
Initialize backtest runner.
|
||||
|
||||
Args:
|
||||
vbt_cache_path: Path to vbt_cache directory
|
||||
output_dir: Directory for results
|
||||
"""
|
||||
self.vbt_cache_path = Path(vbt_cache_path)
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create data adapter
|
||||
self.adapter = ParquetDataAdapter(
|
||||
vbt_cache_path=vbt_cache_path,
|
||||
output_catalog_path=str(self.vbt_cache_path / "catalog"),
|
||||
)
|
||||
|
||||
logger.info("[OK] NDBacktestWithExistingData initialized")
|
||||
|
||||
def prepare_data_catalog(
|
||||
self,
|
||||
assets: list,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
venue: str = "BINANCE_FUTURES",
|
||||
) -> str:
|
||||
"""
|
||||
Prepare Nautilus data catalog from existing parquet data.
|
||||
|
||||
Args:
|
||||
assets: List of assets to include
|
||||
start_date: Start date (YYYY-MM-DD)
|
||||
end_date: End date (YYYY-MM-DD)
|
||||
venue: Venue for the instruments
|
||||
|
||||
Returns:
|
||||
Path to catalog
|
||||
"""
|
||||
logger.info("=" * 80)
|
||||
logger.info("PREPARING DATA CATALOG")
|
||||
logger.info("=" * 80)
|
||||
|
||||
catalog_path = self.adapter.create_nautilus_catalog(
|
||||
assets=assets,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
venue=venue,
|
||||
)
|
||||
|
||||
return catalog_path
|
||||
|
||||
def run_backtest(
|
||||
self,
|
||||
catalog_path: str,
|
||||
strategy_config: Dict[str, Any],
|
||||
assets: list,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute backtest using Nautilus BacktestNode.
|
||||
|
||||
Args:
|
||||
catalog_path: Path to Nautilus catalog
|
||||
strategy_config: Strategy configuration
|
||||
assets: List of assets
|
||||
start_date: Start date
|
||||
end_date: End date
|
||||
|
||||
Returns:
|
||||
Backtest results
|
||||
"""
|
||||
logger.info("=" * 80)
|
||||
logger.info("CONFIGURING BACKTEST")
|
||||
logger.info("=" * 80)
|
||||
|
||||
venue_id = getattr(strategy_config, 'venue', "BINANCE_FUTURES")
|
||||
|
||||
# Venue configuration
|
||||
max_leverage = getattr(strategy_config, 'max_leverage', 2.5)
|
||||
venue_config = BacktestVenueConfig(
|
||||
name=venue_id,
|
||||
oms_type="NETTING",
|
||||
account_type="MARGIN",
|
||||
base_currency="USDT",
|
||||
starting_balances=["100000 USDT"],
|
||||
default_leverage=str(max_leverage),
|
||||
)
|
||||
logger.info(f"[OK] Venue: {venue_id} (leverage: {max_leverage}x)")
|
||||
|
||||
# Data configuration for each asset
|
||||
data_configs = []
|
||||
for asset in assets:
|
||||
instrument_id = f"{asset}.{venue_id}"
|
||||
|
||||
# Add Tick data config
|
||||
data_config = BacktestDataConfig(
|
||||
catalog_path=catalog_path,
|
||||
data_cls="nautilus_trader.model.data:QuoteTick",
|
||||
instrument_id=instrument_id,
|
||||
)
|
||||
data_configs.append(data_config)
|
||||
|
||||
# Add Signal data config (carried as QuoteTicks on a separate instrument)
|
||||
signal_config = BacktestDataConfig(
|
||||
catalog_path=catalog_path,
|
||||
data_cls="nautilus_trader.model.data:QuoteTick",
|
||||
instrument_id=f"{asset}.SIGNAL.{venue_id}",
|
||||
)
|
||||
data_configs.append(signal_config)
|
||||
|
||||
logger.info(f"[OK] Data configs: {len(data_configs)} sources (Ticks + Signals)")
|
||||
|
||||
# Strategy configuration
|
||||
# Create the strategy config object directly
|
||||
strategy_cfg = create_tight_3_3_config()
|
||||
|
||||
nautilus_strategy_config = ImportableStrategyConfig(
|
||||
strategy_path="nautilus_dolphin.nautilus.strategy:DolphinExecutionStrategy",
|
||||
config_path="nautilus_dolphin.nautilus.strategy_config:DolphinStrategyConfig",
|
||||
config=strategy_cfg.dict(),
|
||||
)
|
||||
|
||||
# Engine configuration
|
||||
# Register SmartExecAlgorithm here since it's no longer allowed in on_start
|
||||
exec_algorithm_config = ImportableExecAlgorithmConfig(
|
||||
exec_algorithm_path="nautilus_dolphin.nautilus.smart_exec_algorithm:SmartExecAlgorithm",
|
||||
config_path="nautilus_trader.execution.config:ExecAlgorithmConfig",
|
||||
config={
|
||||
'exec_algorithm_id': "SMART_EXEC",
|
||||
'entry_timeout_sec': 25,
|
||||
'entry_abort_threshold_bps': 5.0,
|
||||
'exit_timeout_sec': 10,
|
||||
'maker_fee_rate': 0.0002,
|
||||
'taker_fee_rate': 0.0005
|
||||
}
|
||||
)
|
||||
|
||||
engine_config = BacktestEngineConfig(
|
||||
strategies=[nautilus_strategy_config],
|
||||
exec_algorithms=[exec_algorithm_config],
|
||||
risk_engine=RiskEngineConfig(
|
||||
bypass=True, # We handle risk in strategy
|
||||
),
|
||||
cache=CacheConfig(
|
||||
tick_capacity=1_000_000,
|
||||
bar_capacity=100_000,
|
||||
),
|
||||
)
|
||||
|
||||
# Complete run configuration
|
||||
run_config = BacktestRunConfig(
|
||||
venues=[venue_config],
|
||||
data=data_configs,
|
||||
engine=engine_config,
|
||||
chunk_size=None, # One-shot for speed and reliability in this case
|
||||
raise_exception=True,
|
||||
dispose_on_completion=False,
|
||||
)
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("RUNNING BACKTEST")
|
||||
logger.info("=" * 80)
|
||||
logger.info("This may take several minutes...")
|
||||
|
||||
# Execute backtest
|
||||
try:
|
||||
logger.info("Initializing BacktestNode...")
|
||||
backtest_node = BacktestNode(configs=[run_config])
|
||||
logger.info("BacktestNode initialized. Starting run...")
|
||||
|
||||
backtest_node.run()
|
||||
logger.info("[OK] Backtest completed!")
|
||||
|
||||
# Get engine to extract metrics
|
||||
logger.info(f"Retrieving engine for ID: {run_config.id}")
|
||||
engine = backtest_node.get_engine(run_config.id)
|
||||
|
||||
# Get results
|
||||
logger.info("Retrieving results from engine...")
|
||||
result = engine.get_result()
|
||||
|
||||
if result:
|
||||
logger.info(f"Processing result: {result}")
|
||||
# Extract trades from result directly
|
||||
try:
|
||||
trades = self._extract_trades_from_result(result)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract trades from result: {e}. Falling back to old cache method...")
|
||||
trades = self._extract_trades(engine)
|
||||
else:
|
||||
# Fallback to engine cache if no result object
|
||||
logger.warning("No result object found, falling back to engine cache...")
|
||||
trades = self._extract_trades(engine)
|
||||
|
||||
logger.info(f"Extracted {len(trades)} trades from cache.")
|
||||
|
||||
logger.info("Computing metrics...")
|
||||
metrics = self._compute_metrics(trades)
|
||||
|
||||
# Enrich from BacktestResult.stats_pnls (Nautilus internal accounting)
|
||||
if result and hasattr(result, 'stats_pnls'):
|
||||
stats = result.stats_pnls.get('USDT', {}) or next(iter(result.stats_pnls.values()), {})
|
||||
if stats:
|
||||
metrics['total_pnl'] = float(stats.get('PnL (total)', metrics['total_pnl']))
|
||||
metrics['roi'] = float(stats.get('PnL% (total)', metrics['roi']))
|
||||
metrics['win_rate'] = float(stats.get('Win Rate', metrics['win_rate']))
|
||||
metrics['max_winner'] = float(stats.get('Max Winner', 0))
|
||||
metrics['max_loser'] = float(stats.get('Max Loser', 0))
|
||||
metrics['expectancy'] = float(stats.get('Expectancy', 0))
|
||||
logger.info(f"Nautilus stats: PnL={metrics['total_pnl']:.2f}, "
|
||||
f"ROI={metrics['roi']:.4%}, WR={metrics['win_rate']:.2%}")
|
||||
|
||||
# Enrich from BacktestResult.stats_returns if available
|
||||
if result and hasattr(result, 'stats_returns'):
|
||||
ret_stats = result.stats_returns
|
||||
if ret_stats:
|
||||
for k in ('Sharpe Ratio (252 days)', 'Sortino Ratio (252 days)',
|
||||
'Profit Factor', 'Returns Volatility (252 days)'):
|
||||
v = ret_stats.get(k)
|
||||
if v is not None:
|
||||
try: metrics[k] = float(v)
|
||||
except (TypeError, ValueError): pass
|
||||
|
||||
# Pull trade count from Nautilus result (total_orders / 2 = round-trips)
|
||||
if result and hasattr(result, 'total_orders'):
|
||||
metrics['nautilus_total_orders'] = result.total_orders
|
||||
metrics['nautilus_total_events'] = result.total_events
|
||||
metrics['nautilus_total_positions'] = result.total_positions
|
||||
est_trades = result.total_orders // 2
|
||||
if est_trades > len(trades):
|
||||
metrics['estimated_trades'] = est_trades
|
||||
logger.info(f"Nautilus reports {result.total_orders} orders = ~{est_trades} round-trips")
|
||||
|
||||
logger.info("Metrics computed.")
|
||||
except Exception as e:
|
||||
logger.error(f"[CRITICAL] Backtest failed: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
# Ensure we flush logs
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
raise
|
||||
|
||||
result_data = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"strategy_config": strategy_config,
|
||||
"backtest_params": {
|
||||
"assets": assets,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"venue": venue_id,
|
||||
},
|
||||
"trades": trades,
|
||||
"metrics": metrics,
|
||||
"trade_count": len(trades),
|
||||
}
|
||||
|
||||
# Save results
|
||||
output_file = self.output_dir / f"nd_backtest_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(result_data, f, indent=2, default=str)
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("BACKTEST RESULTS")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"[OK] Results saved: {output_file}")
|
||||
logger.info(f"[SUMMARY] Total trades: {len(trades)}")
|
||||
logger.info(f"[SUMMARY] Win rate: {metrics.get('win_rate', 0):.2%}")
|
||||
logger.info(f"[SUMMARY] Total P&L: ${metrics.get('total_pnl', 0):,.2f}")
|
||||
logger.info(f"[SUMMARY] ROI: {metrics.get('roi', 0):.2%}")
|
||||
|
||||
return result_data
|
||||
|
||||
def _extract_trades(self, engine) -> list:
|
||||
"""Extract closed positions from backtest engine."""
|
||||
trades = []
|
||||
|
||||
# Get closed positions from cache
|
||||
for position in engine.cache.positions_closed():
|
||||
trade = {
|
||||
"trade_id": str(position.id),
|
||||
"instrument_id": str(position.instrument_id),
|
||||
"entry_time": str(pd.to_datetime(position.ts_opened, unit='ns', utc=True)),
|
||||
"exit_time": str(pd.to_datetime(position.ts_closed, unit='ns', utc=True)),
|
||||
"entry_price": float(position.avg_px_open),
|
||||
"exit_price": float(position.avg_px_close),
|
||||
"direction": str(position.side),
|
||||
"quantity": float(position.quantity),
|
||||
"pnl": float(position.realized_pnl),
|
||||
}
|
||||
trades.append(trade)
|
||||
|
||||
return trades
|
||||
|
||||
def _extract_trades_from_result(self, result) -> list:
|
||||
"""Extract closed positions from Nautilus BacktestResult."""
|
||||
trades = []
|
||||
|
||||
# In modern Nautilus, result.positions() returns the list of closed positions
|
||||
for position in result.positions():
|
||||
trade = {
|
||||
"trade_id": str(position.id),
|
||||
"instrument_id": str(position.instrument_id),
|
||||
"entry_time": str(pd.to_datetime(position.ts_opened, unit='ns', utc=True)),
|
||||
"exit_time": str(pd.to_datetime(position.ts_closed, unit='ns', utc=True)),
|
||||
"entry_price": float(position.avg_px_open),
|
||||
"exit_price": float(position.avg_px_close),
|
||||
"direction": str(position.side),
|
||||
"quantity": float(position.quantity),
|
||||
"pnl": float(position.realized_pnl),
|
||||
}
|
||||
trades.append(trade)
|
||||
|
||||
return trades
|
||||
|
||||
def _compute_metrics(self, trades: list) -> Dict[str, Any]:
|
||||
"""Compute performance metrics."""
|
||||
if not trades:
|
||||
return {
|
||||
"win_rate": 0,
|
||||
"total_pnl": 0,
|
||||
"roi": 0,
|
||||
}
|
||||
|
||||
winning_trades = [t for t in trades if t.get('pnl', 0) > 0]
|
||||
total_pnl = sum(t.get('pnl', 0) for t in trades)
|
||||
|
||||
return {
|
||||
"win_rate": len(winning_trades) / len(trades) if trades else 0,
|
||||
"total_pnl": round(total_pnl, 2),
|
||||
"roi": total_pnl / 100000, # Based on 100k starting balance
|
||||
"winning_trades": len(winning_trades),
|
||||
"losing_trades": len(trades) - len(winning_trades),
|
||||
}
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run Nautilus-Dolphin backtest with existing parquet data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--vbt-cache",
|
||||
type=str,
|
||||
default="vbt_cache",
|
||||
help="Path to vbt_cache directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--assets",
|
||||
type=str,
|
||||
default="BTCUSDT",
|
||||
help="Comma-separated list of assets",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--start-date",
|
||||
type=str,
|
||||
default="2026-01-01",
|
||||
help="Start date (YYYY-MM-DD)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--end-date",
|
||||
type=str,
|
||||
default="2026-01-07",
|
||||
help="End date (YYYY-MM-DD)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default="backtest_results",
|
||||
help="Output directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reference-file",
|
||||
type=str,
|
||||
default="itest_v7_results.json",
|
||||
help="Path to itest_v7 reference results for comparison",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse assets
|
||||
assets = [a.strip() for a in args.assets.split(",")]
|
||||
|
||||
# Create strategy config (tight_3_3)
|
||||
strategy_config = create_tight_3_3_config()
|
||||
|
||||
# Initialize backtest runner
|
||||
runner = NDBacktestWithExistingData(
|
||||
vbt_cache_path=args.vbt_cache,
|
||||
output_dir=args.output_dir,
|
||||
)
|
||||
|
||||
# Prepare data catalog
|
||||
catalog_path = runner.prepare_data_catalog(
|
||||
assets=assets,
|
||||
start_date=args.start_date,
|
||||
end_date=args.end_date,
|
||||
)
|
||||
|
||||
# Run backtest
|
||||
results = runner.run_backtest(
|
||||
catalog_path=catalog_path,
|
||||
strategy_config=strategy_config,
|
||||
assets=assets,
|
||||
start_date=args.start_date,
|
||||
end_date=args.end_date,
|
||||
)
|
||||
|
||||
# Compare with reference if available
|
||||
if os.path.exists(args.reference_file):
|
||||
logger.info("=" * 80)
|
||||
logger.info("COMPARISON WITH ITEST_V7 REFERENCE")
|
||||
logger.info("=" * 80)
|
||||
|
||||
with open(args.reference_file, 'r') as f:
|
||||
ref_data = json.load(f)
|
||||
|
||||
ref_trades = ref_data.get('total_trades', 4009)
|
||||
ref_win_rate = ref_data.get('win_rate', 0.3198)
|
||||
ref_roi = ref_data.get('roi', -0.7609)
|
||||
|
||||
nd_trades = results['trade_count']
|
||||
nd_win_rate = results['metrics']['win_rate']
|
||||
nd_roi = results['metrics']['roi']
|
||||
|
||||
logger.info(f"Trades: Ref={ref_trades}, ND={nd_trades}, Diff={abs(nd_trades - ref_trades)}")
|
||||
logger.info(f"Win Rate: Ref={ref_win_rate:.2%}, ND={nd_win_rate:.2%}, Diff={abs(nd_win_rate - ref_win_rate):.2%}")
|
||||
logger.info(f"ROI: Ref={ref_roi:.2%}, ND={nd_roi:.2%}, Diff={abs(nd_roi - ref_roi):.2%}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user