388 lines
14 KiB
Python
388 lines
14 KiB
Python
|
|
"""
|
||
|
|
Minimal Nautilus-Dolphin Backtest Runner
|
||
|
|
=========================================
|
||
|
|
Simplified version for testing integration without full data catalog.
|
||
|
|
Generates mock trades for validation framework testing.
|
||
|
|
|
||
|
|
Author: Claude
|
||
|
|
Date: 2026-02-19
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import json
|
||
|
|
import asyncio
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Optional, List, Dict, Any
|
||
|
|
import logging
|
||
|
|
import random
|
||
|
|
|
||
|
|
# Configure logging
|
||
|
|
logging.basicConfig(
|
||
|
|
level=logging.INFO,
|
||
|
|
format='%(asctime)s | %(levelname)-8s | %(message)s',
|
||
|
|
datefmt='%H:%M:%S'
|
||
|
|
)
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
# Constants for validation
|
||
|
|
REFERENCE_TRADES = 4009
|
||
|
|
REFERENCE_WIN_RATE = 0.3198
|
||
|
|
REFERENCE_ROI = -0.7609
|
||
|
|
|
||
|
|
|
||
|
|
class MockBacktestRunner:
|
||
|
|
"""
|
||
|
|
Mock backtest runner that generates synthetic trades
|
||
|
|
matching itest_v7 reference statistics.
|
||
|
|
|
||
|
|
This allows testing the validation framework without
|
||
|
|
requiring full data catalog setup.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(
|
||
|
|
self,
|
||
|
|
output_dir: str = "backtest_results",
|
||
|
|
random_seed: int = 42,
|
||
|
|
):
|
||
|
|
"""Initialize mock runner."""
|
||
|
|
self.output_dir = Path(output_dir)
|
||
|
|
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
self.random_seed = random_seed
|
||
|
|
random.seed(random_seed)
|
||
|
|
|
||
|
|
logger.info("[OK] Mock backtest runner initialized")
|
||
|
|
|
||
|
|
async def run_backtest(
|
||
|
|
self,
|
||
|
|
strategy_config: Optional[Dict[str, Any]] = None,
|
||
|
|
target_trades: int = REFERENCE_TRADES,
|
||
|
|
) -> Dict[str, Any]:
|
||
|
|
"""
|
||
|
|
Generate mock trades matching reference statistics.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
strategy_config: Strategy parameters
|
||
|
|
target_trades: Number of trades to generate
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Mock backtest results
|
||
|
|
"""
|
||
|
|
if strategy_config is None:
|
||
|
|
strategy_config = self._create_tight_3_3_config()
|
||
|
|
|
||
|
|
logger.info("=" * 80)
|
||
|
|
logger.info("NAUTILUS-DOLPHIN MOCK BACKTEST (for validation)")
|
||
|
|
logger.info("=" * 80)
|
||
|
|
logger.info(f"Strategy: {strategy_config.get('strategy_id', 'tight_3_3')}")
|
||
|
|
logger.info(f"Max Leverage: {strategy_config.get('max_leverage', 2.5)}x")
|
||
|
|
logger.info(f"Target Trades: {target_trades}")
|
||
|
|
logger.info("=" * 80)
|
||
|
|
|
||
|
|
# Generate mock trades with statistics matching reference
|
||
|
|
trades = self._generate_mock_trades(target_trades)
|
||
|
|
|
||
|
|
# Compute metrics
|
||
|
|
metrics = self._compute_metrics(trades)
|
||
|
|
|
||
|
|
# Save results
|
||
|
|
result_data = {
|
||
|
|
"timestamp": datetime.now().isoformat(),
|
||
|
|
"strategy_config": strategy_config,
|
||
|
|
"trades": trades,
|
||
|
|
"metrics": metrics,
|
||
|
|
"trade_count": len(trades),
|
||
|
|
"is_mock": True,
|
||
|
|
"note": "Mock trades for validation framework testing",
|
||
|
|
}
|
||
|
|
|
||
|
|
output_file = self.output_dir / f"nd_mock_backtest_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||
|
|
with open(output_file, 'w') as f:
|
||
|
|
json.dump(result_data, f, indent=2)
|
||
|
|
|
||
|
|
logger.info(f"[OK] Results saved to: {output_file}")
|
||
|
|
logger.info(f"[SUMMARY] Generated {len(trades)} trades")
|
||
|
|
logger.info(f"[SUMMARY] Win Rate: {metrics.get('win_rate', 0):.2%}")
|
||
|
|
logger.info(f"[SUMMARY] ROI: {metrics.get('roi', 0):.2%}")
|
||
|
|
|
||
|
|
return result_data
|
||
|
|
|
||
|
|
def _create_tight_3_3_config(self) -> Dict[str, Any]:
|
||
|
|
"""Create tight_3_3 strategy config."""
|
||
|
|
return {
|
||
|
|
"strategy_id": "tight_3_3",
|
||
|
|
"strategy_type": "impulse",
|
||
|
|
"max_leverage": 2.5,
|
||
|
|
"capital_fraction": 0.15,
|
||
|
|
"profit_target": 0.018,
|
||
|
|
"stop_loss": 0.015,
|
||
|
|
"trailing_stop": 0.009,
|
||
|
|
"max_hold_bars": 120,
|
||
|
|
"min_confidence": 0.65,
|
||
|
|
"impulse_threshold": 0.6,
|
||
|
|
"reversal_threshold": 0.45,
|
||
|
|
"irp_alignment": 0.45,
|
||
|
|
}
|
||
|
|
|
||
|
|
def _generate_mock_trades(self, count: int) -> List[Dict[str, Any]]:
|
||
|
|
"""
|
||
|
|
Generate mock trades matching reference statistics.
|
||
|
|
|
||
|
|
Generates trades with:
|
||
|
|
- ~32% win rate (matching reference)
|
||
|
|
- Entry/exit prices in realistic BTC range
|
||
|
|
- Various exit types (stop_loss, take_profit, timeout, etc.)
|
||
|
|
- Bars held distribution centered around ~20-30 bars
|
||
|
|
"""
|
||
|
|
trades = []
|
||
|
|
|
||
|
|
# Exit type distribution (approximate from reference)
|
||
|
|
exit_types = [
|
||
|
|
("stop_loss", 0.35),
|
||
|
|
("take_profit", 0.32),
|
||
|
|
("timeout", 0.25),
|
||
|
|
("trailing_stop", 0.08),
|
||
|
|
]
|
||
|
|
|
||
|
|
base_time = datetime(2024, 1, 1, 0, 0, 0)
|
||
|
|
current_price = 45000.0
|
||
|
|
|
||
|
|
for i in range(count):
|
||
|
|
# Determine if winning trade (32% win rate)
|
||
|
|
is_winner = random.random() < REFERENCE_WIN_RATE
|
||
|
|
|
||
|
|
# Price movement
|
||
|
|
if is_winner:
|
||
|
|
pnl_pct = random.uniform(0.005, 0.04) # 0.5% to 4% profit
|
||
|
|
else:
|
||
|
|
pnl_pct = random.uniform(-0.03, -0.005) # 0.5% to 3% loss
|
||
|
|
|
||
|
|
# Entry and exit prices
|
||
|
|
entry_price = current_price * (1 + random.uniform(-0.02, 0.02))
|
||
|
|
exit_price = entry_price * (1 + pnl_pct)
|
||
|
|
|
||
|
|
# Position sizing (with 2.5x leverage, 15% capital)
|
||
|
|
position_size = 100000 * 0.15 * 2.5 # $37,500 notional
|
||
|
|
quantity = position_size / entry_price
|
||
|
|
|
||
|
|
# P&L calculation
|
||
|
|
pnl = position_size * pnl_pct
|
||
|
|
|
||
|
|
# Bars held (typical range 5-80 bars, centered around 25)
|
||
|
|
bars_held = int(random.gauss(25, 15))
|
||
|
|
bars_held = max(5, min(120, bars_held)) # Clamp to 5-120
|
||
|
|
|
||
|
|
# Select exit type
|
||
|
|
r = random.random()
|
||
|
|
cum_prob = 0
|
||
|
|
exit_type = exit_types[0][0]
|
||
|
|
for etype, prob in exit_types:
|
||
|
|
cum_prob += prob
|
||
|
|
if r <= cum_prob:
|
||
|
|
exit_type = etype
|
||
|
|
break
|
||
|
|
|
||
|
|
# Override exit type based on P&L
|
||
|
|
if is_winner and exit_type == "stop_loss":
|
||
|
|
exit_type = "take_profit"
|
||
|
|
elif not is_winner and exit_type == "take_profit":
|
||
|
|
exit_type = "stop_loss"
|
||
|
|
|
||
|
|
# Timestamps
|
||
|
|
entry_time = base_time + timedelta(minutes=10 * i)
|
||
|
|
exit_time = entry_time + timedelta(minutes=10 * bars_held)
|
||
|
|
|
||
|
|
trade = {
|
||
|
|
"trade_id": f"ND_{i:06d}",
|
||
|
|
"entry_time": entry_time.isoformat(),
|
||
|
|
"exit_time": exit_time.isoformat(),
|
||
|
|
"entry_price": round(entry_price, 2),
|
||
|
|
"exit_price": round(exit_price, 2),
|
||
|
|
"direction": "LONG" if random.random() < 0.55 else "SHORT",
|
||
|
|
"quantity": round(quantity, 6),
|
||
|
|
"pnl": round(pnl, 2),
|
||
|
|
"pnl_pct": round(pnl_pct * 100, 3),
|
||
|
|
"exit_reason": exit_type,
|
||
|
|
"bars_held": bars_held,
|
||
|
|
"commission": round(position_size * 0.0004, 2), # 0.04% taker fee
|
||
|
|
}
|
||
|
|
trades.append(trade)
|
||
|
|
|
||
|
|
# Update price for next trade
|
||
|
|
current_price = exit_price
|
||
|
|
|
||
|
|
return trades
|
||
|
|
|
||
|
|
def _compute_metrics(self, trades: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||
|
|
"""Compute performance metrics."""
|
||
|
|
if not trades:
|
||
|
|
return {
|
||
|
|
"win_rate": 0,
|
||
|
|
"roi": 0,
|
||
|
|
"avg_pnl": 0,
|
||
|
|
"total_pnl": 0,
|
||
|
|
}
|
||
|
|
|
||
|
|
winning_trades = [t for t in trades if t.get('pnl', 0) > 0]
|
||
|
|
total_pnl = sum(t.get('pnl', 0) for t in trades)
|
||
|
|
|
||
|
|
# Exit type breakdown
|
||
|
|
exit_breakdown = {}
|
||
|
|
for t in trades:
|
||
|
|
reason = t.get('exit_reason', 'unknown')
|
||
|
|
exit_breakdown[reason] = exit_breakdown.get(reason, 0) + 1
|
||
|
|
|
||
|
|
return {
|
||
|
|
"win_rate": len(winning_trades) / len(trades) if trades else 0,
|
||
|
|
"roi": total_pnl / 100000,
|
||
|
|
"avg_pnl": total_pnl / len(trades) if trades else 0,
|
||
|
|
"total_pnl": round(total_pnl, 2),
|
||
|
|
"total_trades": len(trades),
|
||
|
|
"winning_trades": len(winning_trades),
|
||
|
|
"losing_trades": len(trades) - len(winning_trades),
|
||
|
|
"exit_breakdown": exit_breakdown,
|
||
|
|
"avg_bars_held": sum(t.get('bars_held', 0) for t in trades) / len(trades),
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
class TradeByTradeComparator:
|
||
|
|
"""
|
||
|
|
Compares Nautilus-Dolphin trades with itest_v7 reference.
|
||
|
|
|
||
|
|
Performs statistical validation to ensure ND implementation
|
||
|
|
produces equivalent results.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, tolerance: float = 0.05):
|
||
|
|
"""
|
||
|
|
Initialize comparator.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
tolerance: Statistical tolerance (default 5%)
|
||
|
|
"""
|
||
|
|
self.tolerance = tolerance
|
||
|
|
self.reference_data = None
|
||
|
|
|
||
|
|
def load_reference_data(self, ref_file: str) -> Dict[str, Any]:
|
||
|
|
"""Load itest_v7 reference data."""
|
||
|
|
with open(ref_file, 'r') as f:
|
||
|
|
self.reference_data = json.load(f)
|
||
|
|
return self.reference_data
|
||
|
|
|
||
|
|
def compare(self, nd_results: Dict[str, Any]) -> Dict[str, Any]:
|
||
|
|
"""
|
||
|
|
Compare ND results with reference.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
nd_results: Results from Nautilus-Dolphin backtest
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Comparison report
|
||
|
|
"""
|
||
|
|
if self.reference_data is None:
|
||
|
|
raise ValueError("Reference data not loaded. Call load_reference_data() first.")
|
||
|
|
|
||
|
|
comparison = {
|
||
|
|
"timestamp": datetime.now().isoformat(),
|
||
|
|
"tolerance": self.tolerance,
|
||
|
|
"checks": {},
|
||
|
|
"passed": True,
|
||
|
|
}
|
||
|
|
|
||
|
|
# Trade count comparison
|
||
|
|
ref_count = self.reference_data.get('total_trades', REFERENCE_TRADES)
|
||
|
|
nd_count = nd_results.get('trade_count', 0)
|
||
|
|
count_diff = abs(nd_count - ref_count) / ref_count if ref_count > 0 else 0
|
||
|
|
comparison["checks"]["trade_count"] = {
|
||
|
|
"reference": ref_count,
|
||
|
|
"nautilus": nd_count,
|
||
|
|
"difference_pct": round(count_diff * 100, 2),
|
||
|
|
"passed": count_diff <= self.tolerance,
|
||
|
|
}
|
||
|
|
|
||
|
|
# Win rate comparison
|
||
|
|
ref_wr = self.reference_data.get('win_rate', REFERENCE_WIN_RATE)
|
||
|
|
nd_wr = nd_results.get('metrics', {}).get('win_rate', 0)
|
||
|
|
wr_diff = abs(nd_wr - ref_wr)
|
||
|
|
comparison["checks"]["win_rate"] = {
|
||
|
|
"reference": round(ref_wr * 100, 2),
|
||
|
|
"nautilus": round(nd_wr * 100, 2),
|
||
|
|
"difference_pct": round(wr_diff * 100, 2),
|
||
|
|
"passed": wr_diff <= self.tolerance,
|
||
|
|
}
|
||
|
|
|
||
|
|
# ROI comparison (within tolerance)
|
||
|
|
ref_roi = self.reference_data.get('roi', REFERENCE_ROI)
|
||
|
|
nd_roi = nd_results.get('metrics', {}).get('roi', 0)
|
||
|
|
roi_diff = abs(nd_roi - ref_roi)
|
||
|
|
comparison["checks"]["roi"] = {
|
||
|
|
"reference": round(ref_roi * 100, 2),
|
||
|
|
"nautilus": round(nd_roi * 100, 2),
|
||
|
|
"difference_pct": round(roi_diff * 100, 2),
|
||
|
|
"passed": roi_diff <= self.tolerance * abs(ref_roi) if ref_roi != 0 else roi_diff <= 0.05,
|
||
|
|
}
|
||
|
|
|
||
|
|
# Overall pass/fail
|
||
|
|
comparison["passed"] = all(c.get("passed", False) for c in comparison["checks"].values())
|
||
|
|
|
||
|
|
return comparison
|
||
|
|
|
||
|
|
|
||
|
|
async def main():
|
||
|
|
"""Main entry point."""
|
||
|
|
import argparse
|
||
|
|
|
||
|
|
parser = argparse.ArgumentParser(description="Run minimal ND backtest for validation")
|
||
|
|
parser.add_argument(
|
||
|
|
"--output-dir",
|
||
|
|
type=str,
|
||
|
|
default="backtest_results",
|
||
|
|
help="Output directory",
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--reference-file",
|
||
|
|
type=str,
|
||
|
|
default="../itest_v7_results.json",
|
||
|
|
help="Path to itest_v7 reference results",
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--trades",
|
||
|
|
type=int,
|
||
|
|
default=REFERENCE_TRADES,
|
||
|
|
help=f"Number of trades to generate (default: {REFERENCE_TRADES})",
|
||
|
|
)
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
# Run mock backtest
|
||
|
|
runner = MockBacktestRunner(output_dir=args.output_dir)
|
||
|
|
results = await runner.run_backtest(target_trades=args.trades)
|
||
|
|
|
||
|
|
# Compare with reference if available
|
||
|
|
if os.path.exists(args.reference_file):
|
||
|
|
comparator = TradeByTradeComparator(tolerance=0.10)
|
||
|
|
comparator.load_reference_data(args.reference_file)
|
||
|
|
comparison = comparator.compare(results)
|
||
|
|
|
||
|
|
logger.info("=" * 80)
|
||
|
|
logger.info("COMPARISON WITH REFERENCE")
|
||
|
|
logger.info("=" * 80)
|
||
|
|
for check_name, check_data in comparison["checks"].items():
|
||
|
|
status = "[PASS]" if check_data["passed"] else "[FAIL]"
|
||
|
|
logger.info(f"{status} {check_name}:")
|
||
|
|
logger.info(f" Ref: {check_data['reference']}, ND: {check_data['nautilus']}")
|
||
|
|
logger.info(f" Diff: {check_data['difference_pct']}%")
|
||
|
|
|
||
|
|
logger.info("=" * 80)
|
||
|
|
overall = "[PASS]" if comparison["passed"] else "[FAIL]"
|
||
|
|
logger.info(f"{overall} OVERALL COMPARISON")
|
||
|
|
else:
|
||
|
|
logger.warning(f"[WARN] Reference file not found: {args.reference_file}")
|
||
|
|
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(main())
|