initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
327
nautilus_dolphin/mc/mc_store.py
Executable file
327
nautilus_dolphin/mc/mc_store.py
Executable file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Monte Carlo Result Store
|
||||
========================
|
||||
|
||||
Persistence layer for MC trial results.
|
||||
|
||||
Supports:
|
||||
- Parquet files for bulk data storage
|
||||
- SQLite index for fast querying
|
||||
- Incremental/resumable runs
|
||||
- Batch organization
|
||||
|
||||
Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 8
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Union
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
|
||||
# Try to import pandas/pyarrow
|
||||
try:
|
||||
import pandas as pd
|
||||
PANDAS_AVAILABLE = True
|
||||
except ImportError:
|
||||
PANDAS_AVAILABLE = False
|
||||
print("[WARN] pandas not available - Parquet storage disabled")
|
||||
|
||||
from .mc_metrics import MCTrialResult
|
||||
from .mc_validator import ValidationResult
|
||||
|
||||
|
||||
class MCStore:
|
||||
"""
|
||||
Monte Carlo Result Store.
|
||||
|
||||
Manages persistence of trial configurations, results, and indices.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: Union[str, Path] = "mc_results",
|
||||
batch_size: int = 1000
|
||||
):
|
||||
"""
|
||||
Initialize the store.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
output_dir : str or Path
|
||||
Directory for all MC results
|
||||
batch_size : int
|
||||
Number of trials per batch file
|
||||
"""
|
||||
self.output_dir = Path(output_dir)
|
||||
self.batch_size = batch_size
|
||||
|
||||
# Create directory structure
|
||||
self.manifests_dir = self.output_dir / "manifests"
|
||||
self.results_dir = self.output_dir / "results"
|
||||
self.models_dir = self.output_dir / "models"
|
||||
|
||||
self.manifests_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.results_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.models_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# SQLite index
|
||||
self.index_path = self.output_dir / "mc_index.sqlite"
|
||||
self._init_index()
|
||||
|
||||
self.current_batch = self._get_latest_batch() + 1
|
||||
|
||||
def _init_index(self):
|
||||
"""Initialize SQLite index."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS mc_index (
|
||||
trial_id INTEGER PRIMARY KEY,
|
||||
batch_id INTEGER,
|
||||
status TEXT,
|
||||
roi_pct REAL,
|
||||
profit_factor REAL,
|
||||
win_rate REAL,
|
||||
max_dd_pct REAL,
|
||||
sharpe REAL,
|
||||
n_trades INTEGER,
|
||||
champion_region INTEGER,
|
||||
catastrophic INTEGER,
|
||||
created_at INTEGER
|
||||
)
|
||||
''')
|
||||
|
||||
# Create indices
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_roi ON mc_index (roi_pct)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_champion ON mc_index (champion_region)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_catastrophic ON mc_index (catastrophic)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_batch ON mc_index (batch_id)')
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def _get_latest_batch(self) -> int:
|
||||
"""Get the highest batch ID in the index."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('SELECT MAX(batch_id) FROM mc_index')
|
||||
result = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
return result[0] if result and result[0] else 0
|
||||
|
||||
def save_validation_results(self, results: List[ValidationResult], batch_id: int):
|
||||
"""Save validation results to manifest."""
|
||||
manifest_path = self.manifests_dir / f"batch_{batch_id:04d}_validation.json"
|
||||
|
||||
data = [r.to_dict() for r in results]
|
||||
with open(manifest_path, 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print(f"[OK] Saved validation manifest: {manifest_path}")
|
||||
|
||||
def save_trial_results(
|
||||
self,
|
||||
results: List[MCTrialResult],
|
||||
batch_id: Optional[int] = None
|
||||
):
|
||||
"""
|
||||
Save trial results to Parquet and update index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
results : List[MCTrialResult]
|
||||
Trial results to save
|
||||
batch_id : int, optional
|
||||
Batch ID (auto-incremented if not provided)
|
||||
"""
|
||||
if batch_id is None:
|
||||
batch_id = self.current_batch
|
||||
self.current_batch += 1
|
||||
|
||||
if not results:
|
||||
return
|
||||
|
||||
# Save to Parquet
|
||||
if PANDAS_AVAILABLE:
|
||||
self._save_parquet(results, batch_id)
|
||||
|
||||
# Update SQLite index
|
||||
self._update_index(results, batch_id)
|
||||
|
||||
print(f"[OK] Saved batch {batch_id}: {len(results)} trials")
|
||||
|
||||
def _save_parquet(self, results: List[MCTrialResult], batch_id: int):
|
||||
"""Save results to Parquet file."""
|
||||
parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
|
||||
|
||||
# Convert to DataFrame
|
||||
data = [r.to_dict() for r in results]
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Save
|
||||
df.to_parquet(parquet_path, index=False, compression='zstd')
|
||||
|
||||
def _update_index(self, results: List[MCTrialResult], batch_id: int):
|
||||
"""Update SQLite index with result summaries."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
timestamp = int(datetime.now().timestamp())
|
||||
|
||||
for r in results:
|
||||
cursor.execute('''
|
||||
INSERT OR REPLACE INTO mc_index
|
||||
(trial_id, batch_id, status, roi_pct, profit_factor, win_rate,
|
||||
max_dd_pct, sharpe, n_trades, champion_region, catastrophic, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
r.trial_id,
|
||||
batch_id,
|
||||
r.status,
|
||||
r.roi_pct,
|
||||
r.profit_factor,
|
||||
r.win_rate,
|
||||
r.max_drawdown_pct,
|
||||
r.sharpe_ratio,
|
||||
r.n_trades,
|
||||
int(r.champion_region),
|
||||
int(r.catastrophic),
|
||||
timestamp
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def query_index(
|
||||
self,
|
||||
status: Optional[str] = None,
|
||||
min_roi: Optional[float] = None,
|
||||
champion_only: bool = False,
|
||||
catastrophic_only: bool = False,
|
||||
limit: int = 1000
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query the SQLite index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
status : str, optional
|
||||
Filter by status
|
||||
min_roi : float, optional
|
||||
Minimum ROI percentage
|
||||
champion_only : bool
|
||||
Only champion region configs
|
||||
catastrophic_only : bool
|
||||
Only catastrophic configs
|
||||
limit : int
|
||||
Maximum results
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[Dict]
|
||||
Matching index entries
|
||||
"""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
query = 'SELECT * FROM mc_index WHERE 1=1'
|
||||
params = []
|
||||
|
||||
if status:
|
||||
query += ' AND status = ?'
|
||||
params.append(status)
|
||||
|
||||
if min_roi is not None:
|
||||
query += ' AND roi_pct >= ?'
|
||||
params.append(min_roi)
|
||||
|
||||
if champion_only:
|
||||
query += ' AND champion_region = 1'
|
||||
|
||||
if catastrophic_only:
|
||||
query += ' AND catastrophic = 1'
|
||||
|
||||
query += ' ORDER BY roi_pct DESC LIMIT ?'
|
||||
params.append(limit)
|
||||
|
||||
cursor.execute(query, params)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
def get_corpus_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about the stored corpus."""
|
||||
conn = sqlite3.connect(self.index_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Total trials
|
||||
cursor.execute('SELECT COUNT(*) FROM mc_index')
|
||||
total = cursor.fetchone()[0]
|
||||
|
||||
# By status
|
||||
cursor.execute('SELECT status, COUNT(*) FROM mc_index GROUP BY status')
|
||||
by_status = {row[0]: row[1] for row in cursor.fetchall()}
|
||||
|
||||
# Champion region
|
||||
cursor.execute('SELECT COUNT(*) FROM mc_index WHERE champion_region = 1')
|
||||
champion_count = cursor.fetchone()[0]
|
||||
|
||||
# Catastrophic
|
||||
cursor.execute('SELECT COUNT(*) FROM mc_index WHERE catastrophic = 1')
|
||||
catastrophic_count = cursor.fetchone()[0]
|
||||
|
||||
# ROI stats
|
||||
cursor.execute('''
|
||||
SELECT AVG(roi_pct), MIN(roi_pct), MAX(roi_pct),
|
||||
AVG(sharpe), AVG(max_dd_pct)
|
||||
FROM mc_index WHERE status = 'completed'
|
||||
''')
|
||||
roi_stats = cursor.fetchone()
|
||||
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'total_trials': total,
|
||||
'by_status': by_status,
|
||||
'champion_count': champion_count,
|
||||
'catastrophic_count': catastrophic_count,
|
||||
'avg_roi_pct': roi_stats[0] if roi_stats else 0,
|
||||
'min_roi_pct': roi_stats[1] if roi_stats else 0,
|
||||
'max_roi_pct': roi_stats[2] if roi_stats else 0,
|
||||
'avg_sharpe': roi_stats[3] if roi_stats else 0,
|
||||
'avg_max_dd_pct': roi_stats[4] if roi_stats else 0,
|
||||
}
|
||||
|
||||
def load_batch(self, batch_id: int) -> Optional[pd.DataFrame]:
|
||||
"""Load a batch of results from Parquet."""
|
||||
if not PANDAS_AVAILABLE:
|
||||
return None
|
||||
|
||||
parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
|
||||
|
||||
if not parquet_path.exists():
|
||||
return None
|
||||
|
||||
return pd.read_parquet(parquet_path)
|
||||
|
||||
def load_corpus(self) -> Optional[pd.DataFrame]:
|
||||
"""Load entire corpus from all batches."""
|
||||
if not PANDAS_AVAILABLE:
|
||||
return None
|
||||
|
||||
batches = []
|
||||
for parquet_file in sorted(self.results_dir.glob("batch_*_results.parquet")):
|
||||
df = pd.read_parquet(parquet_file)
|
||||
batches.append(df)
|
||||
|
||||
if not batches:
|
||||
return None
|
||||
|
||||
return pd.concat(batches, ignore_index=True)
|
||||
Reference in New Issue
Block a user