"""
Monte Carlo Result Store
========================

Persistence layer for MC trial results.

Supports:
- Parquet files for bulk data storage
- SQLite index for fast querying
- Incremental/resumable runs
- Batch organization

Reference: MONTE_CARLO_SYSTEM_ENVELOPE_SPEC.md Section 8
"""

import json
import sqlite3
from pathlib import Path
from typing import Dict, List, Optional, Any, Union
from datetime import datetime
import numpy as np

# Try to import pandas/pyarrow
try:
    import pandas as pd
    PANDAS_AVAILABLE = True
except ImportError:
    PANDAS_AVAILABLE = False
    print("[WARN] pandas not available - Parquet storage disabled")

from .mc_metrics import MCTrialResult
from .mc_validator import ValidationResult


class MCStore:
    """
    Monte Carlo Result Store.
    
    Manages persistence of trial configurations, results, and indices.
    """
    
    def __init__(
        self,
        output_dir: Union[str, Path] = "mc_results",
        batch_size: int = 1000
    ):
        """
        Initialize the store.
        
        Parameters
        ----------
        output_dir : str or Path
            Directory for all MC results
        batch_size : int
            Number of trials per batch file
        """
        self.output_dir = Path(output_dir)
        self.batch_size = batch_size
        
        # Create directory structure
        self.manifests_dir = self.output_dir / "manifests"
        self.results_dir = self.output_dir / "results"
        self.models_dir = self.output_dir / "models"
        
        self.manifests_dir.mkdir(parents=True, exist_ok=True)
        self.results_dir.mkdir(parents=True, exist_ok=True)
        self.models_dir.mkdir(parents=True, exist_ok=True)
        
        # SQLite index
        self.index_path = self.output_dir / "mc_index.sqlite"
        self._init_index()
        
        self.current_batch = self._get_latest_batch() + 1
        
    def _init_index(self):
        """Initialize SQLite index."""
        conn = sqlite3.connect(self.index_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS mc_index (
                trial_id INTEGER PRIMARY KEY,
                batch_id INTEGER,
                status TEXT,
                roi_pct REAL,
                profit_factor REAL,
                win_rate REAL,
                max_dd_pct REAL,
                sharpe REAL,
                n_trades INTEGER,
                champion_region INTEGER,
                catastrophic INTEGER,
                created_at INTEGER
            )
        ''')
        
        # Create indices
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_roi ON mc_index (roi_pct)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_champion ON mc_index (champion_region)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_catastrophic ON mc_index (catastrophic)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_batch ON mc_index (batch_id)')
        
        conn.commit()
        conn.close()
    
    def _get_latest_batch(self) -> int:
        """Get the highest batch ID in the index."""
        conn = sqlite3.connect(self.index_path)
        cursor = conn.cursor()
        
        cursor.execute('SELECT MAX(batch_id) FROM mc_index')
        result = cursor.fetchone()
        conn.close()
        
        return result[0] if result and result[0] else 0
    
    def save_validation_results(self, results: List[ValidationResult], batch_id: int):
        """Save validation results to manifest."""
        manifest_path = self.manifests_dir / f"batch_{batch_id:04d}_validation.json"
        
        data = [r.to_dict() for r in results]
        with open(manifest_path, 'w') as f:
            json.dump(data, f, indent=2)
        
        print(f"[OK] Saved validation manifest: {manifest_path}")
    
    def save_trial_results(
        self,
        results: List[MCTrialResult],
        batch_id: Optional[int] = None
    ):
        """
        Save trial results to Parquet and update index.
        
        Parameters
        ----------
        results : List[MCTrialResult]
            Trial results to save
        batch_id : int, optional
            Batch ID (auto-incremented if not provided)
        """
        if batch_id is None:
            batch_id = self.current_batch
            self.current_batch += 1
        
        if not results:
            return
        
        # Save to Parquet
        if PANDAS_AVAILABLE:
            self._save_parquet(results, batch_id)
        
        # Update SQLite index
        self._update_index(results, batch_id)
        
        print(f"[OK] Saved batch {batch_id}: {len(results)} trials")
    
    def _save_parquet(self, results: List[MCTrialResult], batch_id: int):
        """Save results to Parquet file."""
        parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
        
        # Convert to DataFrame
        data = [r.to_dict() for r in results]
        df = pd.DataFrame(data)
        
        # Save
        df.to_parquet(parquet_path, index=False, compression='zstd')
    
    def _update_index(self, results: List[MCTrialResult], batch_id: int):
        """Update SQLite index with result summaries."""
        conn = sqlite3.connect(self.index_path)
        cursor = conn.cursor()
        
        timestamp = int(datetime.now().timestamp())
        
        for r in results:
            cursor.execute('''
                INSERT OR REPLACE INTO mc_index
                (trial_id, batch_id, status, roi_pct, profit_factor, win_rate,
                 max_dd_pct, sharpe, n_trades, champion_region, catastrophic, created_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                r.trial_id,
                batch_id,
                r.status,
                r.roi_pct,
                r.profit_factor,
                r.win_rate,
                r.max_drawdown_pct,
                r.sharpe_ratio,
                r.n_trades,
                int(r.champion_region),
                int(r.catastrophic),
                timestamp
            ))
        
        conn.commit()
        conn.close()
    
    def query_index(
        self,
        status: Optional[str] = None,
        min_roi: Optional[float] = None,
        champion_only: bool = False,
        catastrophic_only: bool = False,
        limit: int = 1000
    ) -> List[Dict[str, Any]]:
        """
        Query the SQLite index.
        
        Parameters
        ----------
        status : str, optional
            Filter by status
        min_roi : float, optional
            Minimum ROI percentage
        champion_only : bool
            Only champion region configs
        catastrophic_only : bool
            Only catastrophic configs
        limit : int
            Maximum results
            
        Returns
        -------
        List[Dict]
            Matching index entries
        """
        conn = sqlite3.connect(self.index_path)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        
        query = 'SELECT * FROM mc_index WHERE 1=1'
        params = []
        
        if status:
            query += ' AND status = ?'
            params.append(status)
        
        if min_roi is not None:
            query += ' AND roi_pct >= ?'
            params.append(min_roi)
        
        if champion_only:
            query += ' AND champion_region = 1'
        
        if catastrophic_only:
            query += ' AND catastrophic = 1'
        
        query += ' ORDER BY roi_pct DESC LIMIT ?'
        params.append(limit)
        
        cursor.execute(query, params)
        rows = cursor.fetchall()
        conn.close()
        
        return [dict(row) for row in rows]
    
    def get_corpus_stats(self) -> Dict[str, Any]:
        """Get statistics about the stored corpus."""
        conn = sqlite3.connect(self.index_path)
        cursor = conn.cursor()
        
        # Total trials
        cursor.execute('SELECT COUNT(*) FROM mc_index')
        total = cursor.fetchone()[0]
        
        # By status
        cursor.execute('SELECT status, COUNT(*) FROM mc_index GROUP BY status')
        by_status = {row[0]: row[1] for row in cursor.fetchall()}
        
        # Champion region
        cursor.execute('SELECT COUNT(*) FROM mc_index WHERE champion_region = 1')
        champion_count = cursor.fetchone()[0]
        
        # Catastrophic
        cursor.execute('SELECT COUNT(*) FROM mc_index WHERE catastrophic = 1')
        catastrophic_count = cursor.fetchone()[0]
        
        # ROI stats
        cursor.execute('''
            SELECT AVG(roi_pct), MIN(roi_pct), MAX(roi_pct), 
                   AVG(sharpe), AVG(max_dd_pct)
            FROM mc_index WHERE status = 'completed'
        ''')
        roi_stats = cursor.fetchone()
        
        conn.close()
        
        return {
            'total_trials': total,
            'by_status': by_status,
            'champion_count': champion_count,
            'catastrophic_count': catastrophic_count,
            'avg_roi_pct': roi_stats[0] if roi_stats else 0,
            'min_roi_pct': roi_stats[1] if roi_stats else 0,
            'max_roi_pct': roi_stats[2] if roi_stats else 0,
            'avg_sharpe': roi_stats[3] if roi_stats else 0,
            'avg_max_dd_pct': roi_stats[4] if roi_stats else 0,
        }
    
    def load_batch(self, batch_id: int) -> Optional[pd.DataFrame]:
        """Load a batch of results from Parquet."""
        if not PANDAS_AVAILABLE:
            return None
        
        parquet_path = self.results_dir / f"batch_{batch_id:04d}_results.parquet"
        
        if not parquet_path.exists():
            return None
        
        return pd.read_parquet(parquet_path)
    
    def load_corpus(self) -> Optional[pd.DataFrame]:
        """Load entire corpus from all batches."""
        if not PANDAS_AVAILABLE:
            return None
        
        batches = []
        for parquet_file in sorted(self.results_dir.glob("batch_*_results.parquet")):
            df = pd.read_parquet(parquet_file)
            batches.append(df)
        
        if not batches:
            return None
        
        return pd.concat(batches, ignore_index=True)