"""
Meta-Adaptive ExF Optimizer
===========================
Runs nightly (or on-demand) to calculate dynamic lag configurations and
active indicator thresholds for the Adaptive Circuit Breaker (ACB).

Implementation of the "Meta-Adaptive" Blueprint:
1. Pulls up to the last 90 days of market returns and indicator values.
2. Runs lag hypothesis testing (0-7 days) on all tracked ExF indicators.
3. Uses strict Point-Biserial correlation (p < 0.05) against market stress (< -1% daily drop).
4. Persists the active, statistically verified JSON configuration for realtime_exf_service.py.
"""

import sys
import json
import time
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
import threading
from scipy import stats
from datetime import datetime, timezone

PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
sys.path.insert(0, str(PROJECT_ROOT / 'nautilus_dolphin'))

try:
    from realtime_exf_service import INDICATORS, OPTIMAL_LAGS
    from dolphin_paper_trade_adaptive_cb_v2 import EIGENVALUES_BASE_PATH
    from dolphin_vbt_real import load_all_data, run_full_backtest, STRATEGIES, INIT_CAPITAL
except ImportError:
    pass

logger = logging.getLogger(__name__)

CONFIG_PATH = Path(__file__).parent / "meta_adaptive_config.json"

class MetaAdaptiveOptimizer:
    def __init__(self, days_lookback=90, max_lags=6, p_value_gate=0.05):
        self.days_lookback = days_lookback
        self.max_lags = max_lags
        self.p_value_gate = p_value_gate
        self.indicators = list(INDICATORS.keys()) if 'INDICATORS' in globals() else []
        self._lock = threading.Lock()

    def _build_history_cache(self, dates, limit_days):
        """Build daily feature cache from NPZ files."""
        logger.info(f"Building cache for last {limit_days} days...")
        cache = {}
        target_dates = dates[-limit_days:] if len(dates) > limit_days else dates
        
        for date_str in target_dates:
            date_path = EIGENVALUES_BASE_PATH / date_str
            if not date_path.exists(): continue
            
            npz_files = list(date_path.glob('scan_*__Indicators.npz'))
            if not npz_files: continue
            
            accum = defaultdict(list)
            for f in npz_files:
                try:
                    data = dict(np.load(f, allow_pickle=True))
                    names = [str(n) for n in data.get('api_names', [])]
                    vals = data.get('api_indicators', [])
                    succ = data.get('api_success', [])
                    for n, v, s in zip(names, vals, succ):
                        if s and not np.isnan(v):
                            accum[n].append(float(v))
                except Exception:
                    pass
            
            if accum:
                cache[date_str] = {k: np.mean(v) for k, v in accum.items()}
                
        return cache, target_dates

    def _get_daily_returns(self, df, target_dates):
        """Derive daily returns proxy from the champion strategy logic."""
        logger.info("Computing proxy returns for the time window...")
        champion = STRATEGIES['champion_5x_f20']
        returns = []
        cap = INIT_CAPITAL
        
        valid_dates = []
        for d in target_dates:
            day_df = df[df['date_str'] == d]
            if len(day_df) < 200:
                returns.append(np.nan)
                valid_dates.append(d)
                continue
            
            res = run_full_backtest(day_df, champion, init_cash=cap, seed=42, verbose=False)
            ret = (res['capital'] - cap) / cap
            returns.append(ret)
            cap = res['capital']
            valid_dates.append(d)
            
        return np.array(returns), valid_dates

    def run_optimization(self) -> dict:
        """Run the full meta-adaptive optimization routine and return new config."""
        with self._lock:
            logger.info("Starting META-ADAPTIVE optimization loop.")
            t0 = time.time()
            
            df = load_all_data()
            if 'date_str' not in df.columns:
                df['date_str'] = df['timestamp'].dt.date.astype(str)
            all_dates = sorted(df['date_str'].unique())
            
            cache, target_dates = self._build_history_cache(all_dates, self.days_lookback + self.max_lags)
            daily_returns, target_dates = self._get_daily_returns(df, target_dates)
            
            # Predict market stress dropping by more than 1%
            stress_arr = (daily_returns < -0.01).astype(float)
            
            candidate_lags = {}
            active_thresholds = {}
            candidate_count = 0
            
            for key in self.indicators:
                ind_arr = np.array([cache.get(d, {}).get(key, np.nan) for d in target_dates])
                
                corrs = []; pvals = []; sc_corrs = []
                for lag in range(self.max_lags + 1):
                    if lag == 0: x, y, y_stress = ind_arr, daily_returns, stress_arr
                    else: x, y, y_stress = ind_arr[:-lag], daily_returns[lag:], stress_arr[lag:]
                    
                    mask = ~np.isnan(x) & ~np.isnan(y)
                    if mask.sum() < 20: # Need at least 20 viable days
                        corrs.append(0); pvals.append(1); sc_corrs.append(0)
                        continue
                        
                    # Pearson to price returns
                    r, p = stats.pearsonr(x[mask], y[mask])
                    corrs.append(r); pvals.append(p)
                    
                    # Point-Biserial to stress events
                    # We capture the relation to binary stress to figure out threshold direction
                    if y_stress[mask].sum() > 2: # At least a few stress days required
                        sc = stats.pointbiserialr(y_stress[mask], x[mask])[0]
                    else:
                        sc = 0
                    sc_corrs.append(sc)

                if not corrs: continue

                # Find lag with highest correlation strength
                best_lag = int(np.argmax(np.abs(corrs)))
                best_p = pvals[best_lag]
                
                # Check gate
                if best_p <= self.p_value_gate:
                    direction = ">" if sc_corrs[best_lag] > 0 else "<"
                    
                    # Compute a stress threshold logic (e.g. 15th / 85th percentile of historical)
                    valid_vals = ind_arr[~np.isnan(ind_arr)]
                    thresh = np.percentile(valid_vals, 85 if direction == '>' else 15)
                    
                    candidate_lags[key] = best_lag
                    active_thresholds[key] = {
                        'threshold': float(thresh),
                        'direction': direction,
                        'p_value': float(best_p),
                        'r_value': float(corrs[best_lag])
                    }
                    candidate_count += 1
                    
            # Fallback checks mapping to V4 baseline if things drift too far
            logger.info(f"Optimization complete ({time.time() - t0:.1f}s). {candidate_count} indicators passed P < {self.p_value_gate}.")
            
            output_config = {
                'timestamp': datetime.now(timezone.utc).isoformat(),
                'days_lookback': self.days_lookback,
                'lags': candidate_lags,
                'thresholds': active_thresholds
            }
            
            # Atomic save
            temp_path = CONFIG_PATH.with_suffix('.tmp')
            with open(temp_path, 'w', encoding='utf-8') as f:
                json.dump(output_config, f, indent=2)
            temp_path.replace(CONFIG_PATH)
            
            return output_config

def get_current_meta_config() -> dict:
    """Read the latest meta-adaptive config, or return empty/default dict."""
    if not CONFIG_PATH.exists():
        return {}
    try:
        with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        logger.error(f"Failed to read meta-adaptive config: {e}")
        return {}

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    optimizer = MetaAdaptiveOptimizer(days_lookback=90)
    config = optimizer.run_optimization()
    print(f"\nSaved config to: {CONFIG_PATH}")
    for k, v in config['lags'].items():
        print(f"  {k}: lag={v} days, dir={config['thresholds'][k]['direction']} thresh={config['thresholds'][k]['threshold']:.4g}")