#!/usr/bin/env python3
"""
REAL-TIME EXTERNAL FACTORS SERVICE v1.0
========================================
Production-grade, HFT-optimized external factors service.

Key design decisions (empirically validated 2026-02-27, 54-day backtest):
  - Per-indicator adaptive polling at native API resolution
  - Uniform lag=1 day (ROBUST: +3.10% ROI, -2.02% DD, zero overfit risk)
  - Binary gating (no confidence weighting - empirically validated)
  - Never blocks consumer: get_indicators() returns cached data in <1ms
  - Dual output: NPZ (legacy) + Arrow (new)

Empirical validation vs baseline (54-day backtest):
  N: No ACB:                    ROI=+7.51%, DD=18.34%
  A: Current (lag=0 daily avg): ROI=+9.33%, DD=12.04%  <-- current production
  L1: Uniform lag=1:            ROI=+12.43%, DD=10.02% <-- THIS SERVICE DEFAULT
  MO: Mixed optimal lags:       ROI=+13.31%, DD=9.10%  <-- experimental (needs 80+ days)
  MS: Mixed + synth intra-day:  ROI=+16.00%, DD=9.92%  <-- future (needs VBT changes)

TODO (ordered by priority):
  1. [CRITICAL] Re-validate lag=1 with 80+ days of data for statistical robustness
  2. [HIGH] Fix the 50 dead indicators (see DEAD_INDICATORS below)
  3. [HIGH] Test each repaired indicator isolated against ACB & alpha engine
  4. [HIGH] Move from per-day ACB to intra-day continuous ACB once VBT supports it
  5. [MED] Switch to per-indicator optimal lags once 80+ days available
  6. [MED] Implement adaptive variance estimator for poll interval tuning
  7. [MED] Add Arrow dual output (schema defined, writer implemented)
  8. [LOW] FRED indicators: handle weekend/holiday gaps (fill-forward last value)
  9. [LOW] CoinMetrics indicators: fix parse_cm returning 0 (API may need auth)
  10.[LOW] Tune system sync to never generate signals with stale/missing data
"""

import asyncio
import aiohttp
import numpy as np
import time
import logging
import json
from pathlib import Path
from datetime import datetime, timezone
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Any
from collections import deque, defaultdict
from enum import Enum
import threading

logger = logging.getLogger(__name__)

# =====================================================================
# INDICATOR METADATA (from empirical analysis)
# =====================================================================

@dataclass
class IndicatorMeta:
    """Per-indicator configuration derived from empirical testing."""
    name: str
    source: str                 # API provider
    url: str                    # Real-time endpoint
    parser: str                 # Parser method name
    poll_interval_s: float      # Native update rate (seconds)
    optimal_lag_days: int       # Information discount lag (empirically measured)
    lag_correlation: float      # Pearson r at optimal lag
    lag_pvalue: float           # Statistical significance
    acb_critical: bool          # Used by ACB v2/v3
    category: str               # derivatives/onchain/macro/etc

# Empirically measured optimal lags (from lag_correlation_analysis):
# dvol_btc:    lag=1, r=-0.4919, p=0.0002 (strongest)
# taker:       lag=1, r=-0.4105, p=0.0034
# dvol_eth:    lag=1, r=-0.4246, p=0.0015
# funding_btc: lag=5, r=+0.3892, p=0.0057 (slow propagation)
# ls_btc:      lag=0, r=+0.2970, p=0.0362 (immediate)
# funding_eth: lag=3, r=+0.2026, p=0.1539 (not significant)
# vix:         lag=1, r=-0.2044, p=0.2700 (not significant)
# fng:         lag=5, r=-0.1923, p=0.1856 (not significant)

INDICATORS = {
    # BINANCE DERIVATIVES (rate limit: 1200/min)
    'funding_btc': IndicatorMeta('funding_btc', 'binance',
        'https://fapi.binance.com/fapi/v1/fundingRate?symbol=BTCUSDT&limit=1',
        'parse_binance_funding', 28800, 5, 0.3892, 0.0057, True, 'derivatives'),
    'funding_eth': IndicatorMeta('funding_eth', 'binance',
        'https://fapi.binance.com/fapi/v1/fundingRate?symbol=ETHUSDT&limit=1',
        'parse_binance_funding', 28800, 3, 0.2026, 0.1539, True, 'derivatives'),
    'oi_btc': IndicatorMeta('oi_btc', 'binance',
        'https://fapi.binance.com/fapi/v1/openInterest?symbol=BTCUSDT',
        'parse_binance_oi', 300, 0, 0, 1.0, False, 'derivatives'),
    'oi_eth': IndicatorMeta('oi_eth', 'binance',
        'https://fapi.binance.com/fapi/v1/openInterest?symbol=ETHUSDT',
        'parse_binance_oi', 300, 0, 0, 1.0, False, 'derivatives'),
    'ls_btc': IndicatorMeta('ls_btc', 'binance',
        'https://fapi.binance.com/futures/data/globalLongShortAccountRatio?symbol=BTCUSDT&period=5m&limit=1',
        'parse_binance_ls', 300, 0, 0.2970, 0.0362, True, 'derivatives'),
    'ls_eth': IndicatorMeta('ls_eth', 'binance',
        'https://fapi.binance.com/futures/data/globalLongShortAccountRatio?symbol=ETHUSDT&period=5m&limit=1',
        'parse_binance_ls', 300, 0, 0, 1.0, False, 'derivatives'),
    'ls_top': IndicatorMeta('ls_top', 'binance',
        'https://fapi.binance.com/futures/data/topLongShortAccountRatio?symbol=BTCUSDT&period=5m&limit=1',
        'parse_binance_ls', 300, 0, 0, 1.0, False, 'derivatives'),
    'taker': IndicatorMeta('taker', 'binance',
        'https://fapi.binance.com/futures/data/takerlongshortRatio?symbol=BTCUSDT&period=5m&limit=1',
        'parse_binance_taker', 300, 1, -0.4105, 0.0034, True, 'derivatives'),
    'basis': IndicatorMeta('basis', 'binance',
        'https://fapi.binance.com/fapi/v1/premiumIndex?symbol=BTCUSDT',
        'parse_binance_basis', 0.5, 0, 0, 1.0, False, 'derivatives'),  # 0.5s AGGRESSIVE OVERSAMPLING
    'imbal_btc': IndicatorMeta('imbal_btc', 'binance',
        'https://api.binance.com/api/v3/depth?symbol=BTCUSDT&limit=100',
        'parse_imbal', 0.5, 0, 0, 1.0, False, 'microstructure'),  # 0.5s AGGRESSIVE OVERSAMPLING
    'imbal_eth': IndicatorMeta('imbal_eth', 'binance',
        'https://api.binance.com/api/v3/depth?symbol=ETHUSDT&limit=100',
        'parse_imbal', 0.5, 0, 0, 1.0, False, 'microstructure'),  # 0.5s AGGRESSIVE OVERSAMPLING
    'spread': IndicatorMeta('spread', 'binance',
        'https://api.binance.com/api/v3/ticker/bookTicker?symbol=BTCUSDT',
        'parse_spread', 0.5, 0, 0, 1.0, False, 'microstructure'),  # 0.5s AGGRESSIVE OVERSAMPLING

    # DERIBIT (rate limit: 100/10s)
    # NOTE: Deribit API now requires start_timestamp and end_timestamp parameters
    # URLs are built dynamically in _build_deribit_url() - these are base URLs
    'dvol_btc': IndicatorMeta('dvol_btc', 'deribit',
        'dvol:BTC',
        'parse_deribit_dvol', 60, 1, -0.4919, 0.0002, True, 'derivatives'),
    'dvol_eth': IndicatorMeta('dvol_eth', 'deribit',
        'dvol:ETH',
        'parse_deribit_dvol', 60, 1, -0.4246, 0.0015, True, 'derivatives'),
    'fund_dbt_btc': IndicatorMeta('fund_dbt_btc', 'deribit',
        'funding:BTC-PERPETUAL',
        'parse_deribit_fund', 28800, 0, 0, 1.0, False, 'derivatives'),
    'fund_dbt_eth': IndicatorMeta('fund_dbt_eth', 'deribit',
        'funding:ETH-PERPETUAL',
        'parse_deribit_fund', 28800, 0, 0, 1.0, False, 'derivatives'),

    # MACRO (FRED, rate limit: 120/min)
    'vix': IndicatorMeta('vix', 'fred', 'VIXCLS', 'parse_fred', 21600, 1, -0.2044, 0.27, True, 'macro'),
    'dxy': IndicatorMeta('dxy', 'fred', 'DTWEXBGS', 'parse_fred', 21600, 0, 0, 1.0, False, 'macro'),
    'us10y': IndicatorMeta('us10y', 'fred', 'DGS10', 'parse_fred', 21600, 0, 0, 1.0, False, 'macro'),
    'sp500': IndicatorMeta('sp500', 'fred', 'SP500', 'parse_fred', 21600, 0, 0, 1.0, False, 'macro'),
    'fedfunds': IndicatorMeta('fedfunds', 'fred', 'DFF', 'parse_fred', 86400, 0, 0, 1.0, False, 'macro'),

    # SENTIMENT
    'fng': IndicatorMeta('fng', 'alternative', 'https://api.alternative.me/fng/?limit=1',
        'parse_fng', 21600, 5, -0.1923, 0.1856, True, 'sentiment'),

    # ON-CHAIN (blockchain.info)
    'hashrate': IndicatorMeta('hashrate', 'blockchain', 'https://blockchain.info/q/hashrate',
        'parse_bc', 1800, 0, 0, 1.0, False, 'onchain'),

    # DEFI (DeFi Llama)
    'tvl': IndicatorMeta('tvl', 'defillama', 'https://api.llama.fi/v2/historicalChainTvl',
        'parse_dl_tvl', 21600, 0, 0, 1.0, False, 'defi'),

    # LIQUIDATIONS (Coinglass) — aggregate forced liquidation data
    # poll_interval=3600s (hourly update), lag=1 day (same conservative default as dvol)
    # lag_correlation and lag_pvalue to be measured empirically (set 0/1.0 as placeholders)
    'liq_vol_24h': IndicatorMeta(
        'liq_vol_24h', 'coinglass',
        'https://open-api.coinglass.com/public/v2/liquidation_chart?symbol=BTC&interval=1h&limit=24',
        'parse_coinglass_liq_vol', 3600, 1, 0.0, 1.0, False, 'liquidations'),
    'liq_long_ratio': IndicatorMeta(
        'liq_long_ratio', 'coinglass',
        'https://open-api.coinglass.com/public/v2/liquidation_chart?symbol=BTC&interval=1h&limit=24',
        'parse_coinglass_liq_ratio', 3600, 1, 0.0, 1.0, False, 'liquidations'),
    'liq_z_score': IndicatorMeta(
        'liq_z_score', 'coinglass',
        '',   # derived — no direct endpoint; computed in backfiller, set 0 in real-time until history accumulated
        'parse_noop', 3600, 1, 0.0, 1.0, False, 'liquidations'),
    'liq_percentile': IndicatorMeta(
        'liq_percentile', 'coinglass',
        '',   # derived — same as above
        'parse_noop', 3600, 1, 0.0, 1.0, False, 'liquidations'),
}

# Rate limits per provider (requests per second)
RATE_LIMITS = {
    'binance': 20.0,    # 1200/min
    'deribit': 10.0,    # 100/10s
    'fred': 2.0,        # 120/min
    'alternative': 0.5,
    'blockchain': 0.5,
    'defillama': 1.0,
    'coinmetrics': 0.15, # 10/min
    'coinglass': 0.5,   # 1 req/2s — free tier conservative limit
}


# =====================================================================
# INDICATOR STATE
# =====================================================================

@dataclass
class IndicatorState:
    """Live state for a single indicator."""
    value: float = np.nan
    fetched_at: float = 0.0          # monotonic time
    fetched_utc: Optional[datetime] = None
    success: bool = False
    error: str = ""
    fetch_count: int = 0
    fail_count: int = 0
    # History buffer for lag support
    daily_history: deque = field(default_factory=lambda: deque(maxlen=10))


# =====================================================================
# PARSERS (same as external_factors_matrix.py, inlined for independence)
# =====================================================================

class Parsers:
    @staticmethod
    def parse_binance_funding(d):
        return float(d[0]['fundingRate']) if isinstance(d, list) and d else 0.0

    @staticmethod
    def parse_binance_oi(d):
        if isinstance(d, list) and d: return float(d[-1].get('sumOpenInterest', 0))
        return float(d.get('openInterest', 0)) if isinstance(d, dict) else 0.0

    @staticmethod
    def parse_binance_ls(d):
        return float(d[-1]['longShortRatio']) if isinstance(d, list) and d else 1.0

    @staticmethod
    def parse_binance_taker(d):
        return float(d[-1]['buySellRatio']) if isinstance(d, list) and d else 1.0

    @staticmethod
    def parse_binance_basis(d):
        return float(d.get('lastFundingRate', 0)) * 365 * 3 if isinstance(d, dict) else 0.0

    @staticmethod
    def parse_deribit_dvol(d):
        if isinstance(d, dict) and 'result' in d:
            r = d['result']
            if isinstance(r, dict) and 'data' in r and r['data']:
                return float(r['data'][-1][4]) if len(r['data'][-1]) > 4 else 0.0
        return 0.0

    @staticmethod
    def parse_deribit_fund(d):
        if isinstance(d, dict) and 'result' in d:
            r = d['result']
            return float(r[-1].get('interest_8h', 0)) if isinstance(r, list) and r else float(r)
        return 0.0

    @staticmethod
    def parse_fred(d):
        if isinstance(d, dict) and 'observations' in d and d['observations']:
            v = d['observations'][-1].get('value', '.')
            if v != '.':
                try: return float(v)
                except: pass
        return 0.0

    @staticmethod
    def parse_fng(d):
        return float(d['data'][0]['value']) if isinstance(d, dict) and 'data' in d and d['data'] else 50.0

    @staticmethod
    def parse_bc(d):
        if isinstance(d, (int, float)): return float(d)
        if isinstance(d, str):
            try: return float(d)
            except: pass
        if isinstance(d, dict) and 'values' in d and d['values']:
            return float(d['values'][-1].get('y', 0))
        return 0.0

    @staticmethod
    def parse_dl_tvl(d):
        if isinstance(d, list) and d:
            return float(d[-1].get('tvl', 0))
        return 0.0

    @staticmethod
    def parse_coinglass_liq_vol(d):
        """
        Coinglass /public/v2/liquidation_chart response:
        {
          "code": "0",
          "data": {
            "liquidationMap": [
              {"t": <unix_ms>, "longLiquidationUsd": 1234567.0, "shortLiquidationUsd": 890123.0},
              ...
            ]
          }
        }
        Returns log10(total_usd + 1) over all bars (24h sum).
        Returns 0.0 on any error.
        """
        import math
        try:
            bars = d.get('data', {}).get('liquidationMap', [])
            if not bars:
                return 0.0
            total = sum(
                float(b.get('longLiquidationUsd', 0)) + float(b.get('shortLiquidationUsd', 0))
                for b in bars
            )
            return math.log10(total + 1.0)
        except Exception:
            return 0.0

    @staticmethod
    def parse_coinglass_liq_ratio(d):
        """
        Same endpoint as parse_coinglass_liq_vol.
        Returns long_liq / (long_liq + short_liq) over all bars.
        Returns 0.5 (neutral) on error or zero total.
        """
        try:
            bars = d.get('data', {}).get('liquidationMap', [])
            if not bars:
                return 0.5
            long_total  = sum(float(b.get('longLiquidationUsd', 0))  for b in bars)
            short_total = sum(float(b.get('shortLiquidationUsd', 0)) for b in bars)
            total = long_total + short_total
            return long_total / total if total > 0 else 0.5
        except Exception:
            return 0.5

    @staticmethod
    def parse_imbal(d):
        if isinstance(d, dict):
            bv = sum(float(b[1]) for b in d.get('bids', [])[:50])
            av = sum(float(a[1]) for a in d.get('asks', [])[:50])
            t = bv + av
            return (bv - av) / t if t > 0 else 0.0
        return 0.0

    @staticmethod
    def parse_spread(d):
        if isinstance(d, dict):
            try:
                b = float(d.get('bidPrice', d.get('bid', 0)))
                a = float(d.get('askPrice', d.get('ask', 0)))
                return (a - b) / b * 10000 if b > 0 else 0.0
            except: pass
        return 0.0

    @staticmethod
    def parse_noop(d):
        """Placeholder for derived indicators (liq_z_score, liq_percentile).
        These are computed by the backfiller, not fetched in real-time."""
        return 0.0


# =====================================================================
# REAL-TIME SERVICE
# =====================================================================

class RealTimeExFService:
    """
    Singleton real-time external factors service.

    Design principles:
      - Never blocks: get_indicators() is pure memory read
      - Background asyncio loop fetches on per-indicator timers
      - Per-provider rate limiting via semaphores
      - History buffer per indicator for lag support
      - Thread-safe via lock on state dict
    """

    def __init__(self, fred_api_key: str = ""):
        self.fred_api_key = fred_api_key or 'c16a9cde3e3bb5bb972bb9283485f202'
        self.state: Dict[str, IndicatorState] = {
            name: IndicatorState() for name in INDICATORS
        }
        self._lock = threading.Lock()
        self._running = False
        self._loop = None
        self._thread = None
        self._semaphores: Dict[str, asyncio.Semaphore] = {}
        self._session: Optional[aiohttp.ClientSession] = None
        self._current_date: str = ""  # for daily history rotation

    # ----- Consumer API (never blocks, <1ms) -----

    def get_indicators(self, apply_lag: bool = False, dual_sample: bool = True) -> Dict[str, Any]:
        """
        Get indicator values with flexible lag and dual-sampling support (T and T-24h).
        Returns both real-time (T) and structural lagged values (T-24h) for indicators.
        
        Args:
            apply_lag (bool): If True, the primary key 'name' will contain the lagged value.
            dual_sample (bool): If True, a secondary key '{name}_lagged' will be added.
        """
        with self._lock:
            result = {}
            staleness = {}
            now = time.monotonic()

            for name, meta in INDICATORS.items():
                st = self.state[name]
                if not st.success or np.isnan(st.value):
                    continue

                # 1. Primary Value calculation
                if apply_lag and meta.optimal_lag_days > 0:
                    lag = meta.optimal_lag_days
                    hist = list(st.daily_history)
                    if len(hist) >= lag:
                        result[name] = hist[-lag]
                    else:
                        result[name] = st.value  # fallback
                else:
                    result[name] = st.value

                # 2. Sequential Dual-Sampling (always provide T and T-24h if requested)
                if dual_sample and meta.optimal_lag_days > 0:
                    lag = meta.optimal_lag_days
                    hist = list(st.daily_history)
                    lag_key = f"{name}_lagged"
                    
                    if len(hist) >= lag:
                        result[lag_key] = hist[-lag]
                    else:
                        result[lag_key] = st.value

                if st.fetched_at > 0:
                    staleness[name] = now - st.fetched_at

            result['_staleness'] = staleness
            return result

    def get_acb_indicators(self) -> Dict[str, float]:
        """Get only the ACB-critical indicators (with lags applied)."""
        full = self.get_indicators(apply_lag=True)
        return {k: v for k, v in full.items()
                if k in ('funding_btc', 'funding_eth', 'dvol_btc', 'dvol_eth',
                          'fng', 'vix', 'ls_btc', 'taker',
                          'mcap_bc', 'fund_dbt_btc', 'oi_btc', 'fund_dbt_eth', 'addr_btc')
                and isinstance(v, (int, float))}

    # ----- Background fetching -----

    async def _fetch_url(self, url: str, source: str) -> Optional[Any]:
        """Fetch URL with rate limiting and error handling."""
        sem = self._semaphores.get(source)
        if sem:
            await sem.acquire()
            try:
                return await self._do_fetch(url)
            finally:
                sem.release()
                # Enforce rate limit delay
                delay = 1.0 / RATE_LIMITS.get(source, 1.0)
                await asyncio.sleep(delay)
        return await self._do_fetch(url)

    async def _do_fetch(self, url: str) -> Optional[Any]:
        """Raw HTTP fetch."""
        if not self._session:
            return None
        try:
            timeout = aiohttp.ClientTimeout(total=10)
            headers = {"User-Agent": "Mozilla/5.0"}
            async with self._session.get(url, timeout=timeout, headers=headers) as r:
                if r.status == 200:
                    ct = r.headers.get('Content-Type', '')
                    if 'json' in ct:
                        return await r.json()
                    text = await r.text()
                    try: return json.loads(text)
                    except: return text
                else:
                    logger.warning(f"HTTP {r.status} for {url[:60]}")
        except asyncio.TimeoutError:
            logger.debug(f"Timeout: {url[:60]}")
        except Exception as e:
            logger.debug(f"Fetch error: {e}")
        return None

    def _build_fred_url(self, series_id: str) -> str:
        return (f"https://api.stlouisfed.org/fred/series/observations?"
                f"series_id={series_id}&api_key={self.fred_api_key}"
                f"&file_type=json&sort_order=desc&limit=1")

    def _build_deribit_url(self, meta_url: str) -> str:
        """
        Build Deribit URL with required timestamps.
        meta_url format: 'dvol:BTC' or 'funding:BTC-PERPETUAL'

        FIXED 2026-03-22:
          - funding: use get_funding_rate_history (returns list with interest_8h per period).
            get_funding_rate_value was the agent's broken fix — it returns a scalar daily
            average ~100x smaller than the per-8h snapshot stored in NPZ ground truth.
            Parity test (test_deribit_api_parity.py --indicators fund) confirms 8/8 PASS.
          - dvol: use resolution=3600 (hourly candles) to match backfill in
            external_factors_matrix.py; resolution=60 (1-min) was wrong.
          Both parsers (parse_deribit_fund, parse_deribit_dvol) take the last entry [-1].
        """
        import time
        now = int(time.time())

        if meta_url.startswith('dvol:'):
            currency = meta_url.split(':')[1]
            # Last 4 hours at hourly resolution — parser takes data[-1][4] (close)
            start_ts = (now - 14400) * 1000
            end_ts = now * 1000
            return (f"https://www.deribit.com/api/v2/public/get_volatility_index_data?"
                    f"currency={currency}&resolution=3600"
                    f"&start_timestamp={start_ts}&end_timestamp={end_ts}")

        elif meta_url.startswith('funding:'):
            instrument = meta_url.split(':')[1]
            # Last 4 hours — get_funding_rate_history returns list; parser takes r[-1]['interest_8h']
            start_ts = (now - 14400) * 1000
            end_ts = now * 1000
            return (f"https://www.deribit.com/api/v2/public/get_funding_rate_history?"
                    f"instrument_name={instrument}"
                    f"&start_timestamp={start_ts}&end_timestamp={end_ts}")

        return meta_url  # Fallback

    async def _fetch_indicator(self, name: str, meta: IndicatorMeta):
        """Fetch and parse a single indicator."""
        # Build URL based on source
        if meta.source == 'fred':
            url = self._build_fred_url(meta.url)
        elif meta.source == 'deribit':
            url = self._build_deribit_url(meta.url)
        else:
            url = meta.url

        # Fetch
        data = await self._fetch_url(url, meta.source)
        if data is None:
            with self._lock:
                self.state[name].fail_count += 1
                self.state[name].error = "fetch_failed"
            return

        # Parse
        parser = getattr(Parsers, meta.parser, None)
        if parser is None:
            logger.error(f"No parser: {meta.parser}")
            return

        try:
            value = parser(data)
            if value == 0.0 and 'imbal' not in name:
                # Most parsers return 0.0 on failure
                with self._lock:
                    self.state[name].fail_count += 1
                    self.state[name].error = "zero_value"
                return

            with self._lock:
                self.state[name].value = value
                self.state[name].success = True
                self.state[name].fetched_at = time.monotonic()
                self.state[name].fetched_utc = datetime.now(timezone.utc)
                self.state[name].fetch_count += 1
                self.state[name].error = ""
        except Exception as e:
            with self._lock:
                self.state[name].fail_count += 1
                self.state[name].error = str(e)

    async def _indicator_loop(self, name: str, meta: IndicatorMeta):
        """Continuous poll loop for one indicator with drift-corrected timing."""
        while self._running:
            start_time = time.monotonic()
            try:
                await self._fetch_indicator(name, meta)
            except Exception as e:
                logger.error(f"Loop error {name}: {e}")

            # Best Practice: Account for fetch duration to prevent cumulative drift
            elapsed = time.monotonic() - start_time
            sleep_time = max(0.1, meta.poll_interval_s - elapsed)
            await asyncio.sleep(sleep_time)

    async def _daily_rotation(self):
        """At midnight UTC, snapshot current values into daily history."""
        while self._running:
            now = datetime.now(timezone.utc)
            date_str = now.strftime('%Y-%m-%d')

            if date_str != self._current_date:
                with self._lock:
                    for name, st in self.state.items():
                        if st.success and not np.isnan(st.value):
                            st.daily_history.append(st.value)
                    self._current_date = date_str
                    logger.info(f"Daily rotation: {date_str}")

            await asyncio.sleep(60)  # check every minute

    async def _run(self):
        """Main async loop."""
        connector = aiohttp.TCPConnector(limit=30, ttl_dns_cache=300)
        self._session = aiohttp.ClientSession(connector=connector)

        # Create rate limit semaphores
        for source, rate in RATE_LIMITS.items():
            max_concurrent = max(1, int(rate * 2))
            self._semaphores[source] = asyncio.Semaphore(max_concurrent)

        # Start per-indicator loops
        tasks = []
        for name, meta in INDICATORS.items():
            tasks.append(asyncio.create_task(self._indicator_loop(name, meta)))

        # Start daily rotation
        tasks.append(asyncio.create_task(self._daily_rotation()))

        logger.info(f"Started {len(INDICATORS)} indicator loops")

        try:
            await asyncio.gather(*tasks)
        finally:
            await self._session.close()

    def start(self):
        """Start background thread with asyncio loop."""
        if self._running:
            return
        self._running = True

        def _thread_target():
            self._loop = asyncio.new_event_loop()
            asyncio.set_event_loop(self._loop)
            self._loop.run_until_complete(self._run())

        self._thread = threading.Thread(target=_thread_target, daemon=True)
        self._thread.start()
        logger.info("RealTimeExFService started")

    def stop(self):
        """Stop the service."""
        self._running = False
        if self._thread:
            self._thread.join(timeout=5)
        logger.info("RealTimeExFService stopped")

    def status(self) -> Dict[str, Any]:
        """Service health status."""
        with self._lock:
            total = len(self.state)
            ok = sum(1 for s in self.state.values() if s.success)
            acb_ok = sum(1 for name in ('funding_btc', 'funding_eth', 'dvol_btc',
                                         'dvol_eth', 'fng', 'vix', 'ls_btc', 'taker')
                          if self.state.get(name, IndicatorState()).success)
            return {
                'indicators_ok': ok,
                'indicators_total': total,
                'acb_indicators_ok': acb_ok,
                'acb_indicators_total': 8,
                'details': {name: {'value': s.value, 'success': s.success,
                                    'staleness_s': time.monotonic() - s.fetched_at if s.fetched_at > 0 else -1,
                                    'fetches': s.fetch_count, 'fails': s.fail_count}
                            for name, s in self.state.items()},
            }


# =====================================================================
# ACB v3 - LAG-AWARE (drop-in replacement for v2)
# =====================================================================

def calculate_adaptive_cut_v3(ext_factors: dict, config: dict = None) -> tuple:
    """
    ACB v3: Same logic as v2 but expects lag-adjusted indicator values.

    The lag adjustment happens in RealTimeExFService.get_acb_indicators().
    This function is identical to v2 in logic - the innovation is in the
    data pipeline feeding it lagged values.

    For backtest: manually construct ext_factors with lagged values.
    """
    from dolphin_paper_trade_adaptive_cb_v2 import ACBV2_CONFIG as DEFAULT_CONFIG
    config = config or DEFAULT_CONFIG

    if not ext_factors or not config.get('enabled', True):
        return config.get('base_cut', 0.30), 0, 0, {'status': 'disabled'}

    signals = 0
    severity = 0
    details = {}

    # Signal 1: Funding (bearish confirmation)
    funding_btc = ext_factors.get('funding_btc', 0)
    if funding_btc < config['thresholds']['funding_btc_very_bearish']:
        signals += 1; severity += 2
        details['funding'] = f'{funding_btc:.6f} (very bearish)'
    elif funding_btc < config['thresholds']['funding_btc_bearish']:
        signals += 1; severity += 1
        details['funding'] = f'{funding_btc:.6f} (bearish)'
    else:
        details['funding'] = f'{funding_btc:.6f} (neutral)'

    # Signal 2: DVOL (volatility confirmation)
    dvol_btc = ext_factors.get('dvol_btc', 50)
    if dvol_btc > config['thresholds']['dvol_extreme']:
        signals += 1; severity += 2
        details['dvol'] = f'{dvol_btc:.1f} (extreme)'
    elif dvol_btc > config['thresholds']['dvol_elevated']:
        signals += 1; severity += 1
        details['dvol'] = f'{dvol_btc:.1f} (elevated)'
    else:
        details['dvol'] = f'{dvol_btc:.1f} (normal)'

    # Signal 3: FNG (only if confirmed by funding/DVOL)
    fng = ext_factors.get('fng', 50)
    funding_bearish = funding_btc < 0
    dvol_elevated = dvol_btc > 55

    if fng < config['thresholds']['fng_extreme_fear'] and (funding_bearish or dvol_elevated):
        signals += 1; severity += 1
        details['fng'] = f'{fng:.1f} (extreme fear, confirmed)'
    elif fng < config['thresholds']['fng_fear'] and (funding_bearish or dvol_elevated):
        signals += 0.5; severity += 0.5
        details['fng'] = f'{fng:.1f} (fear, confirmed)'
    else:
        details['fng'] = f'{fng:.1f} (neutral or unconfirmed)'

    # Signal 4: Taker ratio (strongest predictor)
    taker = ext_factors.get('taker', 1.0)
    if taker < config['thresholds']['taker_selling']:
        signals += 1; severity += 2
        details['taker'] = f'{taker:.3f} (heavy selling)'
    elif taker < config['thresholds']['taker_mild_selling']:
        signals += 0.5; severity += 1
        details['taker'] = f'{taker:.3f} (mild selling)'
    else:
        details['taker'] = f'{taker:.3f} (neutral)'

    # Cut calculation (identical to v2)
    if signals >= 3 and severity >= 5:
        cut = 0.75
    elif signals >= 3:
        cut = 0.65
    elif signals >= 2 and severity >= 3:
        cut = 0.55
    elif signals >= 2:
        cut = 0.45
    elif signals >= 1:
        cut = 0.30
    else:
        cut = 0.0

    details['signals'] = signals
    details['severity'] = severity
    details['version'] = 'v3_lag_aware'

    return cut, signals, severity, details


# =====================================================================
# ACB v4 - EXPANDED 10-INDICATOR ENGINE
# =====================================================================

# Empirically validated thresholds for new v4 indicators
ACB_V4_THRESHOLDS = {
    'funding_eth': -3.105e-05,
    'mcap_bc': 1.361e+12,
    'fund_dbt_btc': -2.426e-06,
    'oi_btc': 7.955e+04,
    'fund_dbt_eth': -6.858e-06,
    'addr_btc': 7.028e+05,
}

def calculate_adaptive_cut_v4(ext_factors: dict, config: dict = None) -> tuple:
    """
    ACB v4: Expanded engine evaluating 10 empirically validated indicators.
    Base cut threshold and math derived from 54-day exhaustive backtest
    (+15.00% ROI, 6.68% DD).
    """
    from dolphin_paper_trade_adaptive_cb_v2 import ACBV2_CONFIG as DEFAULT_CONFIG
    config = config or DEFAULT_CONFIG

    if not ext_factors or not config.get('enabled', True):
        return config.get('base_cut', 0.30), 0, 0, {'status': 'disabled'}

    # Use baseline logic for the core 4 signals
    cut, signals, severity, details = calculate_adaptive_cut_v3(ext_factors, config)
    
    # -------------------------------------------------------------
    # META-ADAPTIVE OVERRIDE OR FALLBACK TO STATIC v4
    # -------------------------------------------------------------
    try:
        from realtime_exf_service import _get_active_meta_thresholds
        active_thresh = _get_active_meta_thresholds()
    except Exception:
        active_thresh = None

    if active_thresh:
        # Dynamic processing of strictly proved meta thresholds
        details['version'] = 'v4_meta_adaptive'
        for key, limits in active_thresh.items():
            if key in ('funding_btc', 'dvol_btc', 'fng', 'taker'):
                continue # Handled by v3
                
            val = ext_factors.get(key, np.nan)
            if np.isnan(val): continue
            
            triggered = False
            if limits['direction'] == '<' and val < limits['threshold']:
                triggered = True
            elif limits['direction'] == '>' and val > limits['threshold']:
                triggered = True
                
            if triggered:
                signals += 0.5; severity += 1
                details[key] = f"{val:.4g} (meta {limits['direction']} {limits['threshold']:.4g})"
    else:
        # Fallback 10-indicator engine statically verified on 2026-02-27
        details['version'] = 'v4_expanded_static'
        
        val = ext_factors.get('funding_eth', np.nan)
        if not np.isnan(val) and val < ACB_V4_THRESHOLDS['funding_eth']:
            signals += 0.5; severity += 1
            details['funding_eth'] = f"{val:.6f} (< {ACB_V4_THRESHOLDS['funding_eth']})"
            
        val = ext_factors.get('mcap_bc', np.nan)
        if not np.isnan(val) and val < ACB_V4_THRESHOLDS['mcap_bc']:
            signals += 0.5; severity += 1
            details['mcap_bc'] = f"{val:.2e} (< {ACB_V4_THRESHOLDS['mcap_bc']:.2e})"
            
        val = ext_factors.get('fund_dbt_btc', np.nan)
        if not np.isnan(val) and val < ACB_V4_THRESHOLDS['fund_dbt_btc']:
            signals += 0.5; severity += 1
            details['fund_dbt_btc'] = f"{val:.2e} (< {ACB_V4_THRESHOLDS['fund_dbt_btc']:.2e})"
            
        val = ext_factors.get('oi_btc', np.nan)
        if not np.isnan(val) and val < ACB_V4_THRESHOLDS['oi_btc']:
            signals += 0.5; severity += 1
            details['oi_btc'] = f"{val:.1f} (< {ACB_V4_THRESHOLDS['oi_btc']:.1f})"
            
        val = ext_factors.get('fund_dbt_eth', np.nan)
        if not np.isnan(val) and val < ACB_V4_THRESHOLDS['fund_dbt_eth']:
            signals += 0.5; severity += 1
            details['fund_dbt_eth'] = f"{val:.2e} (< {ACB_V4_THRESHOLDS['fund_dbt_eth']:.2e})"
            
        val = ext_factors.get('addr_btc', np.nan)
        if not np.isnan(val) and val > ACB_V4_THRESHOLDS['addr_btc']:
            signals += 0.5; severity += 1
            details['addr_btc'] = f"{val:.1f} (> {ACB_V4_THRESHOLDS['addr_btc']:.1f})"

    # Recalculate cut with updated signals and severity
    if signals >= 3 and severity >= 5:
        cut = 0.75
    elif signals >= 3:
        cut = 0.65
    elif signals >= 2 and severity >= 3:
        cut = 0.55
    elif signals >= 2:
        cut = 0.45
    elif signals >= 1:
        cut = 0.30
    else:
        cut = 0.0

    details['total_signals_v4'] = signals
    details['total_severity_v4'] = severity

    return cut, signals, severity, details


# =====================================================================

# NPZ + ARROW DUAL WRITER
# =====================================================================

class DualWriter:
    """Write indicator data in both NPZ and Arrow formats."""

    def __init__(self):
        self._has_pyarrow = False
        try:
            import pyarrow as pa
            self._pa = pa
            self._has_pyarrow = True
        except ImportError:
            pass

    def write(self, indicators: Dict[str, Any], scan_path: Path,
              scan_number: int = 0):
        """Write both NPZ and Arrow files alongside the scan."""
        # Remove metadata keys
        clean = {k: v for k, v in indicators.items()
                  if not k.startswith('_') and isinstance(v, (int, float))}

        # NPZ (legacy format)
        self._write_npz(clean, scan_path, scan_number)

        # Arrow (new format)
        if self._has_pyarrow:
            self._write_arrow(clean, scan_path, scan_number)

    def _write_npz(self, indicators, scan_path, scan_number):
        names = sorted(INDICATORS.keys())
        api_indicators = np.array([indicators.get(n, np.nan) for n in names])
        api_success = np.array([not np.isnan(indicators.get(n, np.nan)) for n in names])
        api_names = np.array(names, dtype='U32')

        out_path = scan_path.parent / f"{scan_path.stem}__Indicators.npz"
        np.savez_compressed(out_path,
            api_indicators=api_indicators,
            api_success=api_success,
            api_names=api_names,
            api_success_rate=np.array([np.nanmean(api_success)]),
            timestamp=np.array([datetime.now(timezone.utc).isoformat()], dtype='U64'),
            scan_number=np.array([scan_number]),
        )

    def _write_arrow(self, indicators, scan_path, scan_number):
        pa = self._pa
        fields = [
            pa.field('timestamp_ns', pa.int64()),
            pa.field('scan_number', pa.int32()),
        ]
        values = {
            'timestamp_ns': [int(datetime.now(timezone.utc).timestamp() * 1e9)],
            'scan_number': [scan_number],
        }
        for name in sorted(INDICATORS.keys()):
            fields.append(pa.field(name, pa.float64()))
            values[name] = [indicators.get(name, np.nan)]

        schema = pa.schema(fields)
        table = pa.table(values, schema=schema)

        out_path = scan_path.parent / f"{scan_path.stem}__Indicators.arrow"
        with pa.ipc.new_file(str(out_path), schema) as writer:
            writer.write_table(table)


# =====================================================================
# CONVENIENCE: Load from NPZ with lag support (for backtesting)
# =====================================================================

# =====================================================================
# LAG CONFIGURATIONS
# =====================================================================

# ROBUST DEFAULT: Uniform lag=1 for all indicators.
# Validated: +3.10% ROI, -2.02% DD vs lag=0 (54-day backtest).
# Zero overfitting risk (no per-indicator optimization).
# Scientifically justified: "yesterday's indicators predict today's market"
ROBUST_LAGS = {
    'funding_btc': 1,
    'funding_eth': 1,
    'dvol_btc': 1,
    'dvol_eth': 1,
    'fng': 1,
    'vix': 1,
    'ls_btc': 1,
    'taker': 1,
}

# EXPERIMENTAL: Per-indicator optimal lags from correlation analysis.
# Validated: +3.98% ROI, -2.93% DD vs lag=0 (54-day backtest).
# WARNING: Overfitting risk at 6.8 days/parameter. Only 5/8 significant.
# DO NOT USE until 80+ days of data available for re-validation.
# TODO: Re-run lag_correlation_analysis with 80+ days, update if confirmed.
EXPERIMENTAL_LAGS = {
    'funding_btc': 5,   # r=+0.39, p=0.006 (slow propagation - 5 days!)
    'funding_eth': 3,   # r=+0.20, p=0.154 (NOT significant)
    'dvol_btc': 1,      # r=-0.49, p=0.0002 (STRONGEST - overnight digest)
    'dvol_eth': 1,      # r=-0.42, p=0.002
    'fng': 5,           # r=-0.19, p=0.186 (NOT significant)
    'vix': 1,           # r=-0.20, p=0.270 (NOT significant)
    'ls_btc': 0,        # r=+0.30, p=0.036 (immediate - only lag=0 indicator)
    'taker': 1,         # r=-0.41, p=0.003 (overnight digest)
}

# CONSERVATIVE: Only statistically verified strong deviations from lag=1 for core indicators.
# Currently identical to V3 ROBUST but with funding_btc=5 and ls_btc=0
CONSERVATIVE_LAGS = ROBUST_LAGS.copy()
CONSERVATIVE_LAGS.update({
    'funding_btc': 5,
    'ls_btc': 0,
})

# V4: Combines robust baseline with 6 new statically proven indicators
V4_LAGS = ROBUST_LAGS.copy()
V4_LAGS.update({
    'funding_eth': 3,
    'mcap_bc': 1,
    'fund_dbt_btc': 0,
    'oi_btc': 0,
    'fund_dbt_eth': 1,
    'addr_btc': 3,
})

# Active configuration - use V4 by default given superior empirical results (+15.00% ROI, 6.68% DD)
OPTIMAL_LAGS = V4_LAGS

# =====================================================================
# META-ADAPTIVE RUNTIME
# =====================================================================

def _get_active_lags() -> dict:
    """Return lags: dynamically from meta-layer if available, else fallback V4."""
    try:
        from meta_adaptive_optimizer import get_current_meta_config
        meta = get_current_meta_config()
        if meta and 'lags' in meta:
            return meta['lags']
    except Exception:
        pass
    return OPTIMAL_LAGS

def _get_active_meta_thresholds() -> dict:
    """Return thresholds: dynamically from meta-layer if available, else None."""
    try:
        from meta_adaptive_optimizer import get_current_meta_config
        meta = get_current_meta_config()
        if meta and 'thresholds' in meta:
            return meta['thresholds']
    except Exception:
        pass
    return None

# TODO: When switching to EXPERIMENTAL_LAGS, also update IndicatorMeta.optimal_lag_days

def load_external_factors_lagged(date_str: str, all_daily_vals: Dict[str, Dict],
                                  sorted_dates: List[str]) -> dict:
    """
    Load external factors with per-indicator optimal lag applied.
    Dynamically respects the Meta-Adaptive Layer configuration.

    Args:
        date_str: Target date
        all_daily_vals: {date_str: {indicator_name: value}} for all dates
        sorted_dates: Chronologically sorted list of all dates
    """
    if date_str not in sorted_dates:
        return {}

    idx = sorted_dates.index(date_str)
    result = {}
    active_lags = _get_active_lags()

    for name, lag in active_lags.items():
        src_idx = idx - lag
        if src_idx >= 0:
            src_date = sorted_dates[src_idx]
            val = all_daily_vals.get(src_date, {}).get(name)
            if val is not None:
                result[name] = val

    return result