Files
siloqy/external_factors/realtime_exf_service.py

887 lines
35 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
REAL-TIME EXTERNAL FACTORS SERVICE v1.0
========================================
Production-grade, HFT-optimized external factors service.
Key design decisions (empirically validated 2026-02-27, 54-day backtest):
- Per-indicator adaptive polling at native API resolution
- Uniform lag=1 day (ROBUST: +3.10% ROI, -2.02% DD, zero overfit risk)
- Binary gating (no confidence weighting - empirically validated)
- Never blocks consumer: get_indicators() returns cached data in <1ms
- Dual output: NPZ (legacy) + Arrow (new)
Empirical validation vs baseline (54-day backtest):
N: No ACB: ROI=+7.51%, DD=18.34%
A: Current (lag=0 daily avg): ROI=+9.33%, DD=12.04% <-- current production
L1: Uniform lag=1: ROI=+12.43%, DD=10.02% <-- THIS SERVICE DEFAULT
MO: Mixed optimal lags: ROI=+13.31%, DD=9.10% <-- experimental (needs 80+ days)
MS: Mixed + synth intra-day: ROI=+16.00%, DD=9.92% <-- future (needs VBT changes)
TODO (ordered by priority):
1. [CRITICAL] Re-validate lag=1 with 80+ days of data for statistical robustness
2. [HIGH] Fix the 50 dead indicators (see DEAD_INDICATORS below)
3. [HIGH] Test each repaired indicator isolated against ACB & alpha engine
4. [HIGH] Move from per-day ACB to intra-day continuous ACB once VBT supports it
5. [MED] Switch to per-indicator optimal lags once 80+ days available
6. [MED] Implement adaptive variance estimator for poll interval tuning
7. [MED] Add Arrow dual output (schema defined, writer implemented)
8. [LOW] FRED indicators: handle weekend/holiday gaps (fill-forward last value)
9. [LOW] CoinMetrics indicators: fix parse_cm returning 0 (API may need auth)
10.[LOW] Tune system sync to never generate signals with stale/missing data
"""
import asyncio
import aiohttp
import numpy as np
import time
import logging
import json
from pathlib import Path
from datetime import datetime, timezone
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Any
from collections import deque, defaultdict
from enum import Enum
import threading
logger = logging.getLogger(__name__)
# =====================================================================
# INDICATOR METADATA (from empirical analysis)
# =====================================================================
@dataclass
class IndicatorMeta:
"""Per-indicator configuration derived from empirical testing."""
name: str
source: str # API provider
url: str # Real-time endpoint
parser: str # Parser method name
poll_interval_s: float # Native update rate (seconds)
optimal_lag_days: int # Information discount lag (empirically measured)
lag_correlation: float # Pearson r at optimal lag
lag_pvalue: float # Statistical significance
acb_critical: bool # Used by ACB v2/v3
category: str # derivatives/onchain/macro/etc
# Empirically measured optimal lags (from lag_correlation_analysis):
# dvol_btc: lag=1, r=-0.4919, p=0.0002 (strongest)
# taker: lag=1, r=-0.4105, p=0.0034
# dvol_eth: lag=1, r=-0.4246, p=0.0015
# funding_btc: lag=5, r=+0.3892, p=0.0057 (slow propagation)
# ls_btc: lag=0, r=+0.2970, p=0.0362 (immediate)
# funding_eth: lag=3, r=+0.2026, p=0.1539 (not significant)
# vix: lag=1, r=-0.2044, p=0.2700 (not significant)
# fng: lag=5, r=-0.1923, p=0.1856 (not significant)
INDICATORS = {
# BINANCE DERIVATIVES (rate limit: 1200/min)
'funding_btc': IndicatorMeta('funding_btc', 'binance',
'https://fapi.binance.com/fapi/v1/fundingRate?symbol=BTCUSDT&limit=1',
'parse_binance_funding', 28800, 5, 0.3892, 0.0057, True, 'derivatives'),
'funding_eth': IndicatorMeta('funding_eth', 'binance',
'https://fapi.binance.com/fapi/v1/fundingRate?symbol=ETHUSDT&limit=1',
'parse_binance_funding', 28800, 3, 0.2026, 0.1539, True, 'derivatives'),
'oi_btc': IndicatorMeta('oi_btc', 'binance',
'https://fapi.binance.com/fapi/v1/openInterest?symbol=BTCUSDT',
'parse_binance_oi', 300, 0, 0, 1.0, False, 'derivatives'),
'oi_eth': IndicatorMeta('oi_eth', 'binance',
'https://fapi.binance.com/fapi/v1/openInterest?symbol=ETHUSDT',
'parse_binance_oi', 300, 0, 0, 1.0, False, 'derivatives'),
'ls_btc': IndicatorMeta('ls_btc', 'binance',
'https://fapi.binance.com/futures/data/globalLongShortAccountRatio?symbol=BTCUSDT&period=5m&limit=1',
'parse_binance_ls', 300, 0, 0.2970, 0.0362, True, 'derivatives'),
'ls_eth': IndicatorMeta('ls_eth', 'binance',
'https://fapi.binance.com/futures/data/globalLongShortAccountRatio?symbol=ETHUSDT&period=5m&limit=1',
'parse_binance_ls', 300, 0, 0, 1.0, False, 'derivatives'),
'ls_top': IndicatorMeta('ls_top', 'binance',
'https://fapi.binance.com/futures/data/topLongShortAccountRatio?symbol=BTCUSDT&period=5m&limit=1',
'parse_binance_ls', 300, 0, 0, 1.0, False, 'derivatives'),
'taker': IndicatorMeta('taker', 'binance',
'https://fapi.binance.com/futures/data/takerlongshortRatio?symbol=BTCUSDT&period=5m&limit=1',
'parse_binance_taker', 300, 1, -0.4105, 0.0034, True, 'derivatives'),
'basis': IndicatorMeta('basis', 'binance',
'https://fapi.binance.com/fapi/v1/premiumIndex?symbol=BTCUSDT',
'parse_binance_basis', 30, 0, 0, 1.0, False, 'derivatives'),
# DERIBIT (rate limit: 100/10s)
'dvol_btc': IndicatorMeta('dvol_btc', 'deribit',
'https://www.deribit.com/api/v2/public/get_volatility_index_data?currency=BTC&resolution=3600&count=1',
'parse_deribit_dvol', 60, 1, -0.4919, 0.0002, True, 'derivatives'),
'dvol_eth': IndicatorMeta('dvol_eth', 'deribit',
'https://www.deribit.com/api/v2/public/get_volatility_index_data?currency=ETH&resolution=3600&count=1',
'parse_deribit_dvol', 60, 1, -0.4246, 0.0015, True, 'derivatives'),
'fund_dbt_btc': IndicatorMeta('fund_dbt_btc', 'deribit',
'https://www.deribit.com/api/v2/public/get_funding_rate_value?instrument_name=BTC-PERPETUAL',
'parse_deribit_fund', 28800, 0, 0, 1.0, False, 'derivatives'),
'fund_dbt_eth': IndicatorMeta('fund_dbt_eth', 'deribit',
'https://www.deribit.com/api/v2/public/get_funding_rate_value?instrument_name=ETH-PERPETUAL',
'parse_deribit_fund', 28800, 0, 0, 1.0, False, 'derivatives'),
# MACRO (FRED, rate limit: 120/min)
'vix': IndicatorMeta('vix', 'fred', 'VIXCLS', 'parse_fred', 21600, 1, -0.2044, 0.27, True, 'macro'),
'dxy': IndicatorMeta('dxy', 'fred', 'DTWEXBGS', 'parse_fred', 21600, 0, 0, 1.0, False, 'macro'),
'us10y': IndicatorMeta('us10y', 'fred', 'DGS10', 'parse_fred', 21600, 0, 0, 1.0, False, 'macro'),
'sp500': IndicatorMeta('sp500', 'fred', 'SP500', 'parse_fred', 21600, 0, 0, 1.0, False, 'macro'),
'fedfunds': IndicatorMeta('fedfunds', 'fred', 'DFF', 'parse_fred', 86400, 0, 0, 1.0, False, 'macro'),
# SENTIMENT
'fng': IndicatorMeta('fng', 'alternative', 'https://api.alternative.me/fng/?limit=1',
'parse_fng', 21600, 5, -0.1923, 0.1856, True, 'sentiment'),
# ON-CHAIN (blockchain.info)
'hashrate': IndicatorMeta('hashrate', 'blockchain', 'https://blockchain.info/q/hashrate',
'parse_bc', 1800, 0, 0, 1.0, False, 'onchain'),
# DEFI (DeFi Llama)
'tvl': IndicatorMeta('tvl', 'defillama', 'https://api.llama.fi/v2/historicalChainTvl',
'parse_dl_tvl', 21600, 0, 0, 1.0, False, 'defi'),
}
# Rate limits per provider (requests per second)
RATE_LIMITS = {
'binance': 20.0, # 1200/min
'deribit': 10.0, # 100/10s
'fred': 2.0, # 120/min
'alternative': 0.5,
'blockchain': 0.5,
'defillama': 1.0,
'coinmetrics': 0.15, # 10/min
}
# =====================================================================
# INDICATOR STATE
# =====================================================================
@dataclass
class IndicatorState:
"""Live state for a single indicator."""
value: float = np.nan
fetched_at: float = 0.0 # monotonic time
fetched_utc: Optional[datetime] = None
success: bool = False
error: str = ""
fetch_count: int = 0
fail_count: int = 0
# History buffer for lag support
daily_history: deque = field(default_factory=lambda: deque(maxlen=10))
# =====================================================================
# PARSERS (same as external_factors_matrix.py, inlined for independence)
# =====================================================================
class Parsers:
@staticmethod
def parse_binance_funding(d):
return float(d[0]['fundingRate']) if isinstance(d, list) and d else 0.0
@staticmethod
def parse_binance_oi(d):
if isinstance(d, list) and d: return float(d[-1].get('sumOpenInterest', 0))
return float(d.get('openInterest', 0)) if isinstance(d, dict) else 0.0
@staticmethod
def parse_binance_ls(d):
return float(d[-1]['longShortRatio']) if isinstance(d, list) and d else 1.0
@staticmethod
def parse_binance_taker(d):
return float(d[-1]['buySellRatio']) if isinstance(d, list) and d else 1.0
@staticmethod
def parse_binance_basis(d):
return float(d.get('lastFundingRate', 0)) * 365 * 3 if isinstance(d, dict) else 0.0
@staticmethod
def parse_deribit_dvol(d):
if isinstance(d, dict) and 'result' in d:
r = d['result']
if isinstance(r, dict) and 'data' in r and r['data']:
return float(r['data'][-1][4]) if len(r['data'][-1]) > 4 else 0.0
return 0.0
@staticmethod
def parse_deribit_fund(d):
if isinstance(d, dict) and 'result' in d:
r = d['result']
return float(r[-1].get('interest_8h', 0)) if isinstance(r, list) and r else float(r)
return 0.0
@staticmethod
def parse_fred(d):
if isinstance(d, dict) and 'observations' in d and d['observations']:
v = d['observations'][-1].get('value', '.')
if v != '.':
try: return float(v)
except: pass
return 0.0
@staticmethod
def parse_fng(d):
return float(d['data'][0]['value']) if isinstance(d, dict) and 'data' in d and d['data'] else 50.0
@staticmethod
def parse_bc(d):
if isinstance(d, (int, float)): return float(d)
if isinstance(d, str):
try: return float(d)
except: pass
if isinstance(d, dict) and 'values' in d and d['values']:
return float(d['values'][-1].get('y', 0))
return 0.0
@staticmethod
def parse_dl_tvl(d):
if isinstance(d, list) and d:
return float(d[-1].get('tvl', 0))
return 0.0
# =====================================================================
# REAL-TIME SERVICE
# =====================================================================
class RealTimeExFService:
"""
Singleton real-time external factors service.
Design principles:
- Never blocks: get_indicators() is pure memory read
- Background asyncio loop fetches on per-indicator timers
- Per-provider rate limiting via semaphores
- History buffer per indicator for lag support
- Thread-safe via lock on state dict
"""
def __init__(self, fred_api_key: str = ""):
self.fred_api_key = fred_api_key or 'c16a9cde3e3bb5bb972bb9283485f202'
self.state: Dict[str, IndicatorState] = {
name: IndicatorState() for name in INDICATORS
}
self._lock = threading.Lock()
self._running = False
self._loop = None
self._thread = None
self._semaphores: Dict[str, asyncio.Semaphore] = {}
self._session: Optional[aiohttp.ClientSession] = None
self._current_date: str = "" # for daily history rotation
# ----- Consumer API (never blocks, <1ms) -----
def get_indicators(self, apply_lag: bool = True) -> Dict[str, Any]:
"""
Get current indicator values with optional lag application.
Returns dict compatible with calculate_adaptive_cut_v2/v3:
{'funding_btc': float, 'dvol_btc': float, ...}
Plus metadata:
{'_staleness': {name: seconds}, '_fetched_at': {name: iso}}
"""
with self._lock:
result = {}
staleness = {}
now = time.monotonic()
for name, meta in INDICATORS.items():
st = self.state[name]
if apply_lag and meta.optimal_lag_days > 0:
# Use lagged value from history
lag = meta.optimal_lag_days
hist = list(st.daily_history)
if len(hist) >= lag:
result[name] = hist[-lag] # lag days ago
# If not enough history, use current (better than nothing)
elif st.success:
result[name] = st.value
else:
if st.success and not np.isnan(st.value):
result[name] = st.value
if st.fetched_at > 0:
staleness[name] = now - st.fetched_at
result['_staleness'] = staleness
return result
def get_acb_indicators(self) -> Dict[str, float]:
"""Get only the ACB-critical indicators (with lags applied)."""
full = self.get_indicators(apply_lag=True)
return {k: v for k, v in full.items()
if k in ('funding_btc', 'funding_eth', 'dvol_btc', 'dvol_eth',
'fng', 'vix', 'ls_btc', 'taker',
'mcap_bc', 'fund_dbt_btc', 'oi_btc', 'fund_dbt_eth', 'addr_btc')
and isinstance(v, (int, float))}
# ----- Background fetching -----
async def _fetch_url(self, url: str, source: str) -> Optional[Any]:
"""Fetch URL with rate limiting and error handling."""
sem = self._semaphores.get(source)
if sem:
await sem.acquire()
try:
return await self._do_fetch(url)
finally:
sem.release()
# Enforce rate limit delay
delay = 1.0 / RATE_LIMITS.get(source, 1.0)
await asyncio.sleep(delay)
return await self._do_fetch(url)
async def _do_fetch(self, url: str) -> Optional[Any]:
"""Raw HTTP fetch."""
if not self._session:
return None
try:
timeout = aiohttp.ClientTimeout(total=10)
headers = {"User-Agent": "Mozilla/5.0"}
async with self._session.get(url, timeout=timeout, headers=headers) as r:
if r.status == 200:
ct = r.headers.get('Content-Type', '')
if 'json' in ct:
return await r.json()
text = await r.text()
try: return json.loads(text)
except: return text
else:
logger.warning(f"HTTP {r.status} for {url[:60]}")
except asyncio.TimeoutError:
logger.debug(f"Timeout: {url[:60]}")
except Exception as e:
logger.debug(f"Fetch error: {e}")
return None
def _build_fred_url(self, series_id: str) -> str:
return (f"https://api.stlouisfed.org/fred/series/observations?"
f"series_id={series_id}&api_key={self.fred_api_key}"
f"&file_type=json&sort_order=desc&limit=1")
async def _fetch_indicator(self, name: str, meta: IndicatorMeta):
"""Fetch and parse a single indicator."""
# Build URL
if meta.source == 'fred':
url = self._build_fred_url(meta.url)
else:
url = meta.url
# Fetch
data = await self._fetch_url(url, meta.source)
if data is None:
with self._lock:
self.state[name].fail_count += 1
self.state[name].error = "fetch_failed"
return
# Parse
parser = getattr(Parsers, meta.parser, None)
if parser is None:
logger.error(f"No parser: {meta.parser}")
return
try:
value = parser(data)
if value == 0.0 and 'imbal' not in name:
# Most parsers return 0.0 on failure
with self._lock:
self.state[name].fail_count += 1
self.state[name].error = "zero_value"
return
with self._lock:
self.state[name].value = value
self.state[name].success = True
self.state[name].fetched_at = time.monotonic()
self.state[name].fetched_utc = datetime.now(timezone.utc)
self.state[name].fetch_count += 1
self.state[name].error = ""
except Exception as e:
with self._lock:
self.state[name].fail_count += 1
self.state[name].error = str(e)
async def _indicator_loop(self, name: str, meta: IndicatorMeta):
"""Continuous poll loop for one indicator."""
while self._running:
try:
await self._fetch_indicator(name, meta)
except Exception as e:
logger.error(f"Loop error {name}: {e}")
await asyncio.sleep(meta.poll_interval_s)
async def _daily_rotation(self):
"""At midnight UTC, snapshot current values into daily history."""
while self._running:
now = datetime.now(timezone.utc)
date_str = now.strftime('%Y-%m-%d')
if date_str != self._current_date:
with self._lock:
for name, st in self.state.items():
if st.success and not np.isnan(st.value):
st.daily_history.append(st.value)
self._current_date = date_str
logger.info(f"Daily rotation: {date_str}")
await asyncio.sleep(60) # check every minute
async def _run(self):
"""Main async loop."""
connector = aiohttp.TCPConnector(limit=30, ttl_dns_cache=300)
self._session = aiohttp.ClientSession(connector=connector)
# Create rate limit semaphores
for source, rate in RATE_LIMITS.items():
max_concurrent = max(1, int(rate * 2))
self._semaphores[source] = asyncio.Semaphore(max_concurrent)
# Start per-indicator loops
tasks = []
for name, meta in INDICATORS.items():
tasks.append(asyncio.create_task(self._indicator_loop(name, meta)))
# Start daily rotation
tasks.append(asyncio.create_task(self._daily_rotation()))
logger.info(f"Started {len(INDICATORS)} indicator loops")
try:
await asyncio.gather(*tasks)
finally:
await self._session.close()
def start(self):
"""Start background thread with asyncio loop."""
if self._running:
return
self._running = True
def _thread_target():
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(self._loop)
self._loop.run_until_complete(self._run())
self._thread = threading.Thread(target=_thread_target, daemon=True)
self._thread.start()
logger.info("RealTimeExFService started")
def stop(self):
"""Stop the service."""
self._running = False
if self._thread:
self._thread.join(timeout=5)
logger.info("RealTimeExFService stopped")
def status(self) -> Dict[str, Any]:
"""Service health status."""
with self._lock:
total = len(self.state)
ok = sum(1 for s in self.state.values() if s.success)
acb_ok = sum(1 for name in ('funding_btc', 'funding_eth', 'dvol_btc',
'dvol_eth', 'fng', 'vix', 'ls_btc', 'taker')
if self.state.get(name, IndicatorState()).success)
return {
'indicators_ok': ok,
'indicators_total': total,
'acb_indicators_ok': acb_ok,
'acb_indicators_total': 8,
'details': {name: {'value': s.value, 'success': s.success,
'staleness_s': time.monotonic() - s.fetched_at if s.fetched_at > 0 else -1,
'fetches': s.fetch_count, 'fails': s.fail_count}
for name, s in self.state.items()},
}
# =====================================================================
# ACB v3 - LAG-AWARE (drop-in replacement for v2)
# =====================================================================
def calculate_adaptive_cut_v3(ext_factors: dict, config: dict = None) -> tuple:
"""
ACB v3: Same logic as v2 but expects lag-adjusted indicator values.
The lag adjustment happens in RealTimeExFService.get_acb_indicators().
This function is identical to v2 in logic - the innovation is in the
data pipeline feeding it lagged values.
For backtest: manually construct ext_factors with lagged values.
"""
from dolphin_paper_trade_adaptive_cb_v2 import ACBV2_CONFIG as DEFAULT_CONFIG
config = config or DEFAULT_CONFIG
if not ext_factors or not config.get('enabled', True):
return config.get('base_cut', 0.30), 0, 0, {'status': 'disabled'}
signals = 0
severity = 0
details = {}
# Signal 1: Funding (bearish confirmation)
funding_btc = ext_factors.get('funding_btc', 0)
if funding_btc < config['thresholds']['funding_btc_very_bearish']:
signals += 1; severity += 2
details['funding'] = f'{funding_btc:.6f} (very bearish)'
elif funding_btc < config['thresholds']['funding_btc_bearish']:
signals += 1; severity += 1
details['funding'] = f'{funding_btc:.6f} (bearish)'
else:
details['funding'] = f'{funding_btc:.6f} (neutral)'
# Signal 2: DVOL (volatility confirmation)
dvol_btc = ext_factors.get('dvol_btc', 50)
if dvol_btc > config['thresholds']['dvol_extreme']:
signals += 1; severity += 2
details['dvol'] = f'{dvol_btc:.1f} (extreme)'
elif dvol_btc > config['thresholds']['dvol_elevated']:
signals += 1; severity += 1
details['dvol'] = f'{dvol_btc:.1f} (elevated)'
else:
details['dvol'] = f'{dvol_btc:.1f} (normal)'
# Signal 3: FNG (only if confirmed by funding/DVOL)
fng = ext_factors.get('fng', 50)
funding_bearish = funding_btc < 0
dvol_elevated = dvol_btc > 55
if fng < config['thresholds']['fng_extreme_fear'] and (funding_bearish or dvol_elevated):
signals += 1; severity += 1
details['fng'] = f'{fng:.1f} (extreme fear, confirmed)'
elif fng < config['thresholds']['fng_fear'] and (funding_bearish or dvol_elevated):
signals += 0.5; severity += 0.5
details['fng'] = f'{fng:.1f} (fear, confirmed)'
else:
details['fng'] = f'{fng:.1f} (neutral or unconfirmed)'
# Signal 4: Taker ratio (strongest predictor)
taker = ext_factors.get('taker', 1.0)
if taker < config['thresholds']['taker_selling']:
signals += 1; severity += 2
details['taker'] = f'{taker:.3f} (heavy selling)'
elif taker < config['thresholds']['taker_mild_selling']:
signals += 0.5; severity += 1
details['taker'] = f'{taker:.3f} (mild selling)'
else:
details['taker'] = f'{taker:.3f} (neutral)'
# Cut calculation (identical to v2)
if signals >= 3 and severity >= 5:
cut = 0.75
elif signals >= 3:
cut = 0.65
elif signals >= 2 and severity >= 3:
cut = 0.55
elif signals >= 2:
cut = 0.45
elif signals >= 1:
cut = 0.30
else:
cut = 0.0
details['signals'] = signals
details['severity'] = severity
details['version'] = 'v3_lag_aware'
return cut, signals, severity, details
# =====================================================================
# ACB v4 - EXPANDED 10-INDICATOR ENGINE
# =====================================================================
# Empirically validated thresholds for new v4 indicators
ACB_V4_THRESHOLDS = {
'funding_eth': -3.105e-05,
'mcap_bc': 1.361e+12,
'fund_dbt_btc': -2.426e-06,
'oi_btc': 7.955e+04,
'fund_dbt_eth': -6.858e-06,
'addr_btc': 7.028e+05,
}
def calculate_adaptive_cut_v4(ext_factors: dict, config: dict = None) -> tuple:
"""
ACB v4: Expanded engine evaluating 10 empirically validated indicators.
Base cut threshold and math derived from 54-day exhaustive backtest
(+15.00% ROI, 6.68% DD).
"""
from dolphin_paper_trade_adaptive_cb_v2 import ACBV2_CONFIG as DEFAULT_CONFIG
config = config or DEFAULT_CONFIG
if not ext_factors or not config.get('enabled', True):
return config.get('base_cut', 0.30), 0, 0, {'status': 'disabled'}
# Use baseline logic for the core 4 signals
cut, signals, severity, details = calculate_adaptive_cut_v3(ext_factors, config)
# -------------------------------------------------------------
# META-ADAPTIVE OVERRIDE OR FALLBACK TO STATIC v4
# -------------------------------------------------------------
try:
from realtime_exf_service import _get_active_meta_thresholds
active_thresh = _get_active_meta_thresholds()
except Exception:
active_thresh = None
if active_thresh:
# Dynamic processing of strictly proved meta thresholds
details['version'] = 'v4_meta_adaptive'
for key, limits in active_thresh.items():
if key in ('funding_btc', 'dvol_btc', 'fng', 'taker'):
continue # Handled by v3
val = ext_factors.get(key, np.nan)
if np.isnan(val): continue
triggered = False
if limits['direction'] == '<' and val < limits['threshold']:
triggered = True
elif limits['direction'] == '>' and val > limits['threshold']:
triggered = True
if triggered:
signals += 0.5; severity += 1
details[key] = f"{val:.4g} (meta {limits['direction']} {limits['threshold']:.4g})"
else:
# Fallback 10-indicator engine statically verified on 2026-02-27
details['version'] = 'v4_expanded_static'
val = ext_factors.get('funding_eth', np.nan)
if not np.isnan(val) and val < ACB_V4_THRESHOLDS['funding_eth']:
signals += 0.5; severity += 1
details['funding_eth'] = f"{val:.6f} (< {ACB_V4_THRESHOLDS['funding_eth']})"
val = ext_factors.get('mcap_bc', np.nan)
if not np.isnan(val) and val < ACB_V4_THRESHOLDS['mcap_bc']:
signals += 0.5; severity += 1
details['mcap_bc'] = f"{val:.2e} (< {ACB_V4_THRESHOLDS['mcap_bc']:.2e})"
val = ext_factors.get('fund_dbt_btc', np.nan)
if not np.isnan(val) and val < ACB_V4_THRESHOLDS['fund_dbt_btc']:
signals += 0.5; severity += 1
details['fund_dbt_btc'] = f"{val:.2e} (< {ACB_V4_THRESHOLDS['fund_dbt_btc']:.2e})"
val = ext_factors.get('oi_btc', np.nan)
if not np.isnan(val) and val < ACB_V4_THRESHOLDS['oi_btc']:
signals += 0.5; severity += 1
details['oi_btc'] = f"{val:.1f} (< {ACB_V4_THRESHOLDS['oi_btc']:.1f})"
val = ext_factors.get('fund_dbt_eth', np.nan)
if not np.isnan(val) and val < ACB_V4_THRESHOLDS['fund_dbt_eth']:
signals += 0.5; severity += 1
details['fund_dbt_eth'] = f"{val:.2e} (< {ACB_V4_THRESHOLDS['fund_dbt_eth']:.2e})"
val = ext_factors.get('addr_btc', np.nan)
if not np.isnan(val) and val > ACB_V4_THRESHOLDS['addr_btc']:
signals += 0.5; severity += 1
details['addr_btc'] = f"{val:.1f} (> {ACB_V4_THRESHOLDS['addr_btc']:.1f})"
# Recalculate cut with updated signals and severity
if signals >= 3 and severity >= 5:
cut = 0.75
elif signals >= 3:
cut = 0.65
elif signals >= 2 and severity >= 3:
cut = 0.55
elif signals >= 2:
cut = 0.45
elif signals >= 1:
cut = 0.30
else:
cut = 0.0
details['total_signals_v4'] = signals
details['total_severity_v4'] = severity
return cut, signals, severity, details
# =====================================================================
# NPZ + ARROW DUAL WRITER
# =====================================================================
class DualWriter:
"""Write indicator data in both NPZ and Arrow formats."""
def __init__(self):
self._has_pyarrow = False
try:
import pyarrow as pa
self._pa = pa
self._has_pyarrow = True
except ImportError:
pass
def write(self, indicators: Dict[str, Any], scan_path: Path,
scan_number: int = 0):
"""Write both NPZ and Arrow files alongside the scan."""
# Remove metadata keys
clean = {k: v for k, v in indicators.items()
if not k.startswith('_') and isinstance(v, (int, float))}
# NPZ (legacy format)
self._write_npz(clean, scan_path, scan_number)
# Arrow (new format)
if self._has_pyarrow:
self._write_arrow(clean, scan_path, scan_number)
def _write_npz(self, indicators, scan_path, scan_number):
names = sorted(INDICATORS.keys())
api_indicators = np.array([indicators.get(n, np.nan) for n in names])
api_success = np.array([not np.isnan(indicators.get(n, np.nan)) for n in names])
api_names = np.array(names, dtype='U32')
out_path = scan_path.parent / f"{scan_path.stem}__Indicators.npz"
np.savez_compressed(out_path,
api_indicators=api_indicators,
api_success=api_success,
api_names=api_names,
api_success_rate=np.array([np.nanmean(api_success)]),
timestamp=np.array([datetime.now(timezone.utc).isoformat()], dtype='U64'),
scan_number=np.array([scan_number]),
)
def _write_arrow(self, indicators, scan_path, scan_number):
pa = self._pa
fields = [
pa.field('timestamp_ns', pa.int64()),
pa.field('scan_number', pa.int32()),
]
values = {
'timestamp_ns': [int(datetime.now(timezone.utc).timestamp() * 1e9)],
'scan_number': [scan_number],
}
for name in sorted(INDICATORS.keys()):
fields.append(pa.field(name, pa.float64()))
values[name] = [indicators.get(name, np.nan)]
schema = pa.schema(fields)
table = pa.table(values, schema=schema)
out_path = scan_path.parent / f"{scan_path.stem}__Indicators.arrow"
with pa.ipc.new_file(str(out_path), schema) as writer:
writer.write_table(table)
# =====================================================================
# CONVENIENCE: Load from NPZ with lag support (for backtesting)
# =====================================================================
# =====================================================================
# LAG CONFIGURATIONS
# =====================================================================
# ROBUST DEFAULT: Uniform lag=1 for all indicators.
# Validated: +3.10% ROI, -2.02% DD vs lag=0 (54-day backtest).
# Zero overfitting risk (no per-indicator optimization).
# Scientifically justified: "yesterday's indicators predict today's market"
ROBUST_LAGS = {
'funding_btc': 1,
'funding_eth': 1,
'dvol_btc': 1,
'dvol_eth': 1,
'fng': 1,
'vix': 1,
'ls_btc': 1,
'taker': 1,
}
# EXPERIMENTAL: Per-indicator optimal lags from correlation analysis.
# Validated: +3.98% ROI, -2.93% DD vs lag=0 (54-day backtest).
# WARNING: Overfitting risk at 6.8 days/parameter. Only 5/8 significant.
# DO NOT USE until 80+ days of data available for re-validation.
# TODO: Re-run lag_correlation_analysis with 80+ days, update if confirmed.
EXPERIMENTAL_LAGS = {
'funding_btc': 5, # r=+0.39, p=0.006 (slow propagation - 5 days!)
'funding_eth': 3, # r=+0.20, p=0.154 (NOT significant)
'dvol_btc': 1, # r=-0.49, p=0.0002 (STRONGEST - overnight digest)
'dvol_eth': 1, # r=-0.42, p=0.002
'fng': 5, # r=-0.19, p=0.186 (NOT significant)
'vix': 1, # r=-0.20, p=0.270 (NOT significant)
'ls_btc': 0, # r=+0.30, p=0.036 (immediate - only lag=0 indicator)
'taker': 1, # r=-0.41, p=0.003 (overnight digest)
}
# CONSERVATIVE: Only statistically verified strong deviations from lag=1 for core indicators.
# Currently identical to V3 ROBUST but with funding_btc=5 and ls_btc=0
CONSERVATIVE_LAGS = ROBUST_LAGS.copy()
CONSERVATIVE_LAGS.update({
'funding_btc': 5,
'ls_btc': 0,
})
# V4: Combines robust baseline with 6 new statically proven indicators
V4_LAGS = ROBUST_LAGS.copy()
V4_LAGS.update({
'funding_eth': 3,
'mcap_bc': 1,
'fund_dbt_btc': 0,
'oi_btc': 0,
'fund_dbt_eth': 1,
'addr_btc': 3,
})
# Active configuration - use V4 by default given superior empirical results (+15.00% ROI, 6.68% DD)
OPTIMAL_LAGS = V4_LAGS
# =====================================================================
# META-ADAPTIVE RUNTIME
# =====================================================================
def _get_active_lags() -> dict:
"""Return lags: dynamically from meta-layer if available, else fallback V4."""
try:
from meta_adaptive_optimizer import get_current_meta_config
meta = get_current_meta_config()
if meta and 'lags' in meta:
return meta['lags']
except Exception:
pass
return OPTIMAL_LAGS
def _get_active_meta_thresholds() -> dict:
"""Return thresholds: dynamically from meta-layer if available, else None."""
try:
from meta_adaptive_optimizer import get_current_meta_config
meta = get_current_meta_config()
if meta and 'thresholds' in meta:
return meta['thresholds']
except Exception:
pass
return None
# TODO: When switching to EXPERIMENTAL_LAGS, also update IndicatorMeta.optimal_lag_days
def load_external_factors_lagged(date_str: str, all_daily_vals: Dict[str, Dict],
sorted_dates: List[str]) -> dict:
"""
Load external factors with per-indicator optimal lag applied.
Dynamically respects the Meta-Adaptive Layer configuration.
Args:
date_str: Target date
all_daily_vals: {date_str: {indicator_name: value}} for all dates
sorted_dates: Chronologically sorted list of all dates
"""
if date_str not in sorted_dates:
return {}
idx = sorted_dates.index(date_str)
result = {}
active_lags = _get_active_lags()
for name, lag in active_lags.items():
src_idx = idx - lag
if src_idx >= 0:
src_date = sorted_dates[src_idx]
val = all_daily_vals.get(src_date, {}).get(name)
if val is not None:
result[name] = val
return result