1207 lines
41 KiB
Python
1207 lines
41 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
test_exf_correlation.py
|
|||
|
|
=======================
|
|||
|
|
Fetch historical data for all ExF indicators with FULL/PARTIAL history,
|
|||
|
|
align to 55 trading days (2025-12-31 → 2026-02-25), and test each for
|
|||
|
|
predictiveness against daily trading returns.
|
|||
|
|
|
|||
|
|
Usage:
|
|||
|
|
python test_exf_correlation.py [--no-cache]
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys
|
|||
|
|
import os
|
|||
|
|
import time
|
|||
|
|
import json
|
|||
|
|
import csv
|
|||
|
|
import math
|
|||
|
|
import argparse
|
|||
|
|
import requests
|
|||
|
|
from pathlib import Path
|
|||
|
|
from datetime import datetime, timezone, timedelta, date
|
|||
|
|
from collections import defaultdict
|
|||
|
|
|
|||
|
|
import numpy as np
|
|||
|
|
import pandas as pd
|
|||
|
|
from scipy import stats
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Paths
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
BASE_DIR = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")
|
|||
|
|
VBT_DIR = BASE_DIR / "vbt_cache"
|
|||
|
|
EXF_DIR = BASE_DIR / "external_factors"
|
|||
|
|
NAUTILUS_DIR = BASE_DIR / "nautilus_dolphin"
|
|||
|
|
RUN_LOGS = NAUTILUS_DIR / "run_logs"
|
|||
|
|
RUN_LOGS.mkdir(parents=True, exist_ok=True)
|
|||
|
|
|
|||
|
|
FRED_KEY = "c16a9cde3e3bb5bb972bb9283485f202"
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Date range
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
START_DT = datetime(2025, 12, 31, tzinfo=timezone.utc)
|
|||
|
|
END_DT = datetime(2026, 2, 25, 23, 59, 59, tzinfo=timezone.utc)
|
|||
|
|
START_MS = int(START_DT.timestamp() * 1000)
|
|||
|
|
END_MS = int(END_DT.timestamp() * 1000)
|
|||
|
|
START_STR = "2025-12-31"
|
|||
|
|
END_STR = "2026-02-26" # exclusive for CoinMetrics end_time
|
|||
|
|
|
|||
|
|
TS_NOW = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Helpers
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def _date_range_str(start: datetime, end: datetime):
|
|||
|
|
"""Yield YYYY-MM-DD strings for every calendar day from start to end inclusive."""
|
|||
|
|
cur = start.date()
|
|||
|
|
stop = end.date()
|
|||
|
|
while cur <= stop:
|
|||
|
|
yield cur.strftime("%Y-%m-%d")
|
|||
|
|
cur += timedelta(days=1)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _ms_to_date(ms: int) -> str:
|
|||
|
|
return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).strftime("%Y-%m-%d")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _unix_to_date(ts: int) -> str:
|
|||
|
|
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _safe_float(v):
|
|||
|
|
if v is None:
|
|||
|
|
return float("nan")
|
|||
|
|
try:
|
|||
|
|
f = float(v)
|
|||
|
|
if math.isfinite(f):
|
|||
|
|
return f
|
|||
|
|
return float("nan")
|
|||
|
|
except (TypeError, ValueError):
|
|||
|
|
return float("nan")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _fill_forward(series: dict, date_list: list) -> dict:
|
|||
|
|
"""Fill-forward a {date_str: float} dict over date_list."""
|
|||
|
|
result = {}
|
|||
|
|
last = float("nan")
|
|||
|
|
for d in date_list:
|
|||
|
|
v = series.get(d, float("nan"))
|
|||
|
|
if not math.isnan(v):
|
|||
|
|
last = v
|
|||
|
|
result[d] = last
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _cache_path(source: str) -> Path:
|
|||
|
|
return RUN_LOGS / f"exf_raw_cache_{source}.json"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _load_cache(source: str):
|
|||
|
|
p = _cache_path(source)
|
|||
|
|
if p.exists():
|
|||
|
|
try:
|
|||
|
|
with open(p, "r") as f:
|
|||
|
|
return json.load(f)
|
|||
|
|
except Exception:
|
|||
|
|
return None
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _save_cache(source: str, data):
|
|||
|
|
p = _cache_path(source)
|
|||
|
|
with open(p, "w") as f:
|
|||
|
|
json.dump(data, f)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _get(url: str, params=None, timeout=20, headers=None) -> dict | list | None:
|
|||
|
|
try:
|
|||
|
|
h = {"User-Agent": "Mozilla/5.0"}
|
|||
|
|
if headers:
|
|||
|
|
h.update(headers)
|
|||
|
|
r = requests.get(url, params=params, timeout=timeout, headers=h)
|
|||
|
|
r.raise_for_status()
|
|||
|
|
return r.json()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" [HTTP ERROR] {url[:80]} -> {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Step 1: Get trading day list from VBT parquet filenames
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def get_trading_days() -> list:
|
|||
|
|
files = sorted(VBT_DIR.glob("*.parquet"))
|
|||
|
|
days = []
|
|||
|
|
for f in files:
|
|||
|
|
stem = f.stem # YYYY-MM-DD
|
|||
|
|
try:
|
|||
|
|
dt = datetime.strptime(stem, "%Y-%m-%d").date()
|
|||
|
|
if date(2025, 12, 31) <= dt <= date(2026, 2, 25):
|
|||
|
|
days.append(stem)
|
|||
|
|
except ValueError:
|
|||
|
|
pass
|
|||
|
|
print(f"[TRADING DAYS] Found {len(days)} days: {days[0]} → {days[-1]}")
|
|||
|
|
return days
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Step 2: Load daily returns
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def load_daily_returns(trading_days: list) -> dict:
|
|||
|
|
"""
|
|||
|
|
Returns {date_str: pnl_normalized} where pnl_normalized = pnl / capital_start_of_day.
|
|||
|
|
Tries to load from most recent run_logs/daily_*.csv first.
|
|||
|
|
"""
|
|||
|
|
csvs = sorted(RUN_LOGS.glob("daily_*.csv"), reverse=True)
|
|||
|
|
if csvs:
|
|||
|
|
latest = csvs[0]
|
|||
|
|
print(f"[RETURNS] Loading from {latest.name}")
|
|||
|
|
df = pd.read_csv(latest)
|
|||
|
|
df["date"] = df["date"].astype(str)
|
|||
|
|
# capital col is end-of-day capital; start capital = capital + pnl
|
|||
|
|
# pnl_normalized = pnl / (capital + pnl) if capital is end-of-day
|
|||
|
|
# OR if capital is start: pnl_normalized = pnl / capital
|
|||
|
|
# The CSV header: date,pnl,capital,dd_pct,... — capital appears to be end-of-day
|
|||
|
|
# Reconstruct start capital: start_cap = capital - pnl
|
|||
|
|
returns = {}
|
|||
|
|
for _, row in df.iterrows():
|
|||
|
|
d = str(row["date"])
|
|||
|
|
if d in trading_days:
|
|||
|
|
pnl = float(row["pnl"])
|
|||
|
|
cap = float(row["capital"])
|
|||
|
|
start_cap = cap - pnl # end_cap = start_cap + pnl ← reconstruct
|
|||
|
|
if start_cap > 0:
|
|||
|
|
returns[d] = pnl / start_cap
|
|||
|
|
else:
|
|||
|
|
returns[d] = float("nan")
|
|||
|
|
print(f"[RETURNS] Loaded {len(returns)} dates. Sample: {list(returns.items())[:3]}")
|
|||
|
|
return returns
|
|||
|
|
else:
|
|||
|
|
raise RuntimeError(
|
|||
|
|
"No run_logs/daily_*.csv found. Run test_pf_dynamic_beta_validate.py first."
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Step 3: Fetch functions — one call per source
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
# ----- FRED ----------------------------------------------------------------
|
|||
|
|
|
|||
|
|
FRED_SERIES = {
|
|||
|
|
"dxy": "DTWEXBGS",
|
|||
|
|
"us10y": "DGS10",
|
|||
|
|
"us2y": "DGS2",
|
|||
|
|
"ycurve": "T10Y2Y",
|
|||
|
|
"vix": "VIXCLS",
|
|||
|
|
"fedfunds": "DFF",
|
|||
|
|
"m2": "WM2NS",
|
|||
|
|
"cpi": "CPIAUCSL",
|
|||
|
|
"sp500": "SP500",
|
|||
|
|
# gold: KO — GOLDAMGBD228NLBM/GOLDPMGBD228NLBM/GOLD all return HTTP 400 (series licensing/discontinued)
|
|||
|
|
"hy_spread":"BAMLH0A0HYM2",
|
|||
|
|
"be5y": "T5YIE",
|
|||
|
|
"nfci": "NFCI",
|
|||
|
|
"claims": "ICSA",
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def fetch_fred(no_cache=False) -> dict:
|
|||
|
|
"""Returns {indicator_name: {date_str: float}} for all FRED indicators."""
|
|||
|
|
cache = None if no_cache else _load_cache("fred")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[FRED] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
result = {}
|
|||
|
|
all_dates = list(_date_range_str(START_DT, END_DT))
|
|||
|
|
|
|||
|
|
for name, series_id in FRED_SERIES.items():
|
|||
|
|
url = (
|
|||
|
|
f"https://api.stlouisfed.org/fred/series/observations"
|
|||
|
|
f"?series_id={series_id}"
|
|||
|
|
f"&api_key={FRED_KEY}"
|
|||
|
|
f"&file_type=json"
|
|||
|
|
f"&observation_start=2025-12-01"
|
|||
|
|
f"&observation_end=2026-03-01"
|
|||
|
|
f"&sort_order=asc"
|
|||
|
|
)
|
|||
|
|
d = _get(url)
|
|||
|
|
series_raw = {}
|
|||
|
|
if d and "observations" in d:
|
|||
|
|
for obs in d["observations"]:
|
|||
|
|
dt = obs.get("date", "")
|
|||
|
|
v = obs.get("value", ".")
|
|||
|
|
if v != ".":
|
|||
|
|
fv = _safe_float(v)
|
|||
|
|
if not math.isnan(fv):
|
|||
|
|
series_raw[dt] = fv
|
|||
|
|
series_ff = _fill_forward(series_raw, all_dates)
|
|||
|
|
result[name] = series_ff
|
|||
|
|
n_ok = sum(1 for v in series_ff.values() if not math.isnan(v))
|
|||
|
|
print(f" [FRED] {name:12s} ({series_id:20s}): {n_ok}/{len(all_dates)} obs")
|
|||
|
|
time.sleep(0.15)
|
|||
|
|
|
|||
|
|
_save_cache("fred", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ----- BINANCE DERIVATIVES -------------------------------------------------
|
|||
|
|
|
|||
|
|
def _bin_hourly_to_daily(records: list, key: str, agg="mean") -> dict:
|
|||
|
|
"""Group hourly Binance records to daily. agg='mean' or 'last'."""
|
|||
|
|
by_day = defaultdict(list)
|
|||
|
|
for r in records:
|
|||
|
|
ts = int(r.get("timestamp", r.get("fundingTime", 0)))
|
|||
|
|
d = _ms_to_date(ts)
|
|||
|
|
v = _safe_float(r.get(key))
|
|||
|
|
if not math.isnan(v):
|
|||
|
|
by_day[d].append(v)
|
|||
|
|
result = {}
|
|||
|
|
for d, vals in by_day.items():
|
|||
|
|
if vals:
|
|||
|
|
result[d] = vals[-1] if agg == "last" else float(np.mean(vals))
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
def fetch_binance(no_cache=False) -> dict:
|
|||
|
|
"""Returns {indicator_name: {date_str: float}}"""
|
|||
|
|
cache = None if no_cache else _load_cache("binance")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[BINANCE] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
result = {}
|
|||
|
|
|
|||
|
|
# --- Funding BTC
|
|||
|
|
def _funding(symbol):
|
|||
|
|
all_recs = []
|
|||
|
|
cur_start = START_MS
|
|||
|
|
while cur_start < END_MS:
|
|||
|
|
chunk_end = min(cur_start + 500 * 8 * 3600 * 1000, END_MS)
|
|||
|
|
url = f"https://fapi.binance.com/fapi/v1/fundingRate?symbol={symbol}&startTime={cur_start}&endTime={chunk_end}&limit=500"
|
|||
|
|
d = _get(url)
|
|||
|
|
if d and isinstance(d, list):
|
|||
|
|
all_recs.extend(d)
|
|||
|
|
if len(d) < 500:
|
|||
|
|
break
|
|||
|
|
cur_start = int(d[-1]["fundingTime"]) + 1
|
|||
|
|
else:
|
|||
|
|
break
|
|||
|
|
return _bin_hourly_to_daily(all_recs, "fundingRate", agg="last")
|
|||
|
|
|
|||
|
|
# --- OI / LS / Taker: KO — Binance Futures stats endpoints retain only ~30 days of history.
|
|||
|
|
# Requesting data from 2025-12-31 (63 days ago) returns HTTP 400 -1130 "startTime invalid".
|
|||
|
|
# These require live collection; cannot be retrofitted historically.
|
|||
|
|
def _oi(symbol):
|
|||
|
|
print(f" [KO] openInterestHist({symbol}): 30-day retention wall — returning empty")
|
|||
|
|
return {}
|
|||
|
|
|
|||
|
|
def _ls(endpoint, symbol):
|
|||
|
|
print(f" [KO] {endpoint}({symbol}): 30-day retention wall — returning empty")
|
|||
|
|
return {}
|
|||
|
|
|
|||
|
|
def _taker():
|
|||
|
|
print(f" [KO] takerlongshortRatio: 30-day retention wall — returning empty")
|
|||
|
|
return {}
|
|||
|
|
|
|||
|
|
# --- Volume (spot klines daily)
|
|||
|
|
def _vol24():
|
|||
|
|
url = f"https://api.binance.com/api/v3/klines?symbol=BTCUSDT&interval=1d&startTime={START_MS}&endTime={END_MS}&limit=100"
|
|||
|
|
d = _get(url)
|
|||
|
|
result_v = {}
|
|||
|
|
if d and isinstance(d, list):
|
|||
|
|
for bar in d:
|
|||
|
|
ts = int(bar[0])
|
|||
|
|
date_s = _ms_to_date(ts)
|
|||
|
|
v = _safe_float(bar[7]) # quote volume
|
|||
|
|
if not math.isnan(v):
|
|||
|
|
result_v[date_s] = v
|
|||
|
|
return result_v
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching funding_btc ...")
|
|||
|
|
result["funding_btc"] = _funding("BTCUSDT")
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching funding_eth ...")
|
|||
|
|
result["funding_eth"] = _funding("ETHUSDT")
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching oi_btc ...")
|
|||
|
|
result["oi_btc"] = _oi("BTCUSDT")
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching oi_eth ...")
|
|||
|
|
result["oi_eth"] = _oi("ETHUSDT")
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching ls_btc ...")
|
|||
|
|
result["ls_btc"] = _ls("globalLongShortAccountRatio", "BTCUSDT")
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching ls_eth ...")
|
|||
|
|
result["ls_eth"] = _ls("globalLongShortAccountRatio", "ETHUSDT")
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching ls_top ...")
|
|||
|
|
result["ls_top"] = _ls("topLongShortAccountRatio", "BTCUSDT")
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching taker ...")
|
|||
|
|
result["taker"] = _taker()
|
|||
|
|
|
|||
|
|
print("[BINANCE] Fetching vol24 ...")
|
|||
|
|
result["vol24"] = _vol24()
|
|||
|
|
|
|||
|
|
for k, v in result.items():
|
|||
|
|
print(f" [BINANCE] {k}: {len(v)} days")
|
|||
|
|
|
|||
|
|
_save_cache("binance", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ----- DERIBIT -------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def fetch_deribit(no_cache=False) -> dict:
|
|||
|
|
cache = None if no_cache else _load_cache("deribit")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[DERIBIT] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
result = {}
|
|||
|
|
|
|||
|
|
def _dvol(currency):
|
|||
|
|
url = (
|
|||
|
|
f"https://www.deribit.com/api/v2/public/get_volatility_index_data"
|
|||
|
|
f"?currency={currency}&resolution=3600&start_timestamp={START_MS}&end_timestamp={END_MS}"
|
|||
|
|
)
|
|||
|
|
d = _get(url)
|
|||
|
|
by_day = defaultdict(list)
|
|||
|
|
if d and "result" in d and isinstance(d["result"], dict):
|
|||
|
|
for row in d["result"].get("data", []):
|
|||
|
|
# row = [timestamp_ms, open, high, low, close]
|
|||
|
|
ts = int(row[0])
|
|||
|
|
close = _safe_float(row[4]) if len(row) > 4 else float("nan")
|
|||
|
|
if not math.isnan(close):
|
|||
|
|
by_day[_ms_to_date(ts)].append(close)
|
|||
|
|
return {d: float(np.mean(v)) for d, v in by_day.items()}
|
|||
|
|
|
|||
|
|
def _deribit_funding(instrument):
|
|||
|
|
# Paginate backward: Deribit returns at most ~744 records per call (count=1000 cap).
|
|||
|
|
# With hourly data, 744 records ≈ 31 days. Need two pages to cover 55 days.
|
|||
|
|
by_day = defaultdict(list)
|
|||
|
|
cur_end = END_MS
|
|||
|
|
pages = 0
|
|||
|
|
while cur_end > START_MS and pages < 10:
|
|||
|
|
url = (
|
|||
|
|
f"https://www.deribit.com/api/v2/public/get_funding_rate_history"
|
|||
|
|
f"?instrument_name={instrument}&start_timestamp={START_MS}&end_timestamp={cur_end}&count=1000"
|
|||
|
|
)
|
|||
|
|
d = _get(url)
|
|||
|
|
if not (d and "result" in d and isinstance(d["result"], list)):
|
|||
|
|
break
|
|||
|
|
rows = d["result"]
|
|||
|
|
if not rows:
|
|||
|
|
break
|
|||
|
|
for row in rows:
|
|||
|
|
ts = int(row.get("timestamp", 0))
|
|||
|
|
v = _safe_float(row.get("interest_8h"))
|
|||
|
|
if not math.isnan(v):
|
|||
|
|
by_day[_ms_to_date(ts)].append(v)
|
|||
|
|
oldest_ts = min(int(r.get("timestamp", cur_end)) for r in rows)
|
|||
|
|
if oldest_ts >= cur_end:
|
|||
|
|
break
|
|||
|
|
cur_end = oldest_ts - 1
|
|||
|
|
pages += 1
|
|||
|
|
time.sleep(0.3)
|
|||
|
|
return {d: float(np.mean(v)) for d, v in by_day.items()}
|
|||
|
|
|
|||
|
|
print("[DERIBIT] Fetching dvol_btc ...")
|
|||
|
|
result["dvol_btc"] = _dvol("BTC")
|
|||
|
|
time.sleep(0.5)
|
|||
|
|
|
|||
|
|
print("[DERIBIT] Fetching dvol_eth ...")
|
|||
|
|
result["dvol_eth"] = _dvol("ETH")
|
|||
|
|
time.sleep(0.5)
|
|||
|
|
|
|||
|
|
print("[DERIBIT] Fetching fund_dbt_btc ...")
|
|||
|
|
result["fund_dbt_btc"] = _deribit_funding("BTC-PERPETUAL")
|
|||
|
|
time.sleep(0.5)
|
|||
|
|
|
|||
|
|
print("[DERIBIT] Fetching fund_dbt_eth ...")
|
|||
|
|
result["fund_dbt_eth"] = _deribit_funding("ETH-PERPETUAL")
|
|||
|
|
|
|||
|
|
for k, v in result.items():
|
|||
|
|
print(f" [DERIBIT] {k}: {len(v)} days")
|
|||
|
|
|
|||
|
|
_save_cache("deribit", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ----- COINMETRICS ---------------------------------------------------------
|
|||
|
|
|
|||
|
|
def fetch_coinmetrics(no_cache=False) -> dict:
|
|||
|
|
cache = None if no_cache else _load_cache("coinmetrics")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[COINMETRICS] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
result = {}
|
|||
|
|
# CoinMetrics community API: KO permanently — HTTP 403 Forbidden as of 2026-03.
|
|||
|
|
# Community endpoint is now paywalled. All 12 CoinMetrics indicators are dead.
|
|||
|
|
BASE = "https://community-api.coinmetrics.io/v4/timeseries/asset-metrics"
|
|||
|
|
|
|||
|
|
def _cm_fetch(asset, metrics_str):
|
|||
|
|
url = (
|
|||
|
|
f"{BASE}?assets={asset}&metrics={metrics_str}"
|
|||
|
|
f"&frequency=1d&start_time={START_STR}&end_time={END_STR}&page_size=100"
|
|||
|
|
)
|
|||
|
|
d = _get(url, timeout=30)
|
|||
|
|
rows = []
|
|||
|
|
if d and "data" in d:
|
|||
|
|
rows = d["data"]
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
def _rows_to_series(rows, metric_key):
|
|||
|
|
out = {}
|
|||
|
|
for row in rows:
|
|||
|
|
t = row.get("time", "")
|
|||
|
|
date_s = t[:10] # YYYY-MM-DD
|
|||
|
|
v = _safe_float(row.get(metric_key))
|
|||
|
|
if not math.isnan(v):
|
|||
|
|
out[date_s] = v
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
# BTC batch
|
|||
|
|
print("[COINMETRICS] Fetching BTC batch ...")
|
|||
|
|
btc_metrics = "CapRealUSD,CapMrktCurUSD,AdrActCnt,TxCnt,FeeTotUSD,NVTAdj,VelCur1yr,SplyAct1yr"
|
|||
|
|
btc_rows = _cm_fetch("btc", btc_metrics)
|
|||
|
|
time.sleep(7)
|
|||
|
|
|
|||
|
|
result["rcap_btc"] = _rows_to_series(btc_rows, "CapRealUSD")
|
|||
|
|
result["addr_btc"] = _rows_to_series(btc_rows, "AdrActCnt")
|
|||
|
|
result["txcnt"] = _rows_to_series(btc_rows, "TxCnt")
|
|||
|
|
result["fees_btc"] = _rows_to_series(btc_rows, "FeeTotUSD")
|
|||
|
|
result["nvt"] = _rows_to_series(btc_rows, "NVTAdj")
|
|||
|
|
result["velocity"] = _rows_to_series(btc_rows, "VelCur1yr")
|
|||
|
|
result["sply_act"] = _rows_to_series(btc_rows, "SplyAct1yr")
|
|||
|
|
|
|||
|
|
# MVRV and NUPL from CapMrktCurUSD + CapRealUSD
|
|||
|
|
mvrv_d = {}
|
|||
|
|
nupl_d = {}
|
|||
|
|
for row in btc_rows:
|
|||
|
|
t = row.get("time", "")[:10]
|
|||
|
|
m = _safe_float(row.get("CapMrktCurUSD"))
|
|||
|
|
rc = _safe_float(row.get("CapRealUSD"))
|
|||
|
|
if not math.isnan(m) and not math.isnan(rc) and rc > 0:
|
|||
|
|
mvrv_d[t] = m / rc
|
|||
|
|
nupl_d[t] = (m - rc) / m if m > 0 else float("nan")
|
|||
|
|
result["mvrv"] = mvrv_d
|
|||
|
|
result["nupl"] = nupl_d
|
|||
|
|
|
|||
|
|
# ETH batch
|
|||
|
|
print("[COINMETRICS] Fetching ETH batch ...")
|
|||
|
|
eth_metrics = "CapRealUSD,AdrActCnt,FeeTotUSD"
|
|||
|
|
eth_rows = _cm_fetch("eth", eth_metrics)
|
|||
|
|
time.sleep(7)
|
|||
|
|
|
|||
|
|
result["rcap_eth"] = _rows_to_series(eth_rows, "CapRealUSD")
|
|||
|
|
result["addr_eth"] = _rows_to_series(eth_rows, "AdrActCnt")
|
|||
|
|
result["fees_eth"] = _rows_to_series(eth_rows, "FeeTotUSD")
|
|||
|
|
|
|||
|
|
for k, v in result.items():
|
|||
|
|
print(f" [COINMETRICS] {k}: {len(v)} days")
|
|||
|
|
|
|||
|
|
_save_cache("coinmetrics", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ----- DEFILLAMA -----------------------------------------------------------
|
|||
|
|
|
|||
|
|
def fetch_defillama(no_cache=False) -> dict:
|
|||
|
|
cache = None if no_cache else _load_cache("defillama")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[DEFILLAMA] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
result = {}
|
|||
|
|
all_dates_set = set(_date_range_str(START_DT, END_DT))
|
|||
|
|
|
|||
|
|
def _dl_tvl(url):
|
|||
|
|
d = _get(url)
|
|||
|
|
out = {}
|
|||
|
|
if d and isinstance(d, list):
|
|||
|
|
for entry in d:
|
|||
|
|
ts = int(entry.get("date", 0))
|
|||
|
|
ds = _unix_to_date(ts)
|
|||
|
|
v = _safe_float(entry.get("tvl"))
|
|||
|
|
if ds in all_dates_set and not math.isnan(v):
|
|||
|
|
out[ds] = v
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
def _dl_stables_chart(url):
|
|||
|
|
"""stablecoincharts/all returns array of {date, totalCirculatingUSD: {peggedUSD: N}}"""
|
|||
|
|
d = _get(url)
|
|||
|
|
out = {}
|
|||
|
|
if d and isinstance(d, list):
|
|||
|
|
for entry in d:
|
|||
|
|
ts = int(entry.get("date", 0))
|
|||
|
|
ds = _unix_to_date(ts)
|
|||
|
|
if ds not in all_dates_set:
|
|||
|
|
continue
|
|||
|
|
circ = entry.get("totalCirculatingUSD")
|
|||
|
|
if isinstance(circ, dict):
|
|||
|
|
v = _safe_float(circ.get("peggedUSD"))
|
|||
|
|
else:
|
|||
|
|
v = _safe_float(circ)
|
|||
|
|
if not math.isnan(v):
|
|||
|
|
out[ds] = v
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
print("[DEFILLAMA] Fetching TVL (all chains) ...")
|
|||
|
|
result["tvl"] = _dl_tvl("https://api.llama.fi/v2/historicalChainTvl")
|
|||
|
|
time.sleep(1)
|
|||
|
|
|
|||
|
|
print("[DEFILLAMA] Fetching TVL Ethereum ...")
|
|||
|
|
result["tvl_eth"] = _dl_tvl("https://api.llama.fi/v2/historicalChainTvl/Ethereum")
|
|||
|
|
time.sleep(1)
|
|||
|
|
|
|||
|
|
print("[DEFILLAMA] Fetching stables total (all) ...")
|
|||
|
|
result["stables"] = _dl_stables_chart("https://stablecoins.llama.fi/stablecoincharts/all")
|
|||
|
|
time.sleep(1)
|
|||
|
|
|
|||
|
|
print("[DEFILLAMA] Fetching USDT ...")
|
|||
|
|
result["usdt"] = _dl_stables_chart("https://stablecoins.llama.fi/stablecoincharts/all?stablecoin=1")
|
|||
|
|
time.sleep(1)
|
|||
|
|
|
|||
|
|
print("[DEFILLAMA] Fetching USDC ...")
|
|||
|
|
result["usdc"] = _dl_stables_chart("https://stablecoins.llama.fi/stablecoincharts/all?stablecoin=2")
|
|||
|
|
|
|||
|
|
for k, v in result.items():
|
|||
|
|
print(f" [DEFILLAMA] {k}: {len(v)} days")
|
|||
|
|
|
|||
|
|
_save_cache("defillama", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ----- ALTERNATIVE.ME (Fear & Greed) ---------------------------------------
|
|||
|
|
|
|||
|
|
def fetch_fng(no_cache=False) -> dict:
|
|||
|
|
cache = None if no_cache else _load_cache("fng")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[FNG] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
print("[FNG] Fetching F&G history ...")
|
|||
|
|
# date_format=us changes timestamp to MM/DD/YYYY strings — int() parse fails silently.
|
|||
|
|
# Without date_format, timestamp is returned as Unix seconds string — works correctly.
|
|||
|
|
url = "https://api.alternative.me/fng/?limit=1000"
|
|||
|
|
d = _get(url)
|
|||
|
|
|
|||
|
|
fng_raw = {}
|
|||
|
|
if d and "data" in d:
|
|||
|
|
for entry in d["data"]:
|
|||
|
|
# date_format=us → timestamp field is unix timestamp (str or int)
|
|||
|
|
ts = entry.get("timestamp")
|
|||
|
|
v = _safe_float(entry.get("value"))
|
|||
|
|
if ts is None:
|
|||
|
|
continue
|
|||
|
|
try:
|
|||
|
|
ts_int = int(ts)
|
|||
|
|
ds = _unix_to_date(ts_int)
|
|||
|
|
except (TypeError, ValueError):
|
|||
|
|
continue
|
|||
|
|
if not math.isnan(v):
|
|||
|
|
fng_raw[ds] = v
|
|||
|
|
|
|||
|
|
all_dates = list(_date_range_str(START_DT, END_DT))
|
|||
|
|
fng_ff = _fill_forward(fng_raw, all_dates)
|
|||
|
|
|
|||
|
|
# Derived: fng_prev (lag-1), fng_week (7-day rolling mean)
|
|||
|
|
sorted_dates = sorted(fng_ff.keys())
|
|||
|
|
fng_prev = {}
|
|||
|
|
fng_week = {}
|
|||
|
|
for i, d_str in enumerate(sorted_dates):
|
|||
|
|
if i >= 1:
|
|||
|
|
fng_prev[d_str] = fng_ff.get(sorted_dates[i - 1], float("nan"))
|
|||
|
|
else:
|
|||
|
|
fng_prev[d_str] = float("nan")
|
|||
|
|
window = [fng_ff.get(sorted_dates[j], float("nan")) for j in range(max(0, i - 6), i + 1)]
|
|||
|
|
valid = [x for x in window if not math.isnan(x)]
|
|||
|
|
fng_week[d_str] = float(np.mean(valid)) if valid else float("nan")
|
|||
|
|
|
|||
|
|
result = {
|
|||
|
|
"fng": fng_ff,
|
|||
|
|
"fng_prev": fng_prev,
|
|||
|
|
"fng_week": fng_week,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for k, v in result.items():
|
|||
|
|
n = sum(1 for x in v.values() if not math.isnan(x))
|
|||
|
|
print(f" [FNG] {k}: {n} valid days")
|
|||
|
|
|
|||
|
|
_save_cache("fng", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ----- BLOCKCHAIN.INFO -----------------------------------------------------
|
|||
|
|
|
|||
|
|
def fetch_blockchain(no_cache=False) -> dict:
|
|||
|
|
cache = None if no_cache else _load_cache("blockchain")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[BLOCKCHAIN] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
result = {}
|
|||
|
|
all_dates_set = set(_date_range_str(START_DT, END_DT))
|
|||
|
|
|
|||
|
|
CHARTS = {
|
|||
|
|
"hashrate": ("https://api.blockchain.info/charts/hash-rate", 1.0),
|
|||
|
|
"difficulty": ("https://api.blockchain.info/charts/difficulty", 1.0),
|
|||
|
|
"mcap_bc": ("https://api.blockchain.info/charts/market-cap", 1.0),
|
|||
|
|
"tx_blk": ("https://api.blockchain.info/charts/n-transactions-per-block", 1.0),
|
|||
|
|
"total_btc": ("https://api.blockchain.info/charts/total-bitcoins", 1e-8),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for name, (base_url, divisor) in CHARTS.items():
|
|||
|
|
url = f"{base_url}?timespan=90days&start=2025-12-01&format=json"
|
|||
|
|
print(f"[BLOCKCHAIN] Fetching {name} ...")
|
|||
|
|
d = _get(url, timeout=30)
|
|||
|
|
out = {}
|
|||
|
|
if d and "values" in d:
|
|||
|
|
for entry in d["values"]:
|
|||
|
|
ts = int(entry.get("x", 0))
|
|||
|
|
ds = _unix_to_date(ts)
|
|||
|
|
v = _safe_float(entry.get("y"))
|
|||
|
|
if ds in all_dates_set and not math.isnan(v):
|
|||
|
|
out[ds] = v * divisor
|
|||
|
|
result[name] = out
|
|||
|
|
print(f" [BLOCKCHAIN] {name}: {len(out)} days")
|
|||
|
|
time.sleep(1)
|
|||
|
|
|
|||
|
|
_save_cache("blockchain", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ----- COINGECKO -----------------------------------------------------------
|
|||
|
|
|
|||
|
|
def fetch_coingecko(no_cache=False) -> dict:
|
|||
|
|
cache = None if no_cache else _load_cache("coingecko")
|
|||
|
|
if cache is not None:
|
|||
|
|
print("[COINGECKO] Using cache")
|
|||
|
|
return cache
|
|||
|
|
|
|||
|
|
result = {"btc_price": {}, "eth_price": {}}
|
|||
|
|
all_dates_set = set(_date_range_str(START_DT, END_DT))
|
|||
|
|
|
|||
|
|
# Use market_chart for a bulk fetch (avoids per-day calls + rate limits)
|
|||
|
|
def _market_chart(coin_id):
|
|||
|
|
# from=unix start, to=unix end
|
|||
|
|
start_unix = int(START_DT.timestamp())
|
|||
|
|
end_unix = int(END_DT.timestamp())
|
|||
|
|
url = f"https://api.coingecko.com/api/v3/coins/{coin_id}/market_chart/range?vs_currency=usd&from={start_unix}&to={end_unix}"
|
|||
|
|
d = _get(url, timeout=30)
|
|||
|
|
out = {}
|
|||
|
|
if d and "prices" in d:
|
|||
|
|
by_day = defaultdict(list)
|
|||
|
|
for ts_ms, price in d["prices"]:
|
|||
|
|
ds = _ms_to_date(int(ts_ms))
|
|||
|
|
if ds in all_dates_set:
|
|||
|
|
by_day[ds].append(_safe_float(price))
|
|||
|
|
for ds, vals in by_day.items():
|
|||
|
|
valid = [x for x in vals if not math.isnan(x)]
|
|||
|
|
if valid:
|
|||
|
|
out[ds] = valid[-1] # last price of the day
|
|||
|
|
return out
|
|||
|
|
|
|||
|
|
print("[COINGECKO] Fetching BTC price ...")
|
|||
|
|
result["btc_price"] = _market_chart("bitcoin")
|
|||
|
|
time.sleep(2)
|
|||
|
|
|
|||
|
|
print("[COINGECKO] Fetching ETH price ...")
|
|||
|
|
result["eth_price"] = _market_chart("ethereum")
|
|||
|
|
|
|||
|
|
for k, v in result.items():
|
|||
|
|
print(f" [COINGECKO] {k}: {len(v)} days")
|
|||
|
|
|
|||
|
|
_save_cache("coingecko", result)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Step 4: Assemble all indicator data
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def assemble_indicators(
|
|||
|
|
fred_data: dict,
|
|||
|
|
binance_data: dict,
|
|||
|
|
deribit_data: dict,
|
|||
|
|
cm_data: dict,
|
|||
|
|
dl_data: dict,
|
|||
|
|
fng_data: dict,
|
|||
|
|
bc_data: dict,
|
|||
|
|
cg_data: dict,
|
|||
|
|
) -> dict:
|
|||
|
|
"""
|
|||
|
|
Merge all source dicts into a single {indicator_name: {date_str: float}} dict.
|
|||
|
|
"""
|
|||
|
|
merged = {}
|
|||
|
|
|
|||
|
|
sources = [
|
|||
|
|
fred_data,
|
|||
|
|
binance_data,
|
|||
|
|
deribit_data,
|
|||
|
|
cm_data,
|
|||
|
|
dl_data,
|
|||
|
|
fng_data,
|
|||
|
|
bc_data,
|
|||
|
|
cg_data,
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
for src in sources:
|
|||
|
|
for name, daily in src.items():
|
|||
|
|
if name in merged:
|
|||
|
|
# Prefer whichever has more data
|
|||
|
|
if len(daily) > len(merged[name]):
|
|||
|
|
merged[name] = daily
|
|||
|
|
else:
|
|||
|
|
merged[name] = daily
|
|||
|
|
|
|||
|
|
return merged
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Step 5: Correlation analysis
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
LAGS = [0, 1, 2, 3, 5, 7]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def compute_correlations(
|
|||
|
|
trading_days: list,
|
|||
|
|
returns: dict,
|
|||
|
|
indicators: dict,
|
|||
|
|
) -> list:
|
|||
|
|
"""
|
|||
|
|
For each indicator, test multiple lags.
|
|||
|
|
Returns a flat list of row dicts for the correlation table.
|
|||
|
|
"""
|
|||
|
|
# Build aligned return vector
|
|||
|
|
ret_vec = np.array([returns.get(d, float("nan")) for d in trading_days])
|
|||
|
|
|
|||
|
|
rows = []
|
|||
|
|
|
|||
|
|
for ind_name, daily in indicators.items():
|
|||
|
|
# Raw vector for this indicator over all calendar days (for lag computation)
|
|||
|
|
# We need to look up indicator values at trading_day - lag (in calendar days)
|
|||
|
|
# Build a sorted lookup from the full daily dict
|
|||
|
|
all_cal = sorted(daily.keys())
|
|||
|
|
cal_set = set(all_cal)
|
|||
|
|
|
|||
|
|
# Pre-build array: for each trading day, what is the indicator value?
|
|||
|
|
ind_series = {}
|
|||
|
|
for d in trading_days:
|
|||
|
|
v = daily.get(d)
|
|||
|
|
if v is None:
|
|||
|
|
# Look backwards up to 5 calendar days (fill-forward)
|
|||
|
|
dt = datetime.strptime(d, "%Y-%m-%d")
|
|||
|
|
for back in range(1, 6):
|
|||
|
|
candidate = (dt - timedelta(days=back)).strftime("%Y-%m-%d")
|
|||
|
|
if candidate in daily:
|
|||
|
|
v = daily[candidate]
|
|||
|
|
break
|
|||
|
|
ind_series[d] = v if v is not None else float("nan")
|
|||
|
|
|
|||
|
|
for lag in LAGS:
|
|||
|
|
# indicator at trading_day[i - lag] predicts return at trading_day[i]
|
|||
|
|
n_td = len(trading_days)
|
|||
|
|
x_vals = []
|
|||
|
|
y_vals = []
|
|||
|
|
for i in range(n_td):
|
|||
|
|
y = ret_vec[i]
|
|||
|
|
if lag == 0:
|
|||
|
|
src_day = trading_days[i]
|
|||
|
|
else:
|
|||
|
|
src_idx = i - lag
|
|||
|
|
if src_idx < 0:
|
|||
|
|
x_vals.append(float("nan"))
|
|||
|
|
y_vals.append(y)
|
|||
|
|
continue
|
|||
|
|
src_day = trading_days[src_idx]
|
|||
|
|
|
|||
|
|
x = ind_series.get(src_day, float("nan"))
|
|||
|
|
x_vals.append(x)
|
|||
|
|
y_vals.append(y)
|
|||
|
|
|
|||
|
|
x_arr = np.array(x_vals, dtype=float)
|
|||
|
|
y_arr = np.array(y_vals, dtype=float)
|
|||
|
|
|
|||
|
|
# Drop NaN pairs
|
|||
|
|
valid = ~(np.isnan(x_arr) | np.isnan(y_arr))
|
|||
|
|
xv = x_arr[valid]
|
|||
|
|
yv = y_arr[valid]
|
|||
|
|
n = int(valid.sum())
|
|||
|
|
|
|||
|
|
if n < 10:
|
|||
|
|
rows.append({
|
|||
|
|
"name": ind_name,
|
|||
|
|
"lag": lag,
|
|||
|
|
"n_days": n,
|
|||
|
|
"pearson_r": float("nan"),
|
|||
|
|
"pearson_p": float("nan"),
|
|||
|
|
"spearman_r": float("nan"),
|
|||
|
|
"spearman_p": float("nan"),
|
|||
|
|
"stress_r": float("nan"),
|
|||
|
|
"stress_p": float("nan"),
|
|||
|
|
"note": "insufficient data",
|
|||
|
|
})
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Pearson
|
|||
|
|
try:
|
|||
|
|
pr, pp = stats.pearsonr(xv, yv)
|
|||
|
|
except Exception:
|
|||
|
|
pr, pp = float("nan"), float("nan")
|
|||
|
|
|
|||
|
|
# Spearman
|
|||
|
|
try:
|
|||
|
|
sr, sp = stats.spearmanr(xv, yv)
|
|||
|
|
except Exception:
|
|||
|
|
sr, sp = float("nan"), float("nan")
|
|||
|
|
|
|||
|
|
# Point-biserial: stress day flag (return < -0.01)
|
|||
|
|
stress_flag = (yv < -0.01).astype(float)
|
|||
|
|
try:
|
|||
|
|
if stress_flag.std() > 0:
|
|||
|
|
pbr, pbp = stats.pointbiserialr(stress_flag.astype(bool), xv)
|
|||
|
|
else:
|
|||
|
|
pbr, pbp = float("nan"), float("nan")
|
|||
|
|
except Exception:
|
|||
|
|
pbr, pbp = float("nan"), float("nan")
|
|||
|
|
|
|||
|
|
rows.append({
|
|||
|
|
"name": ind_name,
|
|||
|
|
"lag": lag,
|
|||
|
|
"n_days": n,
|
|||
|
|
"pearson_r": float(pr),
|
|||
|
|
"pearson_p": float(pp),
|
|||
|
|
"spearman_r": float(sr),
|
|||
|
|
"spearman_p": float(sp),
|
|||
|
|
"stress_r": float(pbr),
|
|||
|
|
"stress_p": float(pbp),
|
|||
|
|
"note": "ok",
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Step 6: Build summary (best lag per indicator)
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def build_summary(corr_rows: list, indicators: dict, trading_days: list) -> list:
|
|||
|
|
"""Best lag per indicator, sorted by |pearson_r|."""
|
|||
|
|
from collections import defaultdict
|
|||
|
|
best = {}
|
|||
|
|
for row in corr_rows:
|
|||
|
|
name = row["name"]
|
|||
|
|
r = row["pearson_r"]
|
|||
|
|
if math.isnan(r):
|
|||
|
|
continue
|
|||
|
|
if name not in best or abs(r) > abs(best[name]["pearson_r"]):
|
|||
|
|
best[name] = row
|
|||
|
|
|
|||
|
|
# Add indicators that had no valid data
|
|||
|
|
for name in indicators:
|
|||
|
|
if name not in best:
|
|||
|
|
# Count non-NaN days
|
|||
|
|
n_ok = sum(1 for d in trading_days if not math.isnan(
|
|||
|
|
_safe_float(indicators[name].get(d))
|
|||
|
|
))
|
|||
|
|
best[name] = {
|
|||
|
|
"name": name,
|
|||
|
|
"lag": -1,
|
|||
|
|
"n_days": n_ok,
|
|||
|
|
"pearson_r": float("nan"),
|
|||
|
|
"pearson_p": float("nan"),
|
|||
|
|
"spearman_r": float("nan"),
|
|||
|
|
"spearman_p": float("nan"),
|
|||
|
|
"stress_r": float("nan"),
|
|||
|
|
"stress_p": float("nan"),
|
|||
|
|
"note": "no valid correlation",
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Assign source labels
|
|||
|
|
SOURCE_MAP = {
|
|||
|
|
**{k: "fred" for k in FRED_SERIES},
|
|||
|
|
"funding_btc": "binance", "funding_eth": "binance",
|
|||
|
|
"oi_btc": "binance", "oi_eth": "binance",
|
|||
|
|
"ls_btc": "binance", "ls_eth": "binance",
|
|||
|
|
"ls_top": "binance", "taker": "binance",
|
|||
|
|
"vol24": "binance",
|
|||
|
|
"dvol_btc": "deribit", "dvol_eth": "deribit",
|
|||
|
|
"fund_dbt_btc": "deribit","fund_dbt_eth": "deribit",
|
|||
|
|
"rcap_btc": "coinmetrics","mvrv": "coinmetrics",
|
|||
|
|
"nupl": "coinmetrics", "addr_btc": "coinmetrics",
|
|||
|
|
"addr_eth": "coinmetrics","txcnt": "coinmetrics",
|
|||
|
|
"fees_btc": "coinmetrics","fees_eth": "coinmetrics",
|
|||
|
|
"nvt": "coinmetrics", "velocity": "coinmetrics",
|
|||
|
|
"sply_act": "coinmetrics","rcap_eth": "coinmetrics",
|
|||
|
|
"tvl": "defillama", "tvl_eth": "defillama",
|
|||
|
|
"stables": "defillama", "usdt": "defillama",
|
|||
|
|
"usdc": "defillama",
|
|||
|
|
"fng": "alternative", "fng_prev": "alternative",
|
|||
|
|
"fng_week": "alternative",
|
|||
|
|
"hashrate": "blockchain", "difficulty": "blockchain",
|
|||
|
|
"mcap_bc": "blockchain", "tx_blk": "blockchain",
|
|||
|
|
"total_btc": "blockchain",
|
|||
|
|
"btc_price": "coingecko", "eth_price": "coingecko",
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
rows_sorted = sorted(
|
|||
|
|
best.values(),
|
|||
|
|
key=lambda x: abs(x["pearson_r"]) if not math.isnan(x["pearson_r"]) else -1,
|
|||
|
|
reverse=True,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
summary = []
|
|||
|
|
for rank, row in enumerate(rows_sorted, 1):
|
|||
|
|
name = row["name"]
|
|||
|
|
sig = ""
|
|||
|
|
if not math.isnan(row.get("pearson_p", float("nan"))):
|
|||
|
|
sig = "*" if row["pearson_p"] < 0.05 else ""
|
|||
|
|
sig += "*" if row["pearson_p"] < 0.01 else ""
|
|||
|
|
|
|||
|
|
summary.append({
|
|||
|
|
"rank": rank,
|
|||
|
|
"name": name,
|
|||
|
|
"source": SOURCE_MAP.get(name, "?"),
|
|||
|
|
"best_lag": row["lag"],
|
|||
|
|
"best_r": row["pearson_r"],
|
|||
|
|
"best_p": row["pearson_p"],
|
|||
|
|
"spearman": row["spearman_r"],
|
|||
|
|
"stress_r": row["stress_r"],
|
|||
|
|
"n_days": row["n_days"],
|
|||
|
|
"sig": sig,
|
|||
|
|
"note": row.get("note", ""),
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return summary
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Step 7: Save outputs
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def save_daily_csv(trading_days: list, indicators: dict, returns: dict):
|
|||
|
|
path = RUN_LOGS / f"exf_daily_{TS_NOW}.csv"
|
|||
|
|
cols = ["date", "daily_return"] + sorted(indicators.keys())
|
|||
|
|
with open(path, "w", newline="") as f:
|
|||
|
|
w = csv.DictWriter(f, fieldnames=cols)
|
|||
|
|
w.writeheader()
|
|||
|
|
for d in trading_days:
|
|||
|
|
row = {"date": d, "daily_return": returns.get(d, "")}
|
|||
|
|
for name, series in indicators.items():
|
|||
|
|
v = series.get(d, float("nan"))
|
|||
|
|
row[name] = "" if math.isnan(v) else v
|
|||
|
|
w.writerow(row)
|
|||
|
|
print(f"[SAVE] Daily values: {path}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def save_corr_csv(corr_rows: list):
|
|||
|
|
path = RUN_LOGS / f"exf_correlation_{TS_NOW}.csv"
|
|||
|
|
if not corr_rows:
|
|||
|
|
return
|
|||
|
|
cols = list(corr_rows[0].keys())
|
|||
|
|
with open(path, "w", newline="") as f:
|
|||
|
|
w = csv.DictWriter(f, fieldnames=cols)
|
|||
|
|
w.writeheader()
|
|||
|
|
w.writerows(corr_rows)
|
|||
|
|
print(f"[SAVE] Correlation table: {path}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def print_summary(summary: list):
|
|||
|
|
print()
|
|||
|
|
print("=" * 110)
|
|||
|
|
print(f"{'rank':>4} {'name':<14} {'source':<12} {'best_lag':>8} {'best_r':>8} {'best_p':>8} "
|
|||
|
|
f"{'spearman':>8} {'stress_r':>8} {'n_days':>6} {'sig':>4}")
|
|||
|
|
print("=" * 110)
|
|||
|
|
for row in summary:
|
|||
|
|
r_str = f"{row['best_r']:+.4f}" if not math.isnan(row.get("best_r", float("nan"))) else " n/a "
|
|||
|
|
p_str = f"{row['best_p']:.4f}" if not math.isnan(row.get("best_p", float("nan"))) else " n/a "
|
|||
|
|
sp_str = f"{row['spearman']:+.4f}" if not math.isnan(row.get("spearman", float("nan"))) else " n/a "
|
|||
|
|
sr_str = f"{row['stress_r']:+.4f}" if not math.isnan(row.get("stress_r", float("nan"))) else " n/a "
|
|||
|
|
lag_str = f"lag={row['best_lag']}" if row['best_lag'] >= 0 else " - "
|
|||
|
|
print(
|
|||
|
|
f"{row['rank']:>4} {row['name']:<14} {row['source']:<12} {lag_str:>8} "
|
|||
|
|
f"{r_str:>8} {p_str:>8} {sp_str:>8} {sr_str:>8} {row['n_days']:>6} {row['sig']:>4}"
|
|||
|
|
)
|
|||
|
|
print("=" * 110)
|
|||
|
|
|
|||
|
|
# Top 10 highlight
|
|||
|
|
sig_rows = [r for r in summary if not math.isnan(r.get("best_p", float("nan"))) and r["best_p"] < 0.05]
|
|||
|
|
print(f"\n[RESULT] {len(sig_rows)} indicators significant at p<0.05 (any lag)")
|
|||
|
|
if sig_rows:
|
|||
|
|
print("TOP PREDICTORS:")
|
|||
|
|
for r in sig_rows[:15]:
|
|||
|
|
print(f" {r['rank']:>3}. {r['name']:<14} lag={r['best_lag']} "
|
|||
|
|
f"r={r['best_r']:+.4f} p={r['best_p']:.4f} spearman={r['spearman']:+.4f} "
|
|||
|
|
f"n={r['n_days']}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Main
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
parser = argparse.ArgumentParser(description="ExF historical correlation tester")
|
|||
|
|
parser.add_argument("--no-cache", action="store_true", help="Ignore and overwrite all caches")
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
no_cache = args.no_cache
|
|||
|
|
|
|||
|
|
print("=" * 70)
|
|||
|
|
print(" DOLPHIN ExF Correlation Tester")
|
|||
|
|
print(f" Date range: {START_STR} → 2026-02-25")
|
|||
|
|
print(f" Cache: {'DISABLED (--no-cache)' if no_cache else 'ENABLED'}")
|
|||
|
|
print("=" * 70)
|
|||
|
|
|
|||
|
|
# --- Trading days
|
|||
|
|
trading_days = get_trading_days()
|
|||
|
|
if len(trading_days) == 0:
|
|||
|
|
print("[ERROR] No parquet files found in vbt_cache. Abort.")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
# --- Daily returns
|
|||
|
|
print("\n[STEP 1] Loading daily returns ...")
|
|||
|
|
try:
|
|||
|
|
returns = load_daily_returns(trading_days)
|
|||
|
|
except RuntimeError as e:
|
|||
|
|
print(f"[ERROR] {e}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
valid_ret = sum(1 for v in returns.values() if not math.isnan(v))
|
|||
|
|
print(f" Returns: {valid_ret}/{len(trading_days)} valid")
|
|||
|
|
|
|||
|
|
# --- Fetch all sources
|
|||
|
|
print("\n[STEP 2] Fetching external factor data ...")
|
|||
|
|
|
|||
|
|
print("\n--- FRED ---")
|
|||
|
|
try:
|
|||
|
|
fred_data = fetch_fred(no_cache)
|
|||
|
|
print(f" FRED: {len(fred_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" FRED FAILED: {e}")
|
|||
|
|
fred_data = {}
|
|||
|
|
|
|||
|
|
print("\n--- BINANCE DERIVATIVES ---")
|
|||
|
|
try:
|
|||
|
|
binance_data = fetch_binance(no_cache)
|
|||
|
|
print(f" BINANCE: {len(binance_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" BINANCE FAILED: {e}")
|
|||
|
|
binance_data = {}
|
|||
|
|
|
|||
|
|
print("\n--- DERIBIT ---")
|
|||
|
|
try:
|
|||
|
|
deribit_data = fetch_deribit(no_cache)
|
|||
|
|
print(f" DERIBIT: {len(deribit_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" DERIBIT FAILED: {e}")
|
|||
|
|
deribit_data = {}
|
|||
|
|
|
|||
|
|
print("\n--- COINMETRICS ---")
|
|||
|
|
try:
|
|||
|
|
cm_data = fetch_coinmetrics(no_cache)
|
|||
|
|
print(f" COINMETRICS: {len(cm_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" COINMETRICS FAILED: {e}")
|
|||
|
|
cm_data = {}
|
|||
|
|
|
|||
|
|
print("\n--- DEFILLAMA ---")
|
|||
|
|
try:
|
|||
|
|
dl_data = fetch_defillama(no_cache)
|
|||
|
|
print(f" DEFILLAMA: {len(dl_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" DEFILLAMA FAILED: {e}")
|
|||
|
|
dl_data = {}
|
|||
|
|
|
|||
|
|
print("\n--- FEAR & GREED (alternative.me) ---")
|
|||
|
|
try:
|
|||
|
|
fng_data = fetch_fng(no_cache)
|
|||
|
|
print(f" FNG: {len(fng_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" FNG FAILED: {e}")
|
|||
|
|
fng_data = {}
|
|||
|
|
|
|||
|
|
print("\n--- BLOCKCHAIN.INFO ---")
|
|||
|
|
try:
|
|||
|
|
bc_data = fetch_blockchain(no_cache)
|
|||
|
|
print(f" BLOCKCHAIN: {len(bc_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" BLOCKCHAIN FAILED: {e}")
|
|||
|
|
bc_data = {}
|
|||
|
|
|
|||
|
|
print("\n--- COINGECKO ---")
|
|||
|
|
try:
|
|||
|
|
cg_data = fetch_coingecko(no_cache)
|
|||
|
|
print(f" COINGECKO: {len(cg_data)} indicators OK")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" COINGECKO FAILED: {e}")
|
|||
|
|
cg_data = {}
|
|||
|
|
|
|||
|
|
# --- Assemble
|
|||
|
|
print("\n[STEP 3] Assembling indicators ...")
|
|||
|
|
indicators = assemble_indicators(
|
|||
|
|
fred_data, binance_data, deribit_data,
|
|||
|
|
cm_data, dl_data, fng_data, bc_data, cg_data,
|
|||
|
|
)
|
|||
|
|
print(f" Total indicators assembled: {len(indicators)}")
|
|||
|
|
|
|||
|
|
# Coverage report
|
|||
|
|
print("\n Coverage per indicator over trading days:")
|
|||
|
|
for name in sorted(indicators.keys()):
|
|||
|
|
series = indicators[name]
|
|||
|
|
n_ok = sum(
|
|||
|
|
1 for d in trading_days
|
|||
|
|
if not math.isnan(_safe_float(series.get(d)))
|
|||
|
|
)
|
|||
|
|
pct = 100 * n_ok / len(trading_days)
|
|||
|
|
status = "OK" if n_ok >= 40 else ("PARTIAL" if n_ok >= 10 else "FAIL")
|
|||
|
|
print(f" {name:<18} {n_ok:>3}/{len(trading_days)} ({pct:5.1f}%) [{status}]")
|
|||
|
|
|
|||
|
|
# --- Correlation analysis
|
|||
|
|
print("\n[STEP 4] Computing correlations (lags 0-7) ...")
|
|||
|
|
corr_rows = compute_correlations(trading_days, returns, indicators)
|
|||
|
|
print(f" Computed {len(corr_rows)} rows ({len(indicators)} indicators × {len(LAGS)} lags)")
|
|||
|
|
|
|||
|
|
# --- Summary
|
|||
|
|
summary = build_summary(corr_rows, indicators, trading_days)
|
|||
|
|
|
|||
|
|
# --- Save
|
|||
|
|
print("\n[STEP 5] Saving outputs ...")
|
|||
|
|
save_daily_csv(trading_days, indicators, returns)
|
|||
|
|
save_corr_csv(corr_rows)
|
|||
|
|
|
|||
|
|
# --- Print table
|
|||
|
|
print("\n[STEP 6] Results")
|
|||
|
|
print_summary(summary)
|
|||
|
|
|
|||
|
|
# Also print status for each source
|
|||
|
|
print("\n[SOURCE STATUS]")
|
|||
|
|
source_ok = {
|
|||
|
|
"fred": "OK" if fred_data else "FAIL",
|
|||
|
|
"binance": "OK" if binance_data else "FAIL",
|
|||
|
|
"deribit": "OK" if deribit_data else "FAIL",
|
|||
|
|
"coinmetrics": "OK" if cm_data else "FAIL",
|
|||
|
|
"defillama": "OK" if dl_data else "FAIL",
|
|||
|
|
"alternative": "OK" if fng_data else "FAIL",
|
|||
|
|
"blockchain": "OK" if bc_data else "FAIL",
|
|||
|
|
"coingecko": "OK" if cg_data else "FAIL",
|
|||
|
|
}
|
|||
|
|
for src, status in source_ok.items():
|
|||
|
|
print(f" {src:<14} {status}")
|
|||
|
|
|
|||
|
|
print("\n[DONE] ExF correlation analysis complete.")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|