initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
342
external_factors/backfill_liquidations_exf.py
Executable file
342
external_factors/backfill_liquidations_exf.py
Executable file
@@ -0,0 +1,342 @@
|
||||
"""
|
||||
backfill_liquidations_exf.py — Backfill liquidation ExF channels for 5y klines dates.
|
||||
|
||||
Fetches aggregate BTC liquidation data from Coinglass historical API and appends
|
||||
4 new channels (liq_vol_24h, liq_long_ratio, liq_z_score, liq_percentile) to the
|
||||
existing scan_000001__Indicators.npz files under EIGENVALUES_PATH.
|
||||
|
||||
Usage (from external_factors/ dir):
|
||||
python backfill_liquidations_exf.py
|
||||
python backfill_liquidations_exf.py --dry-run
|
||||
python backfill_liquidations_exf.py --start 2023-01-01 --end 2023-12-31
|
||||
python backfill_liquidations_exf.py --mode standalone
|
||||
|
||||
Output: each NPZ gains 4 new channels. Log → ../../backfill_liquidations.log
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
import asyncio
|
||||
import math
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
import aiohttp
|
||||
|
||||
# --- Paths (same as backfill_klines_exf.py) ---
|
||||
HCM_DIR = Path(__file__).parent.parent
|
||||
KLINES_DIR = HCM_DIR / "vbt_cache_klines"
|
||||
EIGENVALUES_PATH = Path(
|
||||
r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues"
|
||||
)
|
||||
NPZ_FILENAME = "scan_000001__Indicators.npz"
|
||||
LIQ_NPZ_FILENAME = "scan_000001__Liq_Indicators.npz" # for --mode standalone
|
||||
LOG_PATH = HCM_DIR / "backfill_liquidations.log"
|
||||
|
||||
LIQ_KEYS = ["liq_vol_24h", "liq_long_ratio", "liq_z_score", "liq_percentile"]
|
||||
|
||||
# --- Coinglass endpoint ---
|
||||
# Coinglass API v4 requires CG-API-KEY header
|
||||
CG_URL_V4 = "https://open-api-v4.coinglass.com/api/futures/liquidation/aggregated-history"
|
||||
RATE_DELAY = 2.0 # seconds between requests
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(str(LOG_PATH), encoding="utf-8"),
|
||||
logging.StreamHandler(sys.stdout),
|
||||
],
|
||||
)
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_args():
|
||||
p = argparse.ArgumentParser(description="Backfill liquidation ExF channels")
|
||||
p.add_argument(
|
||||
"--start", default=None, help="Start date YYYY-MM-DD (inclusive)"
|
||||
)
|
||||
p.add_argument("--end", default=None, help="End date YYYY-MM-DD (inclusive)")
|
||||
p.add_argument("--dry-run", action="store_true")
|
||||
p.add_argument("--delay", type=float, default=2.0)
|
||||
p.add_argument("--overwrite", action="store_true")
|
||||
p.add_argument("--mode", default="append", choices=["append", "standalone"])
|
||||
p.add_argument("--api-key", default=None, help="Coinglass API key (or set COINGLASS_API_KEY env var)")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
def get_api_key(args) -> str:
|
||||
"""Get Coinglass API key from args or environment."""
|
||||
import os
|
||||
|
||||
key = args.api_key or os.environ.get("COINGLASS_API_KEY", "")
|
||||
return key
|
||||
|
||||
|
||||
async def fetch_coinglass_day(
|
||||
session: aiohttp.ClientSession, ds: str, api_key: str
|
||||
) -> tuple:
|
||||
"""
|
||||
Fetch liquidation bars for date string 'YYYY-MM-DD'.
|
||||
Returns (liq_vol_log, liq_long_ratio, success: bool).
|
||||
|
||||
Uses Coinglass API v4 which requires CG-API-KEY header.
|
||||
"""
|
||||
if not api_key:
|
||||
log.error(f" {ds}: No Coinglass API key provided")
|
||||
return (0.0, 0.5, False)
|
||||
|
||||
# Coinglass v4 uses different time format (Unix seconds, not ms)
|
||||
yr, mo, dy = int(ds[:4]), int(ds[5:7]), int(ds[8:10])
|
||||
start_ts = int(datetime(yr, mo, dy, 0, 0, 0, tzinfo=timezone.utc).timestamp())
|
||||
end_ts = int(datetime(yr, mo, dy, 23, 59, 59, tzinfo=timezone.utc).timestamp())
|
||||
|
||||
# v4 API params - uses 'startTime' and 'endTime' in seconds
|
||||
params = {
|
||||
"symbol": "BTC",
|
||||
"interval": "1h",
|
||||
"startTime": start_ts,
|
||||
"endTime": end_ts,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"CG-API-KEY": api_key,
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
for attempt in range(3):
|
||||
try:
|
||||
async with session.get(
|
||||
CG_URL_V4,
|
||||
params=params,
|
||||
headers=headers,
|
||||
timeout=aiohttp.ClientTimeout(total=15),
|
||||
) as resp:
|
||||
if resp.status == 429:
|
||||
log.warning(f" {ds}: rate limited (429) — sleeping 30s")
|
||||
await asyncio.sleep(30)
|
||||
continue
|
||||
if resp.status == 403:
|
||||
log.error(f" {ds}: HTTP 403 - Invalid or missing API key")
|
||||
return (0.0, 0.5, False)
|
||||
if resp.status != 200:
|
||||
log.warning(f" {ds}: HTTP {resp.status}")
|
||||
return (0.0, 0.5, False)
|
||||
data = await resp.json(content_type=None)
|
||||
|
||||
# Parse v4 response
|
||||
# Response: {"code":"0","msg":"success","data": [{"t":1234567890, "longLiquidationUsd":123.0, "shortLiquidationUsd":456.0}, ...]}
|
||||
if data.get("code") != "0":
|
||||
log.warning(f" {ds}: API error: {data.get('msg', 'unknown')}")
|
||||
return (0.0, 0.5, False)
|
||||
|
||||
bars = data.get("data", [])
|
||||
if not bars:
|
||||
log.warning(f" {ds}: empty liquidation data")
|
||||
return (0.0, 0.5, False)
|
||||
|
||||
long_total = sum(float(b.get("longLiquidationUsd", 0)) for b in bars)
|
||||
short_total = sum(float(b.get("shortLiquidationUsd", 0)) for b in bars)
|
||||
total = long_total + short_total
|
||||
|
||||
liq_vol_log = math.log10(total + 1.0)
|
||||
liq_long_ratio = (long_total / total) if total > 0 else 0.5
|
||||
|
||||
return (liq_vol_log, liq_long_ratio, True)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
log.warning(f" {ds}: timeout (attempt {attempt+1}/3)")
|
||||
await asyncio.sleep(10)
|
||||
except Exception as e:
|
||||
log.warning(f" {ds}: error {e} (attempt {attempt+1}/3)")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
return (0.0, 0.5, False)
|
||||
|
||||
|
||||
def compute_derived_metrics(dates, raw_vols, raw_success):
|
||||
"""Compute z_score and percentile across full series."""
|
||||
dates_sorted = sorted(dates)
|
||||
vols = np.array([raw_vols.get(d, 0.0) for d in dates_sorted])
|
||||
success = np.array([raw_success.get(d, False) for d in dates_sorted])
|
||||
|
||||
z_scores = {}
|
||||
percentiles = {}
|
||||
WINDOW = 30
|
||||
|
||||
for i, ds in enumerate(dates_sorted):
|
||||
if not success[i]:
|
||||
z_scores[ds] = (0.0, False)
|
||||
percentiles[ds] = (0.5, False)
|
||||
continue
|
||||
|
||||
# z_score vs 30d rolling window
|
||||
start = max(0, i - WINDOW)
|
||||
w_vals = vols[start:i][success[start:i]]
|
||||
if len(w_vals) >= 5:
|
||||
z = float((vols[i] - w_vals.mean()) / (w_vals.std() + 1e-8))
|
||||
z_scores[ds] = (z, True)
|
||||
else:
|
||||
z_scores[ds] = (0.0, False)
|
||||
|
||||
# percentile vs full history to date
|
||||
hist = vols[: i + 1][success[: i + 1]]
|
||||
if len(hist) >= 10:
|
||||
pct = float((hist < vols[i]).sum()) / len(hist)
|
||||
percentiles[ds] = (pct, True)
|
||||
else:
|
||||
percentiles[ds] = (0.5, False)
|
||||
|
||||
return z_scores, percentiles
|
||||
|
||||
|
||||
def append_liq_to_npz(npz_path, liq_values, overwrite, dry_run):
|
||||
"""Append 4 liq channels to existing NPZ. liq_values = {key: (float, bool)}."""
|
||||
if not npz_path.exists():
|
||||
# Create minimal NPZ (rare case)
|
||||
names = np.array(LIQ_KEYS, dtype=object)
|
||||
inds = np.array([liq_values[k][0] for k in LIQ_KEYS], dtype=np.float64)
|
||||
succ = np.array([liq_values[k][1] for k in LIQ_KEYS], dtype=np.bool_)
|
||||
else:
|
||||
data = np.load(str(npz_path), allow_pickle=True)
|
||||
existing_names = [str(n) for n in data["api_names"]]
|
||||
|
||||
if "liq_vol_24h" in existing_names and not overwrite:
|
||||
return False # idempotent skip
|
||||
|
||||
# Strip old liq channels if overwriting
|
||||
if overwrite and "liq_vol_24h" in existing_names:
|
||||
keep = [
|
||||
i
|
||||
for i, n in enumerate(existing_names)
|
||||
if not n.startswith("liq_")
|
||||
]
|
||||
existing_names = [existing_names[i] for i in keep]
|
||||
ex_inds = data["api_indicators"][keep]
|
||||
ex_succ = data["api_success"][keep]
|
||||
else:
|
||||
ex_inds = data["api_indicators"]
|
||||
ex_succ = data["api_success"]
|
||||
|
||||
names = np.array(existing_names + LIQ_KEYS, dtype=object)
|
||||
inds = np.concatenate(
|
||||
[
|
||||
ex_inds.astype(np.float64),
|
||||
np.array([liq_values[k][0] for k in LIQ_KEYS], dtype=np.float64),
|
||||
]
|
||||
)
|
||||
succ = np.concatenate(
|
||||
[
|
||||
ex_succ.astype(np.bool_),
|
||||
np.array([liq_values[k][1] for k in LIQ_KEYS], dtype=np.bool_),
|
||||
]
|
||||
)
|
||||
|
||||
if not dry_run:
|
||||
np.savez_compressed(
|
||||
str(npz_path), api_names=names, api_indicators=inds, api_success=succ
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
async def main_async(args):
|
||||
# Enumerate klines dates
|
||||
parquet_files = sorted(KLINES_DIR.glob("*.parquet"))
|
||||
parquet_files = [p for p in parquet_files if "catalog" not in str(p)]
|
||||
dates = [p.stem for p in parquet_files]
|
||||
|
||||
if args.start:
|
||||
dates = [d for d in dates if d >= args.start]
|
||||
if args.end:
|
||||
dates = [d for d in dates if d <= args.end]
|
||||
total = len(dates)
|
||||
|
||||
log.info(f"Dates to process: {total}")
|
||||
log.info(f"Mode: {args.mode} Dry-run: {args.dry_run} Overwrite: {args.overwrite}")
|
||||
|
||||
raw_vols = {}
|
||||
raw_ratios = {}
|
||||
raw_success = {}
|
||||
|
||||
# Get API key
|
||||
api_key = get_api_key(args)
|
||||
if not api_key:
|
||||
log.warning("No Coinglass API key provided! Use --api-key or set COINGLASS_API_KEY env var.")
|
||||
log.warning("Get a free API key at: https://www.coinglass.com/pricing")
|
||||
|
||||
# Phase 1: Fetch raw data from Coinglass
|
||||
log.info("=== PHASE 1: Fetching Coinglass liquidation data ===")
|
||||
t0 = time.time()
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for i, ds in enumerate(sorted(dates)):
|
||||
vol, ratio, ok = await fetch_coinglass_day(session, ds, api_key)
|
||||
raw_vols[ds] = vol
|
||||
raw_ratios[ds] = ratio
|
||||
raw_success[ds] = ok
|
||||
|
||||
if (i + 1) % 10 == 0:
|
||||
elapsed = time.time() - t0
|
||||
eta = (total - i - 1) * args.delay
|
||||
log.info(
|
||||
f" [{i+1}/{total}] {ds} vol={vol:.3f} ratio={ratio:.3f} ok={ok}"
|
||||
f" elapsed={elapsed/60:.1f}m eta={eta/60:.1f}m"
|
||||
)
|
||||
else:
|
||||
log.info(f" {ds} vol={vol:.3f} ratio={ratio:.3f} ok={ok}")
|
||||
|
||||
await asyncio.sleep(args.delay)
|
||||
|
||||
# Phase 2: Compute derived metrics
|
||||
log.info("=== PHASE 2: Computing z_score and percentile ===")
|
||||
z_scores, percentiles = compute_derived_metrics(dates, raw_vols, raw_success)
|
||||
|
||||
# Phase 3: Append to NPZ files
|
||||
log.info(f"=== PHASE 3: Appending to NPZ files (mode={args.mode}) ===")
|
||||
written = skipped = errors = 0
|
||||
for ds in sorted(dates):
|
||||
liq_values = {
|
||||
"liq_vol_24h": (raw_vols.get(ds, 0.0), raw_success.get(ds, False)),
|
||||
"liq_long_ratio": (raw_ratios.get(ds, 0.5), raw_success.get(ds, False)),
|
||||
"liq_z_score": z_scores.get(ds, (0.0, False)),
|
||||
"liq_percentile": percentiles.get(ds, (0.5, False)),
|
||||
}
|
||||
|
||||
out_dir = EIGENVALUES_PATH / ds
|
||||
if args.mode == "append":
|
||||
npz_path = out_dir / NPZ_FILENAME
|
||||
else: # standalone
|
||||
npz_path = out_dir / LIQ_NPZ_FILENAME
|
||||
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
did_write = append_liq_to_npz(npz_path, liq_values, args.overwrite, args.dry_run)
|
||||
if did_write:
|
||||
written += 1
|
||||
log.debug(f" {ds}: written")
|
||||
else:
|
||||
skipped += 1
|
||||
except Exception as e:
|
||||
log.error(f" {ds}: NPZ write error — {e}")
|
||||
errors += 1
|
||||
|
||||
elapsed_total = time.time() - t0
|
||||
log.info(f"{'='*60}")
|
||||
log.info(f"Liquidation ExF Backfill COMPLETE")
|
||||
log.info(f"Written: {written}")
|
||||
log.info(f"Skipped: {skipped} (already had liq channels)")
|
||||
log.info(f"Errors: {errors}")
|
||||
log.info(f"Runtime: {elapsed_total/60:.1f}m")
|
||||
log.info(f"{'='*60}")
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
asyncio.run(main_async(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user