343 lines
12 KiB
Python
343 lines
12 KiB
Python
|
|
"""
|
||
|
|
backfill_liquidations_exf.py — Backfill liquidation ExF channels for 5y klines dates.
|
||
|
|
|
||
|
|
Fetches aggregate BTC liquidation data from Coinglass historical API and appends
|
||
|
|
4 new channels (liq_vol_24h, liq_long_ratio, liq_z_score, liq_percentile) to the
|
||
|
|
existing scan_000001__Indicators.npz files under EIGENVALUES_PATH.
|
||
|
|
|
||
|
|
Usage (from external_factors/ dir):
|
||
|
|
python backfill_liquidations_exf.py
|
||
|
|
python backfill_liquidations_exf.py --dry-run
|
||
|
|
python backfill_liquidations_exf.py --start 2023-01-01 --end 2023-12-31
|
||
|
|
python backfill_liquidations_exf.py --mode standalone
|
||
|
|
|
||
|
|
Output: each NPZ gains 4 new channels. Log → ../../backfill_liquidations.log
|
||
|
|
"""
|
||
|
|
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
import argparse
|
||
|
|
import asyncio
|
||
|
|
import math
|
||
|
|
import logging
|
||
|
|
from pathlib import Path
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
|
||
|
|
import numpy as np
|
||
|
|
import aiohttp
|
||
|
|
|
||
|
|
# --- Paths (same as backfill_klines_exf.py) ---
|
||
|
|
HCM_DIR = Path(__file__).parent.parent
|
||
|
|
KLINES_DIR = HCM_DIR / "vbt_cache_klines"
|
||
|
|
EIGENVALUES_PATH = Path(
|
||
|
|
r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues"
|
||
|
|
)
|
||
|
|
NPZ_FILENAME = "scan_000001__Indicators.npz"
|
||
|
|
LIQ_NPZ_FILENAME = "scan_000001__Liq_Indicators.npz" # for --mode standalone
|
||
|
|
LOG_PATH = HCM_DIR / "backfill_liquidations.log"
|
||
|
|
|
||
|
|
LIQ_KEYS = ["liq_vol_24h", "liq_long_ratio", "liq_z_score", "liq_percentile"]
|
||
|
|
|
||
|
|
# --- Coinglass endpoint ---
|
||
|
|
# Coinglass API v4 requires CG-API-KEY header
|
||
|
|
CG_URL_V4 = "https://open-api-v4.coinglass.com/api/futures/liquidation/aggregated-history"
|
||
|
|
RATE_DELAY = 2.0 # seconds between requests
|
||
|
|
|
||
|
|
# Configure logging
|
||
|
|
logging.basicConfig(
|
||
|
|
level=logging.INFO,
|
||
|
|
format="%(asctime)s %(levelname)s %(message)s",
|
||
|
|
handlers=[
|
||
|
|
logging.FileHandler(str(LOG_PATH), encoding="utf-8"),
|
||
|
|
logging.StreamHandler(sys.stdout),
|
||
|
|
],
|
||
|
|
)
|
||
|
|
log = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
def parse_args():
|
||
|
|
p = argparse.ArgumentParser(description="Backfill liquidation ExF channels")
|
||
|
|
p.add_argument(
|
||
|
|
"--start", default=None, help="Start date YYYY-MM-DD (inclusive)"
|
||
|
|
)
|
||
|
|
p.add_argument("--end", default=None, help="End date YYYY-MM-DD (inclusive)")
|
||
|
|
p.add_argument("--dry-run", action="store_true")
|
||
|
|
p.add_argument("--delay", type=float, default=2.0)
|
||
|
|
p.add_argument("--overwrite", action="store_true")
|
||
|
|
p.add_argument("--mode", default="append", choices=["append", "standalone"])
|
||
|
|
p.add_argument("--api-key", default=None, help="Coinglass API key (or set COINGLASS_API_KEY env var)")
|
||
|
|
return p.parse_args()
|
||
|
|
|
||
|
|
|
||
|
|
def get_api_key(args) -> str:
|
||
|
|
"""Get Coinglass API key from args or environment."""
|
||
|
|
import os
|
||
|
|
|
||
|
|
key = args.api_key or os.environ.get("COINGLASS_API_KEY", "")
|
||
|
|
return key
|
||
|
|
|
||
|
|
|
||
|
|
async def fetch_coinglass_day(
|
||
|
|
session: aiohttp.ClientSession, ds: str, api_key: str
|
||
|
|
) -> tuple:
|
||
|
|
"""
|
||
|
|
Fetch liquidation bars for date string 'YYYY-MM-DD'.
|
||
|
|
Returns (liq_vol_log, liq_long_ratio, success: bool).
|
||
|
|
|
||
|
|
Uses Coinglass API v4 which requires CG-API-KEY header.
|
||
|
|
"""
|
||
|
|
if not api_key:
|
||
|
|
log.error(f" {ds}: No Coinglass API key provided")
|
||
|
|
return (0.0, 0.5, False)
|
||
|
|
|
||
|
|
# Coinglass v4 uses different time format (Unix seconds, not ms)
|
||
|
|
yr, mo, dy = int(ds[:4]), int(ds[5:7]), int(ds[8:10])
|
||
|
|
start_ts = int(datetime(yr, mo, dy, 0, 0, 0, tzinfo=timezone.utc).timestamp())
|
||
|
|
end_ts = int(datetime(yr, mo, dy, 23, 59, 59, tzinfo=timezone.utc).timestamp())
|
||
|
|
|
||
|
|
# v4 API params - uses 'startTime' and 'endTime' in seconds
|
||
|
|
params = {
|
||
|
|
"symbol": "BTC",
|
||
|
|
"interval": "1h",
|
||
|
|
"startTime": start_ts,
|
||
|
|
"endTime": end_ts,
|
||
|
|
}
|
||
|
|
|
||
|
|
headers = {
|
||
|
|
"CG-API-KEY": api_key,
|
||
|
|
"Accept": "application/json",
|
||
|
|
}
|
||
|
|
|
||
|
|
for attempt in range(3):
|
||
|
|
try:
|
||
|
|
async with session.get(
|
||
|
|
CG_URL_V4,
|
||
|
|
params=params,
|
||
|
|
headers=headers,
|
||
|
|
timeout=aiohttp.ClientTimeout(total=15),
|
||
|
|
) as resp:
|
||
|
|
if resp.status == 429:
|
||
|
|
log.warning(f" {ds}: rate limited (429) — sleeping 30s")
|
||
|
|
await asyncio.sleep(30)
|
||
|
|
continue
|
||
|
|
if resp.status == 403:
|
||
|
|
log.error(f" {ds}: HTTP 403 - Invalid or missing API key")
|
||
|
|
return (0.0, 0.5, False)
|
||
|
|
if resp.status != 200:
|
||
|
|
log.warning(f" {ds}: HTTP {resp.status}")
|
||
|
|
return (0.0, 0.5, False)
|
||
|
|
data = await resp.json(content_type=None)
|
||
|
|
|
||
|
|
# Parse v4 response
|
||
|
|
# Response: {"code":"0","msg":"success","data": [{"t":1234567890, "longLiquidationUsd":123.0, "shortLiquidationUsd":456.0}, ...]}
|
||
|
|
if data.get("code") != "0":
|
||
|
|
log.warning(f" {ds}: API error: {data.get('msg', 'unknown')}")
|
||
|
|
return (0.0, 0.5, False)
|
||
|
|
|
||
|
|
bars = data.get("data", [])
|
||
|
|
if not bars:
|
||
|
|
log.warning(f" {ds}: empty liquidation data")
|
||
|
|
return (0.0, 0.5, False)
|
||
|
|
|
||
|
|
long_total = sum(float(b.get("longLiquidationUsd", 0)) for b in bars)
|
||
|
|
short_total = sum(float(b.get("shortLiquidationUsd", 0)) for b in bars)
|
||
|
|
total = long_total + short_total
|
||
|
|
|
||
|
|
liq_vol_log = math.log10(total + 1.0)
|
||
|
|
liq_long_ratio = (long_total / total) if total > 0 else 0.5
|
||
|
|
|
||
|
|
return (liq_vol_log, liq_long_ratio, True)
|
||
|
|
|
||
|
|
except asyncio.TimeoutError:
|
||
|
|
log.warning(f" {ds}: timeout (attempt {attempt+1}/3)")
|
||
|
|
await asyncio.sleep(10)
|
||
|
|
except Exception as e:
|
||
|
|
log.warning(f" {ds}: error {e} (attempt {attempt+1}/3)")
|
||
|
|
await asyncio.sleep(10)
|
||
|
|
|
||
|
|
return (0.0, 0.5, False)
|
||
|
|
|
||
|
|
|
||
|
|
def compute_derived_metrics(dates, raw_vols, raw_success):
|
||
|
|
"""Compute z_score and percentile across full series."""
|
||
|
|
dates_sorted = sorted(dates)
|
||
|
|
vols = np.array([raw_vols.get(d, 0.0) for d in dates_sorted])
|
||
|
|
success = np.array([raw_success.get(d, False) for d in dates_sorted])
|
||
|
|
|
||
|
|
z_scores = {}
|
||
|
|
percentiles = {}
|
||
|
|
WINDOW = 30
|
||
|
|
|
||
|
|
for i, ds in enumerate(dates_sorted):
|
||
|
|
if not success[i]:
|
||
|
|
z_scores[ds] = (0.0, False)
|
||
|
|
percentiles[ds] = (0.5, False)
|
||
|
|
continue
|
||
|
|
|
||
|
|
# z_score vs 30d rolling window
|
||
|
|
start = max(0, i - WINDOW)
|
||
|
|
w_vals = vols[start:i][success[start:i]]
|
||
|
|
if len(w_vals) >= 5:
|
||
|
|
z = float((vols[i] - w_vals.mean()) / (w_vals.std() + 1e-8))
|
||
|
|
z_scores[ds] = (z, True)
|
||
|
|
else:
|
||
|
|
z_scores[ds] = (0.0, False)
|
||
|
|
|
||
|
|
# percentile vs full history to date
|
||
|
|
hist = vols[: i + 1][success[: i + 1]]
|
||
|
|
if len(hist) >= 10:
|
||
|
|
pct = float((hist < vols[i]).sum()) / len(hist)
|
||
|
|
percentiles[ds] = (pct, True)
|
||
|
|
else:
|
||
|
|
percentiles[ds] = (0.5, False)
|
||
|
|
|
||
|
|
return z_scores, percentiles
|
||
|
|
|
||
|
|
|
||
|
|
def append_liq_to_npz(npz_path, liq_values, overwrite, dry_run):
|
||
|
|
"""Append 4 liq channels to existing NPZ. liq_values = {key: (float, bool)}."""
|
||
|
|
if not npz_path.exists():
|
||
|
|
# Create minimal NPZ (rare case)
|
||
|
|
names = np.array(LIQ_KEYS, dtype=object)
|
||
|
|
inds = np.array([liq_values[k][0] for k in LIQ_KEYS], dtype=np.float64)
|
||
|
|
succ = np.array([liq_values[k][1] for k in LIQ_KEYS], dtype=np.bool_)
|
||
|
|
else:
|
||
|
|
data = np.load(str(npz_path), allow_pickle=True)
|
||
|
|
existing_names = [str(n) for n in data["api_names"]]
|
||
|
|
|
||
|
|
if "liq_vol_24h" in existing_names and not overwrite:
|
||
|
|
return False # idempotent skip
|
||
|
|
|
||
|
|
# Strip old liq channels if overwriting
|
||
|
|
if overwrite and "liq_vol_24h" in existing_names:
|
||
|
|
keep = [
|
||
|
|
i
|
||
|
|
for i, n in enumerate(existing_names)
|
||
|
|
if not n.startswith("liq_")
|
||
|
|
]
|
||
|
|
existing_names = [existing_names[i] for i in keep]
|
||
|
|
ex_inds = data["api_indicators"][keep]
|
||
|
|
ex_succ = data["api_success"][keep]
|
||
|
|
else:
|
||
|
|
ex_inds = data["api_indicators"]
|
||
|
|
ex_succ = data["api_success"]
|
||
|
|
|
||
|
|
names = np.array(existing_names + LIQ_KEYS, dtype=object)
|
||
|
|
inds = np.concatenate(
|
||
|
|
[
|
||
|
|
ex_inds.astype(np.float64),
|
||
|
|
np.array([liq_values[k][0] for k in LIQ_KEYS], dtype=np.float64),
|
||
|
|
]
|
||
|
|
)
|
||
|
|
succ = np.concatenate(
|
||
|
|
[
|
||
|
|
ex_succ.astype(np.bool_),
|
||
|
|
np.array([liq_values[k][1] for k in LIQ_KEYS], dtype=np.bool_),
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
if not dry_run:
|
||
|
|
np.savez_compressed(
|
||
|
|
str(npz_path), api_names=names, api_indicators=inds, api_success=succ
|
||
|
|
)
|
||
|
|
return True
|
||
|
|
|
||
|
|
|
||
|
|
async def main_async(args):
|
||
|
|
# Enumerate klines dates
|
||
|
|
parquet_files = sorted(KLINES_DIR.glob("*.parquet"))
|
||
|
|
parquet_files = [p for p in parquet_files if "catalog" not in str(p)]
|
||
|
|
dates = [p.stem for p in parquet_files]
|
||
|
|
|
||
|
|
if args.start:
|
||
|
|
dates = [d for d in dates if d >= args.start]
|
||
|
|
if args.end:
|
||
|
|
dates = [d for d in dates if d <= args.end]
|
||
|
|
total = len(dates)
|
||
|
|
|
||
|
|
log.info(f"Dates to process: {total}")
|
||
|
|
log.info(f"Mode: {args.mode} Dry-run: {args.dry_run} Overwrite: {args.overwrite}")
|
||
|
|
|
||
|
|
raw_vols = {}
|
||
|
|
raw_ratios = {}
|
||
|
|
raw_success = {}
|
||
|
|
|
||
|
|
# Get API key
|
||
|
|
api_key = get_api_key(args)
|
||
|
|
if not api_key:
|
||
|
|
log.warning("No Coinglass API key provided! Use --api-key or set COINGLASS_API_KEY env var.")
|
||
|
|
log.warning("Get a free API key at: https://www.coinglass.com/pricing")
|
||
|
|
|
||
|
|
# Phase 1: Fetch raw data from Coinglass
|
||
|
|
log.info("=== PHASE 1: Fetching Coinglass liquidation data ===")
|
||
|
|
t0 = time.time()
|
||
|
|
async with aiohttp.ClientSession() as session:
|
||
|
|
for i, ds in enumerate(sorted(dates)):
|
||
|
|
vol, ratio, ok = await fetch_coinglass_day(session, ds, api_key)
|
||
|
|
raw_vols[ds] = vol
|
||
|
|
raw_ratios[ds] = ratio
|
||
|
|
raw_success[ds] = ok
|
||
|
|
|
||
|
|
if (i + 1) % 10 == 0:
|
||
|
|
elapsed = time.time() - t0
|
||
|
|
eta = (total - i - 1) * args.delay
|
||
|
|
log.info(
|
||
|
|
f" [{i+1}/{total}] {ds} vol={vol:.3f} ratio={ratio:.3f} ok={ok}"
|
||
|
|
f" elapsed={elapsed/60:.1f}m eta={eta/60:.1f}m"
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
log.info(f" {ds} vol={vol:.3f} ratio={ratio:.3f} ok={ok}")
|
||
|
|
|
||
|
|
await asyncio.sleep(args.delay)
|
||
|
|
|
||
|
|
# Phase 2: Compute derived metrics
|
||
|
|
log.info("=== PHASE 2: Computing z_score and percentile ===")
|
||
|
|
z_scores, percentiles = compute_derived_metrics(dates, raw_vols, raw_success)
|
||
|
|
|
||
|
|
# Phase 3: Append to NPZ files
|
||
|
|
log.info(f"=== PHASE 3: Appending to NPZ files (mode={args.mode}) ===")
|
||
|
|
written = skipped = errors = 0
|
||
|
|
for ds in sorted(dates):
|
||
|
|
liq_values = {
|
||
|
|
"liq_vol_24h": (raw_vols.get(ds, 0.0), raw_success.get(ds, False)),
|
||
|
|
"liq_long_ratio": (raw_ratios.get(ds, 0.5), raw_success.get(ds, False)),
|
||
|
|
"liq_z_score": z_scores.get(ds, (0.0, False)),
|
||
|
|
"liq_percentile": percentiles.get(ds, (0.5, False)),
|
||
|
|
}
|
||
|
|
|
||
|
|
out_dir = EIGENVALUES_PATH / ds
|
||
|
|
if args.mode == "append":
|
||
|
|
npz_path = out_dir / NPZ_FILENAME
|
||
|
|
else: # standalone
|
||
|
|
npz_path = out_dir / LIQ_NPZ_FILENAME
|
||
|
|
|
||
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
try:
|
||
|
|
did_write = append_liq_to_npz(npz_path, liq_values, args.overwrite, args.dry_run)
|
||
|
|
if did_write:
|
||
|
|
written += 1
|
||
|
|
log.debug(f" {ds}: written")
|
||
|
|
else:
|
||
|
|
skipped += 1
|
||
|
|
except Exception as e:
|
||
|
|
log.error(f" {ds}: NPZ write error — {e}")
|
||
|
|
errors += 1
|
||
|
|
|
||
|
|
elapsed_total = time.time() - t0
|
||
|
|
log.info(f"{'='*60}")
|
||
|
|
log.info(f"Liquidation ExF Backfill COMPLETE")
|
||
|
|
log.info(f"Written: {written}")
|
||
|
|
log.info(f"Skipped: {skipped} (already had liq channels)")
|
||
|
|
log.info(f"Errors: {errors}")
|
||
|
|
log.info(f"Runtime: {elapsed_total/60:.1f}m")
|
||
|
|
log.info(f"{'='*60}")
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
args = parse_args()
|
||
|
|
asyncio.run(main_async(args))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|