""" test_deribit_api_parity.py ========================== Validates that the current Deribit API call format + parser returns values that match pre-existing ExtF data stored in the NG3 eigenvalue NPZ cache. BACKGROUND ---------- The Deribit API changed (date unknown) and an agent added start_timestamp / end_timestamp parameters to make requests work again. The user asked for explicit parity validation BEFORE locking in those params. WHAT THIS TEST DOES ------------------- For each "known" date that has ground-truth data in the NPZ cache: 1. Load the stored value (ground truth, pre-API-change) 2. Re-query Deribit with several candidate endpoint+param combinations 3. Compare each result to ground truth (absolute + relative tolerance) 4. PASS / FAIL per candidate, per indicator The candidate that produces the best parity across all known dates is the one that should be locked in as the production Deribit URL scheme. INDICATORS COVERED (ACBv6 minimum requirement) ---------------------------------------------- fund_dbt_btc — BTC-PERPETUAL 8h funding rate ← ACBv6 primary signal fund_dbt_eth — ETH-PERPETUAL 8h funding rate dvol_btc — BTC Deribit volatility index (hourly close) dvol_eth — ETH Deribit volatility index (hourly close) USAGE ----- python external_factors/test_deribit_api_parity.py # Quick run — funding only (fastest, most critical for ACBv6): python external_factors/test_deribit_api_parity.py --indicators fund # Verbose — show raw responses: python external_factors/test_deribit_api_parity.py --verbose INTERPRETING RESULTS -------------------- LOCKED IN: All parity checks PASS → endpoint confirmed MISMATCH: Values differ > tolerance → endpoint is wrong / format changed SKIP: No NPZ data for that date (not a failure) TOLERANCES ---------- fund_dbt_btc / fund_dbt_eth : abs ≤ 1e-7 (funding rates are tiny) dvol_btc / dvol_eth : abs ≤ 0.5 (DVOL in vol-points) """ import asyncio import aiohttp import argparse import json import sys import time import traceback from datetime import datetime, timezone sys.stdout.reconfigure(encoding='utf-8', errors='replace') from pathlib import Path from typing import Optional import numpy as np # --------------------------------------------------------------------------- # Paths # --------------------------------------------------------------------------- _HERE = Path(__file__).resolve().parent _EIGENVALUES_PATH = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues") # Known dates with confirmed NPZ data in the gold window (2025-12-31→2026-02-26). # Add more as the cache grows. Values were stored by the NG5 scanner. KNOWN_DATES = [ "2026-01-02", "2026-01-03", "2026-01-04", "2026-01-05", "2026-01-06", "2026-01-07", "2026-01-08", "2026-01-21", ] # --------------------------------------------------------------------------- # Tolerances (per indicator) # --------------------------------------------------------------------------- TOLERANCES = { "fund_dbt_btc": 1e-7, "fund_dbt_eth": 1e-7, "dvol_btc": 0.5, "dvol_eth": 0.5, } # --------------------------------------------------------------------------- # Ground-truth loader # --------------------------------------------------------------------------- def load_npz_ground_truth(date_str: str) -> Optional[dict]: """ Load Deribit indicator values stored in an NG3 scan NPZ for *date_str*. Returns dict {indicator_name: float} or None if no data. """ date_path = _EIGENVALUES_PATH / date_str if not date_path.exists(): return None files = sorted(date_path.glob("scan_*__Indicators.npz")) if not files: return None d = np.load(files[0], allow_pickle=True) if "api_names" not in d or "api_indicators" not in d: return None names = list(d["api_names"]) vals = d["api_indicators"] succ = d["api_success"] if "api_success" in d else np.ones(len(names), dtype=bool) result = {} for i, n in enumerate(names): if succ[i]: target_names = {"fund_dbt_btc", "fund_dbt_eth", "dvol_btc", "dvol_eth"} if n in target_names: result[n] = float(vals[i]) return result if result else None # --------------------------------------------------------------------------- # Endpoint candidates # --------------------------------------------------------------------------- def _day_epoch_ms(date_str: str, hour: int = 0) -> int: """Return Unix milliseconds for a given date + hour (UTC).""" dt = datetime(int(date_str[:4]), int(date_str[5:7]), int(date_str[8:10]), hour, 0, 0, tzinfo=timezone.utc) return int(dt.timestamp() * 1000) def ts23utc(date_str: str) -> int: """Return Unix ms for 23:00 UTC on date_str — canonical NG5 scanner capture time.""" return _day_epoch_ms(date_str, hour=23) def build_candidate_urls(date_str: str) -> dict: """ Build all candidate URL variants for a historical date. Returns dict: { candidate_label: {indicator: url, ...} } """ day_start = _day_epoch_ms(date_str, hour=0) next_start = day_start + 86400_000 # +24h ts23 = ts23utc(date_str) # 23:00 UTC — canonical NG5 capture time # Funding: NG5 scanner confirmed to run at 23:00 UTC. # We use get_funding_rate_history (full day) then extract the 23:00 UTC entry. # Candidate variants test different windows and parsers. fund_urls = { # CANDIDATE A (EXPECTED CORRECT): get_funding_rate_history full day → 23:00 UTC entry "A_history_23utc": { "fund_dbt_btc": ( f"https://www.deribit.com/api/v2/public/get_funding_rate_history" f"?instrument_name=BTC-PERPETUAL" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_history_at_23utc", ts23, ), "fund_dbt_eth": ( f"https://www.deribit.com/api/v2/public/get_funding_rate_history" f"?instrument_name=ETH-PERPETUAL" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_history_at_23utc", ts23, ), }, # CANDIDATE B (AGENT FIX — expected wrong): get_funding_rate_value over full day "B_value_fullday_agentfix": { "fund_dbt_btc": ( f"https://www.deribit.com/api/v2/public/get_funding_rate_value" f"?instrument_name=BTC-PERPETUAL" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_scalar_result", 0, ), "fund_dbt_eth": ( f"https://www.deribit.com/api/v2/public/get_funding_rate_value" f"?instrument_name=ETH-PERPETUAL" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_scalar_result", 0, ), }, # CANDIDATE C: get_funding_rate_history narrow window (±2h around 23:00) → last entry "C_history_narrow23": { "fund_dbt_btc": ( f"https://www.deribit.com/api/v2/public/get_funding_rate_history" f"?instrument_name=BTC-PERPETUAL" f"&start_timestamp={ts23 - 7200_000}&end_timestamp={ts23 + 3600_000}", "parse_history_at_23utc", ts23, ), "fund_dbt_eth": ( f"https://www.deribit.com/api/v2/public/get_funding_rate_history" f"?instrument_name=ETH-PERPETUAL" f"&start_timestamp={ts23 - 7200_000}&end_timestamp={ts23 + 3600_000}", "parse_history_at_23utc", ts23, ), }, } # DVOL: hourly resolution; scanner at 23:00 UTC → take candle closest to 23:00 dvol_urls = { # CANDIDATE D: get_volatility_index_data, 1h resolution, full day "D_dvol_1h_fullday": { "dvol_btc": ( f"https://www.deribit.com/api/v2/public/get_volatility_index_data" f"?currency=BTC&resolution=3600" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_dvol_at_23utc", ts23, ), "dvol_eth": ( f"https://www.deribit.com/api/v2/public/get_volatility_index_data" f"?currency=ETH&resolution=3600" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_dvol_at_23utc", ts23, ), }, # CANDIDATE E: agent's variant — 60-min resolution + count=10 "E_dvol_60min_count10": { "dvol_btc": ( f"https://www.deribit.com/api/v2/public/get_volatility_index_data" f"?currency=BTC&resolution=60&count=10" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_dvol_last", 0, ), "dvol_eth": ( f"https://www.deribit.com/api/v2/public/get_volatility_index_data" f"?currency=ETH&resolution=60&count=10" f"&start_timestamp={day_start}&end_timestamp={next_start}", "parse_dvol_last", 0, ), }, } # Merge all_candidates = {} all_candidates.update(fund_urls) all_candidates.update(dvol_urls) return all_candidates # --------------------------------------------------------------------------- # Parsers # --------------------------------------------------------------------------- def parse_history_at_23utc(d: dict, target_ts_ms: int = 0) -> Optional[float]: """ Parse get_funding_rate_history response. Returns interest_8h from the entry CLOSEST to 23:00 UTC on the target date. The NG5 scanner runs at 23:00 UTC daily — this is the canonical capture time. Falls back to last entry if 23:00 UTC entry not found (e.g. live/realtime call). """ if not isinstance(d, dict) or "result" not in d: return None r = d["result"] if not isinstance(r, list) or not r: return None try: r_sorted = sorted(r, key=lambda x: x.get("timestamp", 0)) if target_ts_ms > 0: # Find entry closest to 23:00 UTC for the target date best = min(r_sorted, key=lambda x: abs(x.get("timestamp", 0) - target_ts_ms)) else: # Live call: take last entry (most recent) best = r_sorted[-1] return float(best.get("interest_8h", 0)) except (TypeError, KeyError, ValueError): return None def parse_scalar_result(d: dict) -> Optional[float]: """Parse get_funding_rate_value response — result is a scalar.""" if not isinstance(d, dict) or "result" not in d: return None r = d["result"] if isinstance(r, list) and r: # Fallback: if API returned list anyway, take last interest_8h try: return float(sorted(r, key=lambda x: x.get("timestamp", 0))[-1].get("interest_8h", 0)) except (TypeError, KeyError, ValueError): return None try: return float(r) except (TypeError, ValueError): return None def parse_dvol_last(d: dict, target_ts_ms: int = 0) -> Optional[float]: """Parse get_volatility_index_data — returns close from entry closest to target_ts_ms (or last).""" if not isinstance(d, dict) or "result" not in d: return None r = d["result"] if not isinstance(r, dict) or "data" not in r: return None data = r["data"] if not data: return None # data row format: [timestamp_ms, open, high, low, close] try: rows = sorted(data, key=lambda x: x[0]) if target_ts_ms > 0: best = min(rows, key=lambda x: abs(x[0] - target_ts_ms)) else: best = rows[-1] return float(best[4]) if len(best) > 4 else float(best[-1]) except (TypeError, IndexError, ValueError): return None def parse_dvol_at_23utc(d: dict, target_ts_ms: int = 0) -> Optional[float]: """Alias for parse_dvol_last — explicit 23:00 UTC variant.""" return parse_dvol_last(d, target_ts_ms) PARSERS = { "parse_history_at_23utc": parse_history_at_23utc, "parse_history_last": lambda d, ts=0: parse_history_at_23utc(d, 0), "parse_scalar_result": lambda d, ts=0: parse_scalar_result(d), "parse_dvol_last": parse_dvol_last, "parse_dvol_at_23utc": parse_dvol_at_23utc, } # --------------------------------------------------------------------------- # HTTP fetcher # --------------------------------------------------------------------------- async def fetch_json(session: aiohttp.ClientSession, url: str, verbose: bool = False) -> Optional[dict]: try: async with session.get(url, timeout=aiohttp.ClientTimeout(total=15)) as resp: if resp.status != 200: if verbose: print(f" HTTP {resp.status} for {url[:80]}...") return None text = await resp.text() d = json.loads(text) if verbose: preview = str(d)[:200] print(f" RAW: {preview}") return d except Exception as e: if verbose: print(f" FETCH ERROR: {e} — {url[:80]}") return None # --------------------------------------------------------------------------- # Main parity checker # --------------------------------------------------------------------------- async def run_parity_check(dates: list, indicators_filter: Optional[set], verbose: bool) -> dict: """ Run parity check for all dates × candidates. Returns nested dict: results[candidate][indicator] = {pass: int, fail: int, details: [...]} """ results = {} # candidate → indicator → {pass, fail, abs_diffs, details} async with aiohttp.ClientSession( headers={"User-Agent": "DOLPHIN-ExtF-Parity-Test/1.0"} ) as session: for date_str in dates: print(f"\n{'='*60}") print(f"DATE: {date_str}") print(f"{'='*60}") # Ground truth gt = load_npz_ground_truth(date_str) if gt is None: print(" [SKIP] No NPZ data available for this date.") continue print(f" Ground truth (NPZ): {gt}") # Build candidates candidates = build_candidate_urls(date_str) for cand_label, indicator_urls in candidates.items(): for ind_name, url_spec in indicator_urls.items(): # Unpack 3-tuple (url, parser_name, target_ts_ms) url, parser_name, target_ts = url_spec # Filter if indicators_filter and ind_name not in indicators_filter: continue if ind_name not in gt: continue # no ground truth for this indicator on this date gt_val = gt[ind_name] tol = TOLERANCES.get(ind_name, 1e-6) if verbose: print(f"\n [{cand_label}] {ind_name}") print(f" URL: {url[:100]}...") # Fetch + parse raw = await fetch_json(session, url, verbose=verbose) if raw is None: got_val = None status = "FETCH_FAIL" else: parser = PARSERS[parser_name] got_val = parser(raw, target_ts) if got_val is None: status = "PARSE_FAIL" else: abs_diff = abs(got_val - gt_val) rel_diff = abs_diff / max(abs(gt_val), 1e-12) if abs_diff <= tol: status = "PASS" else: status = f"FAIL (abs={abs_diff:.2e}, rel={rel_diff:.1%})" # Store if cand_label not in results: results[cand_label] = {} if ind_name not in results[cand_label]: results[cand_label][ind_name] = {"pass": 0, "fail": 0, "skip": 0, "abs_diffs": []} rec = results[cand_label][ind_name] if status == "PASS": rec["pass"] += 1 rec["abs_diffs"].append(abs(got_val - gt_val)) elif status == "FETCH_FAIL" or status == "PARSE_FAIL": rec["skip"] += 1 else: rec["fail"] += 1 icon = "OK" if status == "PASS" else ("~~" if "FAIL" not in status else "XX") got_str = f"{got_val:.6e}" if got_val is not None else "None" print(f" {icon} [{cand_label}] {ind_name:16s} gt={gt_val:.6e} got={got_str} {status}") # Rate-limit courtesy await asyncio.sleep(0.15) return results def print_summary(results: dict): """Print pass/fail summary table and recommend endpoint.""" print(f"\n{'='*70}") print("PARITY SUMMARY") print(f"{'='*70}") print(f"{'Candidate':<30} {'Indicator':<16} {'PASS':>5} {'FAIL':>5} {'SKIP':>5} {'Verdict'}") print("-" * 70) winner = {} # indicator → best candidate for cand_label, ind_results in results.items(): for ind_name, rec in sorted(ind_results.items()): p, f, s = rec["pass"], rec["fail"], rec["skip"] if p + f == 0: verdict = "NO DATA" elif f == 0: max_abs = max(rec["abs_diffs"]) if rec["abs_diffs"] else 0 verdict = f"LOCKED IN OK (max_abs={max_abs:.2e})" if ind_name not in winner: winner[ind_name] = (cand_label, max_abs) elif max_abs < winner[ind_name][1]: winner[ind_name] = (cand_label, max_abs) else: verdict = f"MISMATCH XX ({f} failures)" print(f"{cand_label:<30} {ind_name:<16} {p:>5} {f:>5} {s:>5} {verdict}") print(f"\n{'='*70}") print("RECOMMENDED ENDPOINT PER INDICATOR") print(f"{'='*70}") if winner: for ind_name, (cand, max_abs) in sorted(winner.items()): print(f" {ind_name:<16} → {cand} (max abs diff = {max_abs:.2e})") else: print(" WARNING: No candidate passed parity for any indicator.") print(" Possible causes:") print(" • Deribit API response format changed (check raw output with --verbose)") print(" • parser needs updating for new response structure") print(" • timestamps or window size wrong — try different KNOWN_DATES") print() # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser(description="Deribit ExtF API parity test") parser.add_argument("--indicators", choices=["fund", "dvol", "all"], default="all", help="Which indicator groups to test (default: all)") parser.add_argument("--dates", nargs="*", default=None, help="Override KNOWN_DATES list (e.g. 2026-01-02 2026-01-05)") parser.add_argument("--verbose", action="store_true", help="Print raw API responses for debugging") args = parser.parse_args() dates = args.dates if args.dates else KNOWN_DATES ind_filter = None if args.indicators == "fund": ind_filter = {"fund_dbt_btc", "fund_dbt_eth"} elif args.indicators == "dvol": ind_filter = {"dvol_btc", "dvol_eth"} print("DOLPHIN — Deribit ExtF API Parity Test") print(f"Testing {len(dates)} known dates × {args.indicators} indicators") print(f"Ground truth: {_EIGENVALUES_PATH}") print() results = asyncio.run(run_parity_check(dates, ind_filter, args.verbose)) print_summary(results) # Exit non-zero if any critical indicator (fund_dbt_btc) has failures critical = results.get("A_history_fullday", {}).get("fund_dbt_btc", {}) if critical.get("fail", 0) > 0 or critical.get("pass", 0) == 0: # Try to find ANY passing candidate for fund_dbt_btc any_pass = any( results.get(c, {}).get("fund_dbt_btc", {}).get("pass", 0) > 0 and results.get(c, {}).get("fund_dbt_btc", {}).get("fail", 0) == 0 for c in results ) if not any_pass: print("CRITICAL: No valid endpoint found for fund_dbt_btc (ACBv6 dependency)") sys.exit(1) else: print("ℹ️ fund_dbt_btc: preferred candidate (A_history_fullday) failed but another passed.") print(" Update _build_deribit_url() in realtime_exf_service.py to use the passing candidate.") if __name__ == "__main__": main()