initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
181
external_factors/backfill_klines_exf.py
Executable file
181
external_factors/backfill_klines_exf.py
Executable file
@@ -0,0 +1,181 @@
|
||||
"""DOLPHIN ExF Backfill for Klines Dates
|
||||
=========================================
|
||||
Writes ExF Indicators NPZ files for all 1,710 klines parquet dates so that
|
||||
ACBv6 can read funding_btc, dvol_btc, fng, and taker for those dates.
|
||||
|
||||
Problem:
|
||||
backfill_runner.py reads NG3 JSON scan directories to get timestamps.
|
||||
Klines dates (2021-2026) have no NG3 JSON scans → ACBv6 _load_external_factors()
|
||||
returns neutral defaults → boost=1.0 always → inverse-boost component is dead.
|
||||
|
||||
Solution:
|
||||
For each klines date, call ExternalFactorsFetcher.fetch_sync(target_date=noon_UTC)
|
||||
and write a minimal NPZ to EIGENVALUES_PATH/YYYY-MM-DD/scan_000001__Indicators.npz
|
||||
in the exact format ACBv6 expects: api_names + api_indicators + api_success.
|
||||
|
||||
Output format (ACBv6 compatible):
|
||||
data['api_names'] : np.array of indicator name strings (N_INDICATORS)
|
||||
data['api_indicators'] : np.float64 array of values (N_INDICATORS)
|
||||
data['api_success'] : np.bool_ array (N_INDICATORS)
|
||||
|
||||
Idempotent: skips dates where the NPZ already exists.
|
||||
Rate-limited: configurable delay between dates (default 1.0s).
|
||||
|
||||
Usage:
|
||||
cd "C:\\Users\\Lenovo\\Documents\\- DOLPHIN NG HD HCM TSF Predict\\external_factors"
|
||||
"C:\\Users\\Lenovo\\Documents\\- Siloqy\\Scripts\\python.exe" backfill_klines_exf.py
|
||||
"C:\\Users\\Lenovo\\Documents\\- Siloqy\\Scripts\\python.exe" backfill_klines_exf.py --dry-run
|
||||
"C:\\Users\\Lenovo\\Documents\\- Siloqy\\Scripts\\python.exe" backfill_klines_exf.py --start 2022-01-01 --end 2022-12-31
|
||||
|
||||
Expected runtime: 2-5 hours for all 1710 dates (network-dependent).
|
||||
Most of the value (funding_btc, dvol_btc, fng, taker) comes from a few API calls
|
||||
per date. CURRENT-only indicators will fail gracefully (api_success=False, value=0).
|
||||
"""
|
||||
import sys, time, argparse, asyncio
|
||||
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
|
||||
# -- Paths --
|
||||
import sys as _sys
|
||||
HCM_DIR = Path(__file__).parent.parent if _sys.platform == 'win32' else Path('/mnt/dolphin')
|
||||
KLINES_DIR = HCM_DIR / "vbt_cache_klines"
|
||||
EIGENVALUES_PATH = (Path(r"C:\Users\Lenovo\Documents\- Dolphin NG HD (NG3)\correlation_arb512\eigenvalues")
|
||||
if _sys.platform == 'win32' else Path('/mnt/ng6_data/eigenvalues'))
|
||||
NPZ_FILENAME = "scan_000001__Indicators.npz" # single synthetic scan per date
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
def parse_args():
|
||||
p = argparse.ArgumentParser(description="Backfill ExF NPZ files for klines dates")
|
||||
p.add_argument("--start", default=None, help="Start date YYYY-MM-DD (inclusive)")
|
||||
p.add_argument("--end", default=None, help="End date YYYY-MM-DD (inclusive)")
|
||||
p.add_argument("--dry-run", action="store_true", help="Print what would be done, skip writes")
|
||||
p.add_argument("--delay", type=float, default=1.0, help="Seconds between date fetches (default 1.0)")
|
||||
p.add_argument("--overwrite",action="store_true", help="Re-fetch and overwrite existing NPZ files")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# Import ExF infrastructure
|
||||
from external_factors_matrix import ExternalFactorsFetcher, Config, INDICATORS, N_INDICATORS
|
||||
|
||||
# Build ordered name list (matches matrix index: names[i] = INDICATORS[i].name)
|
||||
ind_names = np.array([ind.name for ind in INDICATORS], dtype=object)
|
||||
|
||||
fetcher = ExternalFactorsFetcher(Config())
|
||||
|
||||
# Enumerate klines dates
|
||||
parquet_files = sorted(KLINES_DIR.glob("*.parquet"))
|
||||
parquet_files = [p for p in parquet_files if 'catalog' not in str(p)]
|
||||
date_strings = [p.stem for p in parquet_files]
|
||||
|
||||
# Filter by --start / --end
|
||||
if args.start:
|
||||
date_strings = [d for d in date_strings if d >= args.start]
|
||||
if args.end:
|
||||
date_strings = [d for d in date_strings if d <= args.end]
|
||||
|
||||
total = len(date_strings)
|
||||
print(f"Klines dates to process: {total}")
|
||||
print(f"EIGENVALUES_PATH: {EIGENVALUES_PATH}")
|
||||
print(f"Dry run: {args.dry_run} Overwrite: {args.overwrite} Delay: {args.delay}s\n")
|
||||
|
||||
if args.dry_run:
|
||||
print("DRY RUN — no files will be written.\n")
|
||||
|
||||
skipped = 0
|
||||
written = 0
|
||||
errors = 0
|
||||
t0 = time.time()
|
||||
|
||||
for i, ds in enumerate(date_strings):
|
||||
out_dir = EIGENVALUES_PATH / ds
|
||||
out_npz = out_dir / NPZ_FILENAME
|
||||
|
||||
# Skip if exists and not overwriting
|
||||
if out_npz.exists() and not args.overwrite:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Fetch at noon UTC for this date
|
||||
try:
|
||||
yr, mo, dy = int(ds[:4]), int(ds[5:7]), int(ds[8:10])
|
||||
target_dt = datetime(yr, mo, dy, 12, 0, 0, tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
print(f" [{i+1}/{total}] {ds}: BAD DATE FORMAT — skip")
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
print(f" [{i+1}/{total}] {ds}: would fetch {target_dt.isoformat()} → {out_npz}")
|
||||
written += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
result = fetcher.fetch_sync(target_date=target_dt)
|
||||
except Exception as e:
|
||||
print(f" [{i+1}/{total}] {ds}: FETCH ERROR — {e}")
|
||||
errors += 1
|
||||
time.sleep(args.delay)
|
||||
continue
|
||||
|
||||
# Build NPZ arrays in ACBv6-compatible format
|
||||
matrix = result['matrix'] # np.float64 array, 0-indexed (matrix[id-1])
|
||||
details = result['details'] # {id: {'name': ..., 'value': ..., 'success': bool}}
|
||||
|
||||
api_indicators = matrix.astype(np.float64)
|
||||
api_success = np.array(
|
||||
[details.get(i+1, {}).get('success', False) for i in range(N_INDICATORS)],
|
||||
dtype=np.bool_
|
||||
)
|
||||
success_count = result.get('success_count', int(api_success.sum()))
|
||||
|
||||
# Write NPZ
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
np.savez_compressed(
|
||||
str(out_npz),
|
||||
api_names = ind_names,
|
||||
api_indicators = api_indicators,
|
||||
api_success = api_success,
|
||||
)
|
||||
written += 1
|
||||
|
||||
# Progress every 10 dates
|
||||
if (i + 1) % 10 == 0:
|
||||
elapsed = time.time() - t0
|
||||
rate = written / elapsed if elapsed > 0 else 1
|
||||
eta = (total - i - 1) / rate if rate > 0 else 0
|
||||
print(f" [{i+1}/{total}] {ds} ok={success_count}/{N_INDICATORS}"
|
||||
f" elapsed={elapsed/60:.1f}m eta={eta/60:.1f}m"
|
||||
f" written={written} skipped={skipped} errors={errors}")
|
||||
else:
|
||||
# Brief per-date confirmation
|
||||
key_vals = {
|
||||
'funding': round(float(api_indicators[0]), 6), # id=1 → idx 0
|
||||
'dvol': round(float(api_indicators[10]), 2), # id=11 → idx 10
|
||||
}
|
||||
print(f" {ds} ok={success_count} funding={key_vals['funding']:+.4f} dvol={key_vals['dvol']:.1f}")
|
||||
|
||||
time.sleep(args.delay)
|
||||
|
||||
elapsed_total = time.time() - t0
|
||||
print(f"\n{'='*60}")
|
||||
print(f" ExF Klines Backfill COMPLETE")
|
||||
print(f" Written: {written}")
|
||||
print(f" Skipped: {skipped} (already existed)")
|
||||
print(f" Errors: {errors}")
|
||||
print(f" Runtime: {elapsed_total/60:.1f}m")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if written > 0 and not args.dry_run:
|
||||
print(f"\n ACBv6 will now find ExF data for klines dates.")
|
||||
print(f" Re-run test_pf_5y_klines.py to get the full-boost ACBv6 results.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user