initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions
--- a/prod/run_5y_klines_backfill.bat
+++ b/prod/run_5y_klines_backfill.bat
@@ -0,0 +1,153 @@
+@echo off
+REM =============================================================================
+REM DOLPHIN NG5 - 5 Year Klines Dataset Backfill Script
+REM =============================================================================
+REM This script backfills 1-minute klines data from Binance for 2021-2023
+REM Expected runtime: 12-24 hours (depending on network and disk speed)
+REM 
+REM IMPORTANT: This script is idempotent - you can safely re-run if interrupted
+REM =============================================================================
+
+setlocal enabledelayedexpansion
+
+REM Configuration
+set BACKFILL_DIR=C:\Users\Lenovo\Documents\- Dolphin NG Backfill
+set HCM_DIR=C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict
+set START_DATE=2021-07-01
+set END_DATE=2023-12-31
+set LOG_FILE=%BACKFILL_DIR%\backfill_2021_2023_run.log
+
+echo ============================================================================
+echo DOLPHIN NG5 - 5 Year Klines Backfill
+echo ============================================================================
+echo Start date: %START_DATE%
+echo End date: %END_DATE%
+echo Log file: %LOG_FILE%
+echo ============================================================================
+echo.
+
+REM Check Python
+python --version >nul 2>&1
+if errorlevel 1 (
+    echo ERROR: Python not found in PATH
+    exit /b 1
+)
+
+REM Phase 1: Fetch klines
+echo.
+echo ============================================================================
+echo PHASE 1: Fetching klines from Binance API
+echo ============================================================================
+echo This will download 1-minute OHLCV for 50 symbols x 914 days
+echo Rate limited to ~1100 req/min to stay under Binance limits
+echo Estimated time: 6-12 hours
+echo.
+
+cd /d "%BACKFILL_DIR%"
+echo [%date% %time%] Starting Phase 1: Fetch >> "%LOG_FILE%"
+
+python historical_klines_backfiller.py --fetch --start %START_DATE% --end %END_DATE% 2>&1 | tee -a "%LOG_FILE%"
+
+if errorlevel 1 (
+    echo [%date% %time%] ERROR: Phase 1 failed >> "%LOG_FILE%"
+    echo ERROR: Phase 1 (fetch) failed. Check log: %LOG_FILE%
+    exit /b 1
+)
+
+echo [%date% %time%] Phase 1 complete >> "%LOG_FILE%"
+
+REM Phase 2: Compute eigenvalues and write Arrow files
+echo.
+echo ============================================================================
+echo PHASE 2: Computing eigenvalues and writing Arrow files
+echo ============================================================================
+echo This processes the cached klines into Arrow format with eigenvalues
+echo Estimated time: 2-4 hours
+echo.
+
+echo [%date% %time%] Starting Phase 2: Compute >> "%LOG_FILE%"
+
+python historical_klines_backfiller.py --compute --start %START_DATE% --end %END_DATE% 2>&1 | tee -a "%LOG_FILE%"
+
+if errorlevel 1 (
+    echo [%date% %time%] ERROR: Phase 2 failed >> "%LOG_FILE%"
+    echo ERROR: Phase 2 (compute) failed. Check log: %LOG_FILE%
+    exit /b 1
+)
+
+echo [%date% %time%] Phase 2 complete >> "%LOG_FILE%"
+
+REM Phase 3: Convert Arrow to Parquet
+echo.
+echo ============================================================================
+echo PHASE 3: Converting Arrow files to VBT Parquet cache
+echo ============================================================================
+echo This converts the Arrow files to the final Parquet format
+echo Estimated time: 30-60 minutes
+echo.
+
+cd /d "%HCM_DIR%"
+echo [%date% %time%] Starting Phase 3: Convert >> "%LOG_FILE%"
+
+python ng5_arrow_to_vbt_cache.py --all 2>&1 | tee -a "%LOG_FILE%"
+
+if errorlevel 1 (
+    echo [%date% %time%] ERROR: Phase 3 failed >> "%LOG_FILE%"
+    echo ERROR: Phase 3 (convert) failed. Check log: %LOG_FILE%
+    exit /b 1
+)
+
+echo [%date% %time%] Phase 3 complete >> "%LOG_FILE%"
+
+REM Phase 4: Validation
+echo.
+echo ============================================================================
+echo PHASE 4: Validating output
+echo ============================================================================
+echo.
+
+echo [%date% %time%] Starting Phase 4: Validate >> "%LOG_FILE%"
+
+python -c "
+from pathlib import Path
+import pandas as pd
+from collections import Counter
+
+p = Path(r'%HCM_DIR%\vbt_cache_klines')
+parquets = sorted(p.glob('*.parquet'))
+stems = [f.stem for f in parquets]
+years = Counter([s[:4] for s in stems])
+
+print('='*70)
+print('VALIDATION RESULTS')
+print('='*70)
+print(f'Total parquets: {len(parquets)}')
+print(f'Date range: {stems[0]} to {stems[-1]}')
+print('By year:')
+for y in sorted(years.keys()):
+    print(f'  {y}: {years[y]} days')
+
+# Check for gaps in 2021-2024 period
+target_dates = set()
+import pandas as pd
+dr = pd.date_range('2021-07-01', '2023-12-31', freq='D')
+target_dates = set(dr.strftime('%Y-%m-%d'))
+existing = set(stems)
+missing_in_target = target_dates - existing
+print(f'\\nMissing in target range (2021-07-01 to 2023-12-31): {len(missing_in_target)} days')
+if len(missing_in_target) <= 20:
+    for d in sorted(missing_in_target):
+        print(f'  {d}')
+print('='*70)
+" 2>&1 | tee -a "%LOG_FILE%"
+
+echo [%date% %time%] All phases complete >> "%LOG_FILE%"
+
+echo.
+echo ============================================================================
+echo BACKFILL COMPLETE
+echo ============================================================================
+echo Log file: %LOG_FILE%
+echo ============================================================================
+
+pause