154 lines
5.3 KiB
Batchfile
154 lines
5.3 KiB
Batchfile
|
|
@echo off
|
||
|
|
REM =============================================================================
|
||
|
|
REM DOLPHIN NG5 - 5 Year Klines Dataset Backfill Script
|
||
|
|
REM =============================================================================
|
||
|
|
REM This script backfills 1-minute klines data from Binance for 2021-2023
|
||
|
|
REM Expected runtime: 12-24 hours (depending on network and disk speed)
|
||
|
|
REM
|
||
|
|
REM IMPORTANT: This script is idempotent - you can safely re-run if interrupted
|
||
|
|
REM =============================================================================
|
||
|
|
|
||
|
|
setlocal enabledelayedexpansion
|
||
|
|
|
||
|
|
REM Configuration
|
||
|
|
set BACKFILL_DIR=C:\Users\Lenovo\Documents\- Dolphin NG Backfill
|
||
|
|
set HCM_DIR=C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict
|
||
|
|
set START_DATE=2021-07-01
|
||
|
|
set END_DATE=2023-12-31
|
||
|
|
set LOG_FILE=%BACKFILL_DIR%\backfill_2021_2023_run.log
|
||
|
|
|
||
|
|
echo ============================================================================
|
||
|
|
echo DOLPHIN NG5 - 5 Year Klines Backfill
|
||
|
|
echo ============================================================================
|
||
|
|
echo Start date: %START_DATE%
|
||
|
|
echo End date: %END_DATE%
|
||
|
|
echo Log file: %LOG_FILE%
|
||
|
|
echo ============================================================================
|
||
|
|
echo.
|
||
|
|
|
||
|
|
REM Check Python
|
||
|
|
python --version >nul 2>&1
|
||
|
|
if errorlevel 1 (
|
||
|
|
echo ERROR: Python not found in PATH
|
||
|
|
exit /b 1
|
||
|
|
)
|
||
|
|
|
||
|
|
REM Phase 1: Fetch klines
|
||
|
|
echo.
|
||
|
|
echo ============================================================================
|
||
|
|
echo PHASE 1: Fetching klines from Binance API
|
||
|
|
echo ============================================================================
|
||
|
|
echo This will download 1-minute OHLCV for 50 symbols x 914 days
|
||
|
|
echo Rate limited to ~1100 req/min to stay under Binance limits
|
||
|
|
echo Estimated time: 6-12 hours
|
||
|
|
echo.
|
||
|
|
|
||
|
|
cd /d "%BACKFILL_DIR%"
|
||
|
|
echo [%date% %time%] Starting Phase 1: Fetch >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
python historical_klines_backfiller.py --fetch --start %START_DATE% --end %END_DATE% 2>&1 | tee -a "%LOG_FILE%"
|
||
|
|
|
||
|
|
if errorlevel 1 (
|
||
|
|
echo [%date% %time%] ERROR: Phase 1 failed >> "%LOG_FILE%"
|
||
|
|
echo ERROR: Phase 1 (fetch) failed. Check log: %LOG_FILE%
|
||
|
|
exit /b 1
|
||
|
|
)
|
||
|
|
|
||
|
|
echo [%date% %time%] Phase 1 complete >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
REM Phase 2: Compute eigenvalues and write Arrow files
|
||
|
|
echo.
|
||
|
|
echo ============================================================================
|
||
|
|
echo PHASE 2: Computing eigenvalues and writing Arrow files
|
||
|
|
echo ============================================================================
|
||
|
|
echo This processes the cached klines into Arrow format with eigenvalues
|
||
|
|
echo Estimated time: 2-4 hours
|
||
|
|
echo.
|
||
|
|
|
||
|
|
echo [%date% %time%] Starting Phase 2: Compute >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
python historical_klines_backfiller.py --compute --start %START_DATE% --end %END_DATE% 2>&1 | tee -a "%LOG_FILE%"
|
||
|
|
|
||
|
|
if errorlevel 1 (
|
||
|
|
echo [%date% %time%] ERROR: Phase 2 failed >> "%LOG_FILE%"
|
||
|
|
echo ERROR: Phase 2 (compute) failed. Check log: %LOG_FILE%
|
||
|
|
exit /b 1
|
||
|
|
)
|
||
|
|
|
||
|
|
echo [%date% %time%] Phase 2 complete >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
REM Phase 3: Convert Arrow to Parquet
|
||
|
|
echo.
|
||
|
|
echo ============================================================================
|
||
|
|
echo PHASE 3: Converting Arrow files to VBT Parquet cache
|
||
|
|
echo ============================================================================
|
||
|
|
echo This converts the Arrow files to the final Parquet format
|
||
|
|
echo Estimated time: 30-60 minutes
|
||
|
|
echo.
|
||
|
|
|
||
|
|
cd /d "%HCM_DIR%"
|
||
|
|
echo [%date% %time%] Starting Phase 3: Convert >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
python ng5_arrow_to_vbt_cache.py --all 2>&1 | tee -a "%LOG_FILE%"
|
||
|
|
|
||
|
|
if errorlevel 1 (
|
||
|
|
echo [%date% %time%] ERROR: Phase 3 failed >> "%LOG_FILE%"
|
||
|
|
echo ERROR: Phase 3 (convert) failed. Check log: %LOG_FILE%
|
||
|
|
exit /b 1
|
||
|
|
)
|
||
|
|
|
||
|
|
echo [%date% %time%] Phase 3 complete >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
REM Phase 4: Validation
|
||
|
|
echo.
|
||
|
|
echo ============================================================================
|
||
|
|
echo PHASE 4: Validating output
|
||
|
|
echo ============================================================================
|
||
|
|
echo.
|
||
|
|
|
||
|
|
echo [%date% %time%] Starting Phase 4: Validate >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
python -c "
|
||
|
|
from pathlib import Path
|
||
|
|
import pandas as pd
|
||
|
|
from collections import Counter
|
||
|
|
|
||
|
|
p = Path(r'%HCM_DIR%\vbt_cache_klines')
|
||
|
|
parquets = sorted(p.glob('*.parquet'))
|
||
|
|
stems = [f.stem for f in parquets]
|
||
|
|
years = Counter([s[:4] for s in stems])
|
||
|
|
|
||
|
|
print('='*70)
|
||
|
|
print('VALIDATION RESULTS')
|
||
|
|
print('='*70)
|
||
|
|
print(f'Total parquets: {len(parquets)}')
|
||
|
|
print(f'Date range: {stems[0]} to {stems[-1]}')
|
||
|
|
print('By year:')
|
||
|
|
for y in sorted(years.keys()):
|
||
|
|
print(f' {y}: {years[y]} days')
|
||
|
|
|
||
|
|
# Check for gaps in 2021-2024 period
|
||
|
|
target_dates = set()
|
||
|
|
import pandas as pd
|
||
|
|
dr = pd.date_range('2021-07-01', '2023-12-31', freq='D')
|
||
|
|
target_dates = set(dr.strftime('%Y-%m-%d'))
|
||
|
|
existing = set(stems)
|
||
|
|
missing_in_target = target_dates - existing
|
||
|
|
print(f'\\nMissing in target range (2021-07-01 to 2023-12-31): {len(missing_in_target)} days')
|
||
|
|
if len(missing_in_target) <= 20:
|
||
|
|
for d in sorted(missing_in_target):
|
||
|
|
print(f' {d}')
|
||
|
|
print('='*70)
|
||
|
|
" 2>&1 | tee -a "%LOG_FILE%"
|
||
|
|
|
||
|
|
echo [%date% %time%] All phases complete >> "%LOG_FILE%"
|
||
|
|
|
||
|
|
echo.
|
||
|
|
echo ============================================================================
|
||
|
|
echo BACKFILL COMPLETE
|
||
|
|
echo ============================================================================
|
||
|
|
echo Log file: %LOG_FILE%
|
||
|
|
echo ============================================================================
|
||
|
|
|
||
|
|
pause
|