initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree

Includes core prod + GREEN/BLUE subsystems:
- prod/ (BLUE harness, configs, scripts, docs)
- nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved)
- adaptive_exit/ (AEM engine + models/bucket_assignments.pkl)
- Observability/ (EsoF advisor, TUI, dashboards)
- external_factors/ (EsoF producer)
- mc_forewarning_qlabs_fork/ (MC regime/envelope)

Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
hjnormey
2026-04-21 16:58:38 +02:00
commit 01c19662cb
643 changed files with 260241 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python3
"""
Batch converter for Arrow to Parquet with progress tracking.
Processes in chunks to avoid timeout issues.
"""
import sys
import time
from pathlib import Path
from datetime import datetime
import json
# Add paths
sys.path.insert(0, str(Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict')))
import pandas as pd
import numpy as np
import pyarrow as pa
import pyarrow.ipc as ipc
# Configuration
ARROW_BASE = Path(r'C:\Users\Lenovo\Documents\- Dolphin NG Backfill\backfilled_data\arrow_klines')
OUTPUT_DIR = Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\vbt_cache_klines')
LOG_FILE = Path(r'C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict\prod\conversion_batch.log')
def log(msg):
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
line = f'[{ts}] {msg}'
print(line)
with open(LOG_FILE, 'a') as f:
f.write(line + '\n')
def get_arrow_dates():
"""Get all dates from Arrow directory."""
if not ARROW_BASE.exists():
return []
dates = []
for d in ARROW_BASE.iterdir():
if d.is_dir() and len(d.name) == 10 and d.name[4] == '-':
if any(d.glob('*.arrow')):
dates.append(d.name)
return sorted(dates)
def get_parquet_dates():
"""Get all dates from Parquet directory."""
if not OUTPUT_DIR.exists():
return []
return sorted([f.stem for f in OUTPUT_DIR.glob('*.parquet')])
def convert_date(date_str, force=False):
"""Convert a single date."""
from ng5_arrow_to_vbt_cache import convert_arrow_date
out_file = OUTPUT_DIR / f"{date_str}.parquet"
if out_file.exists() and not force:
return True, "already_exists"
try:
df = convert_arrow_date(date_str, arrow_base=ARROW_BASE, out_dir=OUTPUT_DIR, force=force)
if df is not None:
return True, f"converted_{len(df)}_rows"
else:
return False, "no_data"
except Exception as e:
return False, f"error_{e}"
def main():
log('='*70)
log('BATCH CONVERSION START')
log('='*70)
arrow_dates = get_arrow_dates()
parquet_dates = get_parquet_dates()
log(f'Arrow dates: {len(arrow_dates)}')
log(f'Parquet dates: {len(parquet_dates)}')
arrow_set = set(arrow_dates)
parquet_set = set(parquet_dates)
to_convert = sorted(arrow_set - parquet_set)
log(f'Dates to convert: {len(to_convert)}')
if not to_convert:
log('Nothing to convert - all up to date!')
return
log(f'Range: {to_convert[0]} to {to_convert[-1]}')
# Convert in batches
batch_size = 50
total = len(to_convert)
converted = 0
failed = []
for i, date_str in enumerate(to_convert):
success, status = convert_date(date_str)
if success:
converted += 1
else:
failed.append((date_str, status))
# Progress report every 10 files
if (i + 1) % 10 == 0 or i == total - 1:
pct = (i + 1) / total * 100
log(f'Progress: {i+1}/{total} ({pct:.1f}%) - Converted: {converted}, Failed: {len(failed)}')
log('='*70)
log(f'BATCH COMPLETE: {converted}/{total} converted')
if failed:
log(f'Failed dates: {len(failed)}')
for d, e in failed[:10]:
log(f' {d}: {e}')
log('='*70)
if __name__ == '__main__':
main()