DOLPHIN/nautilus_dolphin/klines_pipeline_orchestrator.py

"""
Klines Pipeline Orchestrator
=============================
Watches the backfill job completion, validates files, then chains:
  [1] historical_klines_backfiller.py  (already running as babejerbg)
      -> wait for: "Arrow files written:" in output log
  [2] Validate arrow files (random sample of 2026 dates)
  [3] ng5_arrow_to_vbt_cache.py --all (converts 2026 arrow_klines -> vbt_cache_klines)
  [4] Validate parquets (2026 dates in vbt_cache_klines)
  [5] test_pf_klines_2y_experiment.py (full ~795-day run)

Run: python klines_pipeline_orchestrator.py
"""
import sys, time, subprocess, shutil, json, random
from pathlib import Path
from datetime import datetime

sys.stdout.reconfigure(encoding='utf-8', errors='replace')

PYTHON      = r"C:\Users\Lenovo\Documents\- Siloqy\Scripts\python.exe"
HCM         = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")
BACKFILL    = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG Backfill")
ARROW_KL    = BACKFILL / "backfilled_data" / "arrow_klines"
VBT_KL      = HCM / "vbt_cache_klines"
EXPERIMENT  = HCM / "nautilus_dolphin" / "test_pf_klines_2y_experiment.py"
CONVERTER   = HCM / "ng5_arrow_to_vbt_cache.py"
BACKFILL_LOG = Path(r"C:\Users\Lenovo\AppData\Local\Temp\claude\C--Users-Lenovo-Documents---Dolphin-NG-HD--NG3-\tasks\babejerbg.output")

POLL_INTERVAL = 30  # seconds between polls

# ── Helpers ────────────────────────────────────────────────────────────────────

def ts():
    return datetime.now().strftime('%H:%M:%S')

def log(msg):
    print(f"[{ts()}] {msg}", flush=True)

def disk_free_gb():
    usage = shutil.disk_usage(r"C:\\")
    return usage.free / (1024**3)

def check_disk():
    free = disk_free_gb()
    log(f"Disk free: {free:.1f} GB")
    if free < 10:
        log("WARNING: < 10 GB free — monitor closely")
    return free

def dir_size_mb(path: Path):
    if not path.exists():
        return 0.0
    total = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())
    return total / (1024**2)

# ── Step 1: Poll backfill log for completion ───────────────────────────────────

def wait_for_backfill():
    log("=" * 60)
    log("STEP 1: Waiting for klines backfill to complete...")
    log(f"  Polling: {BACKFILL_LOG}")
    log(f"  Poll interval: {POLL_INTERVAL}s")

    last_line_count = 0
    last_disk_check = time.time()

    while True:
        time.sleep(POLL_INTERVAL)

        # Read log tail
        if BACKFILL_LOG.exists():
            try:
                lines = BACKFILL_LOG.read_text(encoding='utf-8', errors='replace').splitlines()
            except Exception:
                lines = []

            new_lines = lines[last_line_count:]
            last_line_count = len(lines)

            for line in new_lines:
                if line.strip():
                    print(f"  >> {line}", flush=True)

            # Completion markers
            full_text = '\n'.join(lines)
            if 'Arrow files written:' in full_text:
                log("Backfill COMPLETE — 'Arrow files written:' detected")
                # print final summary lines
                for line in lines[-10:]:
                    if line.strip():
                        print(f"  SUMMARY: {line}", flush=True)
                return True
            if 'Traceback' in full_text or 'Error' in full_text.split('Arrow files written:')[0]:
                last_error = [l for l in lines if 'Error' in l or 'Traceback' in l][-3:]
                log(f"ERROR detected in backfill log:")
                for l in last_error:
                    print(f"  ERROR: {l}", flush=True)
                # Continue watching — might be a non-fatal error
        else:
            log(f"  Log file not found yet: {BACKFILL_LOG}")

        # Periodic disk + progress check
        now = time.time()
        if now - last_disk_check > 300:  # every 5 min
            check_disk()
            n_2026 = len([d for d in ARROW_KL.iterdir() if d.is_dir() and d.name.startswith('2026')]) if ARROW_KL.exists() else 0
            kl_mb  = dir_size_mb(ARROW_KL)
            log(f"  Progress: {n_2026}/64 2026 date dirs in arrow_klines | total dir size: {kl_mb:.0f} MB")
            last_disk_check = now

# ── Step 2: Validate arrow files ───────────────────────────────────────────────

def validate_arrow_files():
    import pyarrow as pa
    import pyarrow.ipc as ipc

    log("=" * 60)
    log("STEP 2: Validating 2026 arrow_klines files...")

    REQUIRED_FIELDS = [
        'timestamp_ns', 'w50_velocity', 'w150_velocity', 'w300_velocity',
        'w750_velocity', 'vel_div', 'w50_instability', 'w150_instability',
        'assets_json', 'asset_prices_json',
    ]

    dates_2026 = sorted([d.name for d in ARROW_KL.iterdir()
                         if d.is_dir() and d.name.startswith('2026')])
    if not dates_2026:
        log("ERROR: No 2026 date dirs found in arrow_klines!")
        return False

    log(f"  Found {len(dates_2026)} 2026 date dirs: {dates_2026[0]} .. {dates_2026[-1]}")

    # Sample 5 dates spread across the range
    sample_dates = [dates_2026[i] for i in
                    sorted(random.sample(range(len(dates_2026)), min(5, len(dates_2026))))]

    errors = []
    for date_str in sample_dates:
        date_dir = ARROW_KL / date_str
        arrow_files = sorted(date_dir.glob('scan_*.arrow'))
        n_files = len(arrow_files)

        if n_files < 1400:
            errors.append(f"{date_str}: only {n_files} arrow files (expected ~1440)")
            continue

        # Sample 3 files from beginning, middle, end of day
        indices = [0, n_files // 2, n_files - 1]
        file_errors = []
        for idx in indices:
            af = arrow_files[idx]
            try:
                with pa.memory_map(str(af), 'r') as src:
                    table = ipc.open_file(src).read_all()
                if len(table) != 1:
                    file_errors.append(f"{af.name}: {len(table)} rows (expected 1)")
                    continue
                row = {col: table.column(col)[0].as_py() for col in table.column_names}
                missing = [f for f in REQUIRED_FIELDS if f not in row]
                if missing:
                    file_errors.append(f"{af.name}: missing fields {missing}")
                    continue
                ts_ns = row.get('timestamp_ns', 0)
                if not ts_ns or ts_ns < 1e15:
                    file_errors.append(f"{af.name}: bad timestamp_ns={ts_ns}")
                    continue
                import json as _json
                prices = _json.loads(row.get('asset_prices_json', '[]') or '[]')
                if not prices or prices[0] is None or float(prices[0]) < 1000:
                    file_errors.append(f"{af.name}: BTC price suspicious: {prices[:3]}")
                    continue
                v50 = row.get('w50_velocity', 0)
                v150 = row.get('w150_velocity', 0)
                if v50 == 0.0 and v150 == 0.0 and idx > 0:
                    file_errors.append(f"{af.name}: both w50/w150 velocity = 0.0 (warmup issue?)")
            except Exception as e:
                file_errors.append(f"{af.name}: read error: {e}")

        if file_errors:
            errors.append(f"{date_str} ({n_files} files): " + "; ".join(file_errors))
        else:
            log(f"  {date_str}: OK  ({n_files} files, BTC=${float(prices[0]):.0f}, "
                f"vel_div={float(row.get('vel_div',0)):.4f})")

    if errors:
        log(f"VALIDATION ERRORS ({len(errors)}):")
        for e in errors:
            log(f"  ERR: {e}")
        return False

    log(f"Arrow validation PASSED — {len(sample_dates)} sampled dates all OK")
    return True

# ── Step 3: VBT conversion ─────────────────────────────────────────────────────

def run_vbt_conversion():
    log("=" * 60)
    log("STEP 3: Converting 2026 arrow_klines -> vbt_cache_klines...")
    log(f"  arrow_base : {ARROW_KL}")
    log(f"  out_dir    : {VBT_KL}")

    disk_before = disk_free_gb()
    before_count = len(list(VBT_KL.glob('*.parquet')))
    log(f"  Parquets before: {before_count} | Disk free: {disk_before:.1f} GB")

    cmd = [
        PYTHON, str(CONVERTER),
        '--all',
        '--ng5-arrow-base', str(ARROW_KL),
        '--out-dir', str(VBT_KL),
    ]
    log(f"  Running: {' '.join(cmd)}")
    result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8', errors='replace')
    print(result.stdout, flush=True)
    if result.stderr:
        print("STDERR:", result.stderr[:2000], flush=True)

    after_count = len(list(VBT_KL.glob('*.parquet')))
    disk_after = disk_free_gb()
    new_pqs = after_count - before_count
    log(f"  Parquets after: {after_count} (+{new_pqs} new) | Disk free: {disk_after:.1f} GB")
    log(f"  Disk used by conversion: {disk_before - disk_after:.2f} GB")

    if result.returncode != 0:
        log(f"ERROR: converter exited with code {result.returncode}")
        return False
    return True

# ── Step 4: Validate parquets ──────────────────────────────────────────────────

def validate_parquets():
    import pandas as pd

    log("=" * 60)
    log("STEP 4: Validating 2026 vbt_cache_klines parquets...")

    REQUIRED_COLS = [
        'timestamp', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',
        'v300_lambda_max_velocity', 'v750_lambda_max_velocity',
        'vel_div', 'instability_50', 'instability_150', 'BTCUSDT',
    ]

    pqs_2026 = sorted(VBT_KL.glob('2026*.parquet'))
    if not pqs_2026:
        log("ERROR: No 2026 parquets found in vbt_cache_klines!")
        return False

    log(f"  Found {len(pqs_2026)} 2026 parquets: {pqs_2026[0].stem} .. {pqs_2026[-1].stem}")

    sample = [pqs_2026[i] for i in
              sorted(random.sample(range(len(pqs_2026)), min(5, len(pqs_2026))))]

    errors = []
    for pf in sample:
        try:
            df = pd.read_parquet(pf)
        except Exception as e:
            errors.append(f"{pf.name}: read error: {e}")
            continue

        missing_cols = [c for c in REQUIRED_COLS if c not in df.columns]
        if missing_cols:
            errors.append(f"{pf.name}: missing columns {missing_cols}")
            continue

        n_rows = len(df)
        if n_rows < 1400:
            errors.append(f"{pf.name}: only {n_rows} rows (expected ~1439)")
            continue

        null_vel = df['vel_div'].isna().sum()
        null_btc = df['BTCUSDT'].isna().sum()
        null_v50 = df['v50_lambda_max_velocity'].isna().sum()
        zero_vel = (df['v50_lambda_max_velocity'] == 0.0).sum()
        btc_min  = df['BTCUSDT'].min()
        btc_max  = df['BTCUSDT'].max()
        vd_p5    = df['vel_div'].quantile(0.05)
        vd_p95   = df['vel_div'].quantile(0.95)

        row_errors = []
        if null_btc > 5:
            row_errors.append(f"BTCUSDT null={null_btc}")
        if btc_min < 1000:
            row_errors.append(f"BTC min={btc_min:.0f} (suspicious)")
        if zero_vel > n_rows * 0.10:  # >10% zero velocities beyond warmup
            row_errors.append(f"w50 velocity zero={zero_vel}/{n_rows}")
        if abs(vd_p5) > 50 or abs(vd_p95) > 50:
            row_errors.append(f"vel_div range suspicious: p5={vd_p5:.3f}, p95={vd_p95:.3f}")

        if row_errors:
            errors.append(f"{pf.name}: " + "; ".join(row_errors))
        else:
            log(f"  {pf.stem}: OK  {n_rows} rows | BTC=[{btc_min:.0f},{btc_max:.0f}] "
                f"| vd=[{vd_p5:.3f},{vd_p95:.3f}] | null_v50={null_v50}")

    if errors:
        log(f"VALIDATION ERRORS ({len(errors)}):")
        for e in errors:
            log(f"  ERR: {e}")
        return False

    log(f"Parquet validation PASSED — {len(sample)} sampled dates all OK")
    return True

# ── Step 5: Run experiment ─────────────────────────────────────────────────────

def run_experiment():
    log("=" * 60)
    log("STEP 5: Running klines fractal experiment (~795 days)...")
    log(f"  Script: {EXPERIMENT}")

    disk_before = disk_free_gb()
    t0 = time.time()

    cmd = [PYTHON, str(EXPERIMENT)]
    result = subprocess.run(
        cmd,
        cwd=str(HCM / "nautilus_dolphin"),
        capture_output=False,   # stream to stdout live
        text=True,
    )
    elapsed = time.time() - t0
    disk_after = disk_free_gb()

    log(f"Experiment finished in {elapsed/60:.1f} min | exit code: {result.returncode}")
    log(f"Disk free: {disk_after:.1f} GB (used {disk_before - disk_after:.2f} GB)")

    if result.returncode != 0:
        log("ERROR: experiment exited non-zero")
        return False
    return True

# ── Main ──────────────────────────────────────────────────────────────────────

def main():
    log("Klines Pipeline Orchestrator started")
    check_disk()

    # Step 1: wait for backfill
    if not wait_for_backfill():
        log("ABORT: backfill did not complete cleanly")
        sys.exit(1)

    check_disk()

    # Step 2: validate arrow files
    if not validate_arrow_files():
        log("ABORT: arrow file validation failed — inspect errors before proceeding")
        sys.exit(1)

    # Step 3: VBT conversion
    if not run_vbt_conversion():
        log("ABORT: VBT conversion failed")
        sys.exit(1)

    check_disk()

    # Step 4: validate parquets
    if not validate_parquets():
        log("ABORT: parquet validation failed — inspect errors before proceeding")
        sys.exit(1)

    # Step 5: run experiment
    if not run_experiment():
        log("ABORT: experiment failed")
        sys.exit(1)

    log("=" * 60)
    log("PIPELINE COMPLETE. Check run_logs/ for klines_2y_*.json results.")

if __name__ == '__main__':
    main()
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`"""`
			`Klines Pipeline Orchestrator`
			`=============================`
			`Watches the backfill job completion, validates files, then chains:`
			`[1] historical_klines_backfiller.py (already running as babejerbg)`
			`-> wait for: "Arrow files written:" in output log`
			`[2] Validate arrow files (random sample of 2026 dates)`
			`[3] ng5_arrow_to_vbt_cache.py --all (converts 2026 arrow_klines -> vbt_cache_klines)`
			`[4] Validate parquets (2026 dates in vbt_cache_klines)`
			`[5] test_pf_klines_2y_experiment.py (full ~795-day run)`

			`Run: python klines_pipeline_orchestrator.py`
			`"""`
			`import sys, time, subprocess, shutil, json, random`
			`from pathlib import Path`
			`from datetime import datetime`

			`sys.stdout.reconfigure(encoding='utf-8', errors='replace')`

			`PYTHON = r"C:\Users\Lenovo\Documents\- Siloqy\Scripts\python.exe"`
			`HCM = Path(r"C:\Users\Lenovo\Documents\- DOLPHIN NG HD HCM TSF Predict")`
			`BACKFILL = Path(r"C:\Users\Lenovo\Documents\- Dolphin NG Backfill")`
			`ARROW_KL = BACKFILL / "backfilled_data" / "arrow_klines"`
			`VBT_KL = HCM / "vbt_cache_klines"`
			`EXPERIMENT = HCM / "nautilus_dolphin" / "test_pf_klines_2y_experiment.py"`
			`CONVERTER = HCM / "ng5_arrow_to_vbt_cache.py"`
			`BACKFILL_LOG = Path(r"C:\Users\Lenovo\AppData\Local\Temp\claude\C--Users-Lenovo-Documents---Dolphin-NG-HD--NG3-\tasks\babejerbg.output")`

			`POLL_INTERVAL = 30 # seconds between polls`

			`# ── Helpers ────────────────────────────────────────────────────────────────────`

			`def ts():`
			`return datetime.now().strftime('%H:%M:%S')`

			`def log(msg):`
			`print(f"[{ts()}] {msg}", flush=True)`

			`def disk_free_gb():`
			`usage = shutil.disk_usage(r"C:\\")`
			`return usage.free / (1024**3)`

			`def check_disk():`
			`free = disk_free_gb()`
			`log(f"Disk free: {free:.1f} GB")`
			`if free < 10:`
			`log("WARNING: < 10 GB free — monitor closely")`
			`return free`

			`def dir_size_mb(path: Path):`
			`if not path.exists():`
			`return 0.0`
			`total = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())`
			`return total / (1024**2)`

			`# ── Step 1: Poll backfill log for completion ───────────────────────────────────`

			`def wait_for_backfill():`
			`log("=" * 60)`
			`log("STEP 1: Waiting for klines backfill to complete...")`
			`log(f" Polling: {BACKFILL_LOG}")`
			`log(f" Poll interval: {POLL_INTERVAL}s")`

			`last_line_count = 0`
			`last_disk_check = time.time()`

			`while True:`
			`time.sleep(POLL_INTERVAL)`

			`# Read log tail`
			`if BACKFILL_LOG.exists():`
			`try:`
			`lines = BACKFILL_LOG.read_text(encoding='utf-8', errors='replace').splitlines()`
			`except Exception:`
			`lines = []`

			`new_lines = lines[last_line_count:]`
			`last_line_count = len(lines)`

			`for line in new_lines:`
			`if line.strip():`
			`print(f" >> {line}", flush=True)`

			`# Completion markers`
			`full_text = '\n'.join(lines)`
			`if 'Arrow files written:' in full_text:`
			`log("Backfill COMPLETE — 'Arrow files written:' detected")`
			`# print final summary lines`
			`for line in lines[-10:]:`
			`if line.strip():`
			`print(f" SUMMARY: {line}", flush=True)`
			`return True`
			`if 'Traceback' in full_text or 'Error' in full_text.split('Arrow files written:')[0]:`
			`last_error = [l for l in lines if 'Error' in l or 'Traceback' in l][-3:]`
			`log(f"ERROR detected in backfill log:")`
			`for l in last_error:`
			`print(f" ERROR: {l}", flush=True)`
			`# Continue watching — might be a non-fatal error`
			`else:`
			`log(f" Log file not found yet: {BACKFILL_LOG}")`

			`# Periodic disk + progress check`
			`now = time.time()`
			`if now - last_disk_check > 300: # every 5 min`
			`check_disk()`
			`n_2026 = len([d for d in ARROW_KL.iterdir() if d.is_dir() and d.name.startswith('2026')]) if ARROW_KL.exists() else 0`
			`kl_mb = dir_size_mb(ARROW_KL)`
			`log(f" Progress: {n_2026}/64 2026 date dirs in arrow_klines \| total dir size: {kl_mb:.0f} MB")`
			`last_disk_check = now`

			`# ── Step 2: Validate arrow files ───────────────────────────────────────────────`

			`def validate_arrow_files():`
			`import pyarrow as pa`
			`import pyarrow.ipc as ipc`

			`log("=" * 60)`
			`log("STEP 2: Validating 2026 arrow_klines files...")`

			`REQUIRED_FIELDS = [`
			`'timestamp_ns', 'w50_velocity', 'w150_velocity', 'w300_velocity',`
			`'w750_velocity', 'vel_div', 'w50_instability', 'w150_instability',`
			`'assets_json', 'asset_prices_json',`
			`]`

			`dates_2026 = sorted([d.name for d in ARROW_KL.iterdir()`
			`if d.is_dir() and d.name.startswith('2026')])`
			`if not dates_2026:`
			`log("ERROR: No 2026 date dirs found in arrow_klines!")`
			`return False`

			`log(f" Found {len(dates_2026)} 2026 date dirs: {dates_2026[0]} .. {dates_2026[-1]}")`

			`# Sample 5 dates spread across the range`
			`sample_dates = [dates_2026[i] for i in`
			`sorted(random.sample(range(len(dates_2026)), min(5, len(dates_2026))))]`

			`errors = []`
			`for date_str in sample_dates:`
			`date_dir = ARROW_KL / date_str`
			`arrow_files = sorted(date_dir.glob('scan_*.arrow'))`
			`n_files = len(arrow_files)`

			`if n_files < 1400:`
			`errors.append(f"{date_str}: only {n_files} arrow files (expected ~1440)")`
			`continue`

			`# Sample 3 files from beginning, middle, end of day`
			`indices = [0, n_files // 2, n_files - 1]`
			`file_errors = []`
			`for idx in indices:`
			`af = arrow_files[idx]`
			`try:`
			`with pa.memory_map(str(af), 'r') as src:`
			`table = ipc.open_file(src).read_all()`
			`if len(table) != 1:`
			`file_errors.append(f"{af.name}: {len(table)} rows (expected 1)")`
			`continue`
			`row = {col: table.column(col)[0].as_py() for col in table.column_names}`
			`missing = [f for f in REQUIRED_FIELDS if f not in row]`
			`if missing:`
			`file_errors.append(f"{af.name}: missing fields {missing}")`
			`continue`
			`ts_ns = row.get('timestamp_ns', 0)`
			`if not ts_ns or ts_ns < 1e15:`
			`file_errors.append(f"{af.name}: bad timestamp_ns={ts_ns}")`
			`continue`
			`import json as _json`
			`prices = _json.loads(row.get('asset_prices_json', '[]') or '[]')`
			`if not prices or prices[0] is None or float(prices[0]) < 1000:`
			`file_errors.append(f"{af.name}: BTC price suspicious: {prices[:3]}")`
			`continue`
			`v50 = row.get('w50_velocity', 0)`
			`v150 = row.get('w150_velocity', 0)`
			`if v50 == 0.0 and v150 == 0.0 and idx > 0:`
			`file_errors.append(f"{af.name}: both w50/w150 velocity = 0.0 (warmup issue?)")`
			`except Exception as e:`
			`file_errors.append(f"{af.name}: read error: {e}")`

			`if file_errors:`
			`errors.append(f"{date_str} ({n_files} files): " + "; ".join(file_errors))`
			`else:`
			`log(f" {date_str}: OK ({n_files} files, BTC=${float(prices[0]):.0f}, "`
			`f"vel_div={float(row.get('vel_div',0)):.4f})")`

			`if errors:`
			`log(f"VALIDATION ERRORS ({len(errors)}):")`
			`for e in errors:`
			`log(f" ERR: {e}")`
			`return False`

			`log(f"Arrow validation PASSED — {len(sample_dates)} sampled dates all OK")`
			`return True`

			`# ── Step 3: VBT conversion ─────────────────────────────────────────────────────`

			`def run_vbt_conversion():`
			`log("=" * 60)`
			`log("STEP 3: Converting 2026 arrow_klines -> vbt_cache_klines...")`
			`log(f" arrow_base : {ARROW_KL}")`
			`log(f" out_dir : {VBT_KL}")`

			`disk_before = disk_free_gb()`
			`before_count = len(list(VBT_KL.glob('*.parquet')))`
			`log(f" Parquets before: {before_count} \| Disk free: {disk_before:.1f} GB")`

			`cmd = [`
			`PYTHON, str(CONVERTER),`
			`'--all',`
			`'--ng5-arrow-base', str(ARROW_KL),`
			`'--out-dir', str(VBT_KL),`
			`]`
			`log(f" Running: {' '.join(cmd)}")`
			`result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8', errors='replace')`
			`print(result.stdout, flush=True)`
			`if result.stderr:`
			`print("STDERR:", result.stderr[:2000], flush=True)`

			`after_count = len(list(VBT_KL.glob('*.parquet')))`
			`disk_after = disk_free_gb()`
			`new_pqs = after_count - before_count`
			`log(f" Parquets after: {after_count} (+{new_pqs} new) \| Disk free: {disk_after:.1f} GB")`
			`log(f" Disk used by conversion: {disk_before - disk_after:.2f} GB")`

			`if result.returncode != 0:`
			`log(f"ERROR: converter exited with code {result.returncode}")`
			`return False`
			`return True`

			`# ── Step 4: Validate parquets ──────────────────────────────────────────────────`

			`def validate_parquets():`
			`import pandas as pd`

			`log("=" * 60)`
			`log("STEP 4: Validating 2026 vbt_cache_klines parquets...")`

			`REQUIRED_COLS = [`
			`'timestamp', 'v50_lambda_max_velocity', 'v150_lambda_max_velocity',`
			`'v300_lambda_max_velocity', 'v750_lambda_max_velocity',`
			`'vel_div', 'instability_50', 'instability_150', 'BTCUSDT',`
			`]`

			`pqs_2026 = sorted(VBT_KL.glob('2026*.parquet'))`
			`if not pqs_2026:`
			`log("ERROR: No 2026 parquets found in vbt_cache_klines!")`
			`return False`

			`log(f" Found {len(pqs_2026)} 2026 parquets: {pqs_2026[0].stem} .. {pqs_2026[-1].stem}")`

			`sample = [pqs_2026[i] for i in`
			`sorted(random.sample(range(len(pqs_2026)), min(5, len(pqs_2026))))]`

			`errors = []`
			`for pf in sample:`
			`try:`
			`df = pd.read_parquet(pf)`
			`except Exception as e:`
			`errors.append(f"{pf.name}: read error: {e}")`
			`continue`

			`missing_cols = [c for c in REQUIRED_COLS if c not in df.columns]`
			`if missing_cols:`
			`errors.append(f"{pf.name}: missing columns {missing_cols}")`
			`continue`

			`n_rows = len(df)`
			`if n_rows < 1400:`
			`errors.append(f"{pf.name}: only {n_rows} rows (expected ~1439)")`
			`continue`

			`null_vel = df['vel_div'].isna().sum()`
			`null_btc = df['BTCUSDT'].isna().sum()`
			`null_v50 = df['v50_lambda_max_velocity'].isna().sum()`
			`zero_vel = (df['v50_lambda_max_velocity'] == 0.0).sum()`
			`btc_min = df['BTCUSDT'].min()`
			`btc_max = df['BTCUSDT'].max()`
			`vd_p5 = df['vel_div'].quantile(0.05)`
			`vd_p95 = df['vel_div'].quantile(0.95)`

			`row_errors = []`
			`if null_btc > 5:`
			`row_errors.append(f"BTCUSDT null={null_btc}")`
			`if btc_min < 1000:`
			`row_errors.append(f"BTC min={btc_min:.0f} (suspicious)")`
			`if zero_vel > n_rows * 0.10: # >10% zero velocities beyond warmup`
			`row_errors.append(f"w50 velocity zero={zero_vel}/{n_rows}")`
			`if abs(vd_p5) > 50 or abs(vd_p95) > 50:`
			`row_errors.append(f"vel_div range suspicious: p5={vd_p5:.3f}, p95={vd_p95:.3f}")`

			`if row_errors:`
			`errors.append(f"{pf.name}: " + "; ".join(row_errors))`
			`else:`
			`log(f" {pf.stem}: OK {n_rows} rows \| BTC=[{btc_min:.0f},{btc_max:.0f}] "`
			`f"\| vd=[{vd_p5:.3f},{vd_p95:.3f}] \| null_v50={null_v50}")`

			`if errors:`
			`log(f"VALIDATION ERRORS ({len(errors)}):")`
			`for e in errors:`
			`log(f" ERR: {e}")`
			`return False`

			`log(f"Parquet validation PASSED — {len(sample)} sampled dates all OK")`
			`return True`

			`# ── Step 5: Run experiment ─────────────────────────────────────────────────────`

			`def run_experiment():`
			`log("=" * 60)`
			`log("STEP 5: Running klines fractal experiment (~795 days)...")`
			`log(f" Script: {EXPERIMENT}")`

			`disk_before = disk_free_gb()`
			`t0 = time.time()`

			`cmd = [PYTHON, str(EXPERIMENT)]`
			`result = subprocess.run(`
			`cmd,`
			`cwd=str(HCM / "nautilus_dolphin"),`
			`capture_output=False, # stream to stdout live`
			`text=True,`
			`)`
			`elapsed = time.time() - t0`
			`disk_after = disk_free_gb()`

			`log(f"Experiment finished in {elapsed/60:.1f} min \| exit code: {result.returncode}")`
			`log(f"Disk free: {disk_after:.1f} GB (used {disk_before - disk_after:.2f} GB)")`

			`if result.returncode != 0:`
			`log("ERROR: experiment exited non-zero")`
			`return False`
			`return True`

			`# ── Main ──────────────────────────────────────────────────────────────────────`

			`def main():`
			`log("Klines Pipeline Orchestrator started")`
			`check_disk()`

			`# Step 1: wait for backfill`
			`if not wait_for_backfill():`
			`log("ABORT: backfill did not complete cleanly")`
			`sys.exit(1)`

			`check_disk()`

			`# Step 2: validate arrow files`
			`if not validate_arrow_files():`
			`log("ABORT: arrow file validation failed — inspect errors before proceeding")`
			`sys.exit(1)`

			`# Step 3: VBT conversion`
			`if not run_vbt_conversion():`
			`log("ABORT: VBT conversion failed")`
			`sys.exit(1)`

			`check_disk()`

			`# Step 4: validate parquets`
			`if not validate_parquets():`
			`log("ABORT: parquet validation failed — inspect errors before proceeding")`
			`sys.exit(1)`

			`# Step 5: run experiment`
			`if not run_experiment():`
			`log("ABORT: experiment failed")`
			`sys.exit(1)`

			`log("=" * 60)`
			`log("PIPELINE COMPLETE. Check run_logs/ for klines_2y_*.json results.")`

			`if __name__ == '__main__':`
			`main()`