""" Offline training script. Run once to build bucket assignments and train continuation models: cd /mnt/dolphinng5_predict siloqy-env python adaptive_exit/train.py Artifacts written: adaptive_exit/models/bucket_assignments.pkl adaptive_exit/models/continuation_models.pkl adaptive_exit/models/training_data.parquet (optional, for audit) """ import argparse import os import sys sys.path.insert(0, "/mnt/dolphinng5_predict") from adaptive_exit.bucket_engine import build_buckets from adaptive_exit.continuation_model import ContinuationModelBank from adaptive_exit.data_pipeline import build_training_data _MODELS_DIR = os.path.join(os.path.dirname(__file__), "models") _TRAIN_DATA_PATH = os.path.join(_MODELS_DIR, "training_data.parquet") def main(): parser = argparse.ArgumentParser(description="Train adaptive exit models") parser.add_argument("--k", type=int, default=None, help="Force bucket count (default: auto)") parser.add_argument("--save-data", action="store_true", help="Save training parquet for audit") parser.add_argument("--force-rebuild", action="store_true", help="Rebuild buckets even if cached") parser.add_argument("--vbt-dir", default="/mnt/dolphinng5_predict/vbt_cache", help="VBT parquet dir for training data generation") parser.add_argument("--klines-dir", default="/mnt/dolphin_training/data/vbt_cache_klines", help="1m klines dir for asset bucketing") args = parser.parse_args() os.makedirs(_MODELS_DIR, exist_ok=True) # ── Step 1: Build asset buckets ────────────────────────────────────────── print("\n=== STEP 1: Asset Bucketing ===") bucket_data = build_buckets( klines_dir=args.klines_dir, k_override=args.k, force_rebuild=args.force_rebuild, ) print(f"Buckets: {bucket_data['n_buckets']} | Assets: {len(bucket_data['assignments'])}") # ── Step 2: Build training data from price series ──────────────────────── print("\n=== STEP 2: Generate MAE/MFE Training Data ===") df = build_training_data( bucket_assignments=bucket_data["assignments"], vbt_dir=args.vbt_dir, use_obf_ch=False, # OBF is live-only (13 days); zero-fill training, bolt on at Phase 2 ) print(f"Training data shape: {df.shape}") print(f"Bucket distribution:\n{df.groupby('bucket_id').size().describe()}") print(f"Continuation rate: {df['continuation'].mean():.3f}") if args.save_data: df.to_parquet(_TRAIN_DATA_PATH) print(f"Training data saved → {_TRAIN_DATA_PATH}") # ── Step 3: Train continuation models ──────────────────────────────────── print("\n=== STEP 3: Train Continuation Models ===") bank = ContinuationModelBank() bank.train(df) bank.save() print(f"\nModel summary: {bank.summary()}") print("\nDone.") if __name__ == "__main__": main()