DOLPHIN/nautilus_dolphin/test_forewarner_reality.py

import os
import sys
import json
import numpy as np
from pathlib import Path

# Adjust paths
PROJECT_ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(PROJECT_ROOT))
sys.path.insert(0, str(PROJECT_ROOT.parent / 'external_factors'))

from mc.mc_ml import DolphinForewarner
from mc.mc_sampler import MCSampler
from mc.mc_executor import MCExecutor

MODELS_DIR = PROJECT_ROOT / "mc_results" / "models"

def run_reality_check():
    print("======================================================================")
    print("FOREWARNER REALITY CHECK: PREDICTION VS. ACTUAL PAPER TRADE")
    print("======================================================================")
    
    forewarner = DolphinForewarner(models_dir=str(MODELS_DIR))
    sampler = MCSampler(base_seed=4242)
    executor = MCExecutor(verbose=False)
    
    print("[1/3] Generating random parameter gamuts to find extremums...")
    trials = sampler.generate_trials(n_samples_per_switch=20, max_trials=1000)
    
    worst_red = None
    best_green = None
    
    for trial in trials:
        report = forewarner.assess(trial)
        
        # Look for a really bad RED
        if report.catastrophic_probability >= 0.25 or report.envelope_score < -2.0:
            if worst_red is None or report.envelope_score < worst_red['report'].envelope_score:
                worst_red = {'trial': trial, 'report': report}
                
        # Look for a really good GREEN
        if report.champion_probability >= 0.8 and report.envelope_score > 3.0:
            if best_green is None or report.envelope_score > best_green['report'].envelope_score:
                best_green = {'trial': trial, 'report': report}
                
    if not worst_red:
        print("Couldn't find a deep RED config in this sample, taking the lowest envelope score.")
        reports = [(t, forewarner.assess(t)) for t in trials]
        reports.sort(key=lambda x: x[1].envelope_score)
        worst_red = {'trial': reports[0][0], 'report': reports[0][1]}
        
    if not best_green:
        print("Couldn't find a deep GREEN config in this sample, taking the highest envelope score.")
        reports = [(t, forewarner.assess(t)) for t in trials]
        reports.sort(key=lambda x: x[1].envelope_score, reverse=True)
        best_green = {'trial': reports[0][0], 'report': reports[0][1]}

    print("\n[2/3] Found Extremums. Running Full Vectorized Backtests (Reality)...")
    
    # Run RED
    print("\n--- RUNNING 'WORST RED' CONFIGURATION ---")
    print(f"Prediction: Envelope Score: {worst_red['report'].envelope_score:.3f} | Catastrophic Prob: {worst_red['report'].catastrophic_probability:.1%} | ROI: {worst_red['report'].predicted_roi:.1f}%")
    print(f"Key Triggers: max_leverage={worst_red['trial'].max_leverage:.2f}, fraction={worst_red['trial'].fraction:.2f}, tp={worst_red['trial'].fixed_tp_pct:.4f}")
    
    red_actual = executor.execute_trial(worst_red['trial'])
    print(f"\nACTUAL PIPELINE RESULT (RED):")
    print(f"  Final ROI:      {red_actual.roi_pct:.2f}%")
    print(f"  Max Drawdown:   {red_actual.max_drawdown_pct:.2f}%")
    print(f"  Sharpe Ratio:   {red_actual.sharpe_ratio:.2f}")
    print(f"  Win Rate:       {red_actual.win_rate*100:.1f}%")
    print(f"  Trades:         {red_actual.n_trades}")
    if red_actual.max_drawdown_pct > 30.0 or red_actual.roi_pct < 0:
        print("  VERDICT: ML Model correctly identified a CATASTROPHIC blowout run.")
    else:
        print("  VERDICT: ML Model was overly pessimistic.")

    # Run GREEN
    print("\n--- RUNNING 'BEST GREEN' CONFIGURATION ---")
    print(f"Prediction: Envelope Score: {best_green['report'].envelope_score:.3f} | Champion Prob: {best_green['report'].champion_probability:.1%} | ROI: {best_green['report'].predicted_roi:.1f}%")
    print(f"Key Triggers: max_leverage={best_green['trial'].max_leverage:.2f}, fraction={best_green['trial'].fraction:.2f}, tp={best_green['trial'].fixed_tp_pct:.4f}")
    
    green_actual = executor.execute_trial(best_green['trial'])
    print(f"\nACTUAL PIPELINE RESULT (GREEN):")
    print(f"  Final ROI:      {green_actual.roi_pct:.2f}%")
    print(f"  Max Drawdown:   {green_actual.max_drawdown_pct:.2f}%")
    print(f"  Sharpe Ratio:   {green_actual.sharpe_ratio:.2f}")
    print(f"  Win Rate:       {green_actual.win_rate*100:.1f}%")
    print(f"  Trades:         {green_actual.n_trades}")
    if green_actual.roi_pct > 30.0 and green_actual.max_drawdown_pct < 20.0:
        print("  VERDICT: ML Model correctly identified a SAFE, HIGH-YIELD champion run.")
    else:
        print("  VERDICT: ML Model was overly optimistic.")

if __name__ == "__main__":
    run_reality_check()
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`import os`
			`import sys`
			`import json`
			`import numpy as np`
			`from pathlib import Path`

			`# Adjust paths`
			`PROJECT_ROOT = Path(__file__).resolve().parent`
			`sys.path.insert(0, str(PROJECT_ROOT))`
			`sys.path.insert(0, str(PROJECT_ROOT.parent / 'external_factors'))`

			`from mc.mc_ml import DolphinForewarner`
			`from mc.mc_sampler import MCSampler`
			`from mc.mc_executor import MCExecutor`

			`MODELS_DIR = PROJECT_ROOT / "mc_results" / "models"`

			`def run_reality_check():`
			`print("======================================================================")`
			`print("FOREWARNER REALITY CHECK: PREDICTION VS. ACTUAL PAPER TRADE")`
			`print("======================================================================")`

			`forewarner = DolphinForewarner(models_dir=str(MODELS_DIR))`
			`sampler = MCSampler(base_seed=4242)`
			`executor = MCExecutor(verbose=False)`

			`print("[1/3] Generating random parameter gamuts to find extremums...")`
			`trials = sampler.generate_trials(n_samples_per_switch=20, max_trials=1000)`

			`worst_red = None`
			`best_green = None`

			`for trial in trials:`
			`report = forewarner.assess(trial)`

			`# Look for a really bad RED`
			`if report.catastrophic_probability >= 0.25 or report.envelope_score < -2.0:`
			`if worst_red is None or report.envelope_score < worst_red['report'].envelope_score:`
			`worst_red = {'trial': trial, 'report': report}`

			`# Look for a really good GREEN`
			`if report.champion_probability >= 0.8 and report.envelope_score > 3.0:`
			`if best_green is None or report.envelope_score > best_green['report'].envelope_score:`
			`best_green = {'trial': trial, 'report': report}`

			`if not worst_red:`
			`print("Couldn't find a deep RED config in this sample, taking the lowest envelope score.")`
			`reports = [(t, forewarner.assess(t)) for t in trials]`
			`reports.sort(key=lambda x: x[1].envelope_score)`
			`worst_red = {'trial': reports[0][0], 'report': reports[0][1]}`

			`if not best_green:`
			`print("Couldn't find a deep GREEN config in this sample, taking the highest envelope score.")`
			`reports = [(t, forewarner.assess(t)) for t in trials]`
			`reports.sort(key=lambda x: x[1].envelope_score, reverse=True)`
			`best_green = {'trial': reports[0][0], 'report': reports[0][1]}`

			`print("\n[2/3] Found Extremums. Running Full Vectorized Backtests (Reality)...")`

			`# Run RED`
			`print("\n--- RUNNING 'WORST RED' CONFIGURATION ---")`
			`print(f"Prediction: Envelope Score: {worst_red['report'].envelope_score:.3f} \| Catastrophic Prob: {worst_red['report'].catastrophic_probability:.1%} \| ROI: {worst_red['report'].predicted_roi:.1f}%")`
			`print(f"Key Triggers: max_leverage={worst_red['trial'].max_leverage:.2f}, fraction={worst_red['trial'].fraction:.2f}, tp={worst_red['trial'].fixed_tp_pct:.4f}")`

			`red_actual = executor.execute_trial(worst_red['trial'])`
			`print(f"\nACTUAL PIPELINE RESULT (RED):")`
			`print(f" Final ROI: {red_actual.roi_pct:.2f}%")`
			`print(f" Max Drawdown: {red_actual.max_drawdown_pct:.2f}%")`
			`print(f" Sharpe Ratio: {red_actual.sharpe_ratio:.2f}")`
			`print(f" Win Rate: {red_actual.win_rate*100:.1f}%")`
			`print(f" Trades: {red_actual.n_trades}")`
			`if red_actual.max_drawdown_pct > 30.0 or red_actual.roi_pct < 0:`
			`print(" VERDICT: ML Model correctly identified a CATASTROPHIC blowout run.")`
			`else:`
			`print(" VERDICT: ML Model was overly pessimistic.")`

			`# Run GREEN`
			`print("\n--- RUNNING 'BEST GREEN' CONFIGURATION ---")`
			`print(f"Prediction: Envelope Score: {best_green['report'].envelope_score:.3f} \| Champion Prob: {best_green['report'].champion_probability:.1%} \| ROI: {best_green['report'].predicted_roi:.1f}%")`
			`print(f"Key Triggers: max_leverage={best_green['trial'].max_leverage:.2f}, fraction={best_green['trial'].fraction:.2f}, tp={best_green['trial'].fixed_tp_pct:.4f}")`

			`green_actual = executor.execute_trial(best_green['trial'])`
			`print(f"\nACTUAL PIPELINE RESULT (GREEN):")`
			`print(f" Final ROI: {green_actual.roi_pct:.2f}%")`
			`print(f" Max Drawdown: {green_actual.max_drawdown_pct:.2f}%")`
			`print(f" Sharpe Ratio: {green_actual.sharpe_ratio:.2f}")`
			`print(f" Win Rate: {green_actual.win_rate*100:.1f}%")`
			`print(f" Trades: {green_actual.n_trades}")`
			`if green_actual.roi_pct > 30.0 and green_actual.max_drawdown_pct < 20.0:`
			`print(" VERDICT: ML Model correctly identified a SAFE, HIGH-YIELD champion run.")`
			`else:`
			`print(" VERDICT: ML Model was overly optimistic.")`

			`if __name__ == "__main__":`
			`run_reality_check()`