DOLPHIN/prod/continuous_test_flow.py

"""
continuous_test_flow.py
=======================
Prefect flow: runs all integrity test suites on a staggered, continuous
schedule and publishes results to run_logs/test_results_latest.json
(picked up by the TUI footer and MHS M6 sensor).

Schedules (Nyquist-optimal — run at least 2× per expected detection window):
  data_integrity  every  7 min   — HZ schema + Arrow file freshness
  finance_fuzz    every 20 min   — Financial invariants, capital bounds
  signal_fill     every 10 min   — Signal path, latency, dedup
  degradation     every 60 min   — Kill/revive tests (destructive, slow)
  actor           every 15 min   — MHS, ACB, scan-bridge integration

Stagger offset (minutes):
  data_integrity  +0
  finance_fuzz    +2
  signal_fill     +4
  degradation     +6  (only on full-hour runs)
  actor           +8

Register:
  python3 continuous_test_flow.py --register

Run once (manual):
  python3 continuous_test_flow.py

Run single suite:
  python3 continuous_test_flow.py --suite data_integrity
"""

import argparse
import json
import subprocess
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

from prefect import flow, task, get_run_logger
from prefect.client.schemas.schedules import CronSchedule as CS

# ── Paths ───────────────────────────────────────────────────────────────────
_ROOT       = Path(__file__).parent.parent                    # dolphinng5_predict
_TESTS_DIR  = Path(__file__).parent / "tests"
_TUI_DIR    = _ROOT / "Observability" / "TUI"
_RESULTS    = _ROOT / "run_logs" / "test_results_latest.json"
_PYTHON     = sys.executable                                  # siloqy_env python

sys.path.insert(0, str(_TUI_DIR))
try:
    from dolphin_tui_v3 import write_test_results
    _WTR_OK = True
except Exception:
    _WTR_OK = False

# ── Suite definitions ────────────────────────────────────────────────────────
# Each suite: (test_file, category, timeout_s, extra_pytest_args)
SUITES = {
    "data_integrity": (
        _TESTS_DIR / "test_data_integrity.py",
        "data_integrity",
        120,
        ["-x", "--tb=short", "-q"],          # fail-fast: first failure is enough
    ),
    "finance_fuzz": (
        _TESTS_DIR / "test_finance_fuzz.py",
        "finance_fuzz",
        180,
        ["--tb=short", "-q"],
    ),
    "signal_fill": (
        _TESTS_DIR / "test_signal_to_fill.py",
        "signal_fill",
        150,
        ["--tb=short", "-q"],
    ),
    "degradation": (
        _TESTS_DIR / "test_degradational.py",
        "degradation",
        300,
        ["--tb=short", "-q", "-m", "not slow"],   # skip marked-slow E2E kills in light run
    ),
    "actor": (
        _TESTS_DIR / "test_mhs_v3.py",
        "actor",
        180,
        ["--tb=short", "-q", "-m", "not live_integration"],
    ),
}


# ── Helpers ──────────────────────────────────────────────────────────────────

def _run_suite(name: str, test_file: Path, category: str,
               timeout: int, extra_args: list) -> dict:
    """
    Run a pytest suite as a subprocess; return result dict for write_test_results.
    Captures pass/fail counts from pytest's JSON output (--json-report).
    Falls back to exit-code-only if json-report unavailable.
    """
    json_out = Path(f"/tmp/dolphin_pytest_{name}.json")
    cmd = [
        _PYTHON, "-m", "pytest",
        str(test_file),
        f"--category={category}",
        "--no-header",
        f"--timeout={timeout}",
    ] + extra_args

    # Try with json-report for precise counts
    cmd_jreport = cmd + [f"--json-report", f"--json-report-file={json_out}"]

    start = time.monotonic()
    try:
        proc = subprocess.run(
            cmd_jreport,
            capture_output=True, text=True,
            timeout=timeout + 30,
            cwd=str(_TESTS_DIR.parent),
        )
        exit_code = proc.returncode
    except subprocess.TimeoutExpired:
        return {"passed": None, "total": None, "status": "FAIL",
                "note": f"timeout after {timeout}s"}
    except Exception as e:
        return {"passed": None, "total": None, "status": "FAIL", "note": str(e)}

    elapsed = time.monotonic() - start

    # Parse json-report if available
    passed = failed = total = None
    if json_out.exists():
        try:
            jr = json.loads(json_out.read_text())
            summary = jr.get("summary", {})
            passed  = summary.get("passed",  0)
            failed  = summary.get("failed",  0) + summary.get("error", 0)
            total   = passed + failed + summary.get("skipped", 0)
        except Exception:
            pass
        finally:
            try: json_out.unlink()
            except Exception: pass

    if passed is None:
        # Fallback: parse stdout for "X passed, Y failed"
        out = proc.stdout + proc.stderr
        import re
        m = re.search(r"(\d+) passed", out)
        if m: passed = int(m.group(1))
        m = re.search(r"(\d+) failed", out)
        if m: failed = int(m.group(1))
        total = (passed or 0) + (failed or 0)
        if total == 0 and exit_code == 0:
            passed, failed, total = 0, 0, 0   # no tests collected

    status = "PASS" if exit_code == 0 and (failed or 0) == 0 else "FAIL"
    if total == 0:
        status = "N/A"

    return {
        "passed":  passed,
        "total":   total,
        "status":  status,
        "elapsed_s": round(elapsed, 1),
    }


def _push(results: dict):
    """Write results dict to run_logs + TUI footer."""
    if _WTR_OK:
        try:
            write_test_results(results)
            return
        except Exception:
            pass
    # Direct write fallback
    try:
        existing = json.loads(_RESULTS.read_text()) if _RESULTS.exists() else {}
    except Exception:
        existing = {}
    existing["_run_at"] = datetime.now(timezone.utc).isoformat()
    existing.update(results)
    _RESULTS.parent.mkdir(parents=True, exist_ok=True)
    _RESULTS.write_text(json.dumps(existing, indent=2))


# ── Prefect tasks ─────────────────────────────────────────────────────────────

@task(name="run_data_integrity",  retries=1, retry_delay_seconds=30, timeout_seconds=150)
def task_data_integrity():
    log = get_run_logger()
    name, (f, cat, t, args) = "data_integrity", SUITES["data_integrity"]
    log.info(f"▶ {name}")
    r = _run_suite(name, f, cat, t, args)
    _push({name: r})
    log.info(f"  {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
    return r


@task(name="run_finance_fuzz",    retries=1, retry_delay_seconds=30, timeout_seconds=210)
def task_finance_fuzz():
    log = get_run_logger()
    name, (f, cat, t, args) = "finance_fuzz", SUITES["finance_fuzz"]
    log.info(f"▶ {name}")
    r = _run_suite(name, f, cat, t, args)
    _push({name: r})
    log.info(f"  {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
    return r


@task(name="run_signal_fill",     retries=1, retry_delay_seconds=30, timeout_seconds=180)
def task_signal_fill():
    log = get_run_logger()
    name, (f, cat, t, args) = "signal_fill", SUITES["signal_fill"]
    log.info(f"▶ {name}")
    r = _run_suite(name, f, cat, t, args)
    _push({name: r})
    log.info(f"  {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
    return r


@task(name="run_degradation",     retries=0,                          timeout_seconds=360)
def task_degradation():
    log = get_run_logger()
    name, (f, cat, t, args) = "degradation", SUITES["degradation"]
    log.info(f"▶ {name}")
    r = _run_suite(name, f, cat, t, args)
    _push({name: r})
    log.info(f"  {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
    return r


@task(name="run_actor",           retries=1, retry_delay_seconds=30, timeout_seconds=210)
def task_actor():
    log = get_run_logger()
    name, (f, cat, t, args) = "actor", SUITES["actor"]
    log.info(f"▶ {name}")
    r = _run_suite(name, f, cat, t, args)
    _push({name: r})
    log.info(f"  {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
    return r


# ── Light flow: runs every 7 minutes (data + signal stagger) ─────────────────

@flow(name="dolphin-tests-light", log_prints=True)
def light_test_flow(suite: Optional[str] = None):
    """
    Fast/frequent suites — data_integrity (0s) and signal_fill (+30s stagger).
    Scheduled every 7 minutes.
    """
    log = get_run_logger()
    log.info("=== Light test flow ===")

    if suite == "data_integrity" or suite is None:
        task_data_integrity()

    if suite == "signal_fill" or suite is None:
        time.sleep(30)   # stagger to avoid bursting HZ simultaneously
        task_signal_fill()


# ── Medium flow: runs every 20 minutes (finance_fuzz + actor) ────────────────

@flow(name="dolphin-tests-medium", log_prints=True)
def medium_test_flow(suite: Optional[str] = None):
    """
    Medium-cadence suites — finance_fuzz (0s) and actor (+60s stagger).
    Scheduled every 20 minutes.
    """
    log = get_run_logger()
    log.info("=== Medium test flow ===")

    if suite == "finance_fuzz" or suite is None:
        task_finance_fuzz()

    if suite == "actor" or suite is None:
        time.sleep(60)
        task_actor()


# ── Heavy flow: runs every 60 minutes (degradation only) ─────────────────────

@flow(name="dolphin-tests-heavy", log_prints=True)
def heavy_test_flow():
    """
    Destructive/slow suites — degradation (kill/revive E2E).
    Scheduled every 60 minutes.
    """
    log = get_run_logger()
    log.info("=== Heavy test flow ===")
    task_degradation()


# ── Full suite flow: runs every 60 minutes at offset +8 min ──────────────────

@flow(name="dolphin-tests-full", log_prints=True)
def full_test_flow():
    """All suites sequentially — used as nightly or on-demand full sweep."""
    log = get_run_logger()
    log.info("=== Full test flow ===")
    task_data_integrity()
    time.sleep(15)
    task_finance_fuzz()
    time.sleep(15)
    task_signal_fill()
    time.sleep(15)
    task_actor()
    time.sleep(15)
    task_degradation()


# ── CLI ───────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    import os
    os.environ.setdefault("PREFECT_API_URL", "http://localhost:4200/api")

    parser = argparse.ArgumentParser()
    parser.add_argument("--register", action="store_true",
                        help="Register all deployments with Prefect")
    parser.add_argument("--suite", default=None,
                        choices=list(SUITES.keys()) + ["full"],
                        help="Run a single suite locally without Prefect")
    args = parser.parse_args()

    if args.register:
        # Light: every 7 minutes
        light_test_flow.to_deployment(
            name="dolphin-tests-light",
            schedule=CS(cron="*/7 * * * *", timezone="UTC"),
            work_pool_name="dolphin",
            tags=["integrity", "light"],
        ).apply()

        # Medium: every 20 minutes, offset +2 min
        medium_test_flow.to_deployment(
            name="dolphin-tests-medium",
            schedule=CS(cron="2-59/20 * * * *", timezone="UTC"),
            work_pool_name="dolphin",
            tags=["integrity", "medium"],
        ).apply()

        # Heavy: every 60 minutes, offset +6 min
        heavy_test_flow.to_deployment(
            name="dolphin-tests-heavy",
            schedule=CS(cron="6 * * * *", timezone="UTC"),
            work_pool_name="dolphin",
            tags=["integrity", "heavy"],
        ).apply()

        print("Registered: dolphin-tests-light (*/7), dolphin-tests-medium (2,22,42), dolphin-tests-heavy (:06)")

    elif args.suite == "full":
        full_test_flow()

    elif args.suite:
        # Run single suite directly
        name = args.suite
        f, cat, t, extra = SUITES[name]
        result = _run_suite(name, f, cat, t, extra)
        _push({name: result})
        print(f"{name}: {result}")

    else:
        # Default: run light + medium inline (manual check)
        light_test_flow()
        medium_test_flow()