Files
DOLPHIN/prod/continuous_test_flow.py

372 lines
13 KiB
Python
Raw Normal View History

"""
continuous_test_flow.py
=======================
Prefect flow: runs all integrity test suites on a staggered, continuous
schedule and publishes results to run_logs/test_results_latest.json
(picked up by the TUI footer and MHS M6 sensor).
Schedules (Nyquist-optimal run at least 2× per expected detection window):
data_integrity every 7 min HZ schema + Arrow file freshness
finance_fuzz every 20 min Financial invariants, capital bounds
signal_fill every 10 min Signal path, latency, dedup
degradation every 60 min Kill/revive tests (destructive, slow)
actor every 15 min MHS, ACB, scan-bridge integration
Stagger offset (minutes):
data_integrity +0
finance_fuzz +2
signal_fill +4
degradation +6 (only on full-hour runs)
actor +8
Register:
python3 continuous_test_flow.py --register
Run once (manual):
python3 continuous_test_flow.py
Run single suite:
python3 continuous_test_flow.py --suite data_integrity
"""
import argparse
import json
import subprocess
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from prefect import flow, task, get_run_logger
from prefect.client.schemas.schedules import CronSchedule as CS
# ── Paths ───────────────────────────────────────────────────────────────────
_ROOT = Path(__file__).parent.parent # dolphinng5_predict
_TESTS_DIR = Path(__file__).parent / "tests"
_TUI_DIR = _ROOT / "Observability" / "TUI"
_RESULTS = _ROOT / "run_logs" / "test_results_latest.json"
_PYTHON = sys.executable # siloqy_env python
sys.path.insert(0, str(_TUI_DIR))
try:
from dolphin_tui_v3 import write_test_results
_WTR_OK = True
except Exception:
_WTR_OK = False
# ── Suite definitions ────────────────────────────────────────────────────────
# Each suite: (test_file, category, timeout_s, extra_pytest_args)
SUITES = {
"data_integrity": (
_TESTS_DIR / "test_data_integrity.py",
"data_integrity",
120,
["-x", "--tb=short", "-q"], # fail-fast: first failure is enough
),
"finance_fuzz": (
_TESTS_DIR / "test_finance_fuzz.py",
"finance_fuzz",
180,
["--tb=short", "-q"],
),
"signal_fill": (
_TESTS_DIR / "test_signal_to_fill.py",
"signal_fill",
150,
["--tb=short", "-q"],
),
"degradation": (
_TESTS_DIR / "test_degradational.py",
"degradation",
300,
["--tb=short", "-q", "-m", "not slow"], # skip marked-slow E2E kills in light run
),
"actor": (
_TESTS_DIR / "test_mhs_v3.py",
"actor",
180,
["--tb=short", "-q", "-m", "not live_integration"],
),
}
# ── Helpers ──────────────────────────────────────────────────────────────────
def _run_suite(name: str, test_file: Path, category: str,
timeout: int, extra_args: list) -> dict:
"""
Run a pytest suite as a subprocess; return result dict for write_test_results.
Captures pass/fail counts from pytest's JSON output (--json-report).
Falls back to exit-code-only if json-report unavailable.
"""
json_out = Path(f"/tmp/dolphin_pytest_{name}.json")
cmd = [
_PYTHON, "-m", "pytest",
str(test_file),
f"--category={category}",
"--no-header",
f"--timeout={timeout}",
] + extra_args
# Try with json-report for precise counts
cmd_jreport = cmd + [f"--json-report", f"--json-report-file={json_out}"]
start = time.monotonic()
try:
proc = subprocess.run(
cmd_jreport,
capture_output=True, text=True,
timeout=timeout + 30,
cwd=str(_TESTS_DIR.parent),
)
exit_code = proc.returncode
except subprocess.TimeoutExpired:
return {"passed": None, "total": None, "status": "FAIL",
"note": f"timeout after {timeout}s"}
except Exception as e:
return {"passed": None, "total": None, "status": "FAIL", "note": str(e)}
elapsed = time.monotonic() - start
# Parse json-report if available
passed = failed = total = None
if json_out.exists():
try:
jr = json.loads(json_out.read_text())
summary = jr.get("summary", {})
passed = summary.get("passed", 0)
failed = summary.get("failed", 0) + summary.get("error", 0)
total = passed + failed + summary.get("skipped", 0)
except Exception:
pass
finally:
try: json_out.unlink()
except Exception: pass
if passed is None:
# Fallback: parse stdout for "X passed, Y failed"
out = proc.stdout + proc.stderr
import re
m = re.search(r"(\d+) passed", out)
if m: passed = int(m.group(1))
m = re.search(r"(\d+) failed", out)
if m: failed = int(m.group(1))
total = (passed or 0) + (failed or 0)
if total == 0 and exit_code == 0:
passed, failed, total = 0, 0, 0 # no tests collected
status = "PASS" if exit_code == 0 and (failed or 0) == 0 else "FAIL"
if total == 0:
status = "N/A"
return {
"passed": passed,
"total": total,
"status": status,
"elapsed_s": round(elapsed, 1),
}
def _push(results: dict):
"""Write results dict to run_logs + TUI footer."""
if _WTR_OK:
try:
write_test_results(results)
return
except Exception:
pass
# Direct write fallback
try:
existing = json.loads(_RESULTS.read_text()) if _RESULTS.exists() else {}
except Exception:
existing = {}
existing["_run_at"] = datetime.now(timezone.utc).isoformat()
existing.update(results)
_RESULTS.parent.mkdir(parents=True, exist_ok=True)
_RESULTS.write_text(json.dumps(existing, indent=2))
# ── Prefect tasks ─────────────────────────────────────────────────────────────
@task(name="run_data_integrity", retries=1, retry_delay_seconds=30, timeout_seconds=150)
def task_data_integrity():
log = get_run_logger()
name, (f, cat, t, args) = "data_integrity", SUITES["data_integrity"]
log.info(f"{name}")
r = _run_suite(name, f, cat, t, args)
_push({name: r})
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
return r
@task(name="run_finance_fuzz", retries=1, retry_delay_seconds=30, timeout_seconds=210)
def task_finance_fuzz():
log = get_run_logger()
name, (f, cat, t, args) = "finance_fuzz", SUITES["finance_fuzz"]
log.info(f"{name}")
r = _run_suite(name, f, cat, t, args)
_push({name: r})
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
return r
@task(name="run_signal_fill", retries=1, retry_delay_seconds=30, timeout_seconds=180)
def task_signal_fill():
log = get_run_logger()
name, (f, cat, t, args) = "signal_fill", SUITES["signal_fill"]
log.info(f"{name}")
r = _run_suite(name, f, cat, t, args)
_push({name: r})
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
return r
@task(name="run_degradation", retries=0, timeout_seconds=360)
def task_degradation():
log = get_run_logger()
name, (f, cat, t, args) = "degradation", SUITES["degradation"]
log.info(f"{name}")
r = _run_suite(name, f, cat, t, args)
_push({name: r})
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
return r
@task(name="run_actor", retries=1, retry_delay_seconds=30, timeout_seconds=210)
def task_actor():
log = get_run_logger()
name, (f, cat, t, args) = "actor", SUITES["actor"]
log.info(f"{name}")
r = _run_suite(name, f, cat, t, args)
_push({name: r})
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
return r
# ── Light flow: runs every 7 minutes (data + signal stagger) ─────────────────
@flow(name="dolphin-tests-light", log_prints=True)
def light_test_flow(suite: Optional[str] = None):
"""
Fast/frequent suites data_integrity (0s) and signal_fill (+30s stagger).
Scheduled every 7 minutes.
"""
log = get_run_logger()
log.info("=== Light test flow ===")
if suite == "data_integrity" or suite is None:
task_data_integrity()
if suite == "signal_fill" or suite is None:
time.sleep(30) # stagger to avoid bursting HZ simultaneously
task_signal_fill()
# ── Medium flow: runs every 20 minutes (finance_fuzz + actor) ────────────────
@flow(name="dolphin-tests-medium", log_prints=True)
def medium_test_flow(suite: Optional[str] = None):
"""
Medium-cadence suites finance_fuzz (0s) and actor (+60s stagger).
Scheduled every 20 minutes.
"""
log = get_run_logger()
log.info("=== Medium test flow ===")
if suite == "finance_fuzz" or suite is None:
task_finance_fuzz()
if suite == "actor" or suite is None:
time.sleep(60)
task_actor()
# ── Heavy flow: runs every 60 minutes (degradation only) ─────────────────────
@flow(name="dolphin-tests-heavy", log_prints=True)
def heavy_test_flow():
"""
Destructive/slow suites degradation (kill/revive E2E).
Scheduled every 60 minutes.
"""
log = get_run_logger()
log.info("=== Heavy test flow ===")
task_degradation()
# ── Full suite flow: runs every 60 minutes at offset +8 min ──────────────────
@flow(name="dolphin-tests-full", log_prints=True)
def full_test_flow():
"""All suites sequentially — used as nightly or on-demand full sweep."""
log = get_run_logger()
log.info("=== Full test flow ===")
task_data_integrity()
time.sleep(15)
task_finance_fuzz()
time.sleep(15)
task_signal_fill()
time.sleep(15)
task_actor()
time.sleep(15)
task_degradation()
# ── CLI ───────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import os
os.environ.setdefault("PREFECT_API_URL", "http://localhost:4200/api")
parser = argparse.ArgumentParser()
parser.add_argument("--register", action="store_true",
help="Register all deployments with Prefect")
parser.add_argument("--suite", default=None,
choices=list(SUITES.keys()) + ["full"],
help="Run a single suite locally without Prefect")
args = parser.parse_args()
if args.register:
# Light: every 7 minutes
light_test_flow.to_deployment(
name="dolphin-tests-light",
schedule=CS(cron="*/7 * * * *", timezone="UTC"),
work_pool_name="dolphin",
tags=["integrity", "light"],
).apply()
# Medium: every 20 minutes, offset +2 min
medium_test_flow.to_deployment(
name="dolphin-tests-medium",
schedule=CS(cron="2-59/20 * * * *", timezone="UTC"),
work_pool_name="dolphin",
tags=["integrity", "medium"],
).apply()
# Heavy: every 60 minutes, offset +6 min
heavy_test_flow.to_deployment(
name="dolphin-tests-heavy",
schedule=CS(cron="6 * * * *", timezone="UTC"),
work_pool_name="dolphin",
tags=["integrity", "heavy"],
).apply()
print("Registered: dolphin-tests-light (*/7), dolphin-tests-medium (2,22,42), dolphin-tests-heavy (:06)")
elif args.suite == "full":
full_test_flow()
elif args.suite:
# Run single suite directly
name = args.suite
f, cat, t, extra = SUITES[name]
result = _run_suite(name, f, cat, t, extra)
_push({name: result})
print(f"{name}: {result}")
else:
# Default: run light + medium inline (manual check)
light_test_flow()
medium_test_flow()