initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree
Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore.
This commit is contained in:
371
prod/continuous_test_flow.py
Executable file
371
prod/continuous_test_flow.py
Executable file
@@ -0,0 +1,371 @@
|
||||
"""
|
||||
continuous_test_flow.py
|
||||
=======================
|
||||
Prefect flow: runs all integrity test suites on a staggered, continuous
|
||||
schedule and publishes results to run_logs/test_results_latest.json
|
||||
(picked up by the TUI footer and MHS M6 sensor).
|
||||
|
||||
Schedules (Nyquist-optimal — run at least 2× per expected detection window):
|
||||
data_integrity every 7 min — HZ schema + Arrow file freshness
|
||||
finance_fuzz every 20 min — Financial invariants, capital bounds
|
||||
signal_fill every 10 min — Signal path, latency, dedup
|
||||
degradation every 60 min — Kill/revive tests (destructive, slow)
|
||||
actor every 15 min — MHS, ACB, scan-bridge integration
|
||||
|
||||
Stagger offset (minutes):
|
||||
data_integrity +0
|
||||
finance_fuzz +2
|
||||
signal_fill +4
|
||||
degradation +6 (only on full-hour runs)
|
||||
actor +8
|
||||
|
||||
Register:
|
||||
python3 continuous_test_flow.py --register
|
||||
|
||||
Run once (manual):
|
||||
python3 continuous_test_flow.py
|
||||
|
||||
Run single suite:
|
||||
python3 continuous_test_flow.py --suite data_integrity
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from prefect import flow, task, get_run_logger
|
||||
from prefect.client.schemas.schedules import CronSchedule as CS
|
||||
|
||||
# ── Paths ───────────────────────────────────────────────────────────────────
|
||||
_ROOT = Path(__file__).parent.parent # dolphinng5_predict
|
||||
_TESTS_DIR = Path(__file__).parent / "tests"
|
||||
_TUI_DIR = _ROOT / "Observability" / "TUI"
|
||||
_RESULTS = _ROOT / "run_logs" / "test_results_latest.json"
|
||||
_PYTHON = sys.executable # siloqy_env python
|
||||
|
||||
sys.path.insert(0, str(_TUI_DIR))
|
||||
try:
|
||||
from dolphin_tui_v3 import write_test_results
|
||||
_WTR_OK = True
|
||||
except Exception:
|
||||
_WTR_OK = False
|
||||
|
||||
# ── Suite definitions ────────────────────────────────────────────────────────
|
||||
# Each suite: (test_file, category, timeout_s, extra_pytest_args)
|
||||
SUITES = {
|
||||
"data_integrity": (
|
||||
_TESTS_DIR / "test_data_integrity.py",
|
||||
"data_integrity",
|
||||
120,
|
||||
["-x", "--tb=short", "-q"], # fail-fast: first failure is enough
|
||||
),
|
||||
"finance_fuzz": (
|
||||
_TESTS_DIR / "test_finance_fuzz.py",
|
||||
"finance_fuzz",
|
||||
180,
|
||||
["--tb=short", "-q"],
|
||||
),
|
||||
"signal_fill": (
|
||||
_TESTS_DIR / "test_signal_to_fill.py",
|
||||
"signal_fill",
|
||||
150,
|
||||
["--tb=short", "-q"],
|
||||
),
|
||||
"degradation": (
|
||||
_TESTS_DIR / "test_degradational.py",
|
||||
"degradation",
|
||||
300,
|
||||
["--tb=short", "-q", "-m", "not slow"], # skip marked-slow E2E kills in light run
|
||||
),
|
||||
"actor": (
|
||||
_TESTS_DIR / "test_mhs_v3.py",
|
||||
"actor",
|
||||
180,
|
||||
["--tb=short", "-q", "-m", "not live_integration"],
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _run_suite(name: str, test_file: Path, category: str,
|
||||
timeout: int, extra_args: list) -> dict:
|
||||
"""
|
||||
Run a pytest suite as a subprocess; return result dict for write_test_results.
|
||||
Captures pass/fail counts from pytest's JSON output (--json-report).
|
||||
Falls back to exit-code-only if json-report unavailable.
|
||||
"""
|
||||
json_out = Path(f"/tmp/dolphin_pytest_{name}.json")
|
||||
cmd = [
|
||||
_PYTHON, "-m", "pytest",
|
||||
str(test_file),
|
||||
f"--category={category}",
|
||||
"--no-header",
|
||||
f"--timeout={timeout}",
|
||||
] + extra_args
|
||||
|
||||
# Try with json-report for precise counts
|
||||
cmd_jreport = cmd + [f"--json-report", f"--json-report-file={json_out}"]
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd_jreport,
|
||||
capture_output=True, text=True,
|
||||
timeout=timeout + 30,
|
||||
cwd=str(_TESTS_DIR.parent),
|
||||
)
|
||||
exit_code = proc.returncode
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"passed": None, "total": None, "status": "FAIL",
|
||||
"note": f"timeout after {timeout}s"}
|
||||
except Exception as e:
|
||||
return {"passed": None, "total": None, "status": "FAIL", "note": str(e)}
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
# Parse json-report if available
|
||||
passed = failed = total = None
|
||||
if json_out.exists():
|
||||
try:
|
||||
jr = json.loads(json_out.read_text())
|
||||
summary = jr.get("summary", {})
|
||||
passed = summary.get("passed", 0)
|
||||
failed = summary.get("failed", 0) + summary.get("error", 0)
|
||||
total = passed + failed + summary.get("skipped", 0)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
try: json_out.unlink()
|
||||
except Exception: pass
|
||||
|
||||
if passed is None:
|
||||
# Fallback: parse stdout for "X passed, Y failed"
|
||||
out = proc.stdout + proc.stderr
|
||||
import re
|
||||
m = re.search(r"(\d+) passed", out)
|
||||
if m: passed = int(m.group(1))
|
||||
m = re.search(r"(\d+) failed", out)
|
||||
if m: failed = int(m.group(1))
|
||||
total = (passed or 0) + (failed or 0)
|
||||
if total == 0 and exit_code == 0:
|
||||
passed, failed, total = 0, 0, 0 # no tests collected
|
||||
|
||||
status = "PASS" if exit_code == 0 and (failed or 0) == 0 else "FAIL"
|
||||
if total == 0:
|
||||
status = "N/A"
|
||||
|
||||
return {
|
||||
"passed": passed,
|
||||
"total": total,
|
||||
"status": status,
|
||||
"elapsed_s": round(elapsed, 1),
|
||||
}
|
||||
|
||||
|
||||
def _push(results: dict):
|
||||
"""Write results dict to run_logs + TUI footer."""
|
||||
if _WTR_OK:
|
||||
try:
|
||||
write_test_results(results)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
# Direct write fallback
|
||||
try:
|
||||
existing = json.loads(_RESULTS.read_text()) if _RESULTS.exists() else {}
|
||||
except Exception:
|
||||
existing = {}
|
||||
existing["_run_at"] = datetime.now(timezone.utc).isoformat()
|
||||
existing.update(results)
|
||||
_RESULTS.parent.mkdir(parents=True, exist_ok=True)
|
||||
_RESULTS.write_text(json.dumps(existing, indent=2))
|
||||
|
||||
|
||||
# ── Prefect tasks ─────────────────────────────────────────────────────────────
|
||||
|
||||
@task(name="run_data_integrity", retries=1, retry_delay_seconds=30, timeout_seconds=150)
|
||||
def task_data_integrity():
|
||||
log = get_run_logger()
|
||||
name, (f, cat, t, args) = "data_integrity", SUITES["data_integrity"]
|
||||
log.info(f"▶ {name}")
|
||||
r = _run_suite(name, f, cat, t, args)
|
||||
_push({name: r})
|
||||
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
|
||||
return r
|
||||
|
||||
|
||||
@task(name="run_finance_fuzz", retries=1, retry_delay_seconds=30, timeout_seconds=210)
|
||||
def task_finance_fuzz():
|
||||
log = get_run_logger()
|
||||
name, (f, cat, t, args) = "finance_fuzz", SUITES["finance_fuzz"]
|
||||
log.info(f"▶ {name}")
|
||||
r = _run_suite(name, f, cat, t, args)
|
||||
_push({name: r})
|
||||
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
|
||||
return r
|
||||
|
||||
|
||||
@task(name="run_signal_fill", retries=1, retry_delay_seconds=30, timeout_seconds=180)
|
||||
def task_signal_fill():
|
||||
log = get_run_logger()
|
||||
name, (f, cat, t, args) = "signal_fill", SUITES["signal_fill"]
|
||||
log.info(f"▶ {name}")
|
||||
r = _run_suite(name, f, cat, t, args)
|
||||
_push({name: r})
|
||||
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
|
||||
return r
|
||||
|
||||
|
||||
@task(name="run_degradation", retries=0, timeout_seconds=360)
|
||||
def task_degradation():
|
||||
log = get_run_logger()
|
||||
name, (f, cat, t, args) = "degradation", SUITES["degradation"]
|
||||
log.info(f"▶ {name}")
|
||||
r = _run_suite(name, f, cat, t, args)
|
||||
_push({name: r})
|
||||
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
|
||||
return r
|
||||
|
||||
|
||||
@task(name="run_actor", retries=1, retry_delay_seconds=30, timeout_seconds=210)
|
||||
def task_actor():
|
||||
log = get_run_logger()
|
||||
name, (f, cat, t, args) = "actor", SUITES["actor"]
|
||||
log.info(f"▶ {name}")
|
||||
r = _run_suite(name, f, cat, t, args)
|
||||
_push({name: r})
|
||||
log.info(f" {name}: {r['status']} {r.get('passed')}/{r.get('total')} ({r.get('elapsed_s')}s)")
|
||||
return r
|
||||
|
||||
|
||||
# ── Light flow: runs every 7 minutes (data + signal stagger) ─────────────────
|
||||
|
||||
@flow(name="dolphin-tests-light", log_prints=True)
|
||||
def light_test_flow(suite: Optional[str] = None):
|
||||
"""
|
||||
Fast/frequent suites — data_integrity (0s) and signal_fill (+30s stagger).
|
||||
Scheduled every 7 minutes.
|
||||
"""
|
||||
log = get_run_logger()
|
||||
log.info("=== Light test flow ===")
|
||||
|
||||
if suite == "data_integrity" or suite is None:
|
||||
task_data_integrity()
|
||||
|
||||
if suite == "signal_fill" or suite is None:
|
||||
time.sleep(30) # stagger to avoid bursting HZ simultaneously
|
||||
task_signal_fill()
|
||||
|
||||
|
||||
# ── Medium flow: runs every 20 minutes (finance_fuzz + actor) ────────────────
|
||||
|
||||
@flow(name="dolphin-tests-medium", log_prints=True)
|
||||
def medium_test_flow(suite: Optional[str] = None):
|
||||
"""
|
||||
Medium-cadence suites — finance_fuzz (0s) and actor (+60s stagger).
|
||||
Scheduled every 20 minutes.
|
||||
"""
|
||||
log = get_run_logger()
|
||||
log.info("=== Medium test flow ===")
|
||||
|
||||
if suite == "finance_fuzz" or suite is None:
|
||||
task_finance_fuzz()
|
||||
|
||||
if suite == "actor" or suite is None:
|
||||
time.sleep(60)
|
||||
task_actor()
|
||||
|
||||
|
||||
# ── Heavy flow: runs every 60 minutes (degradation only) ─────────────────────
|
||||
|
||||
@flow(name="dolphin-tests-heavy", log_prints=True)
|
||||
def heavy_test_flow():
|
||||
"""
|
||||
Destructive/slow suites — degradation (kill/revive E2E).
|
||||
Scheduled every 60 minutes.
|
||||
"""
|
||||
log = get_run_logger()
|
||||
log.info("=== Heavy test flow ===")
|
||||
task_degradation()
|
||||
|
||||
|
||||
# ── Full suite flow: runs every 60 minutes at offset +8 min ──────────────────
|
||||
|
||||
@flow(name="dolphin-tests-full", log_prints=True)
|
||||
def full_test_flow():
|
||||
"""All suites sequentially — used as nightly or on-demand full sweep."""
|
||||
log = get_run_logger()
|
||||
log.info("=== Full test flow ===")
|
||||
task_data_integrity()
|
||||
time.sleep(15)
|
||||
task_finance_fuzz()
|
||||
time.sleep(15)
|
||||
task_signal_fill()
|
||||
time.sleep(15)
|
||||
task_actor()
|
||||
time.sleep(15)
|
||||
task_degradation()
|
||||
|
||||
|
||||
# ── CLI ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
os.environ.setdefault("PREFECT_API_URL", "http://localhost:4200/api")
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--register", action="store_true",
|
||||
help="Register all deployments with Prefect")
|
||||
parser.add_argument("--suite", default=None,
|
||||
choices=list(SUITES.keys()) + ["full"],
|
||||
help="Run a single suite locally without Prefect")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.register:
|
||||
# Light: every 7 minutes
|
||||
light_test_flow.to_deployment(
|
||||
name="dolphin-tests-light",
|
||||
schedule=CS(cron="*/7 * * * *", timezone="UTC"),
|
||||
work_pool_name="dolphin",
|
||||
tags=["integrity", "light"],
|
||||
).apply()
|
||||
|
||||
# Medium: every 20 minutes, offset +2 min
|
||||
medium_test_flow.to_deployment(
|
||||
name="dolphin-tests-medium",
|
||||
schedule=CS(cron="2-59/20 * * * *", timezone="UTC"),
|
||||
work_pool_name="dolphin",
|
||||
tags=["integrity", "medium"],
|
||||
).apply()
|
||||
|
||||
# Heavy: every 60 minutes, offset +6 min
|
||||
heavy_test_flow.to_deployment(
|
||||
name="dolphin-tests-heavy",
|
||||
schedule=CS(cron="6 * * * *", timezone="UTC"),
|
||||
work_pool_name="dolphin",
|
||||
tags=["integrity", "heavy"],
|
||||
).apply()
|
||||
|
||||
print("Registered: dolphin-tests-light (*/7), dolphin-tests-medium (2,22,42), dolphin-tests-heavy (:06)")
|
||||
|
||||
elif args.suite == "full":
|
||||
full_test_flow()
|
||||
|
||||
elif args.suite:
|
||||
# Run single suite directly
|
||||
name = args.suite
|
||||
f, cat, t, extra = SUITES[name]
|
||||
result = _run_suite(name, f, cat, t, extra)
|
||||
_push({name: result})
|
||||
print(f"{name}: {result}")
|
||||
|
||||
else:
|
||||
# Default: run light + medium inline (manual check)
|
||||
light_test_flow()
|
||||
medium_test_flow()
|
||||
Reference in New Issue
Block a user