DOLPHIN/prod/scan_bridge_prefect_daemon.py

#!/usr/bin/env python3
"""
DOLPHIN Scan Bridge Prefect Daemon
===================================
Phase 2 Implementation: Prefect-managed long-running daemon for scan bridge.

This daemon supervises the scan bridge service, providing:
- Automatic restart on crash
- Health monitoring (data freshness)
- Centralized logging via Prefect
- Integration with DOLPHIN orchestration

Usage:
    # Deploy to Prefect
    prefect deployment build scan_bridge_prefect_daemon.py:scan_bridge_daemon_flow \
        --name "scan-bridge-daemon" --pool dolphin-daemon-pool
    
    # Start worker
    prefect worker start --pool dolphin-daemon-pool
"""

import sys
import time
import json
import signal
import subprocess
import threading
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, Dict, Any

# Add paths
sys.path.insert(0, '/mnt/dolphinng5_predict')
sys.path.insert(0, '/mnt/dolphinng5_predict/nautilus_dolphin')

# Prefect imports
from prefect import flow, task, get_run_logger
from prefect.runtime import flow_run
from prefect.states import Completed, Failed

# Hazelcast for health checks
try:
    import hazelcast
    HAZELCAST_AVAILABLE = True
except ImportError:
    HAZELCAST_AVAILABLE = False


# Constants
DAEMON_SCRIPT = "/mnt/dolphinng5_predict/prod/scan_bridge_service.py"
HEALTH_CHECK_INTERVAL = 30  # seconds
DATA_STALE_THRESHOLD = 60   # seconds (critical)
DATA_WARNING_THRESHOLD = 30  # seconds (warning)
RESTART_DELAY = 5           # seconds between restart attempts
MAX_RESTART_ATTEMPTS = 3


class ScanBridgeProcess:
    """Manages the scan bridge subprocess."""
    
    def __init__(self):
        self.process: Optional[subprocess.Popen] = None
        self.start_time: Optional[datetime] = None
        self.restart_count = 0
        self._stop_event = threading.Event()
        self._monitor_thread: Optional[threading.Thread] = None
        
    def start(self) -> bool:
        """Start the scan bridge subprocess."""
        logger = get_run_logger()
        
        if self.process and self.process.poll() is None:
            logger.warning("Process already running")
            return True
        
        logger.info(f"🚀 Starting scan bridge (attempt {self.restart_count + 1})...")
        
        try:
            self.process = subprocess.Popen(
                [sys.executable, DAEMON_SCRIPT],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                universal_newlines=True,
                bufsize=1,  # Line buffered
                cwd="/mnt/dolphinng5_predict/prod"
            )
            
            # Wait for startup
            time.sleep(2)
            
            if self.process.poll() is None:
                self.start_time = datetime.now(timezone.utc)
                self.restart_count = 0
                logger.info(f"✅ Scan bridge started (PID: {self.process.pid})")
                
                # Start log monitor thread
                self._start_log_monitor()
                return True
            else:
                logger.error(f"❌ Process exited immediately with code {self.process.poll()}")
                return False
                
        except Exception as e:
            logger.error(f"❌ Failed to start: {e}")
            return False
    
    def stop(self, timeout: int = 10) -> None:
        """Stop the scan bridge subprocess gracefully."""
        logger = get_run_logger()
        
        self._stop_event.set()
        
        if not self.process:
            return
        
        if self.process.poll() is not None:
            logger.debug("Process already stopped")
            return
        
        logger.info(f"🛑 Stopping scan bridge (PID: {self.process.pid})...")
        
        try:
            # Try graceful shutdown
            self.process.send_signal(signal.SIGTERM)
            
            # Wait for process to terminate
            try:
                self.process.wait(timeout=timeout)
                logger.info("✅ Process stopped gracefully")
            except subprocess.TimeoutExpired:
                logger.warning("⚠️  Process didn't stop in time, forcing...")
                self.process.kill()
                self.process.wait()
                logger.info("✅ Process killed")
                
        except Exception as e:
            logger.error(f"Error stopping process: {e}")
        finally:
            self.process = None
            if self._monitor_thread and self._monitor_thread.is_alive():
                self._monitor_thread.join(timeout=2)
    
    def is_running(self) -> bool:
        """Check if process is running."""
        return self.process is not None and self.process.poll() is None
    
    def get_exit_code(self) -> Optional[int]:
        """Get process exit code if terminated."""
        if self.process is None:
            return None
        return self.process.poll()
    
    def _start_log_monitor(self):
        """Start thread to monitor and forward logs."""
        if not self.process:
            return
        
        def monitor():
            logger = get_run_logger()
            try:
                for line in iter(self.process.stdout.readline, ''):
                    if self._stop_event.is_set():
                        break
                    line = line.strip()
                    if line:
                        # Forward to Prefect logs with prefix
                        logger.info(f"[Bridge] {line}")
            except Exception as e:
                logger.debug(f"Log monitor ended: {e}")
        
        self._monitor_thread = threading.Thread(target=monitor, daemon=True)
        self._monitor_thread.start()


def check_hazelcast_data_freshness() -> Dict[str, Any]:
    """Check freshness of data in Hazelcast."""
    if not HAZELCAST_AVAILABLE:
        return {"available": False, "error": "Hazelcast not installed"}
    
    try:
        client = hazelcast.HazelcastClient(
            cluster_name="dolphin",
            cluster_members=["127.0.0.1:5701"],
        )
        
        features_map = client.get_map('DOLPHIN_FEATURES').blocking()
        val = features_map.get('latest_eigen_scan')
        
        if not val:
            client.shutdown()
            return {
                "available": True,
                "has_data": False,
                "error": "No latest_eigen_scan in Hazelcast"
            }
        
        data = json.loads(val)
        mtime = data.get('file_mtime', 0)
        scan_number = data.get('scan_number', 0)
        asset_count = len(data.get('assets', []))
        
        age_sec = time.time() - mtime if mtime else float('inf')
        
        client.shutdown()
        
        return {
            "available": True,
            "has_data": True,
            "scan_number": scan_number,
            "asset_count": asset_count,
            "data_age_sec": age_sec,
            "is_fresh": age_sec < DATA_STALE_THRESHOLD,
            "is_warning": age_sec >= DATA_WARNING_THRESHOLD,
        }
        
    except Exception as e:
        return {
            "available": True,
            "has_data": False,
            "error": str(e)
        }


@task(name="health-check", retries=2, retry_delay_seconds=5)
def perform_health_check() -> Dict[str, Any]:
    """Perform comprehensive health check."""
    logger = get_run_logger()
    
    result = {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "process_running": False,
        "hazelcast": {},
        "healthy": False,
        "action_required": None
    }
    
    # Check 1: Process status
    global bridge_process
    if bridge_process and bridge_process.is_running():
        result["process_running"] = True
        result["process_pid"] = bridge_process.process.pid
        result["uptime_sec"] = (datetime.now(timezone.utc) - bridge_process.start_time).total_seconds() if bridge_process.start_time else 0
    else:
        logger.error("❌ Health check: Process not running")
        result["action_required"] = "restart"
        return result
    
    # Check 2: Hazelcast data freshness
    hz_status = check_hazelcast_data_freshness()
    result["hazelcast"] = hz_status
    
    if not hz_status.get("available"):
        logger.warning("⚠️  Hazelcast unavailable for health check")
        result["action_required"] = "investigate"
        return result
    
    if not hz_status.get("has_data"):
        logger.warning("⚠️  No data in Hazelcast yet")
        result["action_required"] = "wait"
        return result
    
    age = hz_status.get("data_age_sec", float('inf'))
    
    if age > DATA_STALE_THRESHOLD:
        logger.error(f"❌ Data stale: {age:.0f}s old (threshold: {DATA_STALE_THRESHOLD}s)")
        result["action_required"] = "restart"
        return result
    elif age > DATA_WARNING_THRESHOLD:
        logger.warning(f"⚠️  Data warning: {age:.0f}s old")
    else:
        logger.info(f"✅ Healthy: data age {age:.0f}s, scan #{hz_status.get('scan_number')}")
    
    result["healthy"] = True
    return result


@task(name="restart-bridge")
def restart_bridge() -> bool:
    """Restart the scan bridge service."""
    logger = get_run_logger()
    global bridge_process
    
    bridge_process.restart_count += 1
    
    if bridge_process.restart_count > MAX_RESTART_ATTEMPTS:
        logger.error(f"❌ Max restart attempts ({MAX_RESTART_ATTEMPTS}) exceeded")
        return False
    
    logger.warning(f"🔄 Restarting bridge (attempt {bridge_process.restart_count}/{MAX_RESTART_ATTEMPTS})...")
    
    # Stop existing
    bridge_process.stop()
    time.sleep(RESTART_DELAY)
    
    # Start new
    if bridge_process.start():
        logger.info("✅ Bridge restarted successfully")
        return True
    else:
        logger.error("❌ Bridge restart failed")
        return False


# Global process manager
bridge_process: Optional[ScanBridgeProcess] = None


@flow(
    name="scan-bridge-daemon",
    description="Long-running daemon that supervises the scan bridge service",
    log_prints=True,
)
def scan_bridge_daemon_flow():
    """
    Main daemon flow that runs indefinitely, managing the scan bridge.
    
    This flow:
    1. Starts the scan bridge subprocess
    2. Monitors health every 30 seconds
    3. Restarts on failure or stale data
    4. Logs all output to Prefect
    """
    global bridge_process
    
    logger = get_run_logger()
    logger.info("=" * 70)
    logger.info("🐬 DOLPHIN Scan Bridge Daemon (Prefect)")
    logger.info("=" * 70)
    logger.info(f"Health check interval: {HEALTH_CHECK_INTERVAL}s")
    logger.info(f"Data stale threshold: {DATA_STALE_THRESHOLD}s")
    logger.info(f"Script: {DAEMON_SCRIPT}")
    logger.info("=" * 70)
    
    # Initialize process manager
    bridge_process = ScanBridgeProcess()
    
    # Start initial instance
    if not bridge_process.start():
        logger.error("❌ Failed to start scan bridge")
        raise RuntimeError("Initial start failed")
    
    consecutive_failures = 0
    max_consecutive_failures = 5
    
    try:
        while True:
            # Wait between health checks
            time.sleep(HEALTH_CHECK_INTERVAL)
            
            # Perform health check
            health = perform_health_check()
            
            if health["healthy"]:
                consecutive_failures = 0
                continue
            
            # Not healthy - determine action
            consecutive_failures += 1
            action = health.get("action_required")
            
            if consecutive_failures >= max_consecutive_failures:
                logger.error(f"❌ Too many consecutive failures ({consecutive_failures})")
                raise RuntimeError("Max failures exceeded")
            
            if action == "restart":
                if not restart_bridge():
                    logger.error("❌ Restart failed")
                    raise RuntimeError("Restart failed")
            
            elif action == "investigate":
                logger.warning("⚠️  Manual investigation required")
                # Don't restart, just wait and check again
            
            elif action == "wait":
                logger.info("⏳ Waiting for data...")
                # Normal for startup
            
    except KeyboardInterrupt:
        logger.info("\n🛑 Interrupted by user")
    except Exception as e:
        logger.error(f"❌ Daemon error: {e}")
        raise
    finally:
        logger.info("🧹 Cleaning up...")
        bridge_process.stop()
        logger.info("✅ Daemon stopped")


@flow(name="scan-bridge-health-check")
def quick_health_check() -> Dict[str, Any]:
    """
    Standalone health check flow for external monitoring.
    Can be scheduled independently for alerting.
    """
    logger = get_run_logger()
    
    # Check if bridge is running
    result = check_hazelcast_data_freshness()
    
    if not result.get("available"):
        logger.error("❌ Hazelcast unavailable")
        return {"status": "error", "hazelcast": result}
    
    if not result.get("has_data"):
        logger.error("❌ No scan data in Hazelcast")
        return {"status": "no_data", "hazelcast": result}
    
    age = result.get("data_age_sec", 0)
    
    if age > DATA_STALE_THRESHOLD:
        logger.error(f"❌ STALE DATA: {age:.0f}s old")
        return {"status": "stale", "age_sec": age, "hazelcast": result}
    elif age > DATA_WARNING_THRESHOLD:
        logger.warning(f"⚠️  Data warning: {age:.0f}s old")
        return {"status": "warning", "age_sec": age, "hazelcast": result}
    else:
        logger.info(f"✅ Healthy: {age:.0f}s old, scan #{result.get('scan_number')}")
        return {"status": "healthy", "age_sec": age, "hazelcast": result}


if __name__ == "__main__":
    # Run the daemon
    scan_bridge_daemon_flow()
initial: import DOLPHIN baseline 2026-04-21 from dolphinng5_predict working tree Includes core prod + GREEN/BLUE subsystems: - prod/ (BLUE harness, configs, scripts, docs) - nautilus_dolphin/ (GREEN Nautilus-native impl + dvae/ preserved) - adaptive_exit/ (AEM engine + models/bucket_assignments.pkl) - Observability/ (EsoF advisor, TUI, dashboards) - external_factors/ (EsoF producer) - mc_forewarning_qlabs_fork/ (MC regime/envelope) Excludes runtime caches, logs, backups, and reproducible artifacts per .gitignore. 2026-04-21 16:58:38 +02:00			`#!/usr/bin/env python3`
			`"""`
			`DOLPHIN Scan Bridge Prefect Daemon`
			`===================================`
			`Phase 2 Implementation: Prefect-managed long-running daemon for scan bridge.`

			`This daemon supervises the scan bridge service, providing:`
			`- Automatic restart on crash`
			`- Health monitoring (data freshness)`
			`- Centralized logging via Prefect`
			`- Integration with DOLPHIN orchestration`

			`Usage:`
			`# Deploy to Prefect`
			`prefect deployment build scan_bridge_prefect_daemon.py:scan_bridge_daemon_flow \`
			`--name "scan-bridge-daemon" --pool dolphin-daemon-pool`

			`# Start worker`
			`prefect worker start --pool dolphin-daemon-pool`
			`"""`

			`import sys`
			`import time`
			`import json`
			`import signal`
			`import subprocess`
			`import threading`
			`from datetime import datetime, timezone`
			`from pathlib import Path`
			`from typing import Optional, Dict, Any`

			`# Add paths`
			`sys.path.insert(0, '/mnt/dolphinng5_predict')`
			`sys.path.insert(0, '/mnt/dolphinng5_predict/nautilus_dolphin')`

			`# Prefect imports`
			`from prefect import flow, task, get_run_logger`
			`from prefect.runtime import flow_run`
			`from prefect.states import Completed, Failed`

			`# Hazelcast for health checks`
			`try:`
			`import hazelcast`
			`HAZELCAST_AVAILABLE = True`
			`except ImportError:`
			`HAZELCAST_AVAILABLE = False`


			`# Constants`
			`DAEMON_SCRIPT = "/mnt/dolphinng5_predict/prod/scan_bridge_service.py"`
			`HEALTH_CHECK_INTERVAL = 30 # seconds`
			`DATA_STALE_THRESHOLD = 60 # seconds (critical)`
			`DATA_WARNING_THRESHOLD = 30 # seconds (warning)`
			`RESTART_DELAY = 5 # seconds between restart attempts`
			`MAX_RESTART_ATTEMPTS = 3`


			`class ScanBridgeProcess:`
			`"""Manages the scan bridge subprocess."""`

			`def __init__(self):`
			`self.process: Optional[subprocess.Popen] = None`
			`self.start_time: Optional[datetime] = None`
			`self.restart_count = 0`
			`self._stop_event = threading.Event()`
			`self._monitor_thread: Optional[threading.Thread] = None`

			`def start(self) -> bool:`
			`"""Start the scan bridge subprocess."""`
			`logger = get_run_logger()`

			`if self.process and self.process.poll() is None:`
			`logger.warning("Process already running")`
			`return True`

			`logger.info(f"🚀 Starting scan bridge (attempt {self.restart_count + 1})...")`

			`try:`
			`self.process = subprocess.Popen(`
			`[sys.executable, DAEMON_SCRIPT],`
			`stdout=subprocess.PIPE,`
			`stderr=subprocess.STDOUT,`
			`universal_newlines=True,`
			`bufsize=1, # Line buffered`
			`cwd="/mnt/dolphinng5_predict/prod"`
			`)`

			`# Wait for startup`
			`time.sleep(2)`

			`if self.process.poll() is None:`
			`self.start_time = datetime.now(timezone.utc)`
			`self.restart_count = 0`
			`logger.info(f"✅ Scan bridge started (PID: {self.process.pid})")`

			`# Start log monitor thread`
			`self._start_log_monitor()`
			`return True`
			`else:`
			`logger.error(f"❌ Process exited immediately with code {self.process.poll()}")`
			`return False`

			`except Exception as e:`
			`logger.error(f"❌ Failed to start: {e}")`
			`return False`

			`def stop(self, timeout: int = 10) -> None:`
			`"""Stop the scan bridge subprocess gracefully."""`
			`logger = get_run_logger()`

			`self._stop_event.set()`

			`if not self.process:`
			`return`

			`if self.process.poll() is not None:`
			`logger.debug("Process already stopped")`
			`return`

			`logger.info(f"🛑 Stopping scan bridge (PID: {self.process.pid})...")`

			`try:`
			`# Try graceful shutdown`
			`self.process.send_signal(signal.SIGTERM)`

			`# Wait for process to terminate`
			`try:`
			`self.process.wait(timeout=timeout)`
			`logger.info("✅ Process stopped gracefully")`
			`except subprocess.TimeoutExpired:`
			`logger.warning("⚠️ Process didn't stop in time, forcing...")`
			`self.process.kill()`
			`self.process.wait()`
			`logger.info("✅ Process killed")`

			`except Exception as e:`
			`logger.error(f"Error stopping process: {e}")`
			`finally:`
			`self.process = None`
			`if self._monitor_thread and self._monitor_thread.is_alive():`
			`self._monitor_thread.join(timeout=2)`

			`def is_running(self) -> bool:`
			`"""Check if process is running."""`
			`return self.process is not None and self.process.poll() is None`

			`def get_exit_code(self) -> Optional[int]:`
			`"""Get process exit code if terminated."""`
			`if self.process is None:`
			`return None`
			`return self.process.poll()`

			`def _start_log_monitor(self):`
			`"""Start thread to monitor and forward logs."""`
			`if not self.process:`
			`return`

			`def monitor():`
			`logger = get_run_logger()`
			`try:`
			`for line in iter(self.process.stdout.readline, ''):`
			`if self._stop_event.is_set():`
			`break`
			`line = line.strip()`
			`if line:`
			`# Forward to Prefect logs with prefix`
			`logger.info(f"[Bridge] {line}")`
			`except Exception as e:`
			`logger.debug(f"Log monitor ended: {e}")`

			`self._monitor_thread = threading.Thread(target=monitor, daemon=True)`
			`self._monitor_thread.start()`


			`def check_hazelcast_data_freshness() -> Dict[str, Any]:`
			`"""Check freshness of data in Hazelcast."""`
			`if not HAZELCAST_AVAILABLE:`
			`return {"available": False, "error": "Hazelcast not installed"}`

			`try:`
			`client = hazelcast.HazelcastClient(`
			`cluster_name="dolphin",`
			`cluster_members=["127.0.0.1:5701"],`
			`)`

			`features_map = client.get_map('DOLPHIN_FEATURES').blocking()`
			`val = features_map.get('latest_eigen_scan')`

			`if not val:`
			`client.shutdown()`
			`return {`
			`"available": True,`
			`"has_data": False,`
			`"error": "No latest_eigen_scan in Hazelcast"`
			`}`

			`data = json.loads(val)`
			`mtime = data.get('file_mtime', 0)`
			`scan_number = data.get('scan_number', 0)`
			`asset_count = len(data.get('assets', []))`

			`age_sec = time.time() - mtime if mtime else float('inf')`

			`client.shutdown()`

			`return {`
			`"available": True,`
			`"has_data": True,`
			`"scan_number": scan_number,`
			`"asset_count": asset_count,`
			`"data_age_sec": age_sec,`
			`"is_fresh": age_sec < DATA_STALE_THRESHOLD,`
			`"is_warning": age_sec >= DATA_WARNING_THRESHOLD,`
			`}`

			`except Exception as e:`
			`return {`
			`"available": True,`
			`"has_data": False,`
			`"error": str(e)`
			`}`


			`@task(name="health-check", retries=2, retry_delay_seconds=5)`
			`def perform_health_check() -> Dict[str, Any]:`
			`"""Perform comprehensive health check."""`
			`logger = get_run_logger()`

			`result = {`
			`"timestamp": datetime.now(timezone.utc).isoformat(),`
			`"process_running": False,`
			`"hazelcast": {},`
			`"healthy": False,`
			`"action_required": None`
			`}`

			`# Check 1: Process status`
			`global bridge_process`
			`if bridge_process and bridge_process.is_running():`
			`result["process_running"] = True`
			`result["process_pid"] = bridge_process.process.pid`
			`result["uptime_sec"] = (datetime.now(timezone.utc) - bridge_process.start_time).total_seconds() if bridge_process.start_time else 0`
			`else:`
			`logger.error("❌ Health check: Process not running")`
			`result["action_required"] = "restart"`
			`return result`

			`# Check 2: Hazelcast data freshness`
			`hz_status = check_hazelcast_data_freshness()`
			`result["hazelcast"] = hz_status`

			`if not hz_status.get("available"):`
			`logger.warning("⚠️ Hazelcast unavailable for health check")`
			`result["action_required"] = "investigate"`
			`return result`

			`if not hz_status.get("has_data"):`
			`logger.warning("⚠️ No data in Hazelcast yet")`
			`result["action_required"] = "wait"`
			`return result`

			`age = hz_status.get("data_age_sec", float('inf'))`

			`if age > DATA_STALE_THRESHOLD:`
			`logger.error(f"❌ Data stale: {age:.0f}s old (threshold: {DATA_STALE_THRESHOLD}s)")`
			`result["action_required"] = "restart"`
			`return result`
			`elif age > DATA_WARNING_THRESHOLD:`
			`logger.warning(f"⚠️ Data warning: {age:.0f}s old")`
			`else:`
			`logger.info(f"✅ Healthy: data age {age:.0f}s, scan #{hz_status.get('scan_number')}")`

			`result["healthy"] = True`
			`return result`


			`@task(name="restart-bridge")`
			`def restart_bridge() -> bool:`
			`"""Restart the scan bridge service."""`
			`logger = get_run_logger()`
			`global bridge_process`

			`bridge_process.restart_count += 1`

			`if bridge_process.restart_count > MAX_RESTART_ATTEMPTS:`
			`logger.error(f"❌ Max restart attempts ({MAX_RESTART_ATTEMPTS}) exceeded")`
			`return False`

			`logger.warning(f"🔄 Restarting bridge (attempt {bridge_process.restart_count}/{MAX_RESTART_ATTEMPTS})...")`

			`# Stop existing`
			`bridge_process.stop()`
			`time.sleep(RESTART_DELAY)`

			`# Start new`
			`if bridge_process.start():`
			`logger.info("✅ Bridge restarted successfully")`
			`return True`
			`else:`
			`logger.error("❌ Bridge restart failed")`
			`return False`


			`# Global process manager`
			`bridge_process: Optional[ScanBridgeProcess] = None`


			`@flow(`
			`name="scan-bridge-daemon",`
			`description="Long-running daemon that supervises the scan bridge service",`
			`log_prints=True,`
			`)`
			`def scan_bridge_daemon_flow():`
			`"""`
			`Main daemon flow that runs indefinitely, managing the scan bridge.`

			`This flow:`
			`1. Starts the scan bridge subprocess`
			`2. Monitors health every 30 seconds`
			`3. Restarts on failure or stale data`
			`4. Logs all output to Prefect`
			`"""`
			`global bridge_process`

			`logger = get_run_logger()`
			`logger.info("=" * 70)`
			`logger.info("🐬 DOLPHIN Scan Bridge Daemon (Prefect)")`
			`logger.info("=" * 70)`
			`logger.info(f"Health check interval: {HEALTH_CHECK_INTERVAL}s")`
			`logger.info(f"Data stale threshold: {DATA_STALE_THRESHOLD}s")`
			`logger.info(f"Script: {DAEMON_SCRIPT}")`
			`logger.info("=" * 70)`

			`# Initialize process manager`
			`bridge_process = ScanBridgeProcess()`

			`# Start initial instance`
			`if not bridge_process.start():`
			`logger.error("❌ Failed to start scan bridge")`
			`raise RuntimeError("Initial start failed")`

			`consecutive_failures = 0`
			`max_consecutive_failures = 5`

			`try:`
			`while True:`
			`# Wait between health checks`
			`time.sleep(HEALTH_CHECK_INTERVAL)`

			`# Perform health check`
			`health = perform_health_check()`

			`if health["healthy"]:`
			`consecutive_failures = 0`
			`continue`

			`# Not healthy - determine action`
			`consecutive_failures += 1`
			`action = health.get("action_required")`

			`if consecutive_failures >= max_consecutive_failures:`
			`logger.error(f"❌ Too many consecutive failures ({consecutive_failures})")`
			`raise RuntimeError("Max failures exceeded")`

			`if action == "restart":`
			`if not restart_bridge():`
			`logger.error("❌ Restart failed")`
			`raise RuntimeError("Restart failed")`

			`elif action == "investigate":`
			`logger.warning("⚠️ Manual investigation required")`
			`# Don't restart, just wait and check again`

			`elif action == "wait":`
			`logger.info("⏳ Waiting for data...")`
			`# Normal for startup`

			`except KeyboardInterrupt:`
			`logger.info("\n🛑 Interrupted by user")`
			`except Exception as e:`
			`logger.error(f"❌ Daemon error: {e}")`
			`raise`
			`finally:`
			`logger.info("🧹 Cleaning up...")`
			`bridge_process.stop()`
			`logger.info("✅ Daemon stopped")`


			`@flow(name="scan-bridge-health-check")`
			`def quick_health_check() -> Dict[str, Any]:`
			`"""`
			`Standalone health check flow for external monitoring.`
			`Can be scheduled independently for alerting.`
			`"""`
			`logger = get_run_logger()`

			`# Check if bridge is running`
			`result = check_hazelcast_data_freshness()`

			`if not result.get("available"):`
			`logger.error("❌ Hazelcast unavailable")`
			`return {"status": "error", "hazelcast": result}`

			`if not result.get("has_data"):`
			`logger.error("❌ No scan data in Hazelcast")`
			`return {"status": "no_data", "hazelcast": result}`

			`age = result.get("data_age_sec", 0)`

			`if age > DATA_STALE_THRESHOLD:`
			`logger.error(f"❌ STALE DATA: {age:.0f}s old")`
			`return {"status": "stale", "age_sec": age, "hazelcast": result}`
			`elif age > DATA_WARNING_THRESHOLD:`
			`logger.warning(f"⚠️ Data warning: {age:.0f}s old")`
			`return {"status": "warning", "age_sec": age, "hazelcast": result}`
			`else:`
			`logger.info(f"✅ Healthy: {age:.0f}s old, scan #{result.get('scan_number')}")`
			`return {"status": "healthy", "age_sec": age, "hazelcast": result}`


			`if __name__ == "__main__":`
			`# Run the daemon`
			`scan_bridge_daemon_flow()`