#!/usr/bin/env python3 """ Dolphin Service Base Class - Boilerplate for reliable userland services Features: - Automatic retries with exponential backoff - Structured logging to journal - Health check endpoints - Graceful shutdown on signals - Systemd notify support (Type=notify) - Memory/CPU monitoring """ import abc import asyncio import logging import signal import sys import os import time import json from typing import Optional, Callable, Any from dataclasses import dataclass, asdict from datetime import datetime from functools import wraps # Optional imports - graceful degradation if not available try: from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type TENACITY_AVAILABLE = True except ImportError: TENACITY_AVAILABLE = False try: from pystemd.daemon import notify, Notification SYSTEMD_AVAILABLE = True except ImportError: SYSTEMD_AVAILABLE = False def notify(*args, **kwargs): pass # Configure logging for systemd journal class JournalHandler(logging.Handler): """Log handler that outputs JSON for systemd journal""" def emit(self, record): try: msg = { 'timestamp': datetime.utcnow().isoformat(), 'level': record.levelname, 'logger': record.name, 'message': self.format(record), 'source': getattr(record, 'source', 'unknown'), 'service': getattr(record, 'service', 'unknown'), } print(json.dumps(msg), flush=True) except Exception: self.handleError(record) def get_logger(name: str) -> logging.Logger: """Get configured logger for services""" logger = logging.getLogger(name) if not logger.handlers: handler = JournalHandler() handler.setFormatter(logging.Formatter('%(message)s')) logger.addHandler(handler) logger.setLevel(logging.INFO) return logger @dataclass class ServiceHealth: """Health check status""" status: str # 'healthy', 'degraded', 'unhealthy' last_check: float uptime: float memory_mb: float cpu_percent: float error_count: int message: str def to_json(self) -> str: return json.dumps(asdict(self)) class ServiceBase(abc.ABC): """ Base class for reliable Dolphin services Usage: class MyService(ServiceBase): def __init__(self): super().__init__("my-service", check_interval=30) async def run_cycle(self): # Your service logic here pass if __name__ == '__main__': service = MyService() service.run() """ def __init__( self, name: str, check_interval: float = 30.0, max_retries: int = 3, notify_systemd: bool = True ): self.name = name self.check_interval = check_interval self.max_retries = max_retries self.notify_systemd = notify_systemd and SYSTEMD_AVAILABLE self.logger = get_logger(f'dolphin.{name}') self.logger.service = name self._shutdown_event = asyncio.Event() self._start_time = time.time() self._health = ServiceHealth( status='starting', last_check=time.time(), uptime=0.0, memory_mb=0.0, cpu_percent=0.0, error_count=0, message='Initializing' ) self._tasks = [] # Setup signal handlers self._setup_signals() def _setup_signals(self): """Setup graceful shutdown handlers""" for sig in (signal.SIGTERM, signal.SIGINT): asyncio.get_event_loop().add_signal_handler( sig, lambda: asyncio.create_task(self._shutdown()) ) async def _shutdown(self): """Graceful shutdown""" self.logger.warning(f"{self.name}: Shutdown signal received") self._shutdown_event.set() # Cancel all tasks for task in self._tasks: if not task.done(): task.cancel() # Give tasks time to cleanup await asyncio.sleep(0.5) def _update_health(self, status: str, message: str = ''): """Update health status""" import psutil process = psutil.Process() self._health = ServiceHealth( status=status, last_check=time.time(), uptime=time.time() - self._start_time, memory_mb=process.memory_info().rss / 1024 / 1024, cpu_percent=process.cpu_percent(), error_count=self._health.error_count, message=message ) def _log_extra(self, **kwargs): """Add extra context to logs""" for key, value in kwargs.items(): setattr(self.logger, key, value) def retry_with_backoff(self, func: Callable, **kwargs): """Decorator/wrapper for retry logic""" if not TENACITY_AVAILABLE: return func retry_kwargs = { 'stop': stop_after_attempt(kwargs.get('max_retries', self.max_retries)), 'wait': wait_exponential(multiplier=1, min=4, max=60), 'retry': retry_if_exception_type((Exception,)), 'before_sleep': lambda retry_state: self.logger.warning( f"Retry {retry_state.attempt_number}: {retry_state.outcome.exception()}" ) } return retry(**retry_kwargs)(func) @abc.abstractmethod async def run_cycle(self): """ Main service logic - implement this! Called repeatedly in the main loop. Should be non-blocking or use asyncio. """ pass async def health_check(self) -> bool: """ Optional: Implement custom health check Return True if healthy, False otherwise """ return True async def _health_loop(self): """Background health check loop""" while not self._shutdown_event.is_set(): try: healthy = await self.health_check() if healthy: self._update_health('healthy', 'Service operating normally') else: self._update_health('degraded', 'Health check failed') # Notify systemd we're still alive if self.notify_systemd: notify(Notification.WATCHDOG) except Exception as e: self._health.error_count += 1 self._update_health('unhealthy', str(e)) self.logger.error(f"Health check error: {e}") try: await asyncio.wait_for( self._shutdown_event.wait(), timeout=self.check_interval ) except asyncio.TimeoutError: pass # Normal - continue loop async def _main_loop(self): """Main service loop""" self.logger.info(f"{self.name}: Starting main loop") while not self._shutdown_event.is_set(): try: await self.run_cycle() except asyncio.CancelledError: break except Exception as e: self._health.error_count += 1 self.logger.error(f"Cycle error: {e}", exc_info=True) # Brief pause before retry await asyncio.sleep(1) def run(self): """Run the service (blocking)""" self.logger.info(f"{self.name}: Service starting") # Notify systemd we're ready if self.notify_systemd: notify(Notification.READY) self.logger.info("Notified systemd: READY") # Start health check loop health_task = asyncio.create_task(self._health_loop()) self._tasks.append(health_task) # Start main loop main_task = asyncio.create_task(self._main_loop()) self._tasks.append(main_task) try: # Run until shutdown asyncio.get_event_loop().run_until_complete(self._shutdown_event.wait()) except KeyboardInterrupt: pass finally: self.logger.info(f"{self.name}: Service stopping") # Cleanup for task in self._tasks: if not task.done(): task.cancel() # Wait for cleanup if self._tasks: asyncio.get_event_loop().run_until_complete( asyncio.gather(*self._tasks, return_exceptions=True) ) self.logger.info(f"{self.name}: Service stopped") def run_scheduled( func: Callable, interval_seconds: float, name: str = 'scheduled-task' ): """ Run a function on a schedule (simple alternative to full service) Usage: def my_task(): print("Running...") run_scheduled(my_task, interval_seconds=60, name='my-task') """ logger = get_logger(f'dolphin.scheduled.{name}') logger.info(f"Starting scheduled task: {name} (interval: {interval_seconds}s)") async def loop(): while True: try: start = time.time() if asyncio.iscoroutinefunction(func): await func() else: func() elapsed = time.time() - start logger.info(f"Task completed in {elapsed:.2f}s") # Sleep remaining time sleep_time = max(0, interval_seconds - elapsed) await asyncio.sleep(sleep_time) except Exception as e: logger.error(f"Task error: {e}", exc_info=True) await asyncio.sleep(interval_seconds) try: asyncio.run(loop()) except KeyboardInterrupt: logger.info("Stopped by user") __all__ = [ 'ServiceBase', 'ServiceHealth', 'get_logger', 'JournalHandler', 'run_scheduled', 'notify', 'SYSTEMD_AVAILABLE', 'TENACITY_AVAILABLE', ]