From 07782da7b68502db4c6079de95ae3dfd26d4d496 Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Tue, 10 Feb 2026 13:15:15 -0500 Subject: [PATCH] Add HTTP health check endpoint on port 9102 Lightweight /health endpoint returns JSON with status, hostname, and last check timestamp. Runs as daemon thread, activated via --health-server flag or HEALTH_SERVER_ENABLED=true in .env config. Fixes: https://code.lotusguild.org/LotusGuild/hwmonDaemon/issues/21 Co-Authored-By: Claude Opus 4.6 --- hwmonDaemon.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/hwmonDaemon.py b/hwmonDaemon.py index 84a2960..afdcb02 100644 --- a/hwmonDaemon.py +++ b/hwmonDaemon.py @@ -130,7 +130,10 @@ class SystemHealthMonitor: 'NEW_DRIVE_HOURS_THRESHOLD': 720, # Hours to consider a drive "new" (~30 days) 'SMART_ERROR_RECENT_HOURS': 168, # Hours window for recent SMART errors (~1 week) # Storage limits - 'HISTORY_MAX_BYTES': 52428800 # 50MB max storage for history files + 'HISTORY_MAX_BYTES': 52428800, # 50MB max storage for history files + # Health check endpoint + 'HEALTH_SERVER_ENABLED': False, # Enable HTTP health check endpoint + 'HEALTH_SERVER_PORT': 9102 # Port for health check endpoint } @classmethod @@ -212,6 +215,15 @@ class SystemHealthMonitor: cls.CONFIG['HISTORY_MAX_BYTES'] = int(value) except ValueError: logger.warning(f"Invalid HISTORY_MAX_BYTES value: {value}") + # Health server settings + elif key == 'HEALTH_SERVER_ENABLED': + cls.CONFIG['HEALTH_SERVER_ENABLED'] = value.lower() in ('true', '1', 'yes') + logger.info(f"✓ Loaded HEALTH_SERVER_ENABLED: {cls.CONFIG['HEALTH_SERVER_ENABLED']}") + elif key == 'HEALTH_SERVER_PORT': + try: + cls.CONFIG['HEALTH_SERVER_PORT'] = int(value) + except ValueError: + logger.warning(f"Invalid HEALTH_SERVER_PORT value: {value}") except Exception as e: logger.error(f"Failed to load .env file: {e}") @@ -669,6 +681,10 @@ class SystemHealthMonitor: # Drive details cache (per-run, cleared on next execution) self._drive_details_cache = {} + # Health check tracking + self._last_check_timestamp = None + self._last_check_status = 'unknown' + # Check tool availability at startup self._available_tools = self._check_tool_availability() @@ -751,6 +767,45 @@ class SystemHealthMonitor: except Exception as e: logger.error(f"Error enforcing storage limit: {e}") + # ============================================================================= + # HEALTH CHECK ENDPOINT + # ============================================================================= + def _start_health_server(self): + """Start a lightweight HTTP health check endpoint as a daemon thread.""" + from http.server import HTTPServer, BaseHTTPRequestHandler + import threading + + monitor = self + + class HealthHandler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == '/health': + response = { + 'status': monitor._last_check_status, + 'hostname': socket.gethostname(), + 'last_check': monitor._last_check_timestamp, + 'uptime': datetime.datetime.now().isoformat() + } + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(response).encode()) + else: + self.send_response(404) + self.end_headers() + + def log_message(self, format, *args): + logger.debug(f"Health server: {format % args}") + + port = self.CONFIG.get('HEALTH_SERVER_PORT', 9102) + try: + server = HTTPServer(('', port), HealthHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + logger.info(f"Health check endpoint started on port {port}") + except OSError as e: + logger.warning(f"Could not start health server on port {port}: {e}") + # ============================================================================= # MAIN EXECUTION METHODS # ============================================================================= @@ -760,6 +815,10 @@ class SystemHealthMonitor: # Perform health checks and gather the report health_report = self.perform_health_checks() + # Track last check for health endpoint + self._last_check_timestamp = datetime.datetime.now().isoformat() + self._last_check_status = health_report.get('drives_health', {}).get('overall_status', 'unknown') + # Create tickets for any detected critical issues self._create_tickets_for_issues(health_report) @@ -3625,6 +3684,11 @@ def main(): action="store_true", help="Enable verbose (DEBUG) logging output." ) + parser.add_argument( + "--health-server", + action="store_true", + help="Start HTTP health check endpoint (default port 9102)." + ) args = parser.parse_args() monitor = SystemHealthMonitor( @@ -3633,6 +3697,10 @@ def main(): verbose=args.verbose ) + # Start health server if requested via CLI or .env + if args.health_server or monitor.CONFIG.get('HEALTH_SERVER_ENABLED', False): + monitor._start_health_server() + if args.metrics: # Just output metrics to stdout health_report = monitor.perform_health_checks()