hwmonDaemon/hwmonDaemon.py

#!/usr/bin/env python3
import os
import sys
import json
import datetime
import requests
import psutil
import socket
import subprocess
from typing import Dict, Any, List

class SystemHealthMonitor:
    def __init__(self, 
                 ticket_api_url: str = 'http://10.10.10.45/create_ticket_api.php',
                 state_file: str = '/tmp/last_health_check.json'):
        """
        Initialize the system health monitor
        
        :param ticket_api_url: URL for ticket creation API
        :param state_file: File to track last health check
        """
        self.ticket_api_url = ticket_api_url
        self.state_file = state_file
    
    def run(self):
        """
        Perform a one-shot health check
        """
        try:
            # Perform health checks
            health_report = self.perform_health_checks()
            
            # Create tickets for critical issues
            self._create_tickets_for_issues(health_report)
        except Exception as e:
            print(f"Unexpected error during health check: {e}")
    
    def perform_health_checks(self) -> Dict[str, Any]:
        """
        Perform comprehensive system health checks
        
        :return: Dictionary with health check results
        """
        health_report = {
            'disk_health': self._check_disk_health(),
            'memory_health': self._check_memory_usage(),
            'cpu_health': self._check_cpu_usage(),
            'network_health': self._check_network_status(),
            'drive_smart_status': self._check_drive_smart_status(),
            'temperature_health': self._check_system_temperatures()
        }
        return health_report
    
    def _create_tickets_for_issues(self, health_report: Dict[str, Any]):
        """
        Create tickets for critical issues with dynamic parameters.
        """
        critical_issues = self._detect_critical_issues(health_report)
        if not critical_issues:
            print("No critical issues detected.")
            return

        # Initialize default ticket fields
        priority = "P4"  # Default to low priority
        categories = set()  # To accumulate unique categories
        issue_types = set()  # To accumulate unique issue types
        hostname = socket.gethostname()
        action_type = "[auto]"
        scope = "[cluster-wide]"
        environment = "[production]"
        ticket_type = "[maintenance]"
        
        # Analyze critical issues to determine ticket parameters
        for issue in critical_issues:
            if "disk" in issue.lower():
                priority = "P2"  # High priority for disk issues
                categories.add("Hardware")
                issue_types.add("Incident")
            elif "memory" in issue.lower():
                priority = "P2"  # High priority for memory issues
                categories.add("Hardware")
                issue_types.add("Incident")
            elif "cpu" in issue.lower():
                priority = "P2"  # High priority for CPU issues
                categories.add("Hardware")
                issue_types.add("Incident")
            elif "internet connectivity" in issue.lower():
                priority = "P3"  # Medium priority for network issues
                categories.add("Network")
                issue_types.add("Problem")
            elif "health issues" in issue.lower():
                priority = "P1"  # Critical priority for health issues
                categories.add("Hardware")
                issue_types.add("Problem")

        # Create a list from the set to get unique values
        category = list(categories)[0] if categories else "Other"
        issue_type = list(issue_types)[0] if issue_types else "Task"

        ticket_title = f"[{hostname}]{action_type}[{issue_type}] System Health Issues Detected {scope}{environment}{ticket_type}"
        ticket_description = "Multiple system health issues detected:\n\n" + "\n".join(critical_issues)

        ticket_payload = {
            "title": ticket_title,
            "description": ticket_description,
            "priority": priority,
            "status": "Open",
            "category": category,
            "type": issue_type
        }

        try:
            response = requests.post(
                self.ticket_api_url, 
                json=ticket_payload, 
                headers={'Content-Type': 'application/json'}
            )
            if response.status_code in [200, 201]:
                print(f"Ticket created successfully: {ticket_title}")
            else:
                print(f"Failed to create ticket. Status code: {response.status_code}")
                print(f"Response: {response.text}")
        except Exception as e:
            print(f"Error creating ticket: {e}")


    def _detect_critical_issues(self, health_report: Dict[str, Any]) -> List[str]:
        """
        Detect critical issues in the health report
        
        :param health_report: Comprehensive health report
        :return: List of critical issue descriptions
        """
        critical_issues = []
        for partition in health_report.get('disk_health', {}).get('partitions', []):
            if partition.get('status') == 'CRITICAL_HIGH_USAGE':
                critical_issues.append(
                    f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full"
                )
        memory_health = health_report.get('memory_health', {})
        if memory_health.get('memory_percent', 0) > 90:
            critical_issues.append(
                f"Memory usage at {memory_health['memory_percent']}%"
            )
        cpu_health = health_report.get('cpu_health', {})
        if cpu_health.get('cpu_usage_percent', 0) > 90:
            critical_issues.append(
                f"CPU usage at {cpu_health['cpu_usage_percent']}%"
            )
        network_health = health_report.get('network_health', {})
        if not network_health.get('internet_connectivity', True):
            critical_issues.append("No internet connectivity")
        for drive in health_report.get('drive_smart_status', []):
            if drive.get('status') != 'HEALTHY':
                critical_issues.append(
                    f"Drive {drive['drive']} may have health issues"
                )
        return critical_issues
        
    def _check_disk_health(self) -> Dict[str, Any]:
        """
        Check disk usage and health
        
        :return: Disk health metrics
        """
        disk_health = {'partitions': []}
        
        try:
            # Get all mounted partitions
            partitions = psutil.disk_partitions()
            
            for partition in partitions:
                try:
                    usage = psutil.disk_usage(partition.mountpoint)
                    partition_info = {
                        'mountpoint': partition.mountpoint,
                        'total_space': self._convert_bytes(usage.total),
                        'used_space': self._convert_bytes(usage.used),
                        'free_space': self._convert_bytes(usage.free),
                        'usage_percent': usage.percent
                    }
                    
                    # Flag high usage
                    if usage.percent > 90:
                        partition_info['status'] = 'CRITICAL_HIGH_USAGE'
                    elif usage.percent > 80:
                        partition_info['status'] = 'WARNING_HIGH_USAGE'
                    else:
                        partition_info['status'] = 'NORMAL'
                    
                    disk_health['partitions'].append(partition_info)
                except Exception as e:
                    print(f"Could not check partition {partition.mountpoint}: {e}")
            
            return disk_health
        
        except Exception as e:
            print(f"Disk health check failed: {e}")
            return {'error': str(e)}
        
    def _convert_bytes(self, bytes_value: int, suffix: str = 'B') -> str:
        """
        Convert bytes to human-readable format
        
        :param bytes_value: Number of bytes
        :param suffix: Suffix to append (default 'B' for bytes)
        :return: Formatted string with size
        """
        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
            if abs(bytes_value) < 1024.0:
                return f"{bytes_value:.1f}{unit}{suffix}"
            bytes_value /= 1024.0
        return f"{bytes_value:.1f}Y{suffix}"

    def _check_memory_usage(self) -> Dict[str, Any]:
        """
        Check memory usage and return health metrics
        
        :return: Memory health metrics
        """
        try:
            memory = psutil.virtual_memory()
            return {
                'total_memory': self._convert_bytes(memory.total),
                'used_memory': self._convert_bytes(memory.used),
                'free_memory': self._convert_bytes(memory.available),
                'memory_percent': memory.percent
            }
        except Exception as e:
            print(f"Memory health check failed: {e}")
            return {'error': str(e)}

    def _check_cpu_usage(self) -> Dict[str, Any]:
        """
        Check CPU usage and return health metrics
        
        :return: CPU health metrics
        """
        try:
            cpu_usage = psutil.cpu_percent(interval=1)
            return {
                'cpu_usage_percent': cpu_usage
            }
        except Exception as e:
            print(f"CPU health check failed: {e}")
            return {'error': str(e)}

    def _check_drive_smart_status(self) -> List[Dict[str, Any]]:
        """
        Check SMART status of drives using smartctl.
        
        :return: List of SMART status for drives
        """
        drives = []
        try:
            for disk in psutil.disk_partitions():
                drive = disk.device
                try:
                    # Use smartctl to check the drive's SMART status
                    result = subprocess.run(
                        ['smartctl', '-H', drive],
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        text=True
                    )
                    output = result.stdout + result.stderr
                    status = 'HEALTHY' if 'PASSED' in output else 'UNHEALTHY'
                    drives.append({'drive': drive, 'status': status})
                except Exception as e:
                    drives.append({'drive': drive, 'status': 'ERROR', 'error': str(e)})
        except Exception as e:
            print(f"SMART status check failed: {e}")
            return [{'error': str(e)}]
        return drives

    def _check_network_status(self) -> Dict[str, Any]:
        """
        Check network connectivity between nodes and include detailed identifiers.
        
        :return: Network health report
        """
        network_health = {
            'management_network': {'status': 'UNKNOWN', 'issues': []},
            'ceph_network': {'status': 'UNKNOWN', 'issues': []}
        }
        
        # IP-to-hostname mapping
        management_mapping = {
            '10.10.10.2': 'large1',
            '10.10.10.10': 'medium1',
            '10.10.10.4': 'medium2',
            '10.10.10.8': 'micro1',
            '10.10.10.9': 'micro2'
        }
        ceph_mapping = {
            '10.10.90.10': 'large1',
            '10.10.90.4': 'medium1',
            '10.10.90.3': 'medium2',
            '10.10.90.2': 'micro1',
            '10.10.90.6': 'micro2'
        }
        
        def _ping_device(ip: str) -> bool:
            try:
                result = subprocess.run(['ping', '-c', '1', ip], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                return result.returncode == 0
            except Exception as e:
                print(f"Error pinging {ip}: {e}")
                return False
        
        # Check management network
        management_ips = list(management_mapping.keys())
        for source_ip in management_ips:
            for target_ip in management_ips:
                if source_ip != target_ip and not _ping_device(target_ip):
                    source_host = management_mapping[source_ip]
                    target_host = management_mapping[target_ip]
                    issue = f"{source_host} ({source_ip}) cannot reach {target_host} ({target_ip}) in Management Network"
                    network_health['management_network']['issues'].append(issue)
        
        # Check Ceph network
        ceph_ips = list(ceph_mapping.keys())
        for source_ip in ceph_ips:
            for target_ip in ceph_ips:
                if source_ip != target_ip and not _ping_device(target_ip):
                    source_host = ceph_mapping[source_ip]
                    target_host = ceph_mapping[target_ip]
                    issue = f"{source_host} ({source_ip}) cannot reach {target_host} ({target_ip}) in Ceph Network"
                    network_health['ceph_network']['issues'].append(issue)
        
        # Update statuses
        network_health['management_network']['status'] = 'HEALTHY' if not network_health['management_network']['issues'] else 'ISSUES_DETECTED'
        network_health['ceph_network']['status'] = 'HEALTHY' if not network_health['ceph_network']['issues'] else 'ISSUES_DETECTED'
        
        return network_health


def main():
    # Initialize the monitor
    monitor = SystemHealthMonitor(
        check_interval=86400,  # Check once per day
        ticket_api_url='http://10.10.10.45/create_ticket_api.php'
    )
    
    # Run the monitor
    monitor.run()

if __name__ == '__main__':
    # Require root/sudo for full system access
    if os.geteuid() != 0:
        print("This script must be run with sudo/root privileges")
        sys.exit(1)
    
    main()
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`#!/usr/bin/env python3`
			`import os`
			`import sys`
			`import json`
			`import datetime`
			`import requests`
			`import psutil`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`import socket`
			`import subprocess`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`from typing import Dict, Any, List`

			`class SystemHealthMonitor:`
			`def __init__(self,`
			`ticket_api_url: str = 'http://10.10.10.45/create_ticket_api.php',`
			`state_file: str = '/tmp/last_health_check.json'):`
			`"""`
			`Initialize the system health monitor`

			`:param ticket_api_url: URL for ticket creation API`
			`:param state_file: File to track last health check`
			`"""`
			`self.ticket_api_url = ticket_api_url`
			`self.state_file = state_file`

			`def run(self):`
			`"""`
			`Perform a one-shot health check`
			`"""`
			`try:`
			`# Perform health checks`
			`health_report = self.perform_health_checks()`

			`# Create tickets for critical issues`
			`self._create_tickets_for_issues(health_report)`
			`except Exception as e:`
			`print(f"Unexpected error during health check: {e}")`

			`def perform_health_checks(self) -> Dict[str, Any]:`
			`"""`
			`Perform comprehensive system health checks`

			`:return: Dictionary with health check results`
			`"""`
			`health_report = {`
			`'disk_health': self._check_disk_health(),`
			`'memory_health': self._check_memory_usage(),`
			`'cpu_health': self._check_cpu_usage(),`
			`'network_health': self._check_network_status(),`
			`'drive_smart_status': self._check_drive_smart_status(),`
			`'temperature_health': self._check_system_temperatures()`
			`}`
			`return health_report`

			`def _create_tickets_for_issues(self, health_report: Dict[str, Any]):`
			`"""`
			`Create tickets for critical issues with dynamic parameters.`
			`"""`
			`critical_issues = self._detect_critical_issues(health_report)`
			`if not critical_issues:`
			`print("No critical issues detected.")`
			`return`

			`# Initialize default ticket fields`
			`priority = "P4" # Default to low priority`
			`categories = set() # To accumulate unique categories`
			`issue_types = set() # To accumulate unique issue types`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`hostname = socket.gethostname()`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`action_type = "[auto]"`
			`scope = "[cluster-wide]"`
			`environment = "[production]"`
			`ticket_type = "[maintenance]"`

			`# Analyze critical issues to determine ticket parameters`
			`for issue in critical_issues:`
			`if "disk" in issue.lower():`
			`priority = "P2" # High priority for disk issues`
			`categories.add("Hardware")`
			`issue_types.add("Incident")`
			`elif "memory" in issue.lower():`
			`priority = "P2" # High priority for memory issues`
			`categories.add("Hardware")`
			`issue_types.add("Incident")`
			`elif "cpu" in issue.lower():`
			`priority = "P2" # High priority for CPU issues`
			`categories.add("Hardware")`
			`issue_types.add("Incident")`
			`elif "internet connectivity" in issue.lower():`
			`priority = "P3" # Medium priority for network issues`
			`categories.add("Network")`
			`issue_types.add("Problem")`
			`elif "health issues" in issue.lower():`
			`priority = "P1" # Critical priority for health issues`
			`categories.add("Hardware")`
			`issue_types.add("Problem")`

			`# Create a list from the set to get unique values`
			`category = list(categories)[0] if categories else "Other"`
			`issue_type = list(issue_types)[0] if issue_types else "Task"`

			`ticket_title = f"[{hostname}]{action_type}[{issue_type}] System Health Issues Detected {scope}{environment}{ticket_type}"`
			`ticket_description = "Multiple system health issues detected:\n\n" + "\n".join(critical_issues)`

			`ticket_payload = {`
			`"title": ticket_title,`
			`"description": ticket_description,`
			`"priority": priority,`
			`"status": "Open",`
			`"category": category,`
			`"type": issue_type`
			`}`

			`try:`
			`response = requests.post(`
			`self.ticket_api_url,`
			`json=ticket_payload,`
			`headers={'Content-Type': 'application/json'}`
			`)`
			`if response.status_code in [200, 201]:`
			`print(f"Ticket created successfully: {ticket_title}")`
			`else:`
			`print(f"Failed to create ticket. Status code: {response.status_code}")`
			`print(f"Response: {response.text}")`
			`except Exception as e:`
			`print(f"Error creating ticket: {e}")`



			`def _detect_critical_issues(self, health_report: Dict[str, Any]) -> List[str]:`
			`"""`
			`Detect critical issues in the health report`

			`:param health_report: Comprehensive health report`
			`:return: List of critical issue descriptions`
			`"""`
			`critical_issues = []`
			`for partition in health_report.get('disk_health', {}).get('partitions', []):`
			`if partition.get('status') == 'CRITICAL_HIGH_USAGE':`
			`critical_issues.append(`
			`f"Disk {partition['mountpoint']} is {partition['usage_percent']}% full"`
			`)`
			`memory_health = health_report.get('memory_health', {})`
			`if memory_health.get('memory_percent', 0) > 90:`
			`critical_issues.append(`
			`f"Memory usage at {memory_health['memory_percent']}%"`
			`)`
			`cpu_health = health_report.get('cpu_health', {})`
			`if cpu_health.get('cpu_usage_percent', 0) > 90:`
			`critical_issues.append(`
			`f"CPU usage at {cpu_health['cpu_usage_percent']}%"`
			`)`
			`network_health = health_report.get('network_health', {})`
			`if not network_health.get('internet_connectivity', True):`
			`critical_issues.append("No internet connectivity")`
			`for drive in health_report.get('drive_smart_status', []):`
			`if drive.get('status') != 'HEALTHY':`
			`critical_issues.append(`
			`f"Drive {drive['drive']} may have health issues"`
			`)`
			`return critical_issues`

			`def _check_disk_health(self) -> Dict[str, Any]:`
			`"""`
			`Check disk usage and health`

			`:return: Disk health metrics`
			`"""`
			`disk_health = {'partitions': []}`

			`try:`
			`# Get all mounted partitions`
			`partitions = psutil.disk_partitions()`

			`for partition in partitions:`
			`try:`
			`usage = psutil.disk_usage(partition.mountpoint)`
			`partition_info = {`
			`'mountpoint': partition.mountpoint,`
			`'total_space': self._convert_bytes(usage.total),`
			`'used_space': self._convert_bytes(usage.used),`
			`'free_space': self._convert_bytes(usage.free),`
			`'usage_percent': usage.percent`
			`}`

			`# Flag high usage`
			`if usage.percent > 90:`
			`partition_info['status'] = 'CRITICAL_HIGH_USAGE'`
			`elif usage.percent > 80:`
			`partition_info['status'] = 'WARNING_HIGH_USAGE'`
			`else:`
			`partition_info['status'] = 'NORMAL'`

			`disk_health['partitions'].append(partition_info)`
			`except Exception as e:`
			`print(f"Could not check partition {partition.mountpoint}: {e}")`

			`return disk_health`

			`except Exception as e:`
			`print(f"Disk health check failed: {e}")`
			`return {'error': str(e)}`

			`def _convert_bytes(self, bytes_value: int, suffix: str = 'B') -> str:`
			`"""`
			`Convert bytes to human-readable format`

			`:param bytes_value: Number of bytes`
			`:param suffix: Suffix to append (default 'B' for bytes)`
			`:return: Formatted string with size`
			`"""`
			`for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:`
			`if abs(bytes_value) < 1024.0:`
			`return f"{bytes_value:.1f}{unit}{suffix}"`
			`bytes_value /= 1024.0`
			`return f"{bytes_value:.1f}Y{suffix}"`

			`def _check_memory_usage(self) -> Dict[str, Any]:`
			`"""`
			`Check memory usage and return health metrics`

			`:return: Memory health metrics`
			`"""`
			`try:`
			`memory = psutil.virtual_memory()`
			`return {`
			`'total_memory': self._convert_bytes(memory.total),`
			`'used_memory': self._convert_bytes(memory.used),`
			`'free_memory': self._convert_bytes(memory.available),`
			`'memory_percent': memory.percent`
			`}`
			`except Exception as e:`
			`print(f"Memory health check failed: {e}")`
			`return {'error': str(e)}`

			`def _check_cpu_usage(self) -> Dict[str, Any]:`
			`"""`
			`Check CPU usage and return health metrics`

			`:return: CPU health metrics`
			`"""`
			`try:`
			`cpu_usage = psutil.cpu_percent(interval=1)`
			`return {`
			`'cpu_usage_percent': cpu_usage`
			`}`
			`except Exception as e:`
			`print(f"CPU health check failed: {e}")`
			`return {'error': str(e)}`

			`def _check_drive_smart_status(self) -> List[Dict[str, Any]]:`
			`"""`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`Check SMART status of drives using smartctl.`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00
			`:return: List of SMART status for drives`
			`"""`
			`drives = []`
			`try:`
			`for disk in psutil.disk_partitions():`
			`drive = disk.device`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`try:`
			`# Use smartctl to check the drive's SMART status`
			`result = subprocess.run(`
			`['smartctl', '-H', drive],`
			`stdout=subprocess.PIPE,`
			`stderr=subprocess.PIPE,`
			`text=True`
			`)`
			`output = result.stdout + result.stderr`
			`status = 'HEALTHY' if 'PASSED' in output else 'UNHEALTHY'`
			`drives.append({'drive': drive, 'status': status})`
			`except Exception as e:`
			`drives.append({'drive': drive, 'status': 'ERROR', 'error': str(e)})`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`except Exception as e:`
			`print(f"SMART status check failed: {e}")`
			`return [{'error': str(e)}]`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`return drives`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00
			`def _check_network_status(self) -> Dict[str, Any]:`
			`"""`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`Check network connectivity between nodes and include detailed identifiers.`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00
			`:return: Network health report`
			`"""`
			`network_health = {`
			`'management_network': {'status': 'UNKNOWN', 'issues': []},`
			`'ceph_network': {'status': 'UNKNOWN', 'issues': []}`
			`}`

Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`# IP-to-hostname mapping`
			`management_mapping = {`
			`'10.10.10.2': 'large1',`
			`'10.10.10.10': 'medium1',`
			`'10.10.10.4': 'medium2',`
			`'10.10.10.8': 'micro1',`
			`'10.10.10.9': 'micro2'`
			`}`
			`ceph_mapping = {`
			`'10.10.90.10': 'large1',`
			`'10.10.90.4': 'medium1',`
			`'10.10.90.3': 'medium2',`
			`'10.10.90.2': 'micro1',`
			`'10.10.90.6': 'micro2'`
			`}`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00
			`def _ping_device(ip: str) -> bool:`
			`try:`
			`result = subprocess.run(['ping', '-c', '1', ip], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`return result.returncode == 0`
			`except Exception as e:`
			`print(f"Error pinging {ip}: {e}")`
			`return False`

			`# Check management network`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`management_ips = list(management_mapping.keys())`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`for source_ip in management_ips:`
			`for target_ip in management_ips:`
			`if source_ip != target_ip and not _ping_device(target_ip):`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`source_host = management_mapping[source_ip]`
			`target_host = management_mapping[target_ip]`
			`issue = f"{source_host} ({source_ip}) cannot reach {target_host} ({target_ip}) in Management Network"`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`network_health['management_network']['issues'].append(issue)`

			`# Check Ceph network`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`ceph_ips = list(ceph_mapping.keys())`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`for source_ip in ceph_ips:`
			`for target_ip in ceph_ips:`
			`if source_ip != target_ip and not _ping_device(target_ip):`
Adjusted network logic, made map of both subnet ip 2024-12-04 21:14:47 -05:00			`source_host = ceph_mapping[source_ip]`
			`target_host = ceph_mapping[target_ip]`
			`issue = f"{source_host} ({source_ip}) cannot reach {target_host} ({target_ip}) in Ceph Network"`
Creation of hwmonDaemon and service files 2024-12-04 20:46:35 -05:00			`network_health['ceph_network']['issues'].append(issue)`

			`# Update statuses`
			`network_health['management_network']['status'] = 'HEALTHY' if not network_health['management_network']['issues'] else 'ISSUES_DETECTED'`
			`network_health['ceph_network']['status'] = 'HEALTHY' if not network_health['ceph_network']['issues'] else 'ISSUES_DETECTED'`

			`return network_health`


			`def main():`
			`# Initialize the monitor`
			`monitor = SystemHealthMonitor(`
			`check_interval=86400, # Check once per day`
			`ticket_api_url='http://10.10.10.45/create_ticket_api.php'`
			`)`

			`# Run the monitor`
			`monitor.run()`

			`if __name__ == '__main__':`
			`# Require root/sudo for full system access`
			`if os.geteuid() != 0:`
			`print("This script must be run with sudo/root privileges")`
			`sys.exit(1)`

			`main()`