diff --git a/hwmonDaemon.py b/hwmonDaemon.py index 87df3fb..5cdb0a2 100644 --- a/hwmonDaemon.py +++ b/hwmonDaemon.py @@ -73,6 +73,7 @@ class SystemHealthMonitor: """ self.ticket_api_url = ticket_api_url self.dry_run = dry_run + self.ticket_db = TicketDatabase() def run(self): """ @@ -223,6 +224,7 @@ class SystemHealthMonitor: # Create standardized ticket title ticket_title = f"[{hostname}]{action_type}{hardware_type} {issue} {scope}{environment}{ticket_type}" + ticket_hash = self._generate_ticket_hash(issue, health_report) description = self._generate_detailed_description(issue, health_report) ticket_payload = { @@ -234,6 +236,12 @@ class SystemHealthMonitor: "type": issue_type } + if self.ticket_db.ticket_exists(ticket_hash): + logger.info(f"Duplicate ticket detected - skipping creation: {ticket_title}") + if self.dry_run: + logger.info(f"Dry-run: Duplicate ticket would have been skipped: {ticket_title}") + continue + if self.dry_run: logger.info("Dry-run mode enabled. Simulating ticket creation:") logger.info(json.dumps(ticket_payload, indent=4)) @@ -256,6 +264,46 @@ class SystemHealthMonitor: logger.error(f"Response: {response.text}") except Exception as e: logger.error(f"Error creating ticket: {e}") + + def _generate_ticket_hash(self, issue: str, health_report: Dict[str, Any]) -> str: + """ + Generate a stable hash for ticket deduplication. + """ + import hashlib + + # Extract stable components for hashing + stable_components = { + 'hostname': socket.gethostname(), + 'issue_type': issue + } + + # Add specific stable data based on issue type + if "Disk" in issue: + for partition in health_report.get('drives_health', {}).get('drives', []): + if partition.get('mountpoint') in issue: + stable_components['device'] = partition['device'] + stable_components['mountpoint'] = partition['mountpoint'] + # Exclude variable data like usage percentages + if partition.get('smart_status') == 'UNHEALTHY': + stable_components['smart_status'] = 'UNHEALTHY' + break + + elif "Memory" in issue: + if "Uncorrectable ECC" in issue: + stable_components['error_type'] = 'Uncorrectable_ECC' + elif "Correctable ECC" in issue: + stable_components['error_type'] = 'Correctable_ECC' + + elif "Network" in issue: + if "management" in issue.lower(): + stable_components['network_type'] = 'management' + elif "ceph" in issue.lower(): + stable_components['network_type'] = 'ceph' + + # Create a stable string representation and hash it + stable_string = json.dumps(stable_components, sort_keys=True) + return hashlib.sha256(stable_string.encode()).hexdigest() + def _detect_issues(self, health_report: Dict[str, Any]) -> List[str]: """ Detect issues in the health report including non-critical issues. @@ -591,6 +639,35 @@ class SystemHealthMonitor: 'error': str(e) } +class TicketDatabase: + def __init__(self, db_path="/var/lib/hwmon/tickets.db"): + self.db_path = db_path + os.makedirs(os.path.dirname(db_path), exist_ok=True) + self._init_db() + + def _init_db(self): + import sqlite3 + with sqlite3.connect(self.db_path) as conn: + conn.execute(''' + CREATE TABLE IF NOT EXISTS tickets ( + hash TEXT PRIMARY KEY, + title TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + def ticket_exists(self, ticket_hash: str) -> bool: + import sqlite3 + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute('SELECT 1 FROM tickets WHERE hash = ?', (ticket_hash,)) + return cursor.fetchone() is not None + + def add_ticket(self, ticket_hash: str, title: str): + import sqlite3 + with sqlite3.connect(self.db_path) as conn: + conn.execute('INSERT INTO tickets (hash, title) VALUES (?, ?)', + (ticket_hash, title)) + def main(): try: # Argument parser for CLI options