Updated function scoping

2025-03-06 11:26:55 -05:00
parent e3e0c73630
commit b2cae0b6aa
1 changed files with 218 additions and 210 deletions
--- a/hwmonDaemon.py
+++ b/hwmonDaemon.py
@@ -76,6 +76,182 @@ class SystemHealthMonitor:
            'WD141KRYZ': ['02.01A02']
        }
    }
+    SEVERITY_INDICATORS = {
+        'CRITICAL': '🔴',
+        'WARNING': '🟡',
+        'HEALTHY': '🟢',
+        'UNKNOWN': '⚪'
+    }
+    SMART_DESCRIPTIONS = {
+        'Reported_Uncorrect': """
+        Number of errors that could not be recovered using hardware ECC.
+        Impact:
+        - Indicates permanent data loss in affected sectors
+        - High correlation with drive hardware failure
+        - Critical reliability indicator
+        
+        Recommended Actions:
+        1. Backup critical data immediately
+        2. Check drive logs for related errors
+        3. Plan for drive replacement
+        4. Monitor for error count increases
+        """,
+        
+        'Reallocated_Sector_Ct': """
+        Number of sectors that have been reallocated due to errors.
+        Impact:
+        - High counts indicate degrading media
+        - Each reallocation uses one of the drive's limited spare sectors
+        - Rapid increases suggest accelerating drive wear
+        
+        Recommended Actions:
+        1. Monitor rate of increase
+        2. Check drive temperature
+        3. Plan replacement if count grows rapidly
+        """,
+        
+        'Current_Pending_Sector': """
+        Sectors waiting to be reallocated due to read/write errors.
+        Impact:
+        - Indicates potentially unstable sectors
+        - May result in data loss if unrecoverable
+        - Should be monitored for increases
+        
+        Recommended Actions:
+        1. Backup affected files
+        2. Run extended SMART tests
+        3. Monitor for conversion to reallocated sectors
+        """,
+        
+        'Offline_Uncorrectable': """
+        Count of uncorrectable errors detected during offline data collection.
+        Impact:
+        - Direct indicator of media reliability issues
+        - May affect data integrity
+        - High values suggest drive replacement needed
+        
+        Recommended Actions:
+        1. Run extended SMART tests
+        2. Check drive logs
+        3. Plan replacement if count is increasing
+        """,
+        
+        'Spin_Retry_Count': """
+        Number of spin start retry attempts.
+        Impact:
+        - Indicates potential motor or bearing issues
+        - May predict imminent mechanical failure
+        - Increasing values suggest degrading drive health
+        
+        Recommended Actions:
+        1. Monitor for rapid increases
+        2. Check drive temperature
+        3. Plan replacement if count grows rapidly
+        """,
+        
+        'Power_On_Hours': """
+        Total number of hours the device has been powered on.
+        Impact:
+        - Normal aging metric
+        - Used to gauge overall drive lifetime
+        - Compare against manufacturer's MTBF rating
+        
+        Recommended Actions:
+        1. Compare to warranty period
+        2. Plan replacement if approaching rated lifetime
+        """,
+        
+        'Media_Wearout_Indicator': """
+        Percentage of drive's rated life remaining (SSDs).
+        Impact:
+        - 100 indicates new drive
+        - 0 indicates exceeded rated writes
+        - Critical for SSD lifecycle management
+        
+        Recommended Actions:
+        1. Plan replacement below 20%
+        2. Monitor write workload
+        3. Consider workload redistribution
+        """,
+        
+        'Temperature_Celsius': """
+        Current drive temperature.
+        Impact:
+        - High temperatures accelerate wear
+        - Optimal range: 20-45°C
+        - Sustained high temps reduce lifespan
+        
+        Recommended Actions:
+        1. Check system cooling
+        2. Verify airflow
+        3. Monitor for sustained high temperatures
+        """,
+        
+        'Available_Spare': """
+        Percentage of spare blocks remaining (SSDs).
+        Impact:
+        - Critical for SSD endurance
+        - Low values indicate approaching end-of-life
+        - Rapid decreases suggest excessive writes
+        
+        Recommended Actions:
+        1. Plan replacement if below 20%
+        2. Monitor write patterns
+        3. Consider workload changes
+        """,
+        
+        'Program_Fail_Count': """
+        Number of flash program operation failures.
+        Impact:
+        - Indicates NAND cell reliability
+        - Important for SSD health assessment
+        - Increasing values suggest flash degradation
+        
+        Recommended Actions:
+        1. Monitor rate of increase
+        2. Check firmware updates
+        3. Plan replacement if rapidly increasing
+        """,
+        
+        'Erase_Fail_Count': """
+        Number of flash erase operation failures.
+        Impact:
+        - Related to NAND block health
+        - Critical for SSD reliability
+        - High counts suggest failing flash blocks
+        
+        Recommended Actions:
+        1. Monitor count increases
+        2. Check firmware version
+        3. Plan replacement if count is high
+        """,
+        
+        'Load_Cycle_Count': """
+        Number of power cycles and head load/unload events.
+        Impact:
+        - Normal operation metric
+        - High counts may indicate power management issues
+        - Compare against rated cycles (typically 600k-1M)
+        
+        Recommended Actions:
+        1. Review power management settings
+        2. Monitor rate of increase
+        3. Plan replacement near rated limit
+        """,
+        
+        'Wear_Leveling_Count': """
+        SSD block erase distribution metric.
+        Impact:
+        - Indicates wear pattern uniformity
+        - Higher values show more balanced wear
+        - Critical for SSD longevity
+        
+        Recommended Actions:
+        1. Monitor trend over time
+        2. Compare with similar drives
+        3. Check workload distribution
+        """
+    }

    def __init__(self, 
                 ticket_api_url: str = 'http://10.10.10.45/create_ticket_api.php',
@@ -186,19 +362,52 @@ class SystemHealthMonitor:
            
        return drive_details

+    STANDARD_WIDTH = 80
+
+    def make_box(title: str, content: str) -> str:
+        return f"""
+┏━ {title} {'━' * (content_width - len(title) - 3)}┓
+{content}
+┗{'━' * content_width}┛"""
+
+        # Format each section using the consistent width
+        sections = {
+            'DRIVE SPECIFICATIONS': ...,
+            'SMART STATUS': ...,
+            'PARTITION INFO': ...
+        }
+
+        # Each content line should pad to content_width
+        for section, content in sections.items():
+            formatted_content = '\n'.join(f"┃ {line:<{content_width-2}}┃" for line in content.split('\n'))
+            description += make_box(section, formatted_content)
+
+    def _get_issue_type(self, issue: str) -> str:
+        if "SMART" in issue:
+            return "SMART Health Issue"
+        elif "Drive" in issue:
+            return "Storage Issue"
+        elif "ECC" in issue:
+            return "Memory Issue" 
+        elif "CPU" in issue:
+            return "Performance Issue"
+        elif "Network" in issue:
+            return "Network Issue"
+        return "Hardware Issue"
+
+    def _get_impact_level(self, issue: str) -> str:
+        if "CRITICAL" in issue or "UNHEALTHY" in issue:
+            return "🔴 Critical - Immediate Action Required"
+        elif "WARNING" in issue:
+            return "🟡 Warning - Action Needed Soon"
+        return "🟢 Low - Monitor Only"
+
    def _generate_detailed_description(self, issue: str, health_report: Dict[str, Any]) -> str:
        hostname = socket.gethostname()
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        priority = "⚠ HIGH" if "CRITICAL" in issue else "● MEDIUM"
        
-        # Calculate maximum width based on content
-        content_width = max(
-            len(hostname),
-            len(timestamp),
-            len(priority),
-            len("HARDWARE MONITORING ALERT TICKET")
-        ) + 10  # Add padding
-
+        content_width = STANDARD_WIDTH - 2
        banner = f"""
 ┏{'━' * content_width}┓
 ┃{' HARDWARE MONITORING ALERT TICKET '.center(content_width)}┃
@@ -206,208 +415,7 @@ class SystemHealthMonitor:
 ┃  Host      : {hostname:<{content_width-13}}┃
 ┃  Generated : {timestamp:<{content_width-13}}┃
 ┃  Priority  : {priority:<{content_width-13}}┃
-┗{'━' * content_width}┛
-            """
-
-        description = banner + "\n" + "┏━ ISSUE SUMMARY " + "━" * 50 + "\n" + issue + "\n\n"
-        
-        # Add SMART attribute explanations
-        SMART_DESCRIPTIONS = {
-            'Reported_Uncorrect': """
-            Number of errors that could not be recovered using hardware ECC.
-            Impact:
-            - Indicates permanent data loss in affected sectors
-            - High correlation with drive hardware failure
-            - Critical reliability indicator
-            
-            Recommended Actions:
-            1. Backup critical data immediately
-            2. Check drive logs for related errors
-            3. Plan for drive replacement
-            4. Monitor for error count increases
-            """,
-            
-            'Reallocated_Sector_Ct': """
-            Number of sectors that have been reallocated due to errors.
-            Impact:
-            - High counts indicate degrading media
-            - Each reallocation uses one of the drive's limited spare sectors
-            - Rapid increases suggest accelerating drive wear
-            
-            Recommended Actions:
-            1. Monitor rate of increase
-            2. Check drive temperature
-            3. Plan replacement if count grows rapidly
-            """,
-            
-            'Current_Pending_Sector': """
-            Sectors waiting to be reallocated due to read/write errors.
-            Impact:
-            - Indicates potentially unstable sectors
-            - May result in data loss if unrecoverable
-            - Should be monitored for increases
-            
-            Recommended Actions:
-            1. Backup affected files
-            2. Run extended SMART tests
-            3. Monitor for conversion to reallocated sectors
-            """,
-            
-            'Offline_Uncorrectable': """
-            Count of uncorrectable errors detected during offline data collection.
-            Impact:
-            - Direct indicator of media reliability issues
-            - May affect data integrity
-            - High values suggest drive replacement needed
-            
-            Recommended Actions:
-            1. Run extended SMART tests
-            2. Check drive logs
-            3. Plan replacement if count is increasing
-            """,
-            
-            'Spin_Retry_Count': """
-            Number of spin start retry attempts.
-            Impact:
-            - Indicates potential motor or bearing issues
-            - May predict imminent mechanical failure
-            - Increasing values suggest degrading drive health
-            
-            Recommended Actions:
-            1. Monitor for rapid increases
-            2. Check drive temperature
-            3. Plan replacement if count grows rapidly
-            """,
-            
-            'Power_On_Hours': """
-            Total number of hours the device has been powered on.
-            Impact:
-            - Normal aging metric
-            - Used to gauge overall drive lifetime
-            - Compare against manufacturer's MTBF rating
-            
-            Recommended Actions:
-            1. Compare to warranty period
-            2. Plan replacement if approaching rated lifetime
-            """,
-            
-            'Media_Wearout_Indicator': """
-            Percentage of drive's rated life remaining (SSDs).
-            Impact:
-            - 100 indicates new drive
-            - 0 indicates exceeded rated writes
-            - Critical for SSD lifecycle management
-            
-            Recommended Actions:
-            1. Plan replacement below 20%
-            2. Monitor write workload
-            3. Consider workload redistribution
-            """,
-            
-            'Temperature_Celsius': """
-            Current drive temperature.
-            Impact:
-            - High temperatures accelerate wear
-            - Optimal range: 20-45°C
-            - Sustained high temps reduce lifespan
-            
-            Recommended Actions:
-            1. Check system cooling
-            2. Verify airflow
-            3. Monitor for sustained high temperatures
-            """,
-            
-            'Available_Spare': """
-            Percentage of spare blocks remaining (SSDs).
-            Impact:
-            - Critical for SSD endurance
-            - Low values indicate approaching end-of-life
-            - Rapid decreases suggest excessive writes
-            
-            Recommended Actions:
-            1. Plan replacement if below 20%
-            2. Monitor write patterns
-            3. Consider workload changes
-            """,
-            
-            'Program_Fail_Count': """
-            Number of flash program operation failures.
-            Impact:
-            - Indicates NAND cell reliability
-            - Important for SSD health assessment
-            - Increasing values suggest flash degradation
-            
-            Recommended Actions:
-            1. Monitor rate of increase
-            2. Check firmware updates
-            3. Plan replacement if rapidly increasing
-            """,
-            
-            'Erase_Fail_Count': """
-            Number of flash erase operation failures.
-            Impact:
-            - Related to NAND block health
-            - Critical for SSD reliability
-            - High counts suggest failing flash blocks
-            
-            Recommended Actions:
-            1. Monitor count increases
-            2. Check firmware version
-            3. Plan replacement if count is high
-            """,
-            
-            'Load_Cycle_Count': """
-            Number of power cycles and head load/unload events.
-            Impact:
-            - Normal operation metric
-            - High counts may indicate power management issues
-            - Compare against rated cycles (typically 600k-1M)
-            
-            Recommended Actions:
-            1. Review power management settings
-            2. Monitor rate of increase
-            3. Plan replacement near rated limit
-            """,
-            
-            'Wear_Leveling_Count': """
-            SSD block erase distribution metric.
-            Impact:
-            - Indicates wear pattern uniformity
-            - Higher values show more balanced wear
-            - Critical for SSD longevity
-            
-            Recommended Actions:
-            1. Monitor trend over time
-            2. Compare with similar drives
-            3. Check workload distribution
-            """
-        }
-        SEVERITY_INDICATORS = {
-    'CRITICAL': '🔴',
-    'WARNING': '🟡',
-    'HEALTHY': '🟢',
-    'UNKNOWN': '⚪'
-}
-
-        def _get_issue_type(self, issue: str) -> str:
-            if "SMART" in issue:
-                return "SMART Health Issue"
-            elif "Drive" in issue:
-                return "Storage Issue"
-            elif "ECC" in issue:
-                return "Memory Issue" 
-            elif "CPU" in issue:
-                return "Performance Issue"
-            elif "Network" in issue:
-                return "Network Issue"
-            return "Hardware Issue"
-
-        def _get_impact_level(self, issue: str) -> str:
-            if "CRITICAL" in issue or "UNHEALTHY" in issue:
-                return "🔴 Critical - Immediate Action Required"
-            elif "WARNING" in issue:
-                return "🟡 Warning - Action Needed Soon"
-            return "🟢 Low - Monitor Only"
+┗{'━' * content_width}┛"""
        
        executive_summary = f"""
 ┏━ EXECUTIVE SUMMARY ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓