From 2be4f9072c0bdda121bc248009d936e653cbd66b Mon Sep 17 00:00:00 2001
From: Jared Vititoe <jjvititoe1@gmail.com>
Date: Mon, 3 Mar 2025 19:14:29 -0500
Subject: [PATCH] Variable descriptions for drive tickets

---
 hwmonDaemon.py | 115 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 105 insertions(+), 10 deletions(-)

diff --git a/hwmonDaemon.py b/hwmonDaemon.py
index 0aeb36d..142edbd 100644
--- a/hwmonDaemon.py
+++ b/hwmonDaemon.py
@@ -150,90 +150,185 @@ class SystemHealthMonitor:
         
         # Add SMART attribute explanations
         SMART_DESCRIPTIONS = {
+            'Reported_Uncorrect': """
+            Number of errors that could not be recovered using hardware ECC.
+            Impact:
+            - Indicates permanent data loss in affected sectors
+            - High correlation with drive hardware failure
+            - Critical reliability indicator
+            
+            Recommended Actions:
+            1. Backup critical data immediately
+            2. Check drive logs for related errors
+            3. Plan for drive replacement
+            4. Monitor for error count increases
+            """,
+            
             'Reallocated_Sector_Ct': """
             Number of sectors that have been reallocated due to errors.
+            Impact:
             - High counts indicate degrading media
             - Each reallocation uses one of the drive's limited spare sectors
             - Rapid increases suggest accelerating drive wear
+            
+            Recommended Actions:
+            1. Monitor rate of increase
+            2. Check drive temperature
+            3. Plan replacement if count grows rapidly
             """,
             
             'Current_Pending_Sector': """
             Sectors waiting to be reallocated due to read/write errors.
+            Impact:
             - Indicates potentially unstable sectors
             - May result in data loss if unrecoverable
             - Should be monitored for increases
+            
+            Recommended Actions:
+            1. Backup affected files
+            2. Run extended SMART tests
+            3. Monitor for conversion to reallocated sectors
             """,
             
             'Offline_Uncorrectable': """
             Count of uncorrectable errors detected during offline data collection.
+            Impact:
             - Direct indicator of media reliability issues
             - May affect data integrity
             - High values suggest drive replacement needed
-            """,
             
-            'Reported_Uncorrect': """
-            Number of errors that could not be recovered using hardware ECC.
-            - Critical indicator of drive health
-            - Directly impacts data reliability
-            - Any non-zero value requires attention
+            Recommended Actions:
+            1. Run extended SMART tests
+            2. Check drive logs
+            3. Plan replacement if count is increasing
             """,
             
             'Spin_Retry_Count': """
             Number of spin start retry attempts.
+            Impact:
             - Indicates potential motor or bearing issues
             - May predict imminent mechanical failure
             - Increasing values suggest degrading drive health
+            
+            Recommended Actions:
+            1. Monitor for rapid increases
+            2. Check drive temperature
+            3. Plan replacement if count grows rapidly
             """,
             
             'Power_On_Hours': """
             Total number of hours the device has been powered on.
+            Impact:
             - Normal aging metric
             - Used to gauge overall drive lifetime
             - Compare against manufacturer's MTBF rating
+            
+            Recommended Actions:
+            1. Compare to warranty period
+            2. Plan replacement if approaching rated lifetime
             """,
             
             'Media_Wearout_Indicator': """
             Percentage of drive's rated life remaining (SSDs).
+            Impact:
             - 100 indicates new drive
             - 0 indicates exceeded rated writes
             - Critical for SSD lifecycle management
+            
+            Recommended Actions:
+            1. Plan replacement below 20%
+            2. Monitor write workload
+            3. Consider workload redistribution
             """,
             
             'Temperature_Celsius': """
             Current drive temperature.
+            Impact:
             - High temperatures accelerate wear
             - Optimal range: 20-45°C
             - Sustained high temps reduce lifespan
+            
+            Recommended Actions:
+            1. Check system cooling
+            2. Verify airflow
+            3. Monitor for sustained high temperatures
             """,
             
             'Available_Spare': """
             Percentage of spare blocks remaining (SSDs).
+            Impact:
             - Critical for SSD endurance
             - Low values indicate approaching end-of-life
             - Rapid decreases suggest excessive writes
+            
+            Recommended Actions:
+            1. Plan replacement if below 20%
+            2. Monitor write patterns
+            3. Consider workload changes
             """,
             
             'Program_Fail_Count': """
             Number of flash program operation failures.
+            Impact:
             - Indicates NAND cell reliability
             - Important for SSD health assessment
             - Increasing values suggest flash degradation
+            
+            Recommended Actions:
+            1. Monitor rate of increase
+            2. Check firmware updates
+            3. Plan replacement if rapidly increasing
             """,
             
             'Erase_Fail_Count': """
             Number of flash erase operation failures.
+            Impact:
             - Related to NAND block health
             - Critical for SSD reliability
             - High counts suggest failing flash blocks
+            
+            Recommended Actions:
+            1. Monitor count increases
+            2. Check firmware version
+            3. Plan replacement if count is high
+            """,
+            
+            'Load_Cycle_Count': """
+            Number of power cycles and head load/unload events.
+            Impact:
+            - Normal operation metric
+            - High counts may indicate power management issues
+            - Compare against rated cycles (typically 600k-1M)
+            
+            Recommended Actions:
+            1. Review power management settings
+            2. Monitor rate of increase
+            3. Plan replacement near rated limit
+            """,
+            
+            'Wear_Leveling_Count': """
+            SSD block erase distribution metric.
+            Impact:
+            - Indicates wear pattern uniformity
+            - Higher values show more balanced wear
+            - Critical for SSD longevity
+            
+            Recommended Actions:
+            1. Monitor trend over time
+            2. Compare with similar drives
+            3. Check workload distribution
             """
         }
 
+        # Add relevant SMART descriptions
+        for attr in SMART_DESCRIPTIONS:
+            if attr in issue:
+                description += f"\n{attr}:\n{SMART_DESCRIPTIONS[attr]}\n"
+
         if "SMART" in issue:
             description += """
-            SMART (Self-Monitoring, Analysis, and Reporting Technology) issues indicate potential drive reliability problems.
-            - Reallocated sectors indicate bad blocks that have been remapped
-            - Pending sectors are potentially failing blocks waiting to be remapped
-            - Uncorrectable errors indicate data that could not be read
+            SMART (Self-Monitoring, Analysis, and Reporting Technology) Attribute Details:
+            - Possible drive failure!
             """
         
         if "Temperature" in issue: