Attempted fix for lxc storage

2025-05-29 20:23:21 -04:00
parent 1371592b9e
commit 9a700e9853
1 changed files with 63 additions and 48 deletions
@@ -1297,12 +1297,9 @@ class SystemHealthMonitor:
                # Handle all other standard SMART attributes
                for attr, thresholds in BASE_SMART_THRESHOLDS.items():
-                    if attr in line and attr != 'Wear_Leveling_Count':
+                    if attr in line and attr != 'Wear_Leveling_Count':  # Skip wear leveling as it's handled above
                        parts = line.split()
                        if len(parts) >= 10:
                            if attr == 'Erase_Fail_Count':
                                raw_value = int(parts[9]) if parts[9].isdigit() else 0
                            else:
                            raw_value = self._parse_smart_value(parts[9])
                            smart_health['attributes'][attr] = raw_value
@@ -1316,6 +1313,8 @@ class SystemHealthMonitor:
                                        smart_health['severity'] = 'WARNING'
                                    smart_health['issues'].append(f"High temperature: {raw_value}°C")
                            else:
                                # Fix: Only trigger alerts if the raw value actually exceeds thresholds
                                if raw_value > 0:  # Only check non-zero values
                                    if raw_value >= thresholds['critical']:
                                        smart_health['severity'] = 'CRITICAL'
                                        smart_health['issues'].append(f"Critical {attr}: {raw_value}")
@@ -1748,12 +1747,13 @@ class SystemHealthMonitor:
            )
            logger.debug(f"pct list output:\n{result.stdout}")
-            for line in result.stdout.split('\n')[1:]:  # Skip header
+            for line in result.stdout.split('\n')[1:]:
                if not line.strip():
                    continue
                parts = line.split()
                if len(parts) < 2:
                    logger.debug(f"Skipping invalid line: {line}")
                    continue
                vmid, status = parts[0], parts[1]
@@ -1772,68 +1772,83 @@ class SystemHealthMonitor:
                        'filesystems': []
                    }
-                    # Parse df output correctly
+                    for fs_line in disk_info.stdout.split('\n')[1:]:
                    for fs_line in disk_info.stdout.split('\n')[1:]:  # Skip header
                        if not fs_line.strip() or 'MP' in fs_line:
                            continue
-                        # Split the df line properly
+                        # Fix: Use fs_line instead of line, and columns consistently
-                        fs_parts = fs_line.split()
+                        columns = fs_line.split()
-                        logger.debug(f"Split parts: {fs_parts}")
+                        logger.debug(f"Processing df line: {fs_line}")
-                        if len(fs_parts) >= 6:
+                        logger.debug(f"Split columns: {columns}")
                            try:
                                filesystem = fs_parts[0]
-                                # Skip excluded mounts by filesystem name
+                        if len(columns) >= 6:
-                                if filesystem.startswith('appPool:') or '/mnt/pve/mediafs' in filesystem:
+                            try:
-                                    logger.debug(f"Skipping excluded filesystem: {filesystem}")
+                                # Skip excluded mounts by checking the first column
                                if columns[0].startswith('appPool:') or '/mnt/pve/mediaf' in columns[1]:
                                    continue
-                                # Parse the values - pct df output format:
+                                # Get the mountpoint (last column)
-                                # Filesystem     1K-blocks    Used Available Use% Mounted on
+                                mountpoint = columns[-1]
                                total_kb = int(fs_parts[1])
                                used_kb = int(fs_parts[2]) 
                                avail_kb = int(fs_parts[3])
                                usage_pct = int(fs_parts[4].rstrip('%'))
                                mountpoint = fs_parts[5]
-                                # Skip excluded mounts by mountpoint
+                                # Skip excluded mountpoints
                                if self._is_excluded_mount(mountpoint):
                                    logger.debug(f"Skipping excluded mount: {mountpoint}")
                                    continue
-                                filesystem_info = {
+                                # Parse size values safely - use correct column indices
                                total_space = self._parse_size(columns[2])  # 3rd column
                                used_space = self._parse_size(columns[3])   # 4th column  
                                available_space = self._parse_size(columns[4])  # 5th column
                                # Parse percentage safely
                                try:
                                    usage_percent = float(columns[5].rstrip('%'))  # 6th column
                                except (ValueError, IndexError):
                                    # Calculate percentage if parsing fails
                                    usage_percent = (used_space / total_space * 100) if total_space > 0 else 0
                                filesystem = {
                                    'mountpoint': mountpoint,
-                                    'filesystem': filesystem,
+                                    'total_space': total_space,
-                                    'total_space': total_kb * 1024,  # Convert to bytes
+                                    'used_space': used_space,
-                                    'used_space': used_kb * 1024,
+                                    'available': available_space,
-                                    'available': avail_kb * 1024,
+                                    'usage_percent': usage_percent
                                    'usage_percent': usage_pct
                                }
-                                container_info['filesystems'].append(filesystem_info)
+                                container_info['filesystems'].append(filesystem)
                                # Check thresholds
-                                if usage_pct >= self.CONFIG['THRESHOLDS']['LXC_CRITICAL']:
+                                if usage_percent >= self.CONFIG['THRESHOLDS']['LXC_CRITICAL']:
                                    lxc_health['status'] = 'CRITICAL'
-                                    issue = f"LXC {vmid} critical storage usage: {usage_pct}% on {mountpoint}"
+                                    issue = f"LXC {vmid} critical storage usage: {usage_percent:.1f}% on {mountpoint}"
                                    lxc_health['issues'].append(issue)
-                                elif usage_pct >= self.CONFIG['THRESHOLDS']['LXC_WARNING']:
+                                elif usage_percent >= self.CONFIG['THRESHOLDS']['LXC_WARNING']:
                                    if lxc_health['status'] != 'CRITICAL':
                                        lxc_health['status'] = 'WARNING'
-                                    issue = f"LXC {vmid} high storage usage: {usage_pct}% on {mountpoint}"
+                                    issue = f"LXC {vmid} high storage usage: {usage_percent:.1f}% on {mountpoint}"
                                    lxc_health['issues'].append(issue)
-                            except (ValueError, IndexError) as e:
+                                logger.debug(f"Filesystem details: {filesystem}")
-                                logger.debug(f"Error parsing df line '{fs_line}': {e}")
+                            except Exception as e:
                                logger.debug(f"Error processing line: {str(e)}")
                                logger.debug(f"Full exception: {repr(e)}")
                                continue
                    # Only add container info if we have filesystem data
                    if container_info['filesystems']:
                        lxc_health['containers'].append(container_info)
                        logger.debug(f"Added container info for VMID {vmid}")
            logger.debug("=== LXC Storage Check Summary ===")
            logger.debug(f"Status: {lxc_health['status']}")
            logger.debug(f"Total containers checked: {len(lxc_health['containers'])}")
            logger.debug(f"Issues found: {len(lxc_health['issues'])}")
            logger.debug("=== End LXC Storage Check ===")
        except Exception as e:
-            logger.error(f"Error in LXC storage check: {e}")
+            logger.debug(f"Critical error during LXC storage check: {str(e)}")
            lxc_health['status'] = 'ERROR'
-            lxc_health['issues'].append(f"Error checking LXC storage: {str(e)}")
+            error_msg = f"Error checking LXC storage: {str(e)}"
            lxc_health['issues'].append(error_msg)
        return lxc_health