"""Tests for SystemHealthMonitor pure methods — no external processes or filesystem.""" import sys import os import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from unittest.mock import patch # noqa: E402 from hwmonDaemon import SystemHealthMonitor # noqa: E402 @pytest.fixture(scope='module') def monitor(): """Create a minimal monitor instance with all external side-effects patched out.""" with patch.object(SystemHealthMonitor, 'load_env_config'), \ patch.object(SystemHealthMonitor, '_check_tool_availability', return_value={}), \ patch('os.makedirs'): return SystemHealthMonitor(dry_run=True) # ── _format_bytes_human ────────────────────────────────────────────────────── class TestFormatBytesHuman: def test_bytes(self, monitor): assert monitor._format_bytes_human(512) == '512.0 B' def test_kilobytes(self, monitor): assert monitor._format_bytes_human(1024) == '1.0 KB' def test_megabytes(self, monitor): assert monitor._format_bytes_human(1024 ** 2) == '1.0 MB' def test_gigabytes(self, monitor): assert monitor._format_bytes_human(1024 ** 3) == '1.0 GB' def test_terabytes(self, monitor): assert monitor._format_bytes_human(1024 ** 4) == '1.0 TB' def test_fractional(self, monitor): assert monitor._format_bytes_human(1536) == '1.5 KB' def test_zero(self, monitor): assert monitor._format_bytes_human(0) == '0.0 B' # ── _parse_size ─────────────────────────────────────────────────────────────── class TestParseSize: def test_gigabytes(self, monitor): result = monitor._parse_size('15.7G') assert abs(result - 15.7 * 1024**3) < 1 def test_terabytes(self, monitor): result = monitor._parse_size('21.8T') assert abs(result - 21.8 * 1024**4) < 1 def test_megabytes(self, monitor): result = monitor._parse_size('512M') assert result == 512 * 1024**2 def test_kilobytes(self, monitor): result = monitor._parse_size('100K') assert result == 100 * 1024 def test_bytes(self, monitor): result = monitor._parse_size('100B') assert result == 100 def test_invalid_returns_zero(self, monitor): assert monitor._parse_size('notasize') == 0.0 def test_non_string_returns_zero(self, monitor): assert monitor._parse_size(None) == 0.0 assert monitor._parse_size(42) == 0.0 # ── _parse_smart_value ──────────────────────────────────────────────────────── class TestParseSmartValue: def test_plain_integer(self, monitor): assert monitor._parse_smart_value('42') == 42 def test_temperature_with_celsius(self, monitor): assert monitor._parse_smart_value('38 °C') == 38 def test_time_format(self, monitor): assert monitor._parse_smart_value('15589h+17m+33.939s') == 15589 def test_hex_value(self, monitor): assert monitor._parse_smart_value('0x0a') == 10 def test_invalid_returns_zero(self, monitor): assert monitor._parse_smart_value('not_a_number') == 0 # ── _detect_manufacturer ────────────────────────────────────────────────────── class TestDetectManufacturer: def test_western_digital(self, monitor): assert monitor._detect_manufacturer('WDC WD40EFRX') == 'Western Digital' def test_hgst(self, monitor): assert monitor._detect_manufacturer('HGST HUH728080ALE604') == 'Western Digital' def test_seagate(self, monitor): assert monitor._detect_manufacturer('ST4000DM004') == 'Seagate' def test_samsung(self, monitor): assert monitor._detect_manufacturer('Samsung SSD 870 EVO') == 'Samsung' def test_intel(self, monitor): assert monitor._detect_manufacturer('INTEL SSDSC2KB480G8') == 'Intel' def test_micron(self, monitor): assert monitor._detect_manufacturer('Crucial CT500MX500SSD1') == 'Micron' def test_toshiba(self, monitor): assert monitor._detect_manufacturer('TOSHIBA MG06ACA10TE') == 'Toshiba' def test_unknown(self, monitor): assert monitor._detect_manufacturer('GENERICDRIVE XYZ') == 'Unknown' def test_empty_model(self, monitor): assert monitor._detect_manufacturer('') == 'Unknown' def test_none_model(self, monitor): assert monitor._detect_manufacturer(None) == 'Unknown' # ── _check_thermal_health ───────────────────────────────────────────────────── class TestCheckThermalHealth: def test_hdd_ok_temperature(self, monitor): issues = monitor._check_thermal_health('sda', 45, 'HDD') assert issues == [] def test_hdd_info_temperature(self, monitor): issues = monitor._check_thermal_health('sda', 62, 'HDD') assert len(issues) == 1 assert 'INFO' in issues[0] def test_hdd_warning_temperature(self, monitor): issues = monitor._check_thermal_health('sda', 66, 'HDD') assert len(issues) == 1 assert 'WARNING' in issues[0] def test_hdd_critical_temperature(self, monitor): issues = monitor._check_thermal_health('sda', 76, 'HDD') assert len(issues) == 1 assert 'CRITICAL' in issues[0] def test_ssd_has_higher_warning_threshold(self, monitor): # HDD warning=65°C, SSD warning=70°C; at 67°C: # HDD → WARNING, SSD → INFO (above optimal_max=65 but below warning=70) issues_hdd = monitor._check_thermal_health('sda', 67, 'HDD') issues_ssd = monitor._check_thermal_health('sda', 67, 'SSD') assert any('WARNING' in i for i in issues_hdd) assert not any('WARNING' in i for i in issues_ssd) assert any('INFO' in i for i in issues_ssd) def test_none_temperature_returns_empty(self, monitor): issues = monitor._check_thermal_health('sda', None, 'HDD') assert issues == [] # ── _is_excluded_mount ──────────────────────────────────────────────────────── class TestIsExcludedMount: def test_exact_excluded_mount(self, monitor): assert monitor._is_excluded_mount('/media') is True def test_pattern_excluded(self, monitor): assert monitor._is_excluded_mount('/media/external') is True def test_downloads_excluded(self, monitor): assert monitor._is_excluded_mount('/mnt/data/downloads') is True def test_normal_mount_not_excluded(self, monitor): assert monitor._is_excluded_mount('/') is False assert monitor._is_excluded_mount('/var') is False assert monitor._is_excluded_mount('/mnt/ceph') is False # ── _is_new_drive ───────────────────────────────────────────────────────────── class TestIsNewDrive: def test_brand_new_drive(self, monitor): assert monitor._is_new_drive(0) is True def test_one_hour_drive(self, monitor): assert monitor._is_new_drive(1) is True def test_under_threshold(self, monitor): assert monitor._is_new_drive(719) is True def test_at_threshold_is_not_new(self, monitor): assert monitor._is_new_drive(720) is False def test_old_drive(self, monitor): assert monitor._is_new_drive(50000) is False # ── _is_physical_disk ──────────────────────────────────────────────────────── class TestIsPhysicalDisk: def test_real_sata_disk(self, monitor): # /dev/sda should pass (no exclusion pattern matches) # Note: _is_physical_disk also checks os.path.exists and reads sysfs, # but the exclusion logic runs first and can return False early. # We test the exclusion cases which are pure. assert monitor._is_physical_disk('/dev/mapper/data') is False def test_device_mapper_excluded(self, monitor): assert monitor._is_physical_disk('/dev/dm-0') is False def test_loop_device_excluded(self, monitor): assert monitor._is_physical_disk('/dev/loop0') is False def test_partition_excluded(self, monitor): assert monitor._is_physical_disk('/dev/sda1') is False def test_rbd_excluded(self, monitor): assert monitor._is_physical_disk('/dev/rbd0') is False # ── _get_manufacturer_profile ──────────────────────────────────────────────── class TestGetManufacturerProfile: def test_seagate_model_matched(self, monitor): profile = monitor._get_manufacturer_profile('ST4000DM004') assert 'High_Fly_Writes' in profile['attributes'] def test_seagate_command_timeout_disabled(self, monitor): profile = monitor._get_manufacturer_profile('ST4000DM004') assert profile['attributes']['Command_Timeout']['monitor'] is False def test_wd_model_matched(self, monitor): profile = monitor._get_manufacturer_profile('WDC WD40EFRX') assert profile['attributes']['Command_Timeout']['monitor'] is False def test_samsung_model_matched(self, monitor): profile = monitor._get_manufacturer_profile('Samsung SSD 870 EVO') assert 'Program_Fail_Cnt_Total' in profile['attributes'] assert profile['attributes']['Program_Fail_Cnt_Total']['monitor'] is False def test_samsung_erase_fail_chip_disabled(self, monitor): profile = monitor._get_manufacturer_profile('Samsung SSD 870 EVO') assert profile['attributes']['Erase_Fail_Count_Chip']['monitor'] is False def test_toshiba_mg08_model_matched_by_prefix(self, monitor): # MG08 prefix — model string without "TOSHIBA" word profile = monitor._get_manufacturer_profile('MG08ACP16TE') assert profile['attributes']['Command_Timeout']['monitor'] is True assert profile['attributes']['Command_Timeout']['warning_threshold'] == 1000 def test_toshiba_command_timeout_raised_threshold(self, monitor): profile = monitor._get_manufacturer_profile('TOSHIBA MG06ACA10TE') assert profile['attributes']['Command_Timeout']['critical_threshold'] == 5000 def test_oos_model_matched(self, monitor): profile = monitor._get_manufacturer_profile('OOS14000G') assert profile['attributes']['Command_Timeout']['monitor'] is False assert profile['attributes']['Seek_Error_Rate']['monitor'] is False def test_ridata_firmware_match(self, monitor): profile = monitor._get_manufacturer_profile('SSD 512GB', firmware='HT3618B7') assert profile['attributes']['Erase_Fail_Count_Chip']['monitor'] is False def test_unknown_model_returns_generic(self, monitor): profile = monitor._get_manufacturer_profile('GENERICDRIVE XYZ') assert profile is monitor.MANUFACTURER_SMART_PROFILES['Generic'] # ── _should_monitor_attribute ──────────────────────────────────────────────── class TestShouldMonitorAttribute: def test_disabled_attribute_returns_false(self, monitor): seagate = monitor._get_manufacturer_profile('ST4000DM004') assert monitor._should_monitor_attribute('Command_Timeout', seagate) is False def test_enabled_attribute_returns_true(self, monitor): seagate = monitor._get_manufacturer_profile('ST4000DM004') assert monitor._should_monitor_attribute('High_Fly_Writes', seagate) is True def test_unknown_attribute_defaults_to_true(self, monitor): seagate = monitor._get_manufacturer_profile('ST4000DM004') assert monitor._should_monitor_attribute('Some_Unknown_Attr', seagate) is True def test_none_profile_defaults_to_true(self, monitor): assert monitor._should_monitor_attribute('Anything', None) is True def test_samsung_program_fail_total_disabled(self, monitor): samsung = monitor._get_manufacturer_profile('Samsung SSD 870 EVO') assert monitor._should_monitor_attribute('Program_Fail_Cnt_Total', samsung) is False # ── _get_attribute_thresholds ──────────────────────────────────────────────── class TestGetAttributeThresholds: def test_seagate_high_fly_writes_thresholds(self, monitor): seagate = monitor._get_manufacturer_profile('ST4000DM004') t = monitor._get_attribute_thresholds('High_Fly_Writes', seagate) assert t['warning'] == 100 assert t['critical'] == 500 def test_toshiba_command_timeout_thresholds(self, monitor): toshiba = monitor._get_manufacturer_profile('MG08ACP16TE') t = monitor._get_attribute_thresholds('Command_Timeout', toshiba) assert t['warning'] == 1000 assert t['critical'] == 5000 def test_base_threshold_reallocated_sector(self, monitor): generic = monitor.MANUFACTURER_SMART_PROFILES['Generic'] t = monitor._get_attribute_thresholds('Reallocated_Sector_Ct', generic) assert t['warning'] == 5 assert t['critical'] == 10 def test_base_threshold_high_fly_writes_raised(self, monitor): # Default (non-Seagate) High_Fly_Writes threshold is now 100/500 generic = monitor.MANUFACTURER_SMART_PROFILES['Generic'] t = monitor._get_attribute_thresholds('High_Fly_Writes', generic) assert t['warning'] == 100 assert t['critical'] == 500 def test_unknown_attribute_returns_none(self, monitor): generic = monitor.MANUFACTURER_SMART_PROFILES['Generic'] assert monitor._get_attribute_thresholds('Made_Up_Attribute', generic) is None def test_behavior_defaults_to_countup(self, monitor): generic = monitor.MANUFACTURER_SMART_PROFILES['Generic'] t = monitor._get_attribute_thresholds('Reallocated_Sector_Ct', generic) assert t['behavior'] == 'countup' # ── _get_issue_type ─────────────────────────────────────────────────────────── class TestGetIssueType: def test_smart_issue(self, monitor): assert monitor._get_issue_type('SMART attribute warning on /dev/sda') == 'SMART Health Issue' def test_drive_issue(self, monitor): assert monitor._get_issue_type('Drive /dev/sdb has reallocated sectors') == 'Storage Issue' def test_ceph_issue(self, monitor): assert monitor._get_issue_type('Ceph cluster is HEALTH_WARN') == 'Ceph Cluster Issue' def test_ecc_issue(self, monitor): assert monitor._get_issue_type('ECC memory errors detected') == 'Memory Issue' def test_cpu_issue(self, monitor): assert monitor._get_issue_type('CPU usage at 95%') == 'Performance Issue' def test_network_issue(self, monitor): assert monitor._get_issue_type('Network interface eth0 down') == 'Network Issue' def test_lxc_issue(self, monitor): assert monitor._get_issue_type('LXC container storage usage at 90%') == 'Container Storage Issue' def test_unknown_defaults_to_hardware(self, monitor): assert monitor._get_issue_type('Something completely unknown') == 'Hardware Issue' # ── _get_impact_level ───────────────────────────────────────────────────────── class TestGetImpactLevel: def test_critical_issue(self, monitor): level = monitor._get_impact_level('CRITICAL: drive failure imminent') assert '[CRIT]' in level def test_unhealthy_is_critical(self, monitor): level = monitor._get_impact_level('Ceph is UNHEALTHY') assert '[CRIT]' in level def test_warning_issue(self, monitor): level = monitor._get_impact_level('WARNING: temperature elevated') assert '[WARN]' in level def test_storage_usage_is_warn_not_crit(self, monitor): # "STORAGE USAGE" keyword takes priority over "CRITICAL" substring check level = monitor._get_impact_level('CRITICAL storage usage at 95%') assert '[WARN]' in level def test_cpu_usage_is_warn(self, monitor): level = monitor._get_impact_level('CPU usage at 80% threshold exceeded') assert '[WARN]' in level def test_low_priority(self, monitor): level = monitor._get_impact_level('Informational: drive age notification') assert '[LOW]' in level def test_health_err_is_critical(self, monitor): level = monitor._get_impact_level('Ceph status: HEALTH_ERR') assert '[CRIT]' in level def test_down_is_warning(self, monitor): level = monitor._get_impact_level('OSD.3 is DOWN') assert '[WARN]' in level # ── _categorize_issue ───────────────────────────────────────────────────────── class TestCategorizeIssue: def test_smart_critical_is_hardware_issue(self, monitor): cat, ttype, _, _ = monitor._categorize_issue('SMART critical error on /dev/sda') assert cat == monitor.TICKET_CATEGORIES['HARDWARE'] assert ttype == monitor.TICKET_TYPES['ISSUE'] def test_smart_warning_is_hardware_problem(self, monitor): cat, ttype, _, _ = monitor._categorize_issue('SMART warning: High_Fly_Writes elevated') assert cat == monitor.TICKET_CATEGORIES['HARDWARE'] assert ttype == monitor.TICKET_TYPES['PROBLEM'] def test_lxc_critical_is_software_issue(self, monitor): cat, ttype, _, _ = monitor._categorize_issue('LXC container storage critical') assert cat == monitor.TICKET_CATEGORIES['SOFTWARE'] assert ttype == monitor.TICKET_TYPES['ISSUE'] def test_temperature_is_hardware(self, monitor): cat, _, _, _ = monitor._categorize_issue('temperature warning on /dev/sdb') assert cat == monitor.TICKET_CATEGORIES['HARDWARE'] def test_nvme_is_hardware(self, monitor): cat, _, _, _ = monitor._categorize_issue('NVMe drive warning on /dev/nvme0') assert cat == monitor.TICKET_CATEGORIES['HARDWARE']