Files
jared 823ff18890
Lint / Python (flake8) (push) Successful in 1m14s
Test / Python Tests (pytest) (push) Successful in 1m29s
Lint / Notify on failure (push) Has been skipped
Security / Python Security (bandit) (push) Failing after 11m40s
test: expand test coverage for manufacturer profiles and ticket classification
Add 42 new tests covering _get_manufacturer_profile (Seagate/WD/Samsung/Toshiba/
OOS/Ridata/unknown), _should_monitor_attribute, _get_attribute_thresholds,
_get_issue_type, _get_impact_level, and _categorize_issue.

Toshiba MG08 prefix matching and the raised High_Fly_Writes/Command_Timeout
thresholds introduced in the previous commit are now covered by tests.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 19:52:59 -04:00

426 lines
18 KiB
Python

"""Tests for SystemHealthMonitor pure methods — no external processes or filesystem."""
import sys
import os
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from unittest.mock import patch # noqa: E402
from hwmonDaemon import SystemHealthMonitor # noqa: E402
@pytest.fixture(scope='module')
def monitor():
"""Create a minimal monitor instance with all external side-effects patched out."""
with patch.object(SystemHealthMonitor, 'load_env_config'), \
patch.object(SystemHealthMonitor, '_check_tool_availability', return_value={}), \
patch('os.makedirs'):
return SystemHealthMonitor(dry_run=True)
# ── _format_bytes_human ──────────────────────────────────────────────────────
class TestFormatBytesHuman:
def test_bytes(self, monitor):
assert monitor._format_bytes_human(512) == '512.0 B'
def test_kilobytes(self, monitor):
assert monitor._format_bytes_human(1024) == '1.0 KB'
def test_megabytes(self, monitor):
assert monitor._format_bytes_human(1024 ** 2) == '1.0 MB'
def test_gigabytes(self, monitor):
assert monitor._format_bytes_human(1024 ** 3) == '1.0 GB'
def test_terabytes(self, monitor):
assert monitor._format_bytes_human(1024 ** 4) == '1.0 TB'
def test_fractional(self, monitor):
assert monitor._format_bytes_human(1536) == '1.5 KB'
def test_zero(self, monitor):
assert monitor._format_bytes_human(0) == '0.0 B'
# ── _parse_size ───────────────────────────────────────────────────────────────
class TestParseSize:
def test_gigabytes(self, monitor):
result = monitor._parse_size('15.7G')
assert abs(result - 15.7 * 1024**3) < 1
def test_terabytes(self, monitor):
result = monitor._parse_size('21.8T')
assert abs(result - 21.8 * 1024**4) < 1
def test_megabytes(self, monitor):
result = monitor._parse_size('512M')
assert result == 512 * 1024**2
def test_kilobytes(self, monitor):
result = monitor._parse_size('100K')
assert result == 100 * 1024
def test_bytes(self, monitor):
result = monitor._parse_size('100B')
assert result == 100
def test_invalid_returns_zero(self, monitor):
assert monitor._parse_size('notasize') == 0.0
def test_non_string_returns_zero(self, monitor):
assert monitor._parse_size(None) == 0.0
assert monitor._parse_size(42) == 0.0
# ── _parse_smart_value ────────────────────────────────────────────────────────
class TestParseSmartValue:
def test_plain_integer(self, monitor):
assert monitor._parse_smart_value('42') == 42
def test_temperature_with_celsius(self, monitor):
assert monitor._parse_smart_value('38 °C') == 38
def test_time_format(self, monitor):
assert monitor._parse_smart_value('15589h+17m+33.939s') == 15589
def test_hex_value(self, monitor):
assert monitor._parse_smart_value('0x0a') == 10
def test_invalid_returns_zero(self, monitor):
assert monitor._parse_smart_value('not_a_number') == 0
# ── _detect_manufacturer ──────────────────────────────────────────────────────
class TestDetectManufacturer:
def test_western_digital(self, monitor):
assert monitor._detect_manufacturer('WDC WD40EFRX') == 'Western Digital'
def test_hgst(self, monitor):
assert monitor._detect_manufacturer('HGST HUH728080ALE604') == 'Western Digital'
def test_seagate(self, monitor):
assert monitor._detect_manufacturer('ST4000DM004') == 'Seagate'
def test_samsung(self, monitor):
assert monitor._detect_manufacturer('Samsung SSD 870 EVO') == 'Samsung'
def test_intel(self, monitor):
assert monitor._detect_manufacturer('INTEL SSDSC2KB480G8') == 'Intel'
def test_micron(self, monitor):
assert monitor._detect_manufacturer('Crucial CT500MX500SSD1') == 'Micron'
def test_toshiba(self, monitor):
assert monitor._detect_manufacturer('TOSHIBA MG06ACA10TE') == 'Toshiba'
def test_unknown(self, monitor):
assert monitor._detect_manufacturer('GENERICDRIVE XYZ') == 'Unknown'
def test_empty_model(self, monitor):
assert monitor._detect_manufacturer('') == 'Unknown'
def test_none_model(self, monitor):
assert monitor._detect_manufacturer(None) == 'Unknown'
# ── _check_thermal_health ─────────────────────────────────────────────────────
class TestCheckThermalHealth:
def test_hdd_ok_temperature(self, monitor):
issues = monitor._check_thermal_health('sda', 45, 'HDD')
assert issues == []
def test_hdd_info_temperature(self, monitor):
issues = monitor._check_thermal_health('sda', 62, 'HDD')
assert len(issues) == 1
assert 'INFO' in issues[0]
def test_hdd_warning_temperature(self, monitor):
issues = monitor._check_thermal_health('sda', 66, 'HDD')
assert len(issues) == 1
assert 'WARNING' in issues[0]
def test_hdd_critical_temperature(self, monitor):
issues = monitor._check_thermal_health('sda', 76, 'HDD')
assert len(issues) == 1
assert 'CRITICAL' in issues[0]
def test_ssd_has_higher_warning_threshold(self, monitor):
# HDD warning=65°C, SSD warning=70°C; at 67°C:
# HDD → WARNING, SSD → INFO (above optimal_max=65 but below warning=70)
issues_hdd = monitor._check_thermal_health('sda', 67, 'HDD')
issues_ssd = monitor._check_thermal_health('sda', 67, 'SSD')
assert any('WARNING' in i for i in issues_hdd)
assert not any('WARNING' in i for i in issues_ssd)
assert any('INFO' in i for i in issues_ssd)
def test_none_temperature_returns_empty(self, monitor):
issues = monitor._check_thermal_health('sda', None, 'HDD')
assert issues == []
# ── _is_excluded_mount ────────────────────────────────────────────────────────
class TestIsExcludedMount:
def test_exact_excluded_mount(self, monitor):
assert monitor._is_excluded_mount('/media') is True
def test_pattern_excluded(self, monitor):
assert monitor._is_excluded_mount('/media/external') is True
def test_downloads_excluded(self, monitor):
assert monitor._is_excluded_mount('/mnt/data/downloads') is True
def test_normal_mount_not_excluded(self, monitor):
assert monitor._is_excluded_mount('/') is False
assert monitor._is_excluded_mount('/var') is False
assert monitor._is_excluded_mount('/mnt/ceph') is False
# ── _is_new_drive ─────────────────────────────────────────────────────────────
class TestIsNewDrive:
def test_brand_new_drive(self, monitor):
assert monitor._is_new_drive(0) is True
def test_one_hour_drive(self, monitor):
assert monitor._is_new_drive(1) is True
def test_under_threshold(self, monitor):
assert monitor._is_new_drive(719) is True
def test_at_threshold_is_not_new(self, monitor):
assert monitor._is_new_drive(720) is False
def test_old_drive(self, monitor):
assert monitor._is_new_drive(50000) is False
# ── _is_physical_disk ────────────────────────────────────────────────────────
class TestIsPhysicalDisk:
def test_real_sata_disk(self, monitor):
# /dev/sda should pass (no exclusion pattern matches)
# Note: _is_physical_disk also checks os.path.exists and reads sysfs,
# but the exclusion logic runs first and can return False early.
# We test the exclusion cases which are pure.
assert monitor._is_physical_disk('/dev/mapper/data') is False
def test_device_mapper_excluded(self, monitor):
assert monitor._is_physical_disk('/dev/dm-0') is False
def test_loop_device_excluded(self, monitor):
assert monitor._is_physical_disk('/dev/loop0') is False
def test_partition_excluded(self, monitor):
assert monitor._is_physical_disk('/dev/sda1') is False
def test_rbd_excluded(self, monitor):
assert monitor._is_physical_disk('/dev/rbd0') is False
# ── _get_manufacturer_profile ────────────────────────────────────────────────
class TestGetManufacturerProfile:
def test_seagate_model_matched(self, monitor):
profile = monitor._get_manufacturer_profile('ST4000DM004')
assert 'High_Fly_Writes' in profile['attributes']
def test_seagate_command_timeout_disabled(self, monitor):
profile = monitor._get_manufacturer_profile('ST4000DM004')
assert profile['attributes']['Command_Timeout']['monitor'] is False
def test_wd_model_matched(self, monitor):
profile = monitor._get_manufacturer_profile('WDC WD40EFRX')
assert profile['attributes']['Command_Timeout']['monitor'] is False
def test_samsung_model_matched(self, monitor):
profile = monitor._get_manufacturer_profile('Samsung SSD 870 EVO')
assert 'Program_Fail_Cnt_Total' in profile['attributes']
assert profile['attributes']['Program_Fail_Cnt_Total']['monitor'] is False
def test_samsung_erase_fail_chip_disabled(self, monitor):
profile = monitor._get_manufacturer_profile('Samsung SSD 870 EVO')
assert profile['attributes']['Erase_Fail_Count_Chip']['monitor'] is False
def test_toshiba_mg08_model_matched_by_prefix(self, monitor):
# MG08 prefix — model string without "TOSHIBA" word
profile = monitor._get_manufacturer_profile('MG08ACP16TE')
assert profile['attributes']['Command_Timeout']['monitor'] is True
assert profile['attributes']['Command_Timeout']['warning_threshold'] == 1000
def test_toshiba_command_timeout_raised_threshold(self, monitor):
profile = monitor._get_manufacturer_profile('TOSHIBA MG06ACA10TE')
assert profile['attributes']['Command_Timeout']['critical_threshold'] == 5000
def test_oos_model_matched(self, monitor):
profile = monitor._get_manufacturer_profile('OOS14000G')
assert profile['attributes']['Command_Timeout']['monitor'] is False
assert profile['attributes']['Seek_Error_Rate']['monitor'] is False
def test_ridata_firmware_match(self, monitor):
profile = monitor._get_manufacturer_profile('SSD 512GB', firmware='HT3618B7')
assert profile['attributes']['Erase_Fail_Count_Chip']['monitor'] is False
def test_unknown_model_returns_generic(self, monitor):
profile = monitor._get_manufacturer_profile('GENERICDRIVE XYZ')
assert profile is monitor.MANUFACTURER_SMART_PROFILES['Generic']
# ── _should_monitor_attribute ────────────────────────────────────────────────
class TestShouldMonitorAttribute:
def test_disabled_attribute_returns_false(self, monitor):
seagate = monitor._get_manufacturer_profile('ST4000DM004')
assert monitor._should_monitor_attribute('Command_Timeout', seagate) is False
def test_enabled_attribute_returns_true(self, monitor):
seagate = monitor._get_manufacturer_profile('ST4000DM004')
assert monitor._should_monitor_attribute('High_Fly_Writes', seagate) is True
def test_unknown_attribute_defaults_to_true(self, monitor):
seagate = monitor._get_manufacturer_profile('ST4000DM004')
assert monitor._should_monitor_attribute('Some_Unknown_Attr', seagate) is True
def test_none_profile_defaults_to_true(self, monitor):
assert monitor._should_monitor_attribute('Anything', None) is True
def test_samsung_program_fail_total_disabled(self, monitor):
samsung = monitor._get_manufacturer_profile('Samsung SSD 870 EVO')
assert monitor._should_monitor_attribute('Program_Fail_Cnt_Total', samsung) is False
# ── _get_attribute_thresholds ────────────────────────────────────────────────
class TestGetAttributeThresholds:
def test_seagate_high_fly_writes_thresholds(self, monitor):
seagate = monitor._get_manufacturer_profile('ST4000DM004')
t = monitor._get_attribute_thresholds('High_Fly_Writes', seagate)
assert t['warning'] == 100
assert t['critical'] == 500
def test_toshiba_command_timeout_thresholds(self, monitor):
toshiba = monitor._get_manufacturer_profile('MG08ACP16TE')
t = monitor._get_attribute_thresholds('Command_Timeout', toshiba)
assert t['warning'] == 1000
assert t['critical'] == 5000
def test_base_threshold_reallocated_sector(self, monitor):
generic = monitor.MANUFACTURER_SMART_PROFILES['Generic']
t = monitor._get_attribute_thresholds('Reallocated_Sector_Ct', generic)
assert t['warning'] == 5
assert t['critical'] == 10
def test_base_threshold_high_fly_writes_raised(self, monitor):
# Default (non-Seagate) High_Fly_Writes threshold is now 100/500
generic = monitor.MANUFACTURER_SMART_PROFILES['Generic']
t = monitor._get_attribute_thresholds('High_Fly_Writes', generic)
assert t['warning'] == 100
assert t['critical'] == 500
def test_unknown_attribute_returns_none(self, monitor):
generic = monitor.MANUFACTURER_SMART_PROFILES['Generic']
assert monitor._get_attribute_thresholds('Made_Up_Attribute', generic) is None
def test_behavior_defaults_to_countup(self, monitor):
generic = monitor.MANUFACTURER_SMART_PROFILES['Generic']
t = monitor._get_attribute_thresholds('Reallocated_Sector_Ct', generic)
assert t['behavior'] == 'countup'
# ── _get_issue_type ───────────────────────────────────────────────────────────
class TestGetIssueType:
def test_smart_issue(self, monitor):
assert monitor._get_issue_type('SMART attribute warning on /dev/sda') == 'SMART Health Issue'
def test_drive_issue(self, monitor):
assert monitor._get_issue_type('Drive /dev/sdb has reallocated sectors') == 'Storage Issue'
def test_ceph_issue(self, monitor):
assert monitor._get_issue_type('Ceph cluster is HEALTH_WARN') == 'Ceph Cluster Issue'
def test_ecc_issue(self, monitor):
assert monitor._get_issue_type('ECC memory errors detected') == 'Memory Issue'
def test_cpu_issue(self, monitor):
assert monitor._get_issue_type('CPU usage at 95%') == 'Performance Issue'
def test_network_issue(self, monitor):
assert monitor._get_issue_type('Network interface eth0 down') == 'Network Issue'
def test_lxc_issue(self, monitor):
assert monitor._get_issue_type('LXC container storage usage at 90%') == 'Container Storage Issue'
def test_unknown_defaults_to_hardware(self, monitor):
assert monitor._get_issue_type('Something completely unknown') == 'Hardware Issue'
# ── _get_impact_level ─────────────────────────────────────────────────────────
class TestGetImpactLevel:
def test_critical_issue(self, monitor):
level = monitor._get_impact_level('CRITICAL: drive failure imminent')
assert '[CRIT]' in level
def test_unhealthy_is_critical(self, monitor):
level = monitor._get_impact_level('Ceph is UNHEALTHY')
assert '[CRIT]' in level
def test_warning_issue(self, monitor):
level = monitor._get_impact_level('WARNING: temperature elevated')
assert '[WARN]' in level
def test_storage_usage_is_warn_not_crit(self, monitor):
# "STORAGE USAGE" keyword takes priority over "CRITICAL" substring check
level = monitor._get_impact_level('CRITICAL storage usage at 95%')
assert '[WARN]' in level
def test_cpu_usage_is_warn(self, monitor):
level = monitor._get_impact_level('CPU usage at 80% threshold exceeded')
assert '[WARN]' in level
def test_low_priority(self, monitor):
level = monitor._get_impact_level('Informational: drive age notification')
assert '[LOW]' in level
def test_health_err_is_critical(self, monitor):
level = monitor._get_impact_level('Ceph status: HEALTH_ERR')
assert '[CRIT]' in level
def test_down_is_warning(self, monitor):
level = monitor._get_impact_level('OSD.3 is DOWN')
assert '[WARN]' in level
# ── _categorize_issue ─────────────────────────────────────────────────────────
class TestCategorizeIssue:
def test_smart_critical_is_hardware_issue(self, monitor):
cat, ttype, _, _ = monitor._categorize_issue('SMART critical error on /dev/sda')
assert cat == monitor.TICKET_CATEGORIES['HARDWARE']
assert ttype == monitor.TICKET_TYPES['ISSUE']
def test_smart_warning_is_hardware_problem(self, monitor):
cat, ttype, _, _ = monitor._categorize_issue('SMART warning: High_Fly_Writes elevated')
assert cat == monitor.TICKET_CATEGORIES['HARDWARE']
assert ttype == monitor.TICKET_TYPES['PROBLEM']
def test_lxc_critical_is_software_issue(self, monitor):
cat, ttype, _, _ = monitor._categorize_issue('LXC container storage critical')
assert cat == monitor.TICKET_CATEGORIES['SOFTWARE']
assert ttype == monitor.TICKET_TYPES['ISSUE']
def test_temperature_is_hardware(self, monitor):
cat, _, _, _ = monitor._categorize_issue('temperature warning on /dev/sdb')
assert cat == monitor.TICKET_CATEGORIES['HARDWARE']
def test_nvme_is_hardware(self, monitor):
cat, _, _, _ = monitor._categorize_issue('NVMe drive warning on /dev/nvme0')
assert cat == monitor.TICKET_CATEGORIES['HARDWARE']