From c252dbcdc435aa40dbb0be7991e6414417fce350 Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Mon, 22 Dec 2025 18:05:32 -0500 Subject: [PATCH] resolves /dev/dm-* now --- ceph_osd_analyzer.py | 62 ++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/ceph_osd_analyzer.py b/ceph_osd_analyzer.py index 4b90d39..909ab9b 100644 --- a/ceph_osd_analyzer.py +++ b/ceph_osd_analyzer.py @@ -82,54 +82,66 @@ def get_osd_host_mapping(osd_tree): return osd_to_host def get_device_path_for_osd(osd_id, hostname): - """Get the device path for an OSD on a specific host""" - # Method 1: Try ceph metadata + """Get the physical device path for an OSD on a host (resolve dm devices).""" metadata = get_osd_metadata(osd_id) if metadata: - devices = metadata.get('devices', '') - if devices: - device = devices.split(',')[0] if ',' in devices else devices - if device and not device.startswith('/dev/'): - device = f"/dev/{device}" - if device and device != '/dev/': - if DEBUG: - print(f"{Colors.GREEN}DEBUG: Found device from metadata: {device}{Colors.END}") - return device + # Try 'bluestore_bdev_devices' first + phys_dev = metadata.get('bluestore_bdev_devices') + if phys_dev: + device = f"/dev/{phys_dev.strip()}" + if DEBUG: + print(f"{Colors.GREEN}DEBUG: Found physical device from metadata: {device}{Colors.END}") + return device - # Method 2: Query symlink on remote host - result = run_command(f"readlink -f /var/lib/ceph/osd/ceph-{osd_id}/block 2>/dev/null", host=hostname) + # Fallback: follow the symlink + result = run_command(f"readlink -f /var/lib/ceph/osd/ceph-{osd_id}/block", host=hostname) if result and result.startswith('/dev/'): - if DEBUG: - print(f"{Colors.GREEN}DEBUG: Found device from symlink: {result}{Colors.END}") - return result + # Check if it is a dm device, try to find underlying + if '/dev/dm-' in result: + base = run_command(f"lsblk -no pkname {result}", host=hostname) + if base: + device = f"/dev/{base.strip()}" + if DEBUG: + print(f"{Colors.GREEN}DEBUG: Resolved dm device {result} -> {device}{Colors.END}") + return device + else: + if DEBUG: + print(f"{Colors.GREEN}DEBUG: Using device symlink {result}{Colors.END}") + return result - # Method 3: Try lsblk - result = run_command(f"lsblk -no pkname /var/lib/ceph/osd/ceph-{osd_id}/block 2>/dev/null", host=hostname) + # Last fallback: lsblk from block path + result = run_command(f"lsblk -no pkname /var/lib/ceph/osd/ceph-{osd_id}/block", host=hostname) if result: device = f"/dev/{result.strip()}" if DEBUG: print(f"{Colors.GREEN}DEBUG: Found device from lsblk: {device}{Colors.END}") return device - + if DEBUG: - print(f"{Colors.RED}DEBUG: Could not find device for osd.{osd_id}{Colors.END}") - + print(f"{Colors.RED}DEBUG: Could not determine device for osd.{osd_id}{Colors.END}") return None def get_smart_data_remote(device_path, hostname): - """Get SMART data from a remote host""" + """Get SMART data from a remote host with proper device type detection.""" if not device_path: return None # Strip partition suffix base_device = re.sub(r'p?\d+$', '', device_path) - # Use sudo for smartctl - cmd = f"sudo smartctl -a -j {base_device} 2>/dev/null" - result = run_command(cmd, host=hostname, parse_json=True) + # Detect type: NVMe or SATA + if 'nvme' in base_device: + dev_type = 'nvme' + else: + dev_type = 'sat' # sata/ata, compatible with SSD/HDD + cmd = f"sudo smartctl -a -j -d {dev_type} {base_device} 2>/dev/null" + result = run_command(cmd, host=hostname, parse_json=True) + if DEBUG and result is None: + print(f"{Colors.YELLOW}DEBUG: SMART data failed for {base_device} on {hostname}{Colors.END}") return result + def get_device_health(osd_id, hostname): """Get device SMART health metrics from the appropriate host""" if DEBUG: