Compare commits

...

2 Commits

View File

@@ -82,54 +82,66 @@ def get_osd_host_mapping(osd_tree):
return osd_to_host return osd_to_host
def get_device_path_for_osd(osd_id, hostname): def get_device_path_for_osd(osd_id, hostname):
"""Get the device path for an OSD on a specific host""" """Get the physical device path for an OSD on a host (resolve dm devices)."""
# Method 1: Try ceph metadata
metadata = get_osd_metadata(osd_id) metadata = get_osd_metadata(osd_id)
if metadata: if metadata:
devices = metadata.get('devices', '') # Try 'bluestore_bdev_devices' first
if devices: phys_dev = metadata.get('bluestore_bdev_devices')
device = devices.split(',')[0] if ',' in devices else devices if phys_dev:
if device and not device.startswith('/dev/'): device = f"/dev/{phys_dev.strip()}"
device = f"/dev/{device}" if DEBUG:
if device and device != '/dev/': print(f"{Colors.GREEN}DEBUG: Found physical device from metadata: {device}{Colors.END}")
if DEBUG: return device
print(f"{Colors.GREEN}DEBUG: Found device from metadata: {device}{Colors.END}")
return device
# Method 2: Query symlink on remote host # Fallback: follow the symlink
result = run_command(f"readlink -f /var/lib/ceph/osd/ceph-{osd_id}/block 2>/dev/null", host=hostname) result = run_command(f"readlink -f /var/lib/ceph/osd/ceph-{osd_id}/block", host=hostname)
if result and result.startswith('/dev/'): if result and result.startswith('/dev/'):
if DEBUG: # Check if it is a dm device, try to find underlying
print(f"{Colors.GREEN}DEBUG: Found device from symlink: {result}{Colors.END}") if '/dev/dm-' in result:
return result base = run_command(f"lsblk -no pkname {result}", host=hostname)
if base:
device = f"/dev/{base.strip()}"
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Resolved dm device {result} -> {device}{Colors.END}")
return device
else:
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Using device symlink {result}{Colors.END}")
return result
# Method 3: Try lsblk # Last fallback: lsblk from block path
result = run_command(f"lsblk -no pkname /var/lib/ceph/osd/ceph-{osd_id}/block 2>/dev/null", host=hostname) result = run_command(f"lsblk -no pkname /var/lib/ceph/osd/ceph-{osd_id}/block", host=hostname)
if result: if result:
device = f"/dev/{result.strip()}" device = f"/dev/{result.strip()}"
if DEBUG: if DEBUG:
print(f"{Colors.GREEN}DEBUG: Found device from lsblk: {device}{Colors.END}") print(f"{Colors.GREEN}DEBUG: Found device from lsblk: {device}{Colors.END}")
return device return device
if DEBUG: if DEBUG:
print(f"{Colors.RED}DEBUG: Could not find device for osd.{osd_id}{Colors.END}") print(f"{Colors.RED}DEBUG: Could not determine device for osd.{osd_id}{Colors.END}")
return None return None
def get_smart_data_remote(device_path, hostname): def get_smart_data_remote(device_path, hostname):
"""Get SMART data from a remote host""" """Get SMART data from a remote host with proper device type detection."""
if not device_path: if not device_path:
return None return None
# Strip partition suffix # Strip partition suffix
base_device = re.sub(r'p?\d+$', '', device_path) base_device = re.sub(r'p?\d+$', '', device_path)
# Use sudo for smartctl # Detect type: NVMe or SATA
cmd = f"sudo smartctl -a -j {base_device} 2>/dev/null" if 'nvme' in base_device:
result = run_command(cmd, host=hostname, parse_json=True) dev_type = 'nvme'
else:
dev_type = 'sat' # sata/ata, compatible with SSD/HDD
cmd = f"sudo smartctl -a -j -d {dev_type} {base_device} 2>/dev/null"
result = run_command(cmd, host=hostname, parse_json=True)
if DEBUG and result is None:
print(f"{Colors.YELLOW}DEBUG: SMART data failed for {base_device} on {hostname}{Colors.END}")
return result return result
def get_device_health(osd_id, hostname): def get_device_health(osd_id, hostname):
"""Get device SMART health metrics from the appropriate host""" """Get device SMART health metrics from the appropriate host"""
if DEBUG: if DEBUG: