Compare commits

..

2 Commits

View File

@@ -82,54 +82,66 @@ def get_osd_host_mapping(osd_tree):
return osd_to_host
def get_device_path_for_osd(osd_id, hostname):
"""Get the device path for an OSD on a specific host"""
# Method 1: Try ceph metadata
"""Get the physical device path for an OSD on a host (resolve dm devices)."""
metadata = get_osd_metadata(osd_id)
if metadata:
devices = metadata.get('devices', '')
if devices:
device = devices.split(',')[0] if ',' in devices else devices
if device and not device.startswith('/dev/'):
device = f"/dev/{device}"
if device and device != '/dev/':
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Found device from metadata: {device}{Colors.END}")
return device
# Try 'bluestore_bdev_devices' first
phys_dev = metadata.get('bluestore_bdev_devices')
if phys_dev:
device = f"/dev/{phys_dev.strip()}"
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Found physical device from metadata: {device}{Colors.END}")
return device
# Method 2: Query symlink on remote host
result = run_command(f"readlink -f /var/lib/ceph/osd/ceph-{osd_id}/block 2>/dev/null", host=hostname)
# Fallback: follow the symlink
result = run_command(f"readlink -f /var/lib/ceph/osd/ceph-{osd_id}/block", host=hostname)
if result and result.startswith('/dev/'):
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Found device from symlink: {result}{Colors.END}")
return result
# Check if it is a dm device, try to find underlying
if '/dev/dm-' in result:
base = run_command(f"lsblk -no pkname {result}", host=hostname)
if base:
device = f"/dev/{base.strip()}"
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Resolved dm device {result} -> {device}{Colors.END}")
return device
else:
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Using device symlink {result}{Colors.END}")
return result
# Method 3: Try lsblk
result = run_command(f"lsblk -no pkname /var/lib/ceph/osd/ceph-{osd_id}/block 2>/dev/null", host=hostname)
# Last fallback: lsblk from block path
result = run_command(f"lsblk -no pkname /var/lib/ceph/osd/ceph-{osd_id}/block", host=hostname)
if result:
device = f"/dev/{result.strip()}"
if DEBUG:
print(f"{Colors.GREEN}DEBUG: Found device from lsblk: {device}{Colors.END}")
return device
if DEBUG:
print(f"{Colors.RED}DEBUG: Could not find device for osd.{osd_id}{Colors.END}")
print(f"{Colors.RED}DEBUG: Could not determine device for osd.{osd_id}{Colors.END}")
return None
def get_smart_data_remote(device_path, hostname):
"""Get SMART data from a remote host"""
"""Get SMART data from a remote host with proper device type detection."""
if not device_path:
return None
# Strip partition suffix
base_device = re.sub(r'p?\d+$', '', device_path)
# Use sudo for smartctl
cmd = f"sudo smartctl -a -j {base_device} 2>/dev/null"
result = run_command(cmd, host=hostname, parse_json=True)
# Detect type: NVMe or SATA
if 'nvme' in base_device:
dev_type = 'nvme'
else:
dev_type = 'sat' # sata/ata, compatible with SSD/HDD
cmd = f"sudo smartctl -a -j -d {dev_type} {base_device} 2>/dev/null"
result = run_command(cmd, host=hostname, parse_json=True)
if DEBUG and result is None:
print(f"{Colors.YELLOW}DEBUG: SMART data failed for {base_device} on {hostname}{Colors.END}")
return result
def get_device_health(osd_id, hostname):
"""Get device SMART health metrics from the appropriate host"""
if DEBUG: