#!/bin/bash set -e echo "Starting Proxmox fresh installation script..." # Cleanup function for failed installations cleanup() { echo "Cleaning up on error..." systemctl stop node_exporter 2>/dev/null || true systemctl stop hwmon.timer 2>/dev/null || true systemctl disable node_exporter 2>/dev/null || true systemctl disable hwmon.timer 2>/dev/null || true rm -f /etc/systemd/system/node_exporter.service rm -f /etc/systemd/system/hwmon.service rm -f /etc/systemd/system/hwmon.timer rm -f /usr/local/bin/node_exporter rm -rf node_exporter-*.linux-amd64.tar.gz node_exporter-*.linux-amd64 userdel node_exporter 2>/dev/null || true systemctl daemon-reload echo "Cleanup completed." } # Set trap for cleanup on error trap cleanup ERR # Install dependencies echo "Installing required packages..." apt-get update apt-get install -y python3-pip smartmontools iperf3 python3-psutil python3-requests lm-sensors fastfetch rsync fio # Install Node Exporter echo "Installing Prometheus Node Exporter..." NODE_EXPORTER_VERSION="1.8.2" # Download Node Exporter with error handling echo "Downloading Node Exporter..." if ! wget --timeout=30 --tries=3 "https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz"; then echo "ERROR: Failed to download Node Exporter" exit 1 fi # Extract with error checking if ! tar xvfz node_exporter-*.linux-amd64.tar.gz; then echo "ERROR: Failed to extract Node Exporter archive" exit 1 fi # Check if user already exists if ! id "node_exporter" &>/dev/null; then echo "Creating node_exporter user..." useradd -rs /bin/false node_exporter else echo "node_exporter user already exists, skipping creation..." fi # Move binary to proper location if ! mv node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64/node_exporter /usr/local/bin/; then echo "ERROR: Failed to move Node Exporter binary" exit 1 fi # Cleanup downloaded files rm -rf node_exporter-*.linux-amd64.tar.gz node_exporter-*.linux-amd64 # Set proper permissions chown node_exporter:node_exporter /usr/local/bin/node_exporter chmod +x /usr/local/bin/node_exporter # Create node_exporter service file cat > /etc/systemd/system/node_exporter.service << 'EOL' [Unit] Description=Node Exporter After=network.target [Service] User=node_exporter Group=node_exporter Type=simple ExecStart=/usr/local/bin/node_exporter [Install] WantedBy=multi-user.target EOL # Enable and start node_exporter systemctl daemon-reload systemctl enable node_exporter systemctl start node_exporter # Check if Node Exporter started successfully if ! systemctl is-active --quiet node_exporter; then echo "ERROR: Node Exporter failed to start" systemctl status node_exporter --no-pager || true exit 1 fi # Install hwmon daemon echo "Installing hwmon daemon..." # Download hwmon service files with error handling echo "Downloading hwmon service files..." if ! curl --max-time 30 --retry 3 -o /etc/systemd/system/hwmon.service http://10.10.10.63:3000/LotusGuild/hwmonDaemon/raw/branch/main/hwmon.service; then echo "ERROR: Failed to download hwmon.service" exit 1 fi if ! curl --max-time 30 --retry 3 -o /etc/systemd/system/hwmon.timer http://10.10.10.63:3000/LotusGuild/hwmonDaemon/raw/branch/main/hwmon.timer; then echo "ERROR: Failed to download hwmon.timer" exit 1 fi # Verify downloaded files exist and are not empty if [[ ! -s /etc/systemd/system/hwmon.service ]]; then echo "ERROR: hwmon.service file is empty or missing" exit 1 fi if [[ ! -s /etc/systemd/system/hwmon.timer ]]; then echo "ERROR: hwmon.timer file is empty or missing" exit 1 fi # Create log directory for hwmon mkdir -p /var/log/hwmonDaemon # Start the hwmon daemon systemctl daemon-reload systemctl enable hwmon.timer systemctl start hwmon.timer # Check if hwmon timer started successfully if ! systemctl is-active --quiet hwmon.timer; then echo "ERROR: hwmon timer failed to start" systemctl status hwmon.timer --no-pager || true exit 1 fi # Final verification echo "Verifying installation..." echo "Node Exporter status: $(systemctl is-active node_exporter)" echo "hwmon timer status: $(systemctl is-active hwmon.timer)" echo "Node Exporter port check:" if ss -tlnp | grep :9100; then echo "✓ Node Exporter is listening on port 9100" else echo "WARNING: Node Exporter not listening on port 9100" fi # Test hwmon with error handling echo "Testing hwmon dry-run..." if ! /usr/bin/env python3 -c "import urllib.request; exec(urllib.request.urlopen('http://10.10.10.63:3000/LotusGuild/hwmonDaemon/raw/branch/main/hwmonDaemon.py').read().decode('utf-8'))" --dry-run; then echo "WARNING: hwmon dry-run test failed, but services are installed" fi # Disable cleanup trap on successful completion trap - ERR echo "✓ Installation complete! All services are running correctly." echo "" echo "Services installed:" echo " - Node Exporter: http://$(hostname -I | awk '{print $1}'):9100/metrics" echo " - hwmon daemon: Monitoring system health every 15 minutes" echo "" echo "Log locations:" echo " - Node Exporter: journalctl -u node_exporter" echo " - hwmon: journalctl -u hwmon.service" echo " - hwmon logs: /var/log/hwmonDaemon/"