Updated error handling and bug
This commit is contained in:
134
freshStart.sh
134
freshStart.sh
@ -1,28 +1,71 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Exit on any error
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
echo "Starting Proxmox fresh installation script..."
|
echo "Starting Proxmox fresh installation script..."
|
||||||
|
|
||||||
|
# Cleanup function for failed installations
|
||||||
|
cleanup() {
|
||||||
|
echo "Cleaning up on error..."
|
||||||
|
systemctl stop node_exporter 2>/dev/null || true
|
||||||
|
systemctl stop hwmon.timer 2>/dev/null || true
|
||||||
|
systemctl disable node_exporter 2>/dev/null || true
|
||||||
|
systemctl disable hwmon.timer 2>/dev/null || true
|
||||||
|
rm -f /etc/systemd/system/node_exporter.service
|
||||||
|
rm -f /etc/systemd/system/hwmon.service
|
||||||
|
rm -f /etc/systemd/system/hwmon.timer
|
||||||
|
rm -f /usr/local/bin/node_exporter
|
||||||
|
rm -rf node_exporter-*.linux-amd64.tar.gz node_exporter-*.linux-amd64
|
||||||
|
userdel node_exporter 2>/dev/null || true
|
||||||
|
systemctl daemon-reload
|
||||||
|
echo "Cleanup completed."
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set trap for cleanup on error
|
||||||
|
trap cleanup ERR
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
echo "Installing required packages..."
|
echo "Installing required packages..."
|
||||||
apt-get update
|
apt-get update
|
||||||
apt-get install -y python3-pip smartmontools iperf3 python3-psutil python3-requests, lm-sensors, neofetch
|
apt-get install -y python3-pip smartmontools iperf3 python3-psutil python3-requests lm-sensors neofetch
|
||||||
|
|
||||||
# Install Node Exporter
|
# Install Node Exporter
|
||||||
echo "Installing Prometheus Node Exporter..."
|
echo "Installing Prometheus Node Exporter..."
|
||||||
NODE_EXPORTER_VERSION="1.8.2"
|
NODE_EXPORTER_VERSION="1.8.2"
|
||||||
wget "https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz"
|
|
||||||
tar xvfz node_exporter-*.linux-amd64.tar.gz
|
|
||||||
|
|
||||||
# Create node_exporter user and group
|
# Download Node Exporter with error handling
|
||||||
|
echo "Downloading Node Exporter..."
|
||||||
|
if ! wget --timeout=30 --tries=3 "https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz"; then
|
||||||
|
echo "ERROR: Failed to download Node Exporter"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract with error checking
|
||||||
|
if ! tar xvfz node_exporter-*.linux-amd64.tar.gz; then
|
||||||
|
echo "ERROR: Failed to extract Node Exporter archive"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if user already exists
|
||||||
|
if ! id "node_exporter" &>/dev/null; then
|
||||||
|
echo "Creating node_exporter user..."
|
||||||
useradd -rs /bin/false node_exporter
|
useradd -rs /bin/false node_exporter
|
||||||
|
else
|
||||||
|
echo "node_exporter user already exists, skipping creation..."
|
||||||
|
fi
|
||||||
|
|
||||||
# Move binary to proper location
|
# Move binary to proper location
|
||||||
mv node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64/node_exporter /usr/local/bin/
|
if ! mv node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64/node_exporter /usr/local/bin/; then
|
||||||
|
echo "ERROR: Failed to move Node Exporter binary"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Cleanup downloaded files
|
||||||
rm -rf node_exporter-*.linux-amd64.tar.gz node_exporter-*.linux-amd64
|
rm -rf node_exporter-*.linux-amd64.tar.gz node_exporter-*.linux-amd64
|
||||||
|
|
||||||
|
# Set proper permissions
|
||||||
|
chown node_exporter:node_exporter /usr/local/bin/node_exporter
|
||||||
|
chmod +x /usr/local/bin/node_exporter
|
||||||
|
|
||||||
# Create node_exporter service file
|
# Create node_exporter service file
|
||||||
cat > /etc/systemd/system/node_exporter.service << 'EOL'
|
cat > /etc/systemd/system/node_exporter.service << 'EOL'
|
||||||
[Unit]
|
[Unit]
|
||||||
@ -44,16 +87,81 @@ systemctl daemon-reload
|
|||||||
systemctl enable node_exporter
|
systemctl enable node_exporter
|
||||||
systemctl start node_exporter
|
systemctl start node_exporter
|
||||||
|
|
||||||
|
# Check if Node Exporter started successfully
|
||||||
|
if ! systemctl is-active --quiet node_exporter; then
|
||||||
|
echo "ERROR: Node Exporter failed to start"
|
||||||
|
systemctl status node_exporter --no-pager || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# Install hwmon daemon
|
# Install hwmon daemon
|
||||||
echo "Installing hwmon daemon..."
|
echo "Installing hwmon daemon..."
|
||||||
curl -o /etc/systemd/system/hwmon.service http://10.10.10.110:3000/JWS/hwmonDaemon/raw/branch/main/hwmon.service
|
|
||||||
curl -o /etc/systemd/system/hwmon.timer http://10.10.10.110:3000/JWS/hwmonDaemon/raw/branch/main/hwmon.timer
|
# Download hwmon service files with error handling
|
||||||
|
echo "Downloading hwmon service files..."
|
||||||
|
if ! curl --max-time 30 --retry 3 -o /etc/systemd/system/hwmon.service http://10.10.10.110:3000/JWS/hwmonDaemon/raw/branch/main/hwmon.service; then
|
||||||
|
echo "ERROR: Failed to download hwmon.service"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! curl --max-time 30 --retry 3 -o /etc/systemd/system/hwmon.timer http://10.10.10.110:3000/JWS/hwmonDaemon/raw/branch/main/hwmon.timer; then
|
||||||
|
echo "ERROR: Failed to download hwmon.timer"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Verify downloaded files exist and are not empty
|
||||||
|
if [[ ! -s /etc/systemd/system/hwmon.service ]]; then
|
||||||
|
echo "ERROR: hwmon.service file is empty or missing"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -s /etc/systemd/system/hwmon.timer ]]; then
|
||||||
|
echo "ERROR: hwmon.timer file is empty or missing"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create log directory for hwmon
|
||||||
|
mkdir -p /var/log/hwmonDaemon
|
||||||
|
|
||||||
|
# Start the hwmon daemon
|
||||||
systemctl daemon-reload
|
systemctl daemon-reload
|
||||||
systemctl enable hwmon.timer
|
systemctl enable hwmon.timer
|
||||||
systemctl start hwmon.timer
|
systemctl start hwmon.timer
|
||||||
|
|
||||||
# Test hwmon
|
# Check if hwmon timer started successfully
|
||||||
echo "Testing hwmon dry-run..."
|
if ! systemctl is-active --quiet hwmon.timer; then
|
||||||
/usr/bin/env python3 -c "import urllib.request; exec(urllib.request.urlopen('http://10.10.10.110:3000/JWS/hwmonDaemon/raw/branch/main/hwmonDaemon.py').read().decode('utf-8'))" --dry-run
|
echo "ERROR: hwmon timer failed to start"
|
||||||
|
systemctl status hwmon.timer --no-pager || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Installation complete! Please verify all services are running correctly."
|
# Final verification
|
||||||
|
echo "Verifying installation..."
|
||||||
|
echo "Node Exporter status: $(systemctl is-active node_exporter)"
|
||||||
|
echo "hwmon timer status: $(systemctl is-active hwmon.timer)"
|
||||||
|
echo "Node Exporter port check:"
|
||||||
|
if ss -tlnp | grep :9100; then
|
||||||
|
echo "✓ Node Exporter is listening on port 9100"
|
||||||
|
else
|
||||||
|
echo "WARNING: Node Exporter not listening on port 9100"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test hwmon with error handling
|
||||||
|
echo "Testing hwmon dry-run..."
|
||||||
|
if ! /usr/bin/env python3 -c "import urllib.request; exec(urllib.request.urlopen('http://10.10.10.110:3000/JWS/hwmonDaemon/raw/branch/main/hwmonDaemon.py').read().decode('utf-8'))" --dry-run; then
|
||||||
|
echo "WARNING: hwmon dry-run test failed, but services are installed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Disable cleanup trap on successful completion
|
||||||
|
trap - ERR
|
||||||
|
|
||||||
|
echo "✓ Installation complete! All services are running correctly."
|
||||||
|
echo ""
|
||||||
|
echo "Services installed:"
|
||||||
|
echo " - Node Exporter: http://$(hostname -I | awk '{print $1}'):9100/metrics"
|
||||||
|
echo " - hwmon daemon: Monitoring system health every 15 minutes"
|
||||||
|
echo ""
|
||||||
|
echo "Log locations:"
|
||||||
|
echo " - Node Exporter: journalctl -u node_exporter"
|
||||||
|
echo " - hwmon: journalctl -u hwmon.service"
|
||||||
|
echo " - hwmon logs: /var/log/hwmonDaemon/"
|
||||||
Reference in New Issue
Block a user