From b80fda7cb2bb3a13ac87953c05af6cfbaf598008 Mon Sep 17 00:00:00 2001
From: Jared Vititoe <jjvititoe1@gmail.com>
Date: Tue, 17 Mar 2026 17:17:40 -0400
Subject: [PATCH] Fix host filtering: only show/monitor configured hosts; add
 PBS

- _collect_snapshot() and _process_interfaces() now skip any Prometheus
  instance not explicitly listed in config.json hosts[]. LXC app servers
  (postgresql, matrix, etc.) report node_exporter metrics but are not
  infrastructure hosts Gandalf should display or alert on.
- Add PBS (10.10.10.3) to config hosts[] with prometheus_instance;
  remove from ping_hosts (node_exporter already running on PBS, now
  added to Prometheus scrape config as job pbs-node).
- The _instance_map membership check is now consistent across snapshot,
  alerting, and ethtool SSH collection.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 config.json | 21 ++++++++++++---------
 monitor.py  |  4 ++++
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/config.json b/config.json
index d8578fa..5481549 100644
--- a/config.json
+++ b/config.json
@@ -1,9 +1,9 @@
 {
-  "ssh": {
-    "user": "root",
-    "password": "Server#980000Panda",
-    "connect_timeout": 5,
-    "timeout": 20
+  "pulse": {
+    "url": "http://10.10.10.65:8080",
+    "api_key": "012b303a324152c509bf5ade6f942cfc21404f68662f01a17001cba9e4486049",
+    "worker_id": "1b11d1b5-4ed0-42df-a6af-8d57fffe1343",
+    "timeout": 45
   },
   "unifi": {
     "controller": "https://10.10.10.1",
@@ -28,14 +28,17 @@
     "allowed_groups": ["admin"]
   },
   "monitor": {
-    "poll_interval": 120,
+    "poll_interval": 300,
     "failure_threshold": 2,
     "cluster_threshold": 3,
-    "ping_hosts": [
-      {"name": "pbs", "ip": "10.10.10.3"}
-    ]
+    "ping_hosts": []
   },
   "hosts": [
+    {
+      "name": "pbs",
+      "ip": "10.10.10.3",
+      "prometheus_instance": "10.10.10.3:9100"
+    },
     {
       "name": "large1",
       "ip": "10.10.10.2",
diff --git a/monitor.py b/monitor.py
index 9effb14..8f97934 100644
--- a/monitor.py
+++ b/monitor.py
@@ -694,6 +694,8 @@ class NetworkMonitor:
         hosts_with_regression: List[str] = []
 
         for instance, ifaces in states.items():
+            if instance not in self._instance_map:
+                continue  # skip unconfigured Prometheus instances
             host = self._hostname(instance)
             new_baseline.setdefault(host, {})
             host_has_regression = False
@@ -877,6 +879,8 @@ class NetworkMonitor:
 
         hosts = {}
         for instance, ifaces in iface_states.items():
+            if instance not in self._instance_map:
+                continue  # skip Prometheus instances not in config (e.g. LXC app servers)
             host = self._hostname(instance)
             phys = {k: v for k, v in ifaces.items()}
             up_count = sum(1 for v in phys.values() if v)