diff --git a/.gitignore b/.gitignore index 89547ba..b77e98c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ -log.txt \ No newline at end of file +log.txt +config.json +__pycache__/ +*.pyc \ No newline at end of file diff --git a/README.md b/README.md index 1bd09a8..e286f2d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # GANDALF (Global Advanced Network Detection And Link Facilitator) -> Because it shall not let problems pass! +> Because it shall not let problems pass. Network monitoring dashboard for the LotusGuild Proxmox cluster. Deployed on **LXC 157** (monitor-02 / 10.10.10.9), reachable at `gandalf.lotusguild.org`. @@ -9,7 +9,7 @@ Deployed on **LXC 157** (monitor-02 / 10.10.10.9), reachable at `gandalf.lotusgu ## Architecture -Gandalf is two processes that share a MariaDB database: +Two processes share a MariaDB database: | Process | Service | Role | |---|---|---| @@ -17,22 +17,24 @@ Gandalf is two processes that share a MariaDB database: | `monitor.py` | `gandalf-monitor.service` | Background polling daemon | ``` -[Prometheus :9090] ──▶ - monitor.py ──▶ MariaDB ◀── app.py ──▶ nginx ──▶ Authelia ──▶ Browser -[UniFi Controller] ──▶ +[Prometheus :9090] ──▶ +[UniFi Controller] ──▶ monitor.py ──▶ MariaDB ◀── app.py ──▶ nginx ──▶ Authelia ──▶ Browser +[Pulse Worker] ──▶ +[SSH / ethtool] ──▶ ``` ### Data Sources -| Source | What it monitors | +| Source | What it provides | |---|---| -| **Prometheus** (`10.10.10.48:9090`) | Physical NIC link state (`node_network_up`) for 6 Proxmox hosts | -| **UniFi API** (`https://10.10.10.1`) | Switch, AP, and gateway device status | -| **Ping** | pbs (10.10.10.3) — no node_exporter | +| **Prometheus** (`10.10.10.48:9090`) | Physical NIC link state + traffic/error rates via `node_exporter` | +| **UniFi API** (`https://10.10.10.1`) | Switch port stats, device status, LLDP neighbor table, PoE data | +| **Pulse Worker** | SSH relay — runs `ethtool` + SFP DOM queries on each Proxmox host | +| **Ping** | Reachability for hosts without `node_exporter` (e.g. PBS) | ### Monitored Hosts (Prometheus / node_exporter) -| Host | Instance | +| Host | Prometheus Instance | |---|---| | large1 | 10.10.10.2:9100 | | compute-storage-01 | 10.10.10.4:9100 | @@ -41,18 +43,86 @@ Gandalf is two processes that share a MariaDB database: | compute-storage-gpu-01 | 10.10.10.10:9100 | | storage-01 | 10.10.10.11:9100 | +Ping-only (no node_exporter): **pbs** (10.10.10.3) + --- -## Features +## Pages -- **Interface monitoring** – tracks link state for all physical NICs via Prometheus -- **UniFi device monitoring** – detects offline switches, APs, and gateways -- **Ping reachability** – covers hosts without node_exporter -- **Cluster-wide detection** – creates a separate P1 ticket when 3+ hosts have simultaneous interface failures (likely a switch failure) -- **Smart baseline tracking** – interfaces that are down on first observation (unused ports) are never alerted on; only regressions from UP→DOWN trigger tickets -- **Ticket creation** – integrates with Tinker Tickets (`t.lotusguild.org`) with 24-hour deduplication -- **Alert suppression** – manual toggle or timed windows (30min / 1hr / 4hr / 8hr / manual) -- **Authelia SSO** – restricted to `admin` group via forward-auth headers +### Dashboard (`/`) +- Real-time host status grid with per-NIC link state (UP / DOWN / degraded) +- Network topology diagram (Internet → Gateway → Switches → Hosts) +- UniFi device table (switches, APs, gateway) +- Active alerts table with severity, target, consecutive failures, ticket link +- Quick-suppress modal: apply timed or manual suppression from any alert row +- Auto-refreshes every 30 seconds via `/api/status` + `/api/network` + +### Link Debug (`/links`) +Per-interface statistics collected every poll cycle. All panels are collapsible +(click header or use Collapse All / Expand All). Collapse state persists across +page refreshes via `sessionStorage`. + +**Server NICs** (via Prometheus + SSH/ethtool): +- Speed, duplex, auto-negotiation, link detected +- TX/RX rate bars (bandwidth utilisation % of link capacity) +- TX/RX error and drop rates per second +- Carrier changes (cumulative since boot — watch for flapping) +- **SFP / Optical panel** (when SFP module present): vendor/PN, temp, voltage, + bias current, TX power (dBm), RX power (dBm), RX−TX delta, per-stat bars + +**UniFi Switch Ports** (via UniFi API): +- Port number badge (`#N`), UPLINK badge, PoE draw badge +- LLDP neighbor line: `→ system_name (port_id)` when neighbor is detected +- PoE class and max wattage line +- Speed, duplex, auto-neg, TX/RX rates, errors, drops + +### Inspector (`/inspector`) +Visual switch chassis diagrams. Each switch is rendered model-accurately using +layout config in the template (`SWITCH_LAYOUTS`). + +**Port block colours:** +| Colour | State | +|---|---| +| Green | Up, no active PoE | +| Amber | Up with active PoE draw | +| Cyan | Uplink port (up) | +| Grey | Down | +| White outline | Currently selected | + +**Clicking a port** opens the right-side detail panel showing: +- Link stats (status, speed, duplex, auto-neg, media type) +- PoE (class, max wattage, current draw, mode) +- Traffic (TX/RX rates) +- Errors/drops per second +- **LLDP Neighbor** section (system name, port ID, chassis ID, management IPs) +- **Path Debug** (auto-appears when LLDP `system_name` matches a known server): + two-column comparison of the switch port stats vs. the server NIC stats, + including SFP DOM data if the server side has an SFP module + +**LLDP path debug requirements:** +1. Server must run `lldpd`: `apt install lldpd && systemctl enable --now lldpd` +2. `lldpd` hostname must match the key in `data.hosts` (set via `config.json → hosts`) +3. Switch has LLDP enabled (UniFi default: on) + +**Supported switch models** (set `SWITCH_LAYOUTS` keys to your UniFi model codes): + +| Key | Model | Layout | +|---|---|---| +| `USF5P` | UniFi Switch Flex 5 PoE | 4×RJ45 + 1×SFP uplink | +| `USL8A` | UniFi Switch Lite 8 PoE | 8×SFP (2 rows of 4) | +| `US24PRO` | UniFi Switch Pro 24 | 24×RJ45 staggered + 2×SFP | +| `USPPDUP` | Custom/other | Single-port fallback | +| `USMINI` | UniFi Switch Mini | 5-port row | + +Add new layouts by adding a key to `SWITCH_LAYOUTS` matching the `model` field +returned by the UniFi API for that device. + +### Suppressions (`/suppressions`) +- Create timed (30 min / 1 hr / 4 hr / 8 hr) or manual suppressions +- Target types: host, interface, UniFi device, or global +- Active suppressions table with one-click removal +- Suppression history (last 50) +- Available targets reference grid (all known hosts + interfaces) --- @@ -62,10 +132,16 @@ Gandalf is two processes that share a MariaDB database: | Condition | Priority | |---|---| -| UniFi device offline (2+ consecutive checks) | P2 High | -| Proxmox host NIC link-down regression (2+ consecutive checks) | P2 High | -| Host unreachable via ping (2+ consecutive checks) | P2 High | -| 3+ hosts simultaneously reporting interface failures | P1 Critical | +| UniFi device offline (≥2 consecutive checks) | P2 High | +| Proxmox host NIC link-down regression (≥2 consecutive checks) | P2 High | +| Host unreachable via ping (≥2 consecutive checks) | P2 High | +| ≥3 hosts simultaneously reporting interface failures | P1 Critical | + +### Baseline Tracking + +Interfaces that are **down on first observation** (unused ports, unplugged cables) +are recorded as `initial_down` and never alerted. Only **UP→DOWN regressions** +generate tickets. Baseline is stored in MariaDB and survives daemon restarts. ### Suppression Targets @@ -77,30 +153,88 @@ Gandalf is two processes that share a MariaDB database: | `all` | Everything (global maintenance mode) | Suppressions can be manual (persist until removed) or timed (auto-expire). +Expired suppressions are checked at evaluation time — no background cleanup needed. --- -## Configuration +## Configuration (`config.json`) -**`config.json`** – shared by both processes: +Shared by both processes. Located in the working directory (`/var/www/html/prod/`). + +```json +{ + "database": { + "host": "10.10.10.50", + "port": 3306, + "user": "gandalf", + "password": "...", + "name": "gandalf" + }, + "prometheus": { + "url": "http://10.10.10.48:9090" + }, + "unifi": { + "controller": "https://10.10.10.1", + "api_key": "...", + "site_id": "default" + }, + "ticket_api": { + "url": "https://t.lotusguild.org/api/tickets", + "api_key": "..." + }, + "pulse": { + "url": "http://:", + "api_key": "...", + "worker_id": "...", + "timeout": 45 + }, + "auth": { + "allowed_groups": ["admin"] + }, + "hosts": [ + { "name": "large1", "prometheus_instance": "10.10.10.2:9100" }, + { "name": "compute-storage-01", "prometheus_instance": "10.10.10.4:9100" }, + { "name": "micro1", "prometheus_instance": "10.10.10.8:9100" }, + { "name": "monitor-02", "prometheus_instance": "10.10.10.9:9100" }, + { "name": "compute-storage-gpu-01", "prometheus_instance": "10.10.10.10:9100" }, + { "name": "storage-01", "prometheus_instance": "10.10.10.11:9100" } + ], + "monitor": { + "poll_interval": 120, + "failure_threshold": 2, + "cluster_threshold": 3, + "ping_hosts": [ + { "name": "pbs", "ip": "10.10.10.3" } + ] + } +} +``` + +### Key Config Fields | Key | Description | |---|---| -| `unifi.api_key` | UniFi API key from controller | +| `database.*` | MariaDB credentials (LXC 149 at 10.10.10.50) | | `prometheus.url` | Prometheus base URL | -| `database.*` | MariaDB credentials | -| `ticket_api.api_key` | Tinker Tickets Bearer token | -| `monitor.poll_interval` | Seconds between checks (default: 120) | -| `monitor.failure_threshold` | Consecutive failures before ticketing (default: 2) | -| `monitor.cluster_threshold` | Hosts with failures to trigger cluster alert (default: 3) | -| `monitor.ping_hosts` | Hosts checked via ping (no node_exporter) | -| `hosts` | Maps Prometheus instance labels to hostnames | +| `unifi.controller` | UniFi controller base URL (HTTPS, self-signed cert ignored) | +| `unifi.api_key` | UniFi API key from controller Settings → API | +| `unifi.site_id` | UniFi site ID (default: `default`) | +| `ticket_api.api_key` | Tinker Tickets bearer token | +| `pulse.url` | Pulse worker API base URL (for SSH relay) | +| `pulse.worker_id` | Which Pulse worker runs ethtool collection | +| `pulse.timeout` | Max seconds to wait for SSH collection per host | +| `auth.allowed_groups` | Authelia groups that may access Gandalf | +| `hosts` | Maps Prometheus instance labels → display hostnames | +| `monitor.poll_interval` | Seconds between full check cycles (default: 120) | +| `monitor.failure_threshold` | Consecutive failures before creating ticket (default: 2) | +| `monitor.cluster_threshold` | Hosts with failures to trigger cluster-wide P1 (default: 3) | +| `monitor.ping_hosts` | Hosts checked only by ping (no node_exporter) | --- ## Deployment (LXC 157) -### 1. Database (MariaDB LXC 149 at 10.10.10.50) +### 1. Database — MariaDB LXC 149 (`10.10.10.50`) ```sql CREATE DATABASE gandalf CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; @@ -109,93 +243,234 @@ GRANT ALL PRIVILEGES ON gandalf.* TO 'gandalf'@'10.10.10.61'; FLUSH PRIVILEGES; ``` -Then import the schema: +Import schema: ```bash mysql -h 10.10.10.50 -u gandalf -p gandalf < schema.sql ``` -### 2. LXC 157 – Install dependencies +### 2. LXC 157 — Install dependencies ```bash pip3 install -r requirements.txt +# Ensure sshpass is available (used by deploy scripts) +apt install sshpass ``` ### 3. Deploy files ```bash -cp app.py db.py monitor.py config.json templates/ static/ /var/www/html/prod/ +# From dev machine / root/code/gandalf: +for f in app.py db.py monitor.py config.json schema.sql \ + static/style.css static/app.js \ + templates/*.html; do + sshpass -p 'yourpass' scp -o StrictHostKeyChecking=no \ + "$f" "root@10.10.10.61:/var/www/html/prod/$f" +done +systemctl restart gandalf gandalf-monitor ``` -### 4. Configure secrets in `config.json` +### 4. systemd services -- `database.password` – set the gandalf DB password -- `ticket_api.api_key` – copy from tinker tickets admin panel +**`gandalf.service`** (Flask/gunicorn web app): +```ini +[Unit] +Description=Gandalf Web Dashboard +After=network.target -### 5. Install the monitor service +[Service] +Type=simple +WorkingDirectory=/var/www/html/prod +ExecStart=/usr/bin/python3 -m gunicorn --workers 1 --bind 127.0.0.1:8000 app:app +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +**`gandalf-monitor.service`** (background polling daemon): +```ini +[Unit] +Description=Gandalf Network Monitor Daemon +After=network.target + +[Service] +Type=simple +WorkingDirectory=/var/www/html/prod +ExecStart=/usr/bin/python3 monitor.py +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +### 5. Authelia rule (LXC 167) + +```yaml +access_control: + rules: + - domain: gandalf.lotusguild.org + policy: one_factor + subject: + - group:admin +``` ```bash -cp gandalf-monitor.service /etc/systemd/system/ -systemctl daemon-reload -systemctl enable gandalf-monitor -systemctl start gandalf-monitor +systemctl restart authelia ``` -Update existing `gandalf.service` to use a single worker: -``` -ExecStart=/usr/bin/python3 -m gunicorn --workers 1 --bind 127.0.0.1:8000 app:app -``` +### 6. NPM reverse proxy -### 6. Authelia rule - -Add to `/etc/authelia/configuration.yml` access_control rules: -```yaml -- domain: gandalf.lotusguild.org - policy: one_factor - subject: - - group:admin -``` - -Reload Authelia: `systemctl restart authelia` - -### 7. NPM proxy host - -- Domain: `gandalf.lotusguild.org` -- Forward to: `http://10.10.10.61:80` (nginx on LXC 157) -- Enable Authelia forward auth -- WebSockets: **not required** +- **Domain:** `gandalf.lotusguild.org` +- **Forward to:** `http://10.10.10.61:8000` (gunicorn direct, no nginx needed on LXC) +- **Forward Auth:** Authelia at `http://10.10.10.167:9091` +- **WebSockets:** Not required --- ## Service Management ```bash -# Monitor daemon -systemctl status gandalf-monitor +# Status +systemctl status gandalf gandalf-monitor + +# Logs (live) +journalctl -u gandalf -f journalctl -u gandalf-monitor -f -# Web server -systemctl status gandalf -journalctl -u gandalf -f - -# Restart both after config/code changes -systemctl restart gandalf-monitor gandalf +# Restart after code or config changes +systemctl restart gandalf gandalf-monitor ``` --- ## Troubleshooting -**Monitor not creating tickets** -- Check `config.json` → `ticket_api.api_key` is set -- Check `journalctl -u gandalf-monitor` for errors +### Monitor not creating tickets +- Verify `config.json → ticket_api.api_key` is set and valid +- Check `journalctl -u gandalf-monitor` for `Ticket creation failed` lines +- Confirm the Tinker Tickets API is reachable from LXC 157 -**Baseline re-initializing on every restart** -- `interface_baseline` is stored in the `monitor_state` DB table; it persists across restarts +### Link Debug shows no data / "Loading…" forever +- Check `gandalf-monitor.service` is running and has completed at least one cycle +- Check `journalctl -u gandalf-monitor` for Prometheus or UniFi errors +- Verify Prometheus is reachable: `curl http://10.10.10.48:9090/api/v1/query?query=up` -**Interface always showing as "initial_down"** -- That interface was down on the first poll after the monitor started -- It will begin tracking once it comes up; or manually update the baseline in DB if needed +### Link Debug: SFP DOM panel missing +- SFP data requires Pulse worker + SSH access to hosts +- Verify `config.json → pulse.*` is configured and the Pulse worker is running +- Confirm `sshpass` + SSH access from the Pulse worker to each Proxmox host +- Only interfaces with physical SFP modules return DOM data (`ethtool -m`) -**Prometheus data missing for a host** -- Verify node_exporter is running: `systemctl status prometheus-node-exporter` -- Check Prometheus targets: `http://10.10.10.48:9090/targets` +### Inspector: path debug section not appearing +- Requires LLDP: run `apt install lldpd && systemctl enable --now lldpd` on each server +- The LLDP `system_name` broadcast by `lldpd` must match the hostname in `config.json → hosts[].name` + - Override: `echo 'configure system hostname large1' > /etc/lldpd.d/hostname.conf && systemctl restart lldpd` +- Allow up to 2 poll cycles (240s) after installing lldpd for LLDP table to populate + +### Inspector: switch chassis shows as flat list (no layout) +- The switch's `model` field from UniFi doesn't match any key in `SWITCH_LAYOUTS` in `inspector.html` +- Check the UniFi API: the model appears in the `link_stats` API response under `unifi_switches..model` +- Add the model key to `SWITCH_LAYOUTS` in `inspector.html` with the correct row/SFP layout + +### Baseline re-initializing on every restart +- `interface_baseline` is stored in the `monitor_state` DB table; survives restarts +- If it appears to reset: check DB connectivity from the monitor daemon + +### Interface stuck at "initial_down" forever +- This means the interface was down when the monitor first saw it +- It will begin tracking once it comes up; or manually clear it: + ```sql + -- In MariaDB on 10.10.10.50: + UPDATE monitor_state SET value='{}' WHERE key_name='interface_baseline'; + ``` + Then restart the monitor: `systemctl restart gandalf-monitor` + +### Prometheus data missing for a host +```bash +# On the affected host: +systemctl status prometheus-node-exporter +# Verify it's scraped: +curl http://10.10.10.48:9090/api/v1/query?query=up | jq '.data.result[] | select(.metric.job=="node")' +``` + +--- + +## Development Notes + +### File Layout + +``` +gandalf/ +├── app.py # Flask web app (routes, auth, API endpoints) +├── monitor.py # Background daemon (Prometheus, UniFi, Pulse, alert logic) +├── db.py # Database operations (MariaDB via pymysql, thread-local conn reuse) +├── schema.sql # Database schema (network_events, suppression_rules, monitor_state) +├── config.json # Runtime configuration (not committed with secrets) +├── requirements.txt # Python dependencies +├── static/ +│ ├── style.css # Terminal aesthetic CSS (CRT scanlines, green-on-black) +│ └── app.js # Dashboard JS (auto-refresh, host grid, events, suppress modal) +└── templates/ + ├── base.html # Shared layout (header, nav, footer) + ├── index.html # Dashboard page + ├── links.html # Link Debug page (server NICs + UniFi switch ports) + ├── inspector.html # Visual switch inspector + LLDP path debug + └── suppressions.html # Suppression management page +``` + +### Adding a New Monitored Host + +1. Install `prometheus-node-exporter` on the host +2. Add a scrape target to Prometheus config +3. Add an entry to `config.json → hosts`: + ```json + { "name": "newhost", "prometheus_instance": "10.10.10.X:9100" } + ``` +4. Restart monitor: `systemctl restart gandalf-monitor` +5. For SFP DOM / ethtool: ensure the host is SSH-accessible from the Pulse worker + +### Adding a New Switch Layout (Inspector) + +Find the UniFi model code for the switch (it appears in the `/api/links` JSON response +under `unifi_switches..model`), then add to `SWITCH_LAYOUTS` in +`templates/inspector.html`: + +```javascript +'MYNEWMODEL': { + rows: [[1,2,3,4,5,6,7,8], [9,10,11,12,13,14,15,16]], // port_idx by row + sfp_section: [17, 18], // separate SFP cage ports (rendered below rows) + sfp_ports: [], // port_idx values that are SFP-type within rows +}, +``` + +### Database Schema Notes + +- `network_events`: one row per active event; `resolved_at` is set when recovered +- `suppression_rules`: `active=FALSE` when removed; `expires_at` checked at query time +- `monitor_state`: key/value store; `interface_baseline` and `link_stats` are JSON blobs + +### Security Notes + +- **XSS prevention**: all user-controlled data in dynamically generated HTML uses + `escHtml()` (JS) or Jinja2 auto-escaping (Python). Suppress buttons use `data-*` + attributes + a single delegated click listener rather than inline `onclick` with + interpolated strings. +- **Interface name validation**: `monitor.py` validates SSH interface names against + `^[a-zA-Z0-9_.@-]+$` before use, and additionally wraps them with `shlex.quote()` + for defense-in-depth. +- **DB parameters**: all SQL uses parameterised queries via pymysql — no string + concatenation into SQL. +- **Auth**: Authelia enforces admin-only access at the nginx/LXC 167 layer; the Flask + app additionally checks the `Remote-User` header via `@require_auth`. + +### Known Limitations + +- Single gunicorn worker (`--workers 1`) — required because `db.py` uses thread-local + connection reuse (one connection per thread). Multiple workers would each have their + own connection, which is fine, but the thread-local optimisation only helps within + one worker. +- No CSRF tokens on API endpoints — mitigated by Authelia session cookies being + `SameSite=Strict` and the site being admin-only. +- SSH collection via Pulse is synchronous — if Pulse is slow, the entire monitor cycle + is delayed. The `pulse.timeout` config controls the max wait. +- UniFi LLDP data is only as fresh as the last monitor poll (120s default). diff --git a/app.py b/app.py index 11fca38..12fd7d0 100644 --- a/app.py +++ b/app.py @@ -103,6 +103,13 @@ def links_page(): return render_template('links.html', user=user) +@app.route('/inspector') +@require_auth +def inspector(): + user = _get_user() + return render_template('inspector.html', user=user) + + @app.route('/suppressions') @require_auth def suppressions_page(): diff --git a/db.py b/db.py index 5cd638b..03a32bd 100644 --- a/db.py +++ b/db.py @@ -1,6 +1,7 @@ """Database operations for Gandalf network monitor.""" import json import logging +import threading from contextlib import contextmanager from datetime import datetime, timedelta from typing import Optional @@ -11,6 +12,7 @@ import pymysql.cursors logger = logging.getLogger(__name__) _config_cache = None +_local = threading.local() def _config() -> dict: @@ -23,22 +25,25 @@ def _config() -> dict: @contextmanager def get_conn(): + """Yield a per-thread cached database connection, reconnecting as needed.""" cfg = _config() - conn = pymysql.connect( - host=cfg['host'], - port=cfg.get('port', 3306), - user=cfg['user'], - password=cfg['password'], - database=cfg['name'], - autocommit=True, - cursorclass=pymysql.cursors.DictCursor, - connect_timeout=10, - charset='utf8mb4', - ) - try: - yield conn - finally: - conn.close() + conn = getattr(_local, 'conn', None) + if conn is None: + conn = pymysql.connect( + host=cfg['host'], + port=cfg.get('port', 3306), + user=cfg['user'], + password=cfg['password'], + database=cfg['name'], + autocommit=True, + cursorclass=pymysql.cursors.DictCursor, + connect_timeout=10, + charset='utf8mb4', + ) + _local.conn = conn + else: + conn.ping(reconnect=True) + yield conn # --------------------------------------------------------------------------- diff --git a/monitor.py b/monitor.py index 521bf95..5f600c2 100644 --- a/monitor.py +++ b/monitor.py @@ -10,6 +10,7 @@ Run as a separate systemd service alongside the Flask web app. import json import logging import re +import shlex import subprocess import time from datetime import datetime @@ -120,6 +121,70 @@ class UnifiClient: logger.error(f'UniFi API error: {e}') return None + def get_switch_ports(self) -> Optional[Dict[str, dict]]: + """Return per-port stats for all UniFi switches, keyed by switch name. + + Uses the v1 stat API which includes full port_table data. + Returns {switch_name: {'ip': str, 'model': str, 'ports': {port_name: {...}}}}. + """ + try: + url = f'{self.base_url}/proxy/network/api/s/{self.site_id}/stat/device' + resp = self.session.get(url, headers=self.headers, timeout=15) + resp.raise_for_status() + devices = resp.json().get('data', []) + result: Dict[str, dict] = {} + for dev in devices: + if dev.get('type', '').lower() != 'usw': + continue + sw_name = dev.get('name') or dev.get('mac', 'unknown') + sw_ip = dev.get('ip', '') + sw_model = dev.get('model', '') + ports: Dict[str, dict] = {} + # Build LLDP neighbor map (keyed by port_idx) + lldp_map: Dict[int, dict] = {} + for entry in dev.get('lldp_table', []): + pidx = entry.get('lldp_port_idx') + if pidx is not None: + lldp_map[int(pidx)] = { + 'chassis_id': entry.get('chassis_id', ''), + 'system_name': entry.get('system_name', ''), + 'port_id': entry.get('port_id', ''), + 'port_desc': entry.get('port_desc', ''), + 'mgmt_ips': entry.get('management_ips', []), + } + for port in dev.get('port_table', []): + idx = port.get('port_idx', 0) + pname = port.get('name') or f'Port {idx}' + raw_poe = port.get('poe_power') + raw_poe_max = port.get('poe_max_power') + ports[pname] = { + 'port_idx': idx, + 'switch_ip': sw_ip, + 'up': port.get('up', False), + 'speed_mbps': port.get('speed', 0), + 'full_duplex': port.get('full_duplex', False), + 'autoneg': port.get('autoneg', False), + 'is_uplink': port.get('is_uplink', False), + 'media': port.get('media', ''), + 'poe_power': float(raw_poe) if raw_poe is not None else None, + 'poe_class': port.get('poe_class'), + 'poe_max_power': float(raw_poe_max) if raw_poe_max is not None else None, + 'poe_mode': port.get('poe_mode', ''), + 'lldp': lldp_map.get(idx), + 'tx_bytes': port.get('tx_bytes', 0), + 'rx_bytes': port.get('rx_bytes', 0), + 'tx_errors': port.get('tx_errors', 0), + 'rx_errors': port.get('rx_errors', 0), + 'tx_dropped': port.get('tx_dropped', 0), + 'rx_dropped': port.get('rx_dropped', 0), + } + if ports: + result[sw_name] = {'ip': sw_ip, 'model': sw_model, 'ports': ports} + return result + except Exception as e: + logger.error(f'UniFi switch port stats error: {e}') + return None + # -------------------------------------------------------------------------- # Ticket client @@ -162,29 +227,90 @@ class TicketClient: return None +# -------------------------------------------------------------------------- +# Pulse HTTP client (delegates SSH commands to Pulse worker) +# -------------------------------------------------------------------------- +class PulseClient: + """Submit a command to a Pulse worker via the internal M2M API and poll for result.""" + + def __init__(self, cfg: dict): + p = cfg.get('pulse', {}) + self.url = p.get('url', '').rstrip('/') + self.api_key = p.get('api_key', '') + self.worker_id = p.get('worker_id', '') + self.timeout = p.get('timeout', 45) + self.session = requests.Session() + self.session.headers.update({ + 'X-Gandalf-API-Key': self.api_key, + 'Content-Type': 'application/json', + }) + + def run_command(self, command: str) -> Optional[str]: + """Submit *command* to Pulse, poll until done, return stdout or None.""" + if not self.url or not self.api_key or not self.worker_id: + return None + try: + resp = self.session.post( + f'{self.url}/api/internal/command', + json={'worker_id': self.worker_id, 'command': command}, + timeout=10, + ) + resp.raise_for_status() + execution_id = resp.json()['execution_id'] + except Exception as e: + logger.debug(f'Pulse command submit failed: {e}') + return None + + deadline = time.time() + self.timeout + while time.time() < deadline: + time.sleep(1) + try: + r = self.session.get( + f'{self.url}/api/internal/executions/{execution_id}', + timeout=10, + ) + r.raise_for_status() + data = r.json() + status = data.get('status') + if status == 'completed': + logs = data.get('logs', []) + for entry in logs: + if entry.get('action') == 'command_result': + return entry.get('stdout', '') + return '' + if status == 'failed': + return None + except Exception as e: + logger.debug(f'Pulse poll failed: {e}') + logger.warning(f'Pulse command timed out after {self.timeout}s') + return None + + # -------------------------------------------------------------------------- # Link stats collector (ethtool + Prometheus traffic metrics) # -------------------------------------------------------------------------- class LinkStatsCollector: - """Collects detailed per-interface statistics via SSH (ethtool) and Prometheus.""" + """Collects detailed per-interface statistics via SSH (ethtool) and Prometheus, + plus per-port stats from UniFi switches.""" - def __init__(self, cfg: dict, prom: 'PrometheusClient'): - self.prom = prom - ssh = cfg.get('ssh', {}) - self.ssh_user = ssh.get('user', 'root') - self.ssh_pass = ssh.get('password', '') - self.ssh_connect_timeout = ssh.get('connect_timeout', 5) - self.ssh_timeout = ssh.get('timeout', 20) + def __init__(self, cfg: dict, prom: 'PrometheusClient', + unifi: Optional['UnifiClient'] = None): + self.prom = prom + self.pulse = PulseClient(cfg) + self.unifi = unifi + # State for UniFi rate calculation (previous snapshot + timestamp) + self._prev_unifi: Dict[str, dict] = {} + self._prev_unifi_time: float = 0.0 # ------------------------------------------------------------------ - # SSH collection + # SSH collection (via Pulse worker) # ------------------------------------------------------------------ def _ssh_batch(self, ip: str, ifaces: List[str]) -> Dict[str, dict]: """ - Open one SSH session to *ip* and collect ethtool + SFP DOM data for - all *ifaces*. Returns {iface: {speed_mbps, duplex, ..., sfp: {...}}}. + Delegate one SSH session to the Pulse worker to collect ethtool + SFP DOM + data for all *ifaces*. Returns {iface: {speed_mbps, duplex, ..., sfp: {...}}}. """ - if not ifaces or not self.ssh_pass: + if not ifaces or not self.pulse.url: return {} # Validate interface names (kernel names only contain [a-zA-Z0-9_.-]) @@ -195,37 +321,23 @@ class LinkStatsCollector: # Build a single shell command: for each iface output ethtool + -m with sentinels parts = [] for iface in safe_ifaces: + q = shlex.quote(iface) parts.append( f'echo "___IFACE:{iface}___";' - f' ethtool "{iface}" 2>/dev/null;' + f' ethtool {q} 2>/dev/null;' f' echo "___DOM:{iface}___";' - f' ethtool -m "{iface}" 2>/dev/null;' + f' ethtool -m {q} 2>/dev/null;' f' echo "___END___"' ) shell_cmd = ' '.join(parts) - try: - result = subprocess.run( - [ - 'sshpass', '-p', self.ssh_pass, - 'ssh', - '-o', 'StrictHostKeyChecking=no', - '-o', f'ConnectTimeout={self.ssh_connect_timeout}', - '-o', 'LogLevel=ERROR', - '-o', 'BatchMode=no', - f'{self.ssh_user}@{ip}', - shell_cmd, - ], - capture_output=True, - text=True, - timeout=self.ssh_timeout, - ) - output = result.stdout - except FileNotFoundError: - logger.debug('sshpass not found – skipping ethtool collection') - return {} - except Exception as e: - logger.debug(f'SSH ethtool {ip}: {e}') + ssh_cmd = ( + f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 ' + f'-o LogLevel=ERROR root@{ip} "{shell_cmd}"' + ) + output = self.pulse.run_command(ssh_cmd) + if output is None: + logger.debug(f'Pulse ethtool collection returned None for {ip}') return {} return self._parse_ssh_output(output) @@ -415,9 +527,9 @@ class LinkStatsCollector: host_ip = instance.split(':')[0] ifaces = list(iface_metrics.keys()) - # SSH ethtool collection (one connection per host, all ifaces) + # SSH ethtool collection via Pulse worker (one connection per host, all ifaces) ethtool_data: Dict[str, dict] = {} - if self.ssh_pass and ifaces: + if self.pulse.url and ifaces: try: ethtool_data = self._ssh_batch(host_ip, ifaces) except Exception as e: @@ -438,11 +550,52 @@ class LinkStatsCollector: result_hosts[host] = merged + # Collect UniFi switch port stats + unifi_switches: dict = {} + if self.unifi: + try: + raw = self.unifi.get_switch_ports() + if raw is not None: + now = time.time() + unifi_switches = self._compute_unifi_rates(raw, now) + self._prev_unifi = raw + self._prev_unifi_time = now + except Exception as e: + logger.warning(f'UniFi switch port collection failed: {e}') + return { - 'hosts': result_hosts, - 'updated': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC'), + 'hosts': result_hosts, + 'unifi_switches': unifi_switches, + 'updated': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC'), } + def _compute_unifi_rates(self, raw: Dict[str, dict], now: float) -> Dict[str, dict]: + """Compute per-port byte/error rates from delta against previous snapshot.""" + dt = now - self._prev_unifi_time if self._prev_unifi_time > 0 else 0 + + def rate(new_val: int, old_val: int) -> Optional[float]: + if dt <= 0: + return None + return max(0.0, (new_val - old_val) / dt) + + result: Dict[str, dict] = {} + for sw_name, sw_data in raw.items(): + prev_ports = self._prev_unifi.get(sw_name, {}).get('ports', {}) + merged_ports: Dict[str, dict] = {} + for pname, d in sw_data['ports'].items(): + entry = dict(d) + prev = prev_ports.get(pname, {}) + entry['tx_bytes_rate'] = rate(d['tx_bytes'], prev.get('tx_bytes', 0)) + entry['rx_bytes_rate'] = rate(d['rx_bytes'], prev.get('rx_bytes', 0)) + entry['tx_errs_rate'] = rate(d['tx_errors'], prev.get('tx_errors', 0)) + entry['rx_errs_rate'] = rate(d['rx_errors'], prev.get('rx_errors', 0)) + entry['tx_drops_rate'] = rate(d['tx_dropped'], prev.get('tx_dropped', 0)) + entry['rx_drops_rate'] = rate(d['rx_dropped'], prev.get('rx_dropped', 0)) + merged_ports[pname] = entry + result[sw_name] = {'ip': sw_data['ip'], 'model': sw_data['model'], + 'ports': merged_ports} + return result + # -------------------------------------------------------------------------- # Helpers @@ -479,7 +632,7 @@ class NetworkMonitor: self.prom = PrometheusClient(prom_url) self.unifi = UnifiClient(self.cfg['unifi']) self.tickets = TicketClient(self.cfg.get('ticket_api', {})) - self.link_stats = LinkStatsCollector(self.cfg, self.prom) + self.link_stats = LinkStatsCollector(self.cfg, self.prom, self.unifi) mon = self.cfg.get('monitor', {}) self.poll_interval = mon.get('poll_interval', 120) diff --git a/static/app.js b/static/app.js index 12d8c29..8b61380 100644 --- a/static/app.js +++ b/static/app.js @@ -105,7 +105,9 @@ function updateUnifiTable(devices) { const statusText = d.connected ? 'Online' : 'Offline'; const suppressBtn = !d.connected ? `` + data-sup-type="unifi_device" + data-sup-name="${escHtml(d.name)}" + data-sup-detail="">🔕 Suppress` : ''; return ` @@ -149,9 +151,9 @@ function updateEventsTable(events) { ${ticket} + data-sup-type="${escHtml(supType)}" + data-sup-name="${escHtml(e.target_name)}" + data-sup-detail="${escHtml(e.target_detail||'')}">🔕 `; }).join(''); @@ -204,12 +206,10 @@ function updateSuppressForm() { if (detailGrp) detailGrp.style.display = (type === 'interface') ? '' : 'none'; } -function setDuration(mins) { +function setDuration(mins, el) { document.getElementById('sup-expires').value = mins || ''; - document.querySelectorAll('#suppress-modal .pill').forEach(p => p.classList.remove('active')); - event.currentTarget.classList.add('active'); - + if (el) el.classList.add('active'); const hint = document.getElementById('duration-hint'); if (hint) { if (mins) { @@ -257,10 +257,21 @@ async function submitSuppress(e) { } } -// ── Close modal on backdrop click ───────────────────────────────────── +// ── Global click handler: modal backdrop + suppress button delegation ─ document.addEventListener('click', e => { + // Close modal when clicking backdrop const modal = document.getElementById('suppress-modal'); - if (modal && e.target === modal) closeSuppressModal(); + if (modal && e.target === modal) { closeSuppressModal(); return; } + + // Suppress button via data attributes (avoids inline onclick XSS) + const btn = e.target.closest('.btn-suppress[data-sup-type]'); + if (btn) { + openSuppressModal( + btn.dataset.supType || '', + btn.dataset.supName || '', + btn.dataset.supDetail || '', + ); + } }); // ── Utility ─────────────────────────────────────────────────────────── diff --git a/static/style.css b/static/style.css index a93457e..5ae4852 100644 --- a/static/style.css +++ b/static/style.css @@ -31,7 +31,7 @@ --text: #00ff41; --text-dim: #00cc33; - --text-muted: #008822; + --text-muted: #00bb33; --font: 'Courier New','Consolas','Monaco','Menlo',monospace; @@ -56,7 +56,7 @@ body { font-family: var(--font); background: var(--bg); color: var(--text); - font-size: 13px; + font-size: 14px; line-height: 1.5; min-height: 100vh; position: relative; @@ -788,6 +788,31 @@ a:hover { text-decoration: underline; text-shadow: var(--glow-amber); } .power-warn { background:var(--orange); } .power-crit { background:var(--red); box-shadow:0 0 3px var(--red); } +/* Collapsible link panels */ +.link-host-title { + cursor: pointer; + user-select: none; +} +.link-host-title:hover { background: rgba(0,255,65,.04); } + +.panel-toggle { + font-size: .65em; + color: var(--text-muted); + letter-spacing: .04em; + flex-shrink: 0; + margin-left: 6px; + padding: 0 4px; + border: 1px solid rgba(0,255,65,.2); +} +.link-host-panel.collapsed > .link-ifaces-grid { display: none; } + +/* Collapse all / Expand all bar */ +.link-collapse-bar { + display: flex; + gap: 8px; + margin-bottom: 10px; +} + /* Link panel states */ .link-no-data { padding:14px; color:var(--text-muted); font-size:.78em; text-align:center; } .link-loading { padding:20px; text-align:center; color:var(--text-muted); font-size:.8em; } @@ -797,6 +822,317 @@ a:hover { text-decoration: underline; text-shadow: var(--glow-amber); } .counter-zero { color:var(--green); } .counter-nonzero { color:var(--red); text-shadow:var(--glow-red); } +/* UniFi switch section divider */ +.unifi-section-header { + display: flex; + align-items: center; + gap: 12px; + margin: 24px 0 12px; + color: var(--cyan); + font-size: .75em; + letter-spacing: .1em; + text-shadow: var(--glow-cyan); +} +.unifi-section-header::before, +.unifi-section-header::after { + content: ''; + flex: 1; + height: 1px; + background: linear-gradient(90deg, transparent, var(--cyan), transparent); +} + +/* Port badges (UPLINK, PoE, #N) */ +.port-badge { + font-size: .58em; + padding: 1px 5px; + border: 1px solid; + letter-spacing: .05em; + font-weight: bold; + vertical-align: middle; +} +.port-badge-uplink { color:var(--amber); border-color:var(--amber-dim); } +.port-badge-poe { color:var(--cyan); border-color:var(--cyan-dim); } +.port-badge-num { color:var(--text-muted); border-color:rgba(0,255,65,.2); } + +/* LLDP neighbor + PoE info lines on link debug cards */ +.port-lldp { + font-size: .68em; + color: var(--cyan); + text-shadow: var(--glow-cyan); + margin: -4px 0 6px; + letter-spacing: .02em; +} +.port-poe-info { + font-size: .68em; + color: var(--amber); + margin: -4px 0 6px; + letter-spacing: .02em; +} + +/* Amber value colour used in inspector */ +.val-amber { color:var(--amber); text-shadow:var(--glow-amber); } + +/* Down port card — dim everything */ +.link-iface-card.port-down { + opacity: .42; + filter: saturate(.3); +} + +/* ── Inspector page ───────────────────────────────────────────────── */ + +/* Layout: main chassis area + collapsible right panel */ +.inspector-layout { + display: flex; + gap: 16px; + align-items: flex-start; + min-height: 300px; +} + +.inspector-main { + flex: 1; + min-width: 0; + display: flex; + flex-direction: column; + gap: 14px; +} + +/* Switch chassis card */ +.inspector-chassis { + background: var(--bg2); + border: 1px solid var(--border); + position: relative; +} +.inspector-chassis::before { content:'╔'; position:absolute; top:-1px; left:-1px; color:var(--green); text-shadow:var(--glow); font-size:1rem; line-height:1; } +.inspector-chassis::after { content:'╗'; position:absolute; top:-1px; right:-1px; color:var(--green); text-shadow:var(--glow); font-size:1rem; line-height:1; } + +.chassis-header { + display: flex; + align-items: center; + gap: 12px; + padding: 8px 16px; + background: var(--bg3); + border-bottom: 1px solid var(--border); +} +.chassis-name { font-weight:bold; font-size:.88em; color:var(--amber); text-shadow:var(--glow-amber); letter-spacing:.05em; } +.chassis-name::before { content:'>> '; color:var(--green); } +.chassis-ip { font-size:.72em; color:var(--text-muted); } +.chassis-meta { font-size:.65em; color:var(--text-muted); margin-left:auto; } + +.chassis-body { + padding: 12px 16px 14px; +} + +/* Port rows */ +.chassis-rows { display:flex; flex-direction:column; gap:5px; margin-bottom:8px; } +.chassis-row { display:flex; flex-wrap:wrap; gap:4px; } + +/* SFP section below main rows */ +.chassis-sfp-section { + display: flex; + gap: 6px; + padding-top: 8px; + border-top: 1px solid rgba(0,255,255,.15); + margin-top: 4px; +} + +/* Individual port block */ +.switch-port-block { + width: 34px; + height: 34px; + display: flex; + align-items: center; + justify-content: center; + font-size: .6em; + font-weight: bold; + border: 1px solid; + cursor: pointer; + transition: box-shadow .1s, border-color .1s, background .1s; + user-select: none; + flex-shrink: 0; + letter-spacing: 0; +} + +/* SFP port (in rows — slightly narrower to suggest cage) */ +.switch-port-block.sfp-port { + width: 28px; + height: 38px; + font-size: .55em; +} + +/* SFP section block (standalone cage) */ +.switch-port-block.sfp-block { + width: 44px; + height: 30px; + font-size: .55em; + letter-spacing: .04em; +} + +/* State colours */ +.switch-port-block.down { + background: var(--bg3); + border-color: rgba(0,255,65,.15); + color: rgba(0,255,65,.25); +} +.switch-port-block.up { + background: rgba(0,255,65,.06); + border-color: var(--green-muted); + color: var(--green); + text-shadow: 0 0 4px rgba(0,255,65,.5); +} +.switch-port-block.up:hover { + background: rgba(0,255,65,.13); + border-color: var(--green); + box-shadow: var(--glow); +} +.switch-port-block.poe-active { + background: var(--amber-dim); + border-color: var(--amber); + color: var(--amber); + text-shadow: 0 0 4px rgba(255,176,0,.5); +} +.switch-port-block.poe-active:hover { + box-shadow: var(--glow-amber); +} +.switch-port-block.uplink { + background: var(--cyan-dim); + border-color: var(--cyan); + color: var(--cyan); + text-shadow: 0 0 4px rgba(0,255,255,.5); +} +.switch-port-block.uplink:hover { + box-shadow: var(--glow-cyan); +} +.switch-port-block.selected { + outline: 2px solid #fff; + outline-offset: 1px; +} + +/* Right-side detail panel */ +.inspector-panel { + width: 0; + overflow: hidden; + flex-shrink: 0; + transition: width .2s ease; + display: flex; + flex-direction: column; +} +.inspector-panel.open { + width: 310px; +} + +.inspector-panel-inner { + width: 310px; + background: var(--bg2); + border: 1px solid var(--border); + padding: 14px 14px 18px; + position: relative; + overflow-y: auto; + max-height: calc(100vh - 120px); +} +.inspector-panel-inner::before { content:'╔'; position:absolute; top:-1px; left:-1px; color:var(--green); text-shadow:var(--glow); font-size:1rem; line-height:1; } +.inspector-panel-inner::after { content:'╗'; position:absolute; top:-1px; right:-1px; color:var(--green); text-shadow:var(--glow); font-size:1rem; line-height:1; } + +.panel-header { + display: flex; + justify-content: space-between; + align-items: flex-start; + margin-bottom: 12px; + padding-bottom: 10px; + border-bottom: 1px solid var(--border); +} +.panel-port-name { font-weight:bold; font-size:.92em; color:var(--amber); text-shadow:var(--glow-amber); } +.panel-meta { font-size:.68em; color:var(--text-muted); margin-top:2px; } +.panel-close { + background: none; + border: 1px solid var(--border); + color: var(--text-muted); + cursor: pointer; + font-size: .8em; + padding: 1px 7px; + font-family: var(--font); + flex-shrink: 0; + transition: all .15s; +} +.panel-close:hover { color:var(--red); border-color:var(--red); } + +.panel-section-title { + font-size: .62em; + font-weight: bold; + color: var(--amber); + text-shadow: var(--glow-amber); + text-transform: uppercase; + letter-spacing: .1em; + margin: 10px 0 5px; + padding-bottom: 3px; + border-bottom: 1px solid rgba(0,255,65,.12); +} +.panel-section-title:first-of-type { margin-top: 0; } + +.panel-row { + display: flex; + justify-content: space-between; + align-items: baseline; + padding: 2px 0; +} +.panel-label { font-size:.68em; color:var(--text-muted); text-transform:uppercase; letter-spacing:.05em; flex-shrink:0; } +.panel-val { font-size:.75em; font-weight:bold; color:var(--text-dim); text-align:right; word-break:break-all; } + +/* Path debug two-column layout */ +.path-conn-type { + font-size: .68em; + color: var(--cyan); + font-weight: normal; + margin-left: 6px; + text-shadow: none; + text-transform: none; + letter-spacing: normal; +} + +.path-debug-cols { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 8px; + margin-top: 6px; +} + +.path-col { + background: var(--bg3); + border: 1px solid rgba(0,255,65,.18); + padding: 7px 8px; +} +.path-col-header { + font-size: .62em; + font-weight: bold; + color: var(--amber); + margin-bottom: 5px; + padding-bottom: 3px; + border-bottom: 1px solid rgba(0,255,65,.15); + letter-spacing: .04em; +} +.path-row { + display: flex; + justify-content: space-between; + gap: 4px; + font-size: .65em; + padding: 1px 0; +} +.path-row span:first-child { color:var(--text-muted); flex-shrink:0; } +.path-row span:last-child { color:var(--text-dim); font-weight:bold; text-align:right; word-break:break-all; } + +.path-dom { + margin-top: 5px; + padding-top: 5px; + border-top: 1px solid rgba(0,255,255,.15); +} +.path-dom-row { + display: flex; + justify-content: space-between; + font-size: .65em; + padding: 1px 0; + color: var(--cyan); +} +.path-dom-row span:first-child { color:var(--text-muted); } + /* ── Responsive ───────────────────────────────────────────────────── */ @media (max-width: 768px) { .host-grid { grid-template-columns:1fr; } @@ -806,4 +1142,7 @@ a:hover { text-decoration: underline; text-shadow: var(--glow-amber); } .link-ifaces-grid { grid-template-columns:1fr; } .sfp-grid { grid-template-columns:1fr 1fr; } .header-nav { display:none; } + .inspector-layout { flex-direction:column; } + .inspector-panel.open { width:100%; } + .inspector-panel-inner { width:100%; } } diff --git a/templates/base.html b/templates/base.html index 5fd1ca8..0b6bf43 100644 --- a/templates/base.html +++ b/templates/base.html @@ -22,6 +22,10 @@ class="nav-link {% if request.endpoint == 'links_page' %}active{% endif %}"> Link Debug + + Inspector + Suppressions diff --git a/templates/index.html b/templates/index.html index e14eac1..9a7b35a 100644 --- a/templates/index.html +++ b/templates/index.html @@ -116,7 +116,9 @@
@@ -164,7 +166,9 @@ {% if not d.connected %} {% endif %} @@ -221,7 +225,9 @@ @@ -271,11 +277,11 @@
- - - - - + + + + +
Persists until manually removed.
diff --git a/templates/inspector.html b/templates/inspector.html new file mode 100644 index 0000000..441e4df --- /dev/null +++ b/templates/inspector.html @@ -0,0 +1,391 @@ +{% extends "base.html" %} +{% block title %}Inspector – GANDALF{% endblock %} + +{% block content %} + + + +
+
+ +
+
+
+
+
+ +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/links.html b/templates/links.html index ad5f087..853d3c7 100644 --- a/templates/links.html +++ b/templates/links.html @@ -7,7 +7,7 @@

Link Debug

Per-interface stats: speed, duplex, SFP optical levels, TX/RX rates, errors, and carrier changes. - Data collected via Prometheus node_exporter + SSH ethtool every poll cycle. + Data collected via Prometheus node_exporter + SSH ethtool (servers) and UniFi API (switches) every poll cycle.

@@ -262,10 +262,177 @@ function renderIfaceCard(ifaceName, d) {
`; } +// ── Render a single UniFi switch port card ──────────────────────── +function renderPortCard(portName, d) { + const up = d.up; + const speed = up ? fmtSpeed(d.speed_mbps) : 'DOWN'; + const duplex = d.full_duplex ? 'Full' : (up ? 'Half' : '–'); + const media = d.media || ''; + + const uplinkBadge = d.is_uplink + ? 'UPLINK' : ''; + const poeBadge = (d.poe_power != null && d.poe_power > 0) + ? `PoE ${d.poe_power.toFixed(1)}W` : ''; + const numBadge = d.port_idx + ? `#${d.port_idx}` : ''; + + const lldpHtml = (d.lldp && d.lldp.system_name) + ? `
→ ${escHtml(d.lldp.system_name)}${d.lldp.port_id ? ' (' + escHtml(d.lldp.port_id) + ')' : ''}
` : ''; + const poeMaxHtml = (d.poe_class != null) + ? `
PoE class ${d.poe_class}${d.poe_max_power ? ' / max ' + d.poe_max_power.toFixed(1) + 'W' : ''}
` : ''; + + const txRate = d.tx_bytes_rate; + const rxRate = d.rx_bytes_rate; + const txPct = fmtRateBar(txRate, d.speed_mbps); + const rxPct = fmtRateBar(rxRate, d.speed_mbps); + const txStr = fmtRate(txRate); + const rxStr = fmtRate(rxRate); + + return ` + `; +} + +// ── Render UniFi switches section ───────────────────────────────── +function renderUnifiSwitches(unifiSwitches) { + if (!unifiSwitches || !Object.keys(unifiSwitches).length) return ''; + + const panels = Object.entries(unifiSwitches).map(([swName, sw]) => { + const ports = sw.ports || {}; + const allPorts= Object.entries(ports) + .sort(([,a],[,b]) => (a.port_idx||0) - (b.port_idx||0)); + const upCount = allPorts.filter(([,d]) => d.up).length; + const downCount = allPorts.length - upCount; + + const portCards = allPorts + .map(([pname, d]) => renderPortCard(pname, d)) + .join(''); + + const meta = [ + sw.model, + `${upCount} up`, + downCount ? `${downCount} down` : '', + ].filter(Boolean).join(' · '); + + return ` + `; + }).join(''); + + return ` +
UniFi Switches
+ `; +} + +// ── Collapse / expand panels ─────────────────────────────────────── +function togglePanel(panel) { + panel.classList.toggle('collapsed'); + const btn = panel.querySelector('.panel-toggle'); + if (btn) btn.textContent = panel.classList.contains('collapsed') ? '[+]' : '[–]'; + const id = panel.id; + if (id) { + const saved = JSON.parse(sessionStorage.getItem('gandalfCollapsed') || '{}'); + saved[id] = panel.classList.contains('collapsed'); + sessionStorage.setItem('gandalfCollapsed', JSON.stringify(saved)); + } +} + +function restoreCollapseState() { + const saved = JSON.parse(sessionStorage.getItem('gandalfCollapsed') || '{}'); + for (const [id, collapsed] of Object.entries(saved)) { + if (!collapsed) continue; + const panel = document.getElementById(id); + if (panel) { + panel.classList.add('collapsed'); + const btn = panel.querySelector('.panel-toggle'); + if (btn) btn.textContent = '[+]'; + } + } +} + +function collapseAll() { + document.querySelectorAll('.link-host-panel').forEach(panel => { + panel.classList.add('collapsed'); + const btn = panel.querySelector('.panel-toggle'); + if (btn) btn.textContent = '[+]'; + }); + sessionStorage.setItem('gandalfCollapsed', '{}'); // let restore pick it up next time +} + +function expandAll() { + document.querySelectorAll('.link-host-panel').forEach(panel => { + panel.classList.remove('collapsed'); + const btn = panel.querySelector('.panel-toggle'); + if (btn) btn.textContent = '[–]'; + }); + sessionStorage.setItem('gandalfCollapsed', '{}'); +} + // ── Render all hosts ────────────────────────────────────────────── function renderLinks(data) { - const hosts = data.hosts || {}; - if (!Object.keys(hosts).length) { + const hosts = data.hosts || {}; + const unifi = data.unifi_switches || {}; + + if (!Object.keys(hosts).length && !Object.keys(unifi).length) { document.getElementById('links-container').innerHTML = '

No link data collected yet. Monitor may still be initialising.

'; return; @@ -275,7 +442,7 @@ function renderLinks(data) { const updEl = document.getElementById('links-updated'); if (updEl) updEl.textContent = upd; - const html = Object.entries(hosts).map(([hostName, ifaces]) => { + const serverHtml = Object.entries(hosts).map(([hostName, ifaces]) => { const ifaceCards = Object.entries(ifaces) .sort(([a],[b]) => a.localeCompare(b)) .map(([ifaceName, d]) => renderIfaceCard(ifaceName, d)) @@ -284,9 +451,10 @@ function renderLinks(data) { const hostIp = ifaces[Object.keys(ifaces)[0]]?.host_ip || ''; return `