Complete rewrite: full-featured network monitoring dashboard
- Two-service architecture: Flask web app (gandalf.service) + background polling daemon (gandalf-monitor.service) - Monitor polls Prometheus node_network_up for physical NIC states on all 6 hypervisors (added storage-01 at 10.10.10.11:9100) - UniFi API monitoring for switches, APs, and gateway device status - Ping reachability for hosts without node_exporter (pbs only now) - Smart baseline: interfaces first seen as down are never alerted on; only UP→DOWN regressions trigger tickets - Cluster-wide P1 ticket when 3+ hosts have genuine simultaneous interface regressions (guards against false positives on startup) - Tinker Tickets integration with 24-hour hash-based deduplication - Alert suppression: manual toggle or timed windows (30m/1h/4h/8h) - Authelia SSO via forward-auth headers, admin group required - Network topology: Internet → UDM-Pro → Agg Switch (10G DAC) → PoE Switch (10G DAC) → Hosts - MariaDB schema, suppression management UI, host/interface cards Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
50
schema.sql
Normal file
50
schema.sql
Normal file
@@ -0,0 +1,50 @@
|
||||
-- Gandalf Network Monitor – Database Schema
|
||||
-- Run on MariaDB LXC 149 (10.10.10.50)
|
||||
|
||||
CREATE DATABASE IF NOT EXISTS gandalf
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
USE gandalf;
|
||||
|
||||
-- ── Network events (open and resolved alerts) ─────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS network_events (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
event_type VARCHAR(60) NOT NULL,
|
||||
severity ENUM('critical','warning','info') NOT NULL DEFAULT 'warning',
|
||||
source_type VARCHAR(20) NOT NULL, -- 'prometheus', 'unifi', 'ping'
|
||||
target_name VARCHAR(255) NOT NULL, -- hostname or device name
|
||||
target_detail VARCHAR(255) NOT NULL DEFAULT '', -- interface name, device type, IP
|
||||
description TEXT,
|
||||
first_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
resolved_at TIMESTAMP NULL,
|
||||
consecutive_failures INT NOT NULL DEFAULT 1,
|
||||
ticket_id VARCHAR(20) NULL,
|
||||
|
||||
INDEX idx_active (resolved_at),
|
||||
INDEX idx_target (target_name, target_detail),
|
||||
INDEX idx_type (event_type)
|
||||
) ENGINE=InnoDB;
|
||||
|
||||
-- ── Suppression rules ─────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS suppression_rules (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
target_type VARCHAR(50) NOT NULL, -- 'host', 'interface', 'unifi_device', 'all'
|
||||
target_name VARCHAR(255) NOT NULL DEFAULT '',
|
||||
target_detail VARCHAR(255) NOT NULL DEFAULT '',
|
||||
reason TEXT NOT NULL,
|
||||
suppressed_by VARCHAR(255) NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
expires_at TIMESTAMP NULL, -- NULL = manual (never auto-expires)
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
|
||||
INDEX idx_active_exp (active, expires_at)
|
||||
) ENGINE=InnoDB;
|
||||
|
||||
-- ── Monitor state (key/value store for snapshot + baseline) ───────────
|
||||
CREATE TABLE IF NOT EXISTS monitor_state (
|
||||
key_name VARCHAR(100) PRIMARY KEY,
|
||||
value MEDIUMTEXT NOT NULL,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
|
||||
) ENGINE=InnoDB;
|
||||
Reference in New Issue
Block a user