Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9c5a88fbce | |||
| 0975dd007a | |||
| a34898b8e8 | |||
| 31747c4bd3 | |||
| faa0707f79 | |||
| 9c52e4ad1a | |||
| 156ef97667 | |||
| 2f74266bd9 | |||
| 222bdb08ab | |||
| 8dd744b039 | |||
| 9e2be150b5 | |||
| ed5ba5c59e | |||
| 2be44d8b24 | |||
| 2d6dcd782f | |||
| a1a3a52dd8 | |||
| bcc2ad7f5c | |||
| d4f159ee7c | |||
| 61019418d3 | |||
| 1a53718cc5 | |||
| afaeb64636 | |||
| b6ee45a842 |
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"env": {
|
||||||
|
"browser": true,
|
||||||
|
"es2021": true
|
||||||
|
},
|
||||||
|
"globals": {
|
||||||
|
"lt": "readonly",
|
||||||
|
"GANDALF_CONFIG": "readonly",
|
||||||
|
"CSS": "readonly"
|
||||||
|
},
|
||||||
|
"rules": {
|
||||||
|
"no-undef": "error",
|
||||||
|
"no-unused-vars": ["warn", { "argsIgnorePattern": "^_", "varsIgnorePattern": "^_" }],
|
||||||
|
"no-console": "off",
|
||||||
|
"eqeqeq": ["error", "always", { "null": "ignore" }]
|
||||||
|
},
|
||||||
|
"parserOptions": {
|
||||||
|
"ecmaVersion": 2021,
|
||||||
|
"sourceType": "script"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -64,7 +64,7 @@ _diag_rate: dict = {}
|
|||||||
|
|
||||||
|
|
||||||
def _purge_old_jobs_loop():
|
def _purge_old_jobs_loop():
|
||||||
"""Background thread: remove stale diag jobs and run daily event purge."""
|
"""Background thread: remove stale diagnostic jobs and mark stuck ones done."""
|
||||||
while True:
|
while True:
|
||||||
time.sleep(120)
|
time.sleep(120)
|
||||||
cutoff = time.time() - 600
|
cutoff = time.time() - 600
|
||||||
@@ -174,17 +174,26 @@ _PAGE_LIMIT = 200 # max events returned per request
|
|||||||
|
|
||||||
|
|
||||||
def _annotate_suppressions(events: list, suppressions: list) -> None:
|
def _annotate_suppressions(events: list, suppressions: list) -> None:
|
||||||
"""Annotate each event dict in-place with an is_suppressed bool."""
|
"""Annotate each event dict in-place with an is_suppressed bool.
|
||||||
|
|
||||||
|
Mirrors the suppression check order in monitor.py exactly:
|
||||||
|
interface_down → interface OR host
|
||||||
|
unifi_device_* → unifi_device
|
||||||
|
everything else → host
|
||||||
|
"""
|
||||||
for ev in events:
|
for ev in events:
|
||||||
sup_type = (
|
etype = ev.get('event_type', '')
|
||||||
'unifi_device' if ev.get('event_type') == 'unifi_device_offline'
|
name = ev.get('target_name', '')
|
||||||
else 'interface' if ev.get('event_type') == 'interface_down'
|
detail = ev.get('target_detail', '') or ''
|
||||||
else 'host'
|
if etype == 'interface_down':
|
||||||
)
|
ev['is_suppressed'] = (
|
||||||
ev['is_suppressed'] = db.check_suppressed(
|
db.check_suppressed(suppressions, 'interface', name, detail) or
|
||||||
suppressions, sup_type,
|
db.check_suppressed(suppressions, 'host', name)
|
||||||
ev.get('target_name', ''), ev.get('target_detail', '') or '',
|
)
|
||||||
)
|
elif etype == 'unifi_device_offline':
|
||||||
|
ev['is_suppressed'] = db.check_suppressed(suppressions, 'unifi_device', name, detail)
|
||||||
|
else:
|
||||||
|
ev['is_suppressed'] = db.check_suppressed(suppressions, 'host', name, detail)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import pymysql
|
import pymysql
|
||||||
@@ -114,12 +114,12 @@ def upsert_event(
|
|||||||
target_detail: str,
|
target_detail: str,
|
||||||
description: str,
|
description: str,
|
||||||
) -> tuple:
|
) -> tuple:
|
||||||
"""Insert or update a network event. Returns (id, is_new, consecutive_failures)."""
|
"""Insert or update a network event. Returns (id, is_new, consecutive_failures, ticket_id)."""
|
||||||
detail = target_detail or ''
|
detail = target_detail or ''
|
||||||
with get_conn() as conn:
|
with get_conn() as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""SELECT id, consecutive_failures FROM network_events
|
"""SELECT id, consecutive_failures, ticket_id FROM network_events
|
||||||
WHERE event_type=%s AND target_name=%s AND target_detail=%s
|
WHERE event_type=%s AND target_name=%s AND target_detail=%s
|
||||||
AND resolved_at IS NULL LIMIT 1""",
|
AND resolved_at IS NULL LIMIT 1""",
|
||||||
(event_type, target_name, detail),
|
(event_type, target_name, detail),
|
||||||
@@ -134,7 +134,7 @@ def upsert_event(
|
|||||||
WHERE id=%s""",
|
WHERE id=%s""",
|
||||||
(new_count, description, existing['id']),
|
(new_count, description, existing['id']),
|
||||||
)
|
)
|
||||||
return existing['id'], False, new_count
|
return existing['id'], False, new_count, existing.get('ticket_id')
|
||||||
else:
|
else:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""INSERT INTO network_events
|
"""INSERT INTO network_events
|
||||||
@@ -142,7 +142,7 @@ def upsert_event(
|
|||||||
VALUES (%s, %s, %s, %s, %s, %s)""",
|
VALUES (%s, %s, %s, %s, %s, %s)""",
|
||||||
(event_type, severity, source_type, target_name, detail, description),
|
(event_type, severity, source_type, target_name, detail, description),
|
||||||
)
|
)
|
||||||
return cur.lastrowid, True, 1
|
return cur.lastrowid, True, 1, None
|
||||||
|
|
||||||
|
|
||||||
def resolve_event(event_type: str, target_name: str, target_detail: str = '') -> None:
|
def resolve_event(event_type: str, target_name: str, target_detail: str = '') -> None:
|
||||||
@@ -182,7 +182,7 @@ def get_active_events(limit: int = 200, offset: int = 0) -> list:
|
|||||||
for r in rows:
|
for r in rows:
|
||||||
for k in ('first_seen', 'last_seen'):
|
for k in ('first_seen', 'last_seen'):
|
||||||
if r.get(k) and hasattr(r[k], 'isoformat'):
|
if r.get(k) and hasattr(r[k], 'isoformat'):
|
||||||
r[k] = r[k].isoformat()
|
r[k] = r[k].isoformat() + 'Z'
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
@@ -210,7 +210,7 @@ def get_recent_resolved(hours: int = 24, limit: int = 50) -> list:
|
|||||||
for r in rows:
|
for r in rows:
|
||||||
for k in ('first_seen', 'last_seen', 'resolved_at'):
|
for k in ('first_seen', 'last_seen', 'resolved_at'):
|
||||||
if r.get(k) and hasattr(r[k], 'isoformat'):
|
if r.get(k) and hasattr(r[k], 'isoformat'):
|
||||||
r[k] = r[k].isoformat()
|
r[k] = r[k].isoformat() + 'Z'
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
@@ -252,7 +252,7 @@ def get_active_suppressions() -> list:
|
|||||||
for r in rows:
|
for r in rows:
|
||||||
for k in ('created_at', 'expires_at'):
|
for k in ('created_at', 'expires_at'):
|
||||||
if r.get(k) and hasattr(r[k], 'isoformat'):
|
if r.get(k) and hasattr(r[k], 'isoformat'):
|
||||||
r[k] = r[k].isoformat()
|
r[k] = r[k].isoformat() + 'Z'
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
@@ -267,7 +267,7 @@ def get_suppression_history(limit: int = 50) -> list:
|
|||||||
for r in rows:
|
for r in rows:
|
||||||
for k in ('created_at', 'expires_at'):
|
for k in ('created_at', 'expires_at'):
|
||||||
if r.get(k) and hasattr(r[k], 'isoformat'):
|
if r.get(k) and hasattr(r[k], 'isoformat'):
|
||||||
r[k] = r[k].isoformat()
|
r[k] = r[k].isoformat() + 'Z'
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
@@ -281,7 +281,7 @@ def create_suppression(
|
|||||||
) -> int:
|
) -> int:
|
||||||
expires_at = None
|
expires_at = None
|
||||||
if expires_minutes:
|
if expires_minutes:
|
||||||
expires_at = datetime.utcnow() + timedelta(minutes=int(expires_minutes))
|
expires_at = datetime.now(timezone.utc) + timedelta(minutes=int(expires_minutes))
|
||||||
with get_conn() as conn:
|
with get_conn() as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
|
|||||||
+2
-2
@@ -68,7 +68,7 @@ class DiagnosticsRunner:
|
|||||||
f' echo "=== ip_route ===";'
|
f' echo "=== ip_route ===";'
|
||||||
f' ip route show dev {q} 2>/dev/null;'
|
f' ip route show dev {q} 2>/dev/null;'
|
||||||
f' echo "=== dmesg ===";'
|
f' echo "=== dmesg ===";'
|
||||||
f' dmesg 2>/dev/null | grep {q} | tail -50;'
|
f' dmesg 2>/dev/null | grep -F -- {q} | tail -50;'
|
||||||
f' echo "=== lldpctl ===";'
|
f' echo "=== lldpctl ===";'
|
||||||
f' lldpctl 2>/dev/null || echo "lldpd not running";'
|
f' lldpctl 2>/dev/null || echo "lldpd not running";'
|
||||||
f' echo "=== end ==="'
|
f' echo "=== end ==="'
|
||||||
@@ -78,7 +78,7 @@ class DiagnosticsRunner:
|
|||||||
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
||||||
f'-o BatchMode=yes -o LogLevel=ERROR '
|
f'-o BatchMode=yes -o LogLevel=ERROR '
|
||||||
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
||||||
f'root@{ip_q} \'{remote_cmd}\''
|
f'root@{ip_q} {shlex.quote(remote_cmd)}'
|
||||||
)
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|||||||
+40
-29
@@ -12,7 +12,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -215,7 +215,10 @@ class TicketClient:
|
|||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
if data.get('success'):
|
if data.get('success'):
|
||||||
tid = data['ticket_id']
|
tid = data.get('ticket_id')
|
||||||
|
if not tid:
|
||||||
|
logger.warning(f'Ticket API success but no ticket_id in response: {data}')
|
||||||
|
return None
|
||||||
logger.info(f'Created ticket #{tid}: {title}')
|
logger.info(f'Created ticket #{tid}: {title}')
|
||||||
return tid
|
return tid
|
||||||
if data.get('existing_ticket_id'):
|
if data.get('existing_ticket_id'):
|
||||||
@@ -377,7 +380,7 @@ class LinkStatsCollector:
|
|||||||
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
f'ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 '
|
||||||
f'-o BatchMode=yes -o LogLevel=ERROR '
|
f'-o BatchMode=yes -o LogLevel=ERROR '
|
||||||
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
f'-o ServerAliveInterval=10 -o ServerAliveCountMax=2 '
|
||||||
f'root@{ip} "{shell_cmd}"'
|
f'root@{ip} {shlex.quote(shell_cmd)}'
|
||||||
)
|
)
|
||||||
output = self.pulse.run_command(ssh_cmd)
|
output = self.pulse.run_command(ssh_cmd)
|
||||||
if output is None:
|
if output is None:
|
||||||
@@ -615,7 +618,7 @@ class LinkStatsCollector:
|
|||||||
return {
|
return {
|
||||||
'hosts': result_hosts,
|
'hosts': result_hosts,
|
||||||
'unifi_switches': unifi_switches,
|
'unifi_switches': unifi_switches,
|
||||||
'updated': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC'),
|
'updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _compute_unifi_rates(self, raw: Dict[str, dict], now: float) -> Dict[str, dict]:
|
def _compute_unifi_rates(self, raw: Dict[str, dict], now: float) -> Dict[str, dict]:
|
||||||
@@ -650,7 +653,7 @@ class LinkStatsCollector:
|
|||||||
# Helpers
|
# Helpers
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
def _now_utc() -> str:
|
def _now_utc() -> str:
|
||||||
return datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')
|
return datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
@@ -725,13 +728,13 @@ class NetworkMonitor:
|
|||||||
db.check_suppressed(suppressions, 'interface', host, iface) or
|
db.check_suppressed(suppressions, 'interface', host, iface) or
|
||||||
db.check_suppressed(suppressions, 'host', host)
|
db.check_suppressed(suppressions, 'host', host)
|
||||||
)
|
)
|
||||||
event_id, is_new, consec = db.upsert_event(
|
event_id, is_new, consec, ticket_id = db.upsert_event(
|
||||||
'interface_down', 'critical', 'prometheus',
|
'interface_down', 'critical', 'prometheus',
|
||||||
host, iface,
|
host, iface,
|
||||||
f'Interface {iface} on {host} went link-down ({_now_utc()})',
|
f'Interface {iface} on {host} went link-down ({_now_utc()})',
|
||||||
)
|
)
|
||||||
if not sup and consec >= self.fail_thresh:
|
if not sup and consec >= self.fail_thresh and not ticket_id:
|
||||||
self._ticket_interface(event_id, is_new, host, iface, consec)
|
self._ticket_interface(event_id, host, iface, consec)
|
||||||
|
|
||||||
if host_has_regression:
|
if host_has_regression:
|
||||||
hosts_with_regression.append(host)
|
hosts_with_regression.append(host)
|
||||||
@@ -741,13 +744,13 @@ class NetworkMonitor:
|
|||||||
# Cluster-wide check – only genuine regressions count
|
# Cluster-wide check – only genuine regressions count
|
||||||
if len(hosts_with_regression) >= self.cluster_thresh:
|
if len(hosts_with_regression) >= self.cluster_thresh:
|
||||||
sup = db.check_suppressed(suppressions, 'all', '')
|
sup = db.check_suppressed(suppressions, 'all', '')
|
||||||
event_id, is_new, consec = db.upsert_event(
|
event_id, is_new, consec, ticket_id = db.upsert_event(
|
||||||
'cluster_network_issue', 'critical', 'prometheus',
|
'cluster_network_issue', 'critical', 'prometheus',
|
||||||
self.cluster_name, '',
|
self.cluster_name, '',
|
||||||
f'{len(hosts_with_regression)} hosts reporting simultaneous interface failures: '
|
f'{len(hosts_with_regression)} hosts reporting simultaneous interface failures: '
|
||||||
f'{", ".join(hosts_with_regression)}',
|
f'{", ".join(hosts_with_regression)}',
|
||||||
)
|
)
|
||||||
if not sup and is_new:
|
if not sup and (is_new or not ticket_id):
|
||||||
title = (
|
title = (
|
||||||
f'[{self.cluster_name}][auto][production][issue][network][cluster-wide] '
|
f'[{self.cluster_name}][auto][production][issue][network][cluster-wide] '
|
||||||
f'Multiple hosts reporting interface failures'
|
f'Multiple hosts reporting interface failures'
|
||||||
@@ -768,7 +771,7 @@ class NetworkMonitor:
|
|||||||
db.resolve_event('cluster_network_issue', self.cluster_name, '')
|
db.resolve_event('cluster_network_issue', self.cluster_name, '')
|
||||||
|
|
||||||
def _ticket_interface(
|
def _ticket_interface(
|
||||||
self, event_id: int, is_new: bool, host: str, iface: str, consec: int
|
self, event_id: int, host: str, iface: str, consec: int
|
||||||
) -> None:
|
) -> None:
|
||||||
title = (
|
title = (
|
||||||
f'[{host}][auto][production][issue][network][single-node] '
|
f'[{host}][auto][production][issue][network][single-node] '
|
||||||
@@ -786,7 +789,7 @@ class NetworkMonitor:
|
|||||||
f'Please inspect the cable/SFP/switch port for {host}/{iface}.'
|
f'Please inspect the cable/SFP/switch port for {host}/{iface}.'
|
||||||
)
|
)
|
||||||
tid = self.tickets.create(title, desc, priority='2')
|
tid = self.tickets.create(title, desc, priority='2')
|
||||||
if tid and is_new:
|
if tid:
|
||||||
db.set_ticket_id(event_id, tid)
|
db.set_ticket_id(event_id, tid)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -801,17 +804,17 @@ class NetworkMonitor:
|
|||||||
name = d['name']
|
name = d['name']
|
||||||
if not d['connected']:
|
if not d['connected']:
|
||||||
sup = db.check_suppressed(suppressions, 'unifi_device', name)
|
sup = db.check_suppressed(suppressions, 'unifi_device', name)
|
||||||
event_id, is_new, consec = db.upsert_event(
|
event_id, is_new, consec, ticket_id = db.upsert_event(
|
||||||
'unifi_device_offline', 'critical', 'unifi',
|
'unifi_device_offline', 'critical', 'unifi',
|
||||||
name, d.get('type', ''),
|
name, d.get('type', ''),
|
||||||
f'UniFi {name} ({d.get("ip","")}) offline ({_now_utc()})',
|
f'UniFi {name} ({d.get("ip","")}) offline ({_now_utc()})',
|
||||||
)
|
)
|
||||||
if not sup and consec >= self.fail_thresh:
|
if not sup and consec >= self.fail_thresh and not ticket_id:
|
||||||
self._ticket_unifi(event_id, is_new, d)
|
self._ticket_unifi(event_id, d)
|
||||||
else:
|
else:
|
||||||
db.resolve_event('unifi_device_offline', name, d.get('type', ''))
|
db.resolve_event('unifi_device_offline', name, d.get('type', ''))
|
||||||
|
|
||||||
def _ticket_unifi(self, event_id: int, is_new: bool, device: dict) -> None:
|
def _ticket_unifi(self, event_id: int, device: dict) -> None:
|
||||||
name = device['name']
|
name = device['name']
|
||||||
title = (
|
title = (
|
||||||
f'[{name}][auto][production][issue][network][single-node] '
|
f'[{name}][auto][production][issue][network][single-node] '
|
||||||
@@ -828,31 +831,31 @@ class NetworkMonitor:
|
|||||||
f'Please check power and cable connectivity.'
|
f'Please check power and cable connectivity.'
|
||||||
)
|
)
|
||||||
tid = self.tickets.create(title, desc, priority='2')
|
tid = self.tickets.create(title, desc, priority='2')
|
||||||
if tid and is_new:
|
if tid:
|
||||||
db.set_ticket_id(event_id, tid)
|
db.set_ticket_id(event_id, tid)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Ping-only hosts (no node_exporter)
|
# Ping-only hosts (no node_exporter)
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def _process_ping_hosts(self, suppressions: list) -> None:
|
def _process_ping_hosts(self, suppressions: list, ping_states: Dict[str, bool]) -> None:
|
||||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||||
name, ip = h['name'], h['ip']
|
name, ip = h['name'], h['ip']
|
||||||
reachable = self.pulse.ping(ip)
|
reachable = ping_states.get(name, False)
|
||||||
|
|
||||||
if not reachable:
|
if not reachable:
|
||||||
sup = db.check_suppressed(suppressions, 'host', name)
|
sup = db.check_suppressed(suppressions, 'host', name)
|
||||||
event_id, is_new, consec = db.upsert_event(
|
event_id, is_new, consec, ticket_id = db.upsert_event(
|
||||||
'host_unreachable', 'critical', 'ping',
|
'host_unreachable', 'critical', 'ping',
|
||||||
name, ip,
|
name, ip,
|
||||||
f'Host {name} ({ip}) unreachable via ping ({_now_utc()})',
|
f'Host {name} ({ip}) unreachable via ping ({_now_utc()})',
|
||||||
)
|
)
|
||||||
if not sup and consec >= self.fail_thresh:
|
if not sup and consec >= self.fail_thresh and not ticket_id:
|
||||||
self._ticket_unreachable(event_id, is_new, name, ip, consec)
|
self._ticket_unreachable(event_id, name, ip, consec)
|
||||||
else:
|
else:
|
||||||
db.resolve_event('host_unreachable', name, ip)
|
db.resolve_event('host_unreachable', name, ip)
|
||||||
|
|
||||||
def _ticket_unreachable(
|
def _ticket_unreachable(
|
||||||
self, event_id: int, is_new: bool, name: str, ip: str, consec: int
|
self, event_id: int, name: str, ip: str, consec: int
|
||||||
) -> None:
|
) -> None:
|
||||||
title = (
|
title = (
|
||||||
f'[{name}][auto][production][issue][network][single-node] '
|
f'[{name}][auto][production][issue][network][single-node] '
|
||||||
@@ -870,7 +873,7 @@ class NetworkMonitor:
|
|||||||
f'Please check the host power, management interface, and network connectivity.'
|
f'Please check the host power, management interface, and network connectivity.'
|
||||||
)
|
)
|
||||||
tid = self.tickets.create(title, desc, priority='2')
|
tid = self.tickets.create(title, desc, priority='2')
|
||||||
if tid and is_new:
|
if tid:
|
||||||
db.set_ticket_id(event_id, tid)
|
db.set_ticket_id(event_id, tid)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -879,6 +882,7 @@ class NetworkMonitor:
|
|||||||
def _collect_snapshot(
|
def _collect_snapshot(
|
||||||
self, iface_states: Dict[str, Dict[str, bool]],
|
self, iface_states: Dict[str, Dict[str, bool]],
|
||||||
unifi_devices: Optional[List[dict]] = None,
|
unifi_devices: Optional[List[dict]] = None,
|
||||||
|
ping_states: Optional[Dict[str, bool]] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
# Accept pre-fetched devices; fall back to empty list if unavailable
|
# Accept pre-fetched devices; fall back to empty list if unavailable
|
||||||
display_unifi = unifi_devices if unifi_devices is not None else []
|
display_unifi = unifi_devices if unifi_devices is not None else []
|
||||||
@@ -907,7 +911,7 @@ class NetworkMonitor:
|
|||||||
|
|
||||||
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
for h in self.cfg.get('monitor', {}).get('ping_hosts', []):
|
||||||
name, ip = h['name'], h['ip']
|
name, ip = h['name'], h['ip']
|
||||||
reachable = self.pulse.ping(ip, count=1, timeout=2)
|
reachable = (ping_states or {}).get(name, False)
|
||||||
hosts[name] = {
|
hosts[name] = {
|
||||||
'ip': ip,
|
'ip': ip,
|
||||||
'interfaces': {},
|
'interfaces': {},
|
||||||
@@ -918,7 +922,7 @@ class NetworkMonitor:
|
|||||||
return {
|
return {
|
||||||
'hosts': hosts,
|
'hosts': hosts,
|
||||||
'unifi': display_unifi,
|
'unifi': display_unifi,
|
||||||
'updated': datetime.utcnow().isoformat(),
|
'updated': datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
|
||||||
}
|
}
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -939,8 +943,14 @@ class NetworkMonitor:
|
|||||||
# 2. Fetch UniFi devices once — used by both snapshot and alert processing
|
# 2. Fetch UniFi devices once — used by both snapshot and alert processing
|
||||||
unifi_devices = self.unifi.get_devices()
|
unifi_devices = self.unifi.get_devices()
|
||||||
|
|
||||||
# 3. Collect and store snapshot for dashboard
|
# 3a. Ping-only hosts once — shared by snapshot and alert processing
|
||||||
snapshot = self._collect_snapshot(iface_states, unifi_devices)
|
ping_states: Dict[str, bool] = {
|
||||||
|
h['name']: self.pulse.ping(h['ip'])
|
||||||
|
for h in self.cfg.get('monitor', {}).get('ping_hosts', [])
|
||||||
|
}
|
||||||
|
|
||||||
|
# 3b. Collect and store snapshot for dashboard
|
||||||
|
snapshot = self._collect_snapshot(iface_states, unifi_devices, ping_states)
|
||||||
db.set_state('network_snapshot', snapshot)
|
db.set_state('network_snapshot', snapshot)
|
||||||
db.set_state('last_check', _now_utc())
|
db.set_state('last_check', _now_utc())
|
||||||
|
|
||||||
@@ -956,7 +966,7 @@ class NetworkMonitor:
|
|||||||
self._process_interfaces(iface_states, suppressions)
|
self._process_interfaces(iface_states, suppressions)
|
||||||
self._process_unifi(unifi_devices, suppressions)
|
self._process_unifi(unifi_devices, suppressions)
|
||||||
|
|
||||||
self._process_ping_hosts(suppressions)
|
self._process_ping_hosts(suppressions, ping_states)
|
||||||
|
|
||||||
# Housekeeping: deactivate expired suppressions and purge old resolved events
|
# Housekeeping: deactivate expired suppressions and purge old resolved events
|
||||||
db.cleanup_expired_suppressions()
|
db.cleanup_expired_suppressions()
|
||||||
@@ -967,6 +977,7 @@ class NetworkMonitor:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f'Monitor loop error: {e}', exc_info=True)
|
logger.error(f'Monitor loop error: {e}', exc_info=True)
|
||||||
time.sleep(30)
|
time.sleep(30)
|
||||||
|
continue
|
||||||
|
|
||||||
time.sleep(self.poll_interval)
|
time.sleep(self.poll_interval)
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -220,7 +220,7 @@ function updateEventsTable(events, totalActive) {
|
|||||||
? GANDALF_CONFIG.ticket_web_url : 'http://t.lotusguild.org/ticket/';
|
? GANDALF_CONFIG.ticket_web_url : 'http://t.lotusguild.org/ticket/';
|
||||||
const ticket = e.ticket_id
|
const ticket = e.ticket_id
|
||||||
? `<a href="${lt.escHtml(ticketBase)}${lt.escHtml(String(e.ticket_id))}" target="_blank"
|
? `<a href="${lt.escHtml(ticketBase)}${lt.escHtml(String(e.ticket_id))}" target="_blank"
|
||||||
class="ticket-link">#${e.ticket_id}</a>`
|
class="ticket-link">#${lt.escHtml(String(e.ticket_id))}</a>`
|
||||||
: '–';
|
: '–';
|
||||||
const supBadge = e.is_suppressed
|
const supBadge = e.is_suppressed
|
||||||
? `<span class="lt-badge badge-suppressed" title="Alert suppressed">🔕 sup</span>`
|
? `<span class="lt-badge badge-suppressed" title="Alert suppressed">🔕 sup</span>`
|
||||||
|
|||||||
@@ -324,6 +324,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="host-grid" id="host-grid">
|
<div class="host-grid" id="host-grid">
|
||||||
|
{%- set has_global_sup = suppressions | selectattr('target_type', 'equalto', 'all') | list | length > 0 -%}
|
||||||
{% for name, host in snapshot.hosts.items() %}
|
{% for name, host in snapshot.hosts.items() %}
|
||||||
{% set suppressed = suppressions | selectattr('target_name', 'equalto', name) | list %}
|
{% set suppressed = suppressions | selectattr('target_name', 'equalto', name) | list %}
|
||||||
<div class="host-card host-card-{{ host.status }}" data-host="{{ name }}">
|
<div class="host-card host-card-{{ host.status }}" data-host="{{ name }}">
|
||||||
@@ -331,7 +332,7 @@
|
|||||||
<div class="host-name-row">
|
<div class="host-name-row">
|
||||||
<span class="host-status-dot dot-{{ host.status }}"></span>
|
<span class="host-status-dot dot-{{ host.status }}"></span>
|
||||||
<span class="host-name">{{ name }}</span>
|
<span class="host-name">{{ name }}</span>
|
||||||
{% if suppressed %}
|
{% if suppressed or has_global_sup %}
|
||||||
<span class="badge-suppressed" title="Suppressed">🔕</span>
|
<span class="badge-suppressed" title="Suppressed">🔕</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
@@ -468,7 +469,7 @@
|
|||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
<script>
|
<script>
|
||||||
// Start auto-refresh using saved settings interval (default 30 s)
|
// Start auto-refresh using saved settings interval (default 30 s)
|
||||||
const _savedInterval = (window.gandalfSettings && window.gandalfSettings.refreshInterval) || 30;
|
const _savedInterval = window.gandalfSettings?.refreshInterval ?? 30;
|
||||||
if (_savedInterval > 0) lt.autoRefresh.start(refreshAll, _savedInterval * 1000);
|
if (_savedInterval > 0) lt.autoRefresh.start(refreshAll, _savedInterval * 1000);
|
||||||
|
|
||||||
// When settings change, restart auto-refresh with new interval
|
// When settings change, restart auto-refresh with new interval
|
||||||
|
|||||||
@@ -218,6 +218,7 @@ let _apiData = null;
|
|||||||
function selectPort(el) {
|
function selectPort(el) {
|
||||||
const swName = el.dataset.switch;
|
const swName = el.dataset.switch;
|
||||||
const idx = parseInt(el.dataset.portIdx, 10);
|
const idx = parseInt(el.dataset.portIdx, 10);
|
||||||
|
if (_diagPollTimer) { clearInterval(_diagPollTimer); _diagPollTimer = null; }
|
||||||
document.querySelectorAll('.switch-port-block.selected')
|
document.querySelectorAll('.switch-port-block.selected')
|
||||||
.forEach(e => e.classList.remove('selected'));
|
.forEach(e => e.classList.remove('selected'));
|
||||||
el.classList.add('selected');
|
el.classList.add('selected');
|
||||||
@@ -428,7 +429,7 @@ function renderInspector(data) {
|
|||||||
|
|
||||||
const updEl = document.getElementById('inspector-updated');
|
const updEl = document.getElementById('inspector-updated');
|
||||||
if (updEl && data.updated) {
|
if (updEl && data.updated) {
|
||||||
const updMs = new Date(data.updated + (data.updated.includes('Z') ? '' : 'Z'));
|
const updMs = new Date(_toIso(data.updated));
|
||||||
const ageMin = (Date.now() - updMs) / 60000;
|
const ageMin = (Date.now() - updMs) / 60000;
|
||||||
const timeStr = updMs.toLocaleTimeString();
|
const timeStr = updMs.toLocaleTimeString();
|
||||||
if (ageMin > 15) {
|
if (ageMin > 15) {
|
||||||
@@ -472,7 +473,7 @@ async function loadInspector() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
loadInspector();
|
loadInspector();
|
||||||
const _inspInterval = (window.gandalfSettings && window.gandalfSettings.refreshInterval) || 60;
|
const _inspInterval = window.gandalfSettings?.refreshInterval ?? 60;
|
||||||
if (_inspInterval > 0) lt.autoRefresh.start(loadInspector, Math.max(_inspInterval, 15) * 1000);
|
if (_inspInterval > 0) lt.autoRefresh.start(loadInspector, Math.max(_inspInterval, 15) * 1000);
|
||||||
|
|
||||||
window.onGandalfSettingsChanged = function(s) {
|
window.onGandalfSettingsChanged = function(s) {
|
||||||
|
|||||||
+13
-9
@@ -372,14 +372,16 @@ function togglePanel(panel) {
|
|||||||
if (title) title.setAttribute('aria-expanded', isCollapsed ? 'false' : 'true');
|
if (title) title.setAttribute('aria-expanded', isCollapsed ? 'false' : 'true');
|
||||||
const id = panel.id;
|
const id = panel.id;
|
||||||
if (id) {
|
if (id) {
|
||||||
const collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}');
|
let collapsed = {};
|
||||||
|
try { collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}'); } catch(_) {}
|
||||||
collapsed[id] = panel.classList.contains('collapsed');
|
collapsed[id] = panel.classList.contains('collapsed');
|
||||||
sessionStorage.setItem('linksCollapsed', JSON.stringify(collapsed));
|
try { sessionStorage.setItem('linksCollapsed', JSON.stringify(collapsed)); } catch(_) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function restoreCollapseState() {
|
function restoreCollapseState() {
|
||||||
const collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}');
|
let collapsed = {};
|
||||||
|
try { collapsed = JSON.parse(sessionStorage.getItem('linksCollapsed') || '{}'); } catch(_) {}
|
||||||
for (const [id, isCollapsed] of Object.entries(collapsed)) {
|
for (const [id, isCollapsed] of Object.entries(collapsed)) {
|
||||||
const panel = document.getElementById(id);
|
const panel = document.getElementById(id);
|
||||||
if (!panel) continue;
|
if (!panel) continue;
|
||||||
@@ -507,9 +509,11 @@ function collapseAll() {
|
|||||||
if (btn) btn.textContent = '[+]';
|
if (btn) btn.textContent = '[+]';
|
||||||
if (title) title.setAttribute('aria-expanded', 'false');
|
if (title) title.setAttribute('aria-expanded', 'false');
|
||||||
});
|
});
|
||||||
sessionStorage.setItem('linksCollapsed', JSON.stringify(
|
try {
|
||||||
Object.fromEntries([...document.querySelectorAll('.link-host-panel')].map(p => [p.id, true]))
|
sessionStorage.setItem('linksCollapsed', JSON.stringify(
|
||||||
));
|
Object.fromEntries([...document.querySelectorAll('.link-host-panel')].map(p => [p.id, true]))
|
||||||
|
));
|
||||||
|
} catch(_) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
function expandAll() {
|
function expandAll() {
|
||||||
@@ -520,7 +524,7 @@ function expandAll() {
|
|||||||
if (btn) btn.textContent = '[–]';
|
if (btn) btn.textContent = '[–]';
|
||||||
if (title) title.setAttribute('aria-expanded', 'true');
|
if (title) title.setAttribute('aria-expanded', 'true');
|
||||||
});
|
});
|
||||||
sessionStorage.setItem('linksCollapsed', '{}');
|
try { sessionStorage.setItem('linksCollapsed', '{}'); } catch(_) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Stale data warning ────────────────────────────────────────────
|
// ── Stale data warning ────────────────────────────────────────────
|
||||||
@@ -548,7 +552,7 @@ function checkLinksStale(updatedStr) {
|
|||||||
async function loadLinks() {
|
async function loadLinks() {
|
||||||
try {
|
try {
|
||||||
const data = await lt.api.get('/api/links');
|
const data = await lt.api.get('/api/links');
|
||||||
if (!data.hosts && !data.unifi_switches) {
|
if ((!data.hosts || !Object.keys(data.hosts).length) && (!data.unifi_switches || !Object.keys(data.unifi_switches).length)) {
|
||||||
document.getElementById('links-container').innerHTML =
|
document.getElementById('links-container').innerHTML =
|
||||||
'<div class="link-no-data">No link data yet — monitor has not completed a full cycle.</div>';
|
'<div class="link-no-data">No link data yet — monitor has not completed a full cycle.</div>';
|
||||||
return;
|
return;
|
||||||
@@ -567,7 +571,7 @@ async function loadLinks() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
loadLinks();
|
loadLinks();
|
||||||
const _linksInterval = (window.gandalfSettings && window.gandalfSettings.refreshInterval) || 60;
|
const _linksInterval = window.gandalfSettings?.refreshInterval ?? 60;
|
||||||
if (_linksInterval > 0) lt.autoRefresh.start(loadLinks, Math.max(_linksInterval, 15) * 1000);
|
if (_linksInterval > 0) lt.autoRefresh.start(loadLinks, Math.max(_linksInterval, 15) * 1000);
|
||||||
|
|
||||||
window.onGandalfSettingsChanged = function(s) {
|
window.onGandalfSettingsChanged = function(s) {
|
||||||
|
|||||||
@@ -51,7 +51,7 @@
|
|||||||
<label class="lt-label" for="s-reason">Reason <span class="required">*</span></label>
|
<label class="lt-label" for="s-reason">Reason <span class="required">*</span></label>
|
||||||
<input type="text" class="lt-input" id="s-reason" name="reason"
|
<input type="text" class="lt-input" id="s-reason" name="reason"
|
||||||
placeholder="e.g. Planned switch maintenance, replacing SFP on large1/enp43s0"
|
placeholder="e.g. Planned switch maintenance, replacing SFP on large1/enp43s0"
|
||||||
required>
|
required aria-required="true">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,12 @@ class TestBuildSshCommand:
|
|||||||
cmd = DiagnosticsRunner.build_ssh_command('10.0.0.1', 'eth0')
|
cmd = DiagnosticsRunner.build_ssh_command('10.0.0.1', 'eth0')
|
||||||
assert 'ethtool' in cmd
|
assert 'ethtool' in cmd
|
||||||
|
|
||||||
|
def test_dmesg_uses_fixed_string_grep(self):
|
||||||
|
# grep -F prevents iface names with dots (e.g. eth0.1) being treated as
|
||||||
|
# regex wildcards; -- prevents leading - from being parsed as a flag
|
||||||
|
cmd = DiagnosticsRunner.build_ssh_command('10.0.0.1', 'eth0')
|
||||||
|
assert 'grep -F --' in cmd
|
||||||
|
|
||||||
|
|
||||||
# ── parse_output ─────────────────────────────────────────────────────────────
|
# ── parse_output ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user