De-hardcode ticket URL and cluster name; improve diagnostic polling UX

app.py:
- Context processor injects config.ticket_api.web_url into all templates
  (falls back to 'http://t.lotusguild.org/ticket/' if not set in config)

templates/base.html:
- Inject GANDALF_CONFIG JS global with ticket_web_url before app.js loads

static/app.js:
- Use GANDALF_CONFIG.ticket_web_url instead of hardcoded domain

templates/index.html:
- Use {{ config.ticket_api.web_url }} Jinja var instead of hardcoded domain

monitor.py:
- CLUSTER_NAME constant kept as default; NetworkMonitor now reads cluster_name
  from config monitor.cluster_name, falling back to the constant
- All CLUSTER_NAME references inside class methods replaced with self.cluster_name

templates/inspector.html:
- pollDiagnostic() .catch() now clears interval and shows error message instead
  of silently ignoring network failures during active polling

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-14 14:31:57 -04:00
parent 8f852ed830
commit 14eaa6a8c9
6 changed files with 31 additions and 6 deletions

13
app.py
View File

@@ -29,6 +29,19 @@ app = Flask(__name__)
_cfg = None _cfg = None
@app.context_processor
def inject_config():
"""Inject safe config values into all templates."""
cfg = _config()
return {
'config': {
'ticket_api': {
'web_url': cfg.get('ticket_api', {}).get('web_url', 'http://t.lotusguild.org/ticket/'),
}
}
}
# In-memory diagnostic job store { job_id: { status, result, created_at } } # In-memory diagnostic job store { job_id: { status, result, created_at } }
_diag_jobs: dict = {} _diag_jobs: dict = {}
_diag_lock = threading.Lock() _diag_lock = threading.Lock()

View File

@@ -641,6 +641,7 @@ class NetworkMonitor:
self.poll_interval = mon.get('poll_interval', 120) self.poll_interval = mon.get('poll_interval', 120)
self.fail_thresh = mon.get('failure_threshold', 2) self.fail_thresh = mon.get('failure_threshold', 2)
self.cluster_thresh = mon.get('cluster_threshold', 3) self.cluster_thresh = mon.get('cluster_threshold', 3)
self.cluster_name = mon.get('cluster_name', CLUSTER_NAME)
# Build Prometheus instance → hostname lookup # Build Prometheus instance → hostname lookup
self._instance_map: Dict[str, str] = { self._instance_map: Dict[str, str] = {
@@ -706,13 +707,13 @@ class NetworkMonitor:
sup = db.check_suppressed(suppressions, 'all', '') sup = db.check_suppressed(suppressions, 'all', '')
event_id, is_new, consec = db.upsert_event( event_id, is_new, consec = db.upsert_event(
'cluster_network_issue', 'critical', 'prometheus', 'cluster_network_issue', 'critical', 'prometheus',
CLUSTER_NAME, '', self.cluster_name, '',
f'{len(hosts_with_regression)} hosts reporting simultaneous interface failures: ' f'{len(hosts_with_regression)} hosts reporting simultaneous interface failures: '
f'{", ".join(hosts_with_regression)}', f'{", ".join(hosts_with_regression)}',
) )
if not sup and is_new: if not sup and is_new:
title = ( title = (
f'[{CLUSTER_NAME}][auto][production][issue][network][cluster-wide] ' f'[{self.cluster_name}][auto][production][issue][network][cluster-wide] '
f'Multiple hosts reporting interface failures' f'Multiple hosts reporting interface failures'
) )
desc = ( desc = (
@@ -728,7 +729,7 @@ class NetworkMonitor:
if tid: if tid:
db.set_ticket_id(event_id, tid) db.set_ticket_id(event_id, tid)
else: else:
db.resolve_event('cluster_network_issue', CLUSTER_NAME, '') db.resolve_event('cluster_network_issue', self.cluster_name, '')
def _ticket_interface( def _ticket_interface(
self, event_id: int, is_new: bool, host: str, iface: str, consec: int self, event_id: int, is_new: bool, host: str, iface: str, consec: int

View File

@@ -135,8 +135,10 @@ function updateEventsTable(events) {
const supType = e.event_type === 'unifi_device_offline' ? 'unifi_device' const supType = e.event_type === 'unifi_device_offline' ? 'unifi_device'
: e.event_type === 'interface_down' ? 'interface' : e.event_type === 'interface_down' ? 'interface'
: 'host'; : 'host';
const ticketBase = (typeof GANDALF_CONFIG !== 'undefined' && GANDALF_CONFIG.ticket_web_url)
? GANDALF_CONFIG.ticket_web_url : 'http://t.lotusguild.org/ticket/';
const ticket = e.ticket_id const ticket = e.ticket_id
? `<a href="http://t.lotusguild.org/ticket/${e.ticket_id}" target="_blank" ? `<a href="${ticketBase}${e.ticket_id}" target="_blank"
class="ticket-link">#${e.ticket_id}</a>` class="ticket-link">#${e.ticket_id}</a>`
: ''; : '';
return ` return `

View File

@@ -41,6 +41,11 @@
{% block content %}{% endblock %} {% block content %}{% endblock %}
</main> </main>
<script>
const GANDALF_CONFIG = {
ticket_web_url: "{{ config.get('ticket_api', {}).get('web_url', 'http://t.lotusguild.org/ticket/') }}"
};
</script>
<script src="{{ url_for('static', filename='app.js') }}"></script> <script src="{{ url_for('static', filename='app.js') }}"></script>
{% block scripts %}{% endblock %} {% block scripts %}{% endblock %}
</body> </body>

View File

@@ -219,7 +219,7 @@
<td>{{ e.consecutive_failures }}</td> <td>{{ e.consecutive_failures }}</td>
<td> <td>
{% if e.ticket_id %} {% if e.ticket_id %}
<a href="http://t.lotusguild.org/ticket/{{ e.ticket_id }}" target="_blank" <a href="{{ config.ticket_api.web_url }}{{ e.ticket_id }}" target="_blank"
class="ticket-link">#{{ e.ticket_id }}</a> class="ticket-link">#{{ e.ticket_id }}</a>
{% else %}{% endif %} {% else %}{% endif %}
</td> </td>

View File

@@ -451,7 +451,11 @@ function pollDiagnostic(jobId, statusEl, resultsEl) {
renderDiagnosticResults(resp.result, resultsEl); renderDiagnosticResults(resp.result, resultsEl);
} }
}) })
.catch(() => {}); .catch(() => {
clearInterval(_diagPollTimer);
_diagPollTimer = null;
statusEl.textContent = 'Error: lost connection while collecting diagnostics.';
});
}, 2000); }, 2000);
} }