mirror of
https://github.com/kmein/niveum
synced 2026-03-16 10:11:08 +01:00
feat(monitoring): probe alew.hu-berlin.de
This commit is contained in:
17
configs/monitoring/blackbox.nix
Normal file
17
configs/monitoring/blackbox.nix
Normal file
@@ -0,0 +1,17 @@
|
||||
# https://github.com/Fluepke/nix-files/blob/2be70b76a198afaa7763132fed645a3c19d5af6e/configuration/common/blackbox-exporter.yml
|
||||
# https://github.com/xHain-hackspace/xhain-nixfiles/blob/0d6e3b87a07317c2d54cccabf4f90da589319e2c/common/prometheus/blackbox-exporter.yml
|
||||
{
|
||||
modules.http_2xx = {
|
||||
http = {
|
||||
fail_if_not_ssl = true;
|
||||
ip_protocol_fallback = false;
|
||||
method = "GET";
|
||||
no_follow_redirects = false;
|
||||
preferred_ip_protocol = "ip4";
|
||||
valid_http_versions = [ "HTTP/1.1" "HTTP/2.0" ];
|
||||
tls_config.insecure_skip_verify = true;
|
||||
};
|
||||
prober = "http";
|
||||
timeout = "15s";
|
||||
};
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{ lib, config, pkgs, ... }:
|
||||
let
|
||||
lokiConfig = import ./loki.nix;
|
||||
blackboxConfig = import ./blackbox.nix;
|
||||
in
|
||||
{
|
||||
services.grafana = {
|
||||
@@ -78,6 +79,36 @@ in
|
||||
expr = "time() - node_boot_time_seconds < 300";
|
||||
annotations.summary = "{{$labels.job}}: Reboot";
|
||||
}
|
||||
{
|
||||
alert = "ProbeFailed";
|
||||
expr = "probe_success == 0";
|
||||
for = "5m";
|
||||
annotations.summary = "{{$labels.instance}}: probe failed";
|
||||
}
|
||||
{
|
||||
alert = "SlowProbe";
|
||||
expr = "avg_over_time(probe_http_duration_seconds[1m]) > 1";
|
||||
for = "5m";
|
||||
annotations.summary = "{{$labels.instance}}: HTTP probe slow";
|
||||
}
|
||||
{
|
||||
alert = "HttpStatusCode";
|
||||
expr = "probe_http_status_code <= 199 OR probe_http_status_code >= 400";
|
||||
for = "5m";
|
||||
annotations.summary = "{{$labels.instance}}: returns {{$value}}";
|
||||
}
|
||||
{
|
||||
alert = "SslExpirySoon";
|
||||
expr = "probe_ssl_earliest_cert_expiry - time() < 86400 * 30";
|
||||
for = "5m";
|
||||
annotations.summary = "{{$labels.instance}}: SSL certificate expires in 30 days";
|
||||
}
|
||||
{
|
||||
alert = "SslExpiry";
|
||||
expr = "probe_ssl_earliest_cert_expiry - time() <= 0";
|
||||
for = "5m";
|
||||
annotations.summary = "{{$labels.instance}}: SSL certificate has expired";
|
||||
}
|
||||
];
|
||||
}];
|
||||
})];
|
||||
@@ -157,15 +188,35 @@ in
|
||||
job_name = "makanek";
|
||||
static_configs = [ { targets = [
|
||||
"127.0.0.1:${toString config.services.prometheus.exporters.node.port}"
|
||||
# "127.0.0.1:${toString config.services.prometheus.exporters.nginx.port}"
|
||||
]; } ];
|
||||
}
|
||||
{
|
||||
job_name = "blackbox";
|
||||
metrics_path = "/probe";
|
||||
params.module = [ "http_2xx" ];
|
||||
relabel_configs = [
|
||||
{ source_labels = ["__address__"]; target_label = "__param_target"; }
|
||||
{ source_labels = ["__param_target"]; target_label = "instance"; }
|
||||
{ replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; target_label = "__address__"; }
|
||||
];
|
||||
static_configs = [{
|
||||
targets = [
|
||||
"alew.hu-berlin.de"
|
||||
];
|
||||
}];
|
||||
}
|
||||
{
|
||||
job_name = "zaatar";
|
||||
static_configs = [ { targets = [ "zaatar.r:${toString config.services.prometheus.exporters.node.port}" ]; } ];
|
||||
}
|
||||
];
|
||||
|
||||
|
||||
services.prometheus.exporters.blackbox = {
|
||||
enable = true;
|
||||
configFile = (pkgs.formats.yaml {}).generate "blackbox.yaml" blackboxConfig;
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [
|
||||
lokiConfig.server.http_listen_port
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user