1
0
mirror of https://github.com/kmein/niveum synced 2026-03-16 10:11:08 +01:00

feat(monitoring): probe alew.hu-berlin.de

This commit is contained in:
2021-12-13 17:29:09 +01:00
parent 7d00a2e896
commit 08c04462a7
2 changed files with 69 additions and 1 deletions

View File

@@ -0,0 +1,17 @@
# https://github.com/Fluepke/nix-files/blob/2be70b76a198afaa7763132fed645a3c19d5af6e/configuration/common/blackbox-exporter.yml
# https://github.com/xHain-hackspace/xhain-nixfiles/blob/0d6e3b87a07317c2d54cccabf4f90da589319e2c/common/prometheus/blackbox-exporter.yml
{
modules.http_2xx = {
http = {
fail_if_not_ssl = true;
ip_protocol_fallback = false;
method = "GET";
no_follow_redirects = false;
preferred_ip_protocol = "ip4";
valid_http_versions = [ "HTTP/1.1" "HTTP/2.0" ];
tls_config.insecure_skip_verify = true;
};
prober = "http";
timeout = "15s";
};
}

View File

@@ -1,6 +1,7 @@
{ lib, config, pkgs, ... }: { lib, config, pkgs, ... }:
let let
lokiConfig = import ./loki.nix; lokiConfig = import ./loki.nix;
blackboxConfig = import ./blackbox.nix;
in in
{ {
services.grafana = { services.grafana = {
@@ -78,6 +79,36 @@ in
expr = "time() - node_boot_time_seconds < 300"; expr = "time() - node_boot_time_seconds < 300";
annotations.summary = "{{$labels.job}}: Reboot"; annotations.summary = "{{$labels.job}}: Reboot";
} }
{
alert = "ProbeFailed";
expr = "probe_success == 0";
for = "5m";
annotations.summary = "{{$labels.instance}}: probe failed";
}
{
alert = "SlowProbe";
expr = "avg_over_time(probe_http_duration_seconds[1m]) > 1";
for = "5m";
annotations.summary = "{{$labels.instance}}: HTTP probe slow";
}
{
alert = "HttpStatusCode";
expr = "probe_http_status_code <= 199 OR probe_http_status_code >= 400";
for = "5m";
annotations.summary = "{{$labels.instance}}: returns {{$value}}";
}
{
alert = "SslExpirySoon";
expr = "probe_ssl_earliest_cert_expiry - time() < 86400 * 30";
for = "5m";
annotations.summary = "{{$labels.instance}}: SSL certificate expires in 30 days";
}
{
alert = "SslExpiry";
expr = "probe_ssl_earliest_cert_expiry - time() <= 0";
for = "5m";
annotations.summary = "{{$labels.instance}}: SSL certificate has expired";
}
]; ];
}]; }];
})]; })];
@@ -157,15 +188,35 @@ in
job_name = "makanek"; job_name = "makanek";
static_configs = [ { targets = [ static_configs = [ { targets = [
"127.0.0.1:${toString config.services.prometheus.exporters.node.port}" "127.0.0.1:${toString config.services.prometheus.exporters.node.port}"
# "127.0.0.1:${toString config.services.prometheus.exporters.nginx.port}"
]; } ]; ]; } ];
} }
{
job_name = "blackbox";
metrics_path = "/probe";
params.module = [ "http_2xx" ];
relabel_configs = [
{ source_labels = ["__address__"]; target_label = "__param_target"; }
{ source_labels = ["__param_target"]; target_label = "instance"; }
{ replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; target_label = "__address__"; }
];
static_configs = [{
targets = [
"alew.hu-berlin.de"
];
}];
}
{ {
job_name = "zaatar"; job_name = "zaatar";
static_configs = [ { targets = [ "zaatar.r:${toString config.services.prometheus.exporters.node.port}" ]; } ]; static_configs = [ { targets = [ "zaatar.r:${toString config.services.prometheus.exporters.node.port}" ]; } ];
} }
]; ];
services.prometheus.exporters.blackbox = {
enable = true;
configFile = (pkgs.formats.yaml {}).generate "blackbox.yaml" blackboxConfig;
};
networking.firewall.allowedTCPPorts = [ networking.firewall.allowedTCPPorts = [
lokiConfig.server.http_listen_port lokiConfig.server.http_listen_port
]; ];