1
0
mirror of https://github.com/kmein/niveum synced 2026-03-21 12:21:08 +01:00

chore: move configs to system directories

This commit is contained in:
2021-12-29 18:49:51 +01:00
parent 383fb35837
commit 58d39aa41a
43 changed files with 234 additions and 689 deletions

View File

@@ -0,0 +1,17 @@
# https://github.com/Fluepke/nix-files/blob/2be70b76a198afaa7763132fed645a3c19d5af6e/configuration/common/blackbox-exporter.yml
# https://github.com/xHain-hackspace/xhain-nixfiles/blob/0d6e3b87a07317c2d54cccabf4f90da589319e2c/common/prometheus/blackbox-exporter.yml
{
modules.http_2xx = {
http = {
fail_if_not_ssl = true;
ip_protocol_fallback = false;
method = "GET";
no_follow_redirects = false;
preferred_ip_protocol = "ip4";
valid_http_versions = [ "HTTP/1.1" "HTTP/2.0" ];
tls_config.insecure_skip_verify = true;
};
prober = "http";
timeout = "15s";
};
}

View File

@@ -0,0 +1,229 @@
{ lib, config, pkgs, ... }:
let
lokiConfig = import ./loki.nix;
blackboxConfig = import ./blackbox.nix;
in
{
services.grafana = {
enable = true;
domain = "grafana.kmein.r";
port = 9444;
addr = "127.0.0.1";
};
services.nginx.virtualHosts.${config.services.grafana.domain} = {
locations."/" = {
proxyPass = "http://127.0.0.1:${toString config.services.grafana.port}";
proxyWebsockets = true;
};
};
services.prometheus.rules = let diskFreeThreshold = 10; in [(builtins.toJSON {
groups = [{
name = "niveum";
rules = [
{
alert = "ServiceDown";
expr = ''node_systemd_unit_state{state="failed"} == 1'';
annotations = {
summary = "{{$labels.job}}: Service {{$labels.name}} failed to start.";
};
}
{
alert = "RootPartitionFull";
for = "10m";
expr = ''(node_filesystem_free_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"} < ${toString diskFreeThreshold}'';
annotations = {
summary = "{{ $labels.job }}: Filesystem is running out of space soon.";
description = ''The root disk of {{ $labels.job }} has {{ $value | printf "%.2f" }}% free disk space (threshold at ${toString diskFreeThreshold}%).'';
};
}
{
alert = "RootPartitionFullWeek";
for = "1h";
expr = ''node_filesystem_free_bytes{mountpoint="/"} ''
+ ''and predict_linear(node_filesystem_free_bytes{mountpoint="/"}[2d], 7*24*3600) <= 0'';
annotations = {
summary = "{{$labels.job}}: Filesystem is running out of space in 7 days.";
};
}
{
alert = "HighLoad";
expr = ''node_load15 / on(job) count(node_cpu_seconds_total{mode="system"}) by (job) >= 1.0'';
for = "10m";
annotations = {
summary = "{{$labels.job}}: Running on high load: {{$value}}";
};
}
{
alert = "HighRAM";
expr = "node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes < node_memory_MemTotal_bytes * 0.1";
for = "1h";
annotations.summary = "{{$labels.job}}: Using lots of RAM.";
}
{
alert = "UptimeMonster";
expr = "time() - node_boot_time_seconds > 2592000";
annotations.summary = "{{$labels.job}}: up for more than 30 days.";
}
{
alert = "HostDown";
expr = ''up == 0'';
for = "5m";
annotations = {
summary = "Host {{ $labels.job }} down for 5 minutes.";
};
}
{
alert = "Reboot";
expr = "time() - node_boot_time_seconds < 300";
annotations.summary = "{{$labels.job}}: Reboot";
}
{
alert = "ProbeFailed";
expr = "probe_success == 0";
for = "5m";
annotations.summary = "{{$labels.instance}}: probe failed";
}
{
alert = "SlowProbe";
expr = "avg_over_time(probe_http_duration_seconds[1m]) > 1";
for = "5m";
annotations.summary = "{{$labels.instance}}: HTTP probe slow";
}
{
alert = "HttpStatusCode";
expr = "probe_http_status_code != 0 AND (probe_http_status_code <= 199 OR probe_http_status_code >= 400)";
for = "5m";
annotations.summary = "{{$labels.instance}}: status code {{$value}}";
}
{
alert = "SslExpirySoon";
expr = "probe_ssl_earliest_cert_expiry - time() < 86400 * 30";
for = "5m";
annotations.summary = "{{$labels.instance}}: SSL certificate expires in 30 days";
}
{
alert = "SslExpiry";
expr = "probe_ssl_earliest_cert_expiry - time() <= 0";
for = "5m";
annotations.summary = "{{$labels.instance}}: SSL certificate has expired";
}
];
}];
})];
systemd.services.alertmanager-bot-telegram =
let
alertmanager-bot-telegram = pkgs.buildGoModule rec {
pname = "alertmanager-bot";
version = "2020-07-13";
src = pkgs.fetchFromGitHub {
owner = "metalmatze";
repo = "alertmanager-bot";
rev = "5efc0bbbf8023d4324e9da98562f064a714a7206";
sha256 = "09cciml1j8x76jpm2v5v6h2q6j1fkhsz1kswslmx8wl4wk40xgp4";
};
vendorSha256 = "1v0fgin8dn81b559zz4lqmrl7hikr46g4gb18sci4riql5qs1isj";
postInstall = ''
install -D ./default.tmpl $out/templates/default.tmpl
'';
};
in {
wantedBy = [ "multi-user.target" ];
after = [ "ip-up.target" ];
environment.TELEGRAM_ADMIN = "18980945";
environment.TELEGRAM_TOKEN = lib.strings.fileContents <system-secrets/telegram/prometheus.token>;
serviceConfig = {
DynamicUser = true;
StateDirectory = "alertbot";
ExecStart = ''${alertmanager-bot-telegram}/bin/alertmanager-bot \
--alertmanager.url=http://localhost:9093 --log.level=info \
--store=bolt --bolt.path=/var/lib/alertbot/bot.db \
--listen.addr="0.0.0.0:16320" \
--template.paths=${pkgs.writeText "template.tmpl" ''
{{ define "telegram.default" }}
{{range .Alerts -}}
{{ if eq .Status "firing" }}
<b>{{ index .Annotations "summary"}}</b>
{{ index .Annotations "description" }}
See on Grafana: http://${config.services.grafana.domain}/d/alpUteInz/niveum
{{ else -}}
RESOLVED 😌 <del>{{ index .Annotations "summary"}}</del>
{{- end }}
{{end -}}
{{end}}
''}'';
};
};
services.prometheus.alertmanager = {
enable = true;
listenAddress = "localhost";
configuration = {
route = {
group_wait = "30s";
repeat_interval = "4h";
receiver = "me";
};
receivers = [{
name = "me";
webhook_configs = [{
url = "http://localhost:16320";
send_resolved = true;
}];
}];
};
};
services.prometheus.alertmanagers = [{
scheme = "http";
path_prefix = "/";
static_configs = [ { targets = [ "localhost:9093" ]; } ];
}];
services.prometheus.scrapeConfigs = [
{
job_name = "makanek";
static_configs = [ { targets = [
"127.0.0.1:${toString config.services.prometheus.exporters.node.port}"
]; } ];
}
{
scrape_interval = "5m";
job_name = "blackbox";
metrics_path = "/probe";
params.module = [ "http_2xx" ];
relabel_configs = [
{ source_labels = ["__address__"]; target_label = "__param_target"; }
{ source_labels = ["__param_target"]; target_label = "instance"; }
{ replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; target_label = "__address__"; }
];
static_configs = [{
targets = [
"alew.hu-berlin.de"
];
}];
}
{
job_name = "zaatar";
static_configs = [ { targets = [ "zaatar.r:${toString config.services.prometheus.exporters.node.port}" ]; } ];
}
];
services.prometheus.exporters.blackbox = {
enable = true;
configFile = (pkgs.formats.yaml {}).generate "blackbox.yaml" blackboxConfig;
};
networking.firewall.allowedTCPPorts = [
lokiConfig.server.http_listen_port
];
services.loki = {
enable = true;
configFile = (pkgs.formats.yaml {}).generate "loki.yaml" lokiConfig;
};
}

View File

@@ -0,0 +1,60 @@
{
auth_enabled = false;
server = {
http_listen_port = 3100;
grpc_listen_port = 9096;
};
ingester = {
wal = {
enabled = true;
dir = "/tmp/wal";
};
lifecycler = {
address = "127.0.0.1";
ring = {
kvstore.store = "inmemory";
replication_factor = 1;
};
final_sleep = "0s";
};
chunk_idle_period = "1h"; # Any chunk not receiving new logs in this time will be flushed
max_chunk_age = "1h"; # All chunks will be flushed when they hit this age, default is 1h
chunk_target_size = 1048576; # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first
chunk_retain_period = "30s"; # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m)
max_transfer_retries = 0; # Chunk transfers disabled
};
schema_config.configs = [
{
from = "2020-10-24";
store = "boltdb-shipper";
object_store = "filesystem";
schema = "v11";
index = {
prefix = "index_";
period = "24h";
};
}
];
storage_config = {
boltdb_shipper = {
active_index_directory = "/tmp/loki/boltdb-shipper-active";
cache_location = "/tmp/loki/boltdb-shipper-cache";
cache_ttl = "24h"; # Can be increased for faster performance over longer query periods, uses more disk space
shared_store = "filesystem";
};
filesystem.directory = "/tmp/loki/chunks";
};
compactor = {
working_directory = "/tmp/loki/boltdb-shipper-compactor";
shared_store = "filesystem";
};
limits_config = {
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
chunk_store_config.max_look_back_period = "0s";
table_manager = {
retention_deletes_enabled = false;
retention_period = "0s";
};
}