mirror of
https://github.com/kmein/niveum
synced 2026-03-20 20:01:08 +01:00
Compare commits
6 Commits
b8ff81344d
...
cc378bf109
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc378bf109 | ||
| 1c299cafef | |||
| cbce724ade | |||
| 00dfe27738 | |||
| 6e0026ed5c | |||
| d92f382b9a |
@@ -1,5 +1,7 @@
|
||||
# niveum
|
||||
|
||||
> I must Create a System, or be enslav'd by another Man's. —William Blake
|
||||
|
||||
> [nĭvĕus](https://logeion.uchicago.edu/niveus), a, um, adj. [nix], _of_ or _from snow, snowy, snow-_ (poet.)
|
||||
>
|
||||
> 1. Lit.: aggeribus niveis informis, Verg. G. 3, 354: aqua, _cooled with snow_, Mart. 12, 17, 6; cf. id. 14, 104 and 117: mons, _covered with snow_, Cat. 64, 240.—
|
||||
|
||||
@@ -17,11 +17,7 @@ in {
|
||||
chmod o+rx ${stateDir}
|
||||
cd ${stateDir}
|
||||
(${pkgs.curl}/bin/curl -s -o wallpaper.tmp -z wallpaper.tmp ${lib.escapeShellArg url} && cp wallpaper.tmp wallpaper) || :
|
||||
if [ -z $SWAYSOCK ]; then
|
||||
${pkgs.feh}/bin/feh --no-fehbg --bg-scale wallpaper
|
||||
else
|
||||
${pkgs.sway}/bin/swaymsg -s $SWAYSOCK 'output * bg ${stateDir}/wallpaper fill'
|
||||
fi
|
||||
${pkgs.feh}/bin/feh --no-fehbg --bg-scale wallpaper
|
||||
'';
|
||||
startAt = "*:00,10,20,30,40,50";
|
||||
serviceConfig = {
|
||||
|
||||
190
flake.lock
generated
190
flake.lock
generated
@@ -12,11 +12,11 @@
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1750173260,
|
||||
"narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=",
|
||||
"lastModified": 1754433428,
|
||||
"narHash": "sha256-NA/FT2hVhKDftbHSwVnoRTFhes62+7dxZbxj5Gxvghs=",
|
||||
"owner": "ryantm",
|
||||
"repo": "agenix",
|
||||
"rev": "531beac616433bac6f9e2a19feb8e99a22a66baf",
|
||||
"rev": "9edb1787864c4f59ae5074ad498b6272b3ec308d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -178,14 +178,14 @@
|
||||
"stockholm",
|
||||
"nixpkgs"
|
||||
],
|
||||
"treefmt-nix": "treefmt-nix_2"
|
||||
"treefmt-nix": "treefmt-nix"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1746626503,
|
||||
"narHash": "sha256-mRnIdJLP+0NSim9ao30ue0Z3ttSuxzXwQG7UN1KuKfU=",
|
||||
"lastModified": 1751515480,
|
||||
"narHash": "sha256-vCYcc/b8WizF6vnjuRVxSiU8hy9L3vOTWDVKpWM7xRE=",
|
||||
"owner": "Mic92",
|
||||
"repo": "buildbot-nix",
|
||||
"rev": "7ad9b4886eccb5eecc0686a16266ddabf6cbefe9",
|
||||
"rev": "47ad4c7afb169df6f9d48d0df3d7e2f71d9ddd8f",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -336,21 +336,6 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-compat_2": {
|
||||
"locked": {
|
||||
"lastModified": 1747046372,
|
||||
"narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-parts": {
|
||||
"inputs": {
|
||||
"nixpkgs-lib": [
|
||||
@@ -381,11 +366,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1743550720,
|
||||
"narHash": "sha256-hIshGgKZCgWh6AYJpJmRgFdR3WUbkY04o82X05xqQiY=",
|
||||
"lastModified": 1751413152,
|
||||
"narHash": "sha256-Tyw1RjYEsp5scoigs1384gIg6e0GoBVjms4aXFfRssQ=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "c621e8422220273271f52058f618c94e405bb0f5",
|
||||
"rev": "77826244401ea9de6e3bac47c2db46005e1f30b5",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -531,54 +516,6 @@
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"git-hooks": {
|
||||
"inputs": {
|
||||
"flake-compat": [
|
||||
"stylix",
|
||||
"flake-compat"
|
||||
],
|
||||
"gitignore": "gitignore",
|
||||
"nixpkgs": [
|
||||
"stylix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1750779888,
|
||||
"narHash": "sha256-wibppH3g/E2lxU43ZQHC5yA/7kIKLGxVEnsnVK1BtRg=",
|
||||
"owner": "cachix",
|
||||
"repo": "git-hooks.nix",
|
||||
"rev": "16ec914f6fb6f599ce988427d9d94efddf25fe6d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "cachix",
|
||||
"repo": "git-hooks.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"gitignore": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"stylix",
|
||||
"git-hooks",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1709087332,
|
||||
"narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"gnome-shell": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
@@ -610,11 +547,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1744693102,
|
||||
"narHash": "sha256-1Z4WPGVky4w3lrhrgs89OKsLzPdtkbi1bPLNFWsoLfY=",
|
||||
"lastModified": 1748000383,
|
||||
"narHash": "sha256-EaAJhwfJGBncgIV/0NlJviid2DP93cTMc9h0q6P6xXk=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "hercules-ci-effects",
|
||||
"rev": "5b6cec51c9ec095a0d3fd4c8eeb53eb5c59ae33e",
|
||||
"rev": "231726642197817d20310b9d39dd4afb9e899489",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -653,11 +590,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1751468302,
|
||||
"narHash": "sha256-tWosziZTT039x6PgEZUhzGlV8oLvdDmIgKTE8ESMaEA=",
|
||||
"lastModified": 1753592768,
|
||||
"narHash": "sha256-oV695RvbAE4+R9pcsT9shmp6zE/+IZe6evHWX63f2Qg=",
|
||||
"owner": "nix-community",
|
||||
"repo": "home-manager",
|
||||
"rev": "501cfec8277f931a9c9af9f23d3105c537faeafe",
|
||||
"rev": "fc3add429f21450359369af74c2375cb34a2d204",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1108,11 +1045,11 @@
|
||||
},
|
||||
"nixpkgs-unstable_2": {
|
||||
"locked": {
|
||||
"lastModified": 1751704732,
|
||||
"narHash": "sha256-30JXBxkJD3pxBBGQwl/DDwxxJDGQ1nvkvWwEtTsmhA8=",
|
||||
"lastModified": 1754787963,
|
||||
"narHash": "sha256-BExvuIabyDAgm8oLgGi/rbTbCouydsLMLaL2Fzpdvcc=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "07c3e0e2be593551abdccacabe29cc57206b396f",
|
||||
"rev": "796902fa97f5d4174251562eee6fa4d138c4a5e8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1230,11 +1167,11 @@
|
||||
},
|
||||
"nixpkgs_7": {
|
||||
"locked": {
|
||||
"lastModified": 1751582995,
|
||||
"narHash": "sha256-u7ubvtxdTnFPpV27AHpgoKn7qHuE7sgWgza/1oj5nzA=",
|
||||
"lastModified": 1754689972,
|
||||
"narHash": "sha256-eogqv6FqZXHgqrbZzHnq43GalnRbLTkbBbFtEfm1RSc=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "7a732ed41ca0dd64b4b71b563ab9805a80a7d693",
|
||||
"rev": "fc756aa6f5d3e2e5666efcf865d190701fef150a",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1246,11 +1183,11 @@
|
||||
},
|
||||
"nixpkgs_8": {
|
||||
"locked": {
|
||||
"lastModified": 1751271578,
|
||||
"narHash": "sha256-P/SQmKDu06x8yv7i0s8bvnnuJYkxVGBWLWHaU+tt4YY=",
|
||||
"lastModified": 1754498491,
|
||||
"narHash": "sha256-erbiH2agUTD0Z30xcVSFcDHzkRvkRXOQ3lb887bcVrs=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "3016b4b15d13f3089db8a41ef937b13a9e33a8df",
|
||||
"rev": "c2ae88e026f9525daf89587f3cbee584b92b6134",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1262,11 +1199,11 @@
|
||||
},
|
||||
"nixpkgs_9": {
|
||||
"locked": {
|
||||
"lastModified": 1746904237,
|
||||
"narHash": "sha256-3e+AVBczosP5dCLQmMoMEogM57gmZ2qrVSrmq9aResQ=",
|
||||
"lastModified": 1751792365,
|
||||
"narHash": "sha256-J1kI6oAj25IG4EdVlg2hQz8NZTBNYvIS0l4wpr9KcUo=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "d89fc19e405cb2d55ce7cc114356846a0ee5e956",
|
||||
"rev": "1fd8bada0b6117e6c7eb54aad5813023eed37ccb",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1390,15 +1327,14 @@
|
||||
"nur_2": {
|
||||
"inputs": {
|
||||
"flake-parts": "flake-parts",
|
||||
"nixpkgs": "nixpkgs_8",
|
||||
"treefmt-nix": "treefmt-nix"
|
||||
"nixpkgs": "nixpkgs_8"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1751701159,
|
||||
"narHash": "sha256-iDjz1tBd/ibtmZZse4k1NBxfPR8g9K5W4sRgCyOnrj0=",
|
||||
"lastModified": 1754771961,
|
||||
"narHash": "sha256-aLtVkt7vTReGT/M51TvKpUNOhvs+XFAdimadlUiOPfg=",
|
||||
"owner": "nix-community",
|
||||
"repo": "NUR",
|
||||
"rev": "a26411970baba3604e425f23e5293da492069e4a",
|
||||
"rev": "27dfc5e3bedd281be925bc290351890571a720d2",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1417,7 +1353,7 @@
|
||||
"stylix",
|
||||
"nixpkgs"
|
||||
],
|
||||
"treefmt-nix": "treefmt-nix_3"
|
||||
"treefmt-nix": "treefmt-nix_2"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1751320053,
|
||||
@@ -1593,11 +1529,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1751683029,
|
||||
"narHash": "sha256-dYO5X5jK8bpQOeRAo8R5aUt6M/+Ji1cZgstZI7SQ2IA=",
|
||||
"lastModified": 1754707163,
|
||||
"narHash": "sha256-wgVgOsyJUDn2ZRpzu2gELKALoJXlBSoZJSln+Tlg5Pw=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "9e5e62a33a929a67a5427fb7324a6f583dced0b2",
|
||||
"rev": "ac39ab4c8ed7cefe48d5ae5750f864422df58f01",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1651,11 +1587,11 @@
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1748892379,
|
||||
"narHash": "sha256-mDDxMwKFURX1K1Z8X/kmt+jYjswofDf0br+Mkw2tmSE=",
|
||||
"lastModified": 1753033513,
|
||||
"narHash": "sha256-TnfXFloY4Ntq+0hp+q9GGmuhtB1oueFiB+pcBUNYzFs=",
|
||||
"owner": "kmein",
|
||||
"repo": "scripts",
|
||||
"rev": "f44c7a4a6caa1ef5d6b7bf7e93acea0d96f30c21",
|
||||
"rev": "cc37fa4aec70f53731b9131bb8830b4445b75b3d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1671,11 +1607,11 @@
|
||||
"nixpkgs": "nixpkgs_9"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1747157099,
|
||||
"narHash": "sha256-i2s6jU+8GLKVjhWDyvFYxmXI7A44c9p6apPPyKt0ETk=",
|
||||
"lastModified": 1754761025,
|
||||
"narHash": "sha256-Mo2BkJXIz6HKM8cX2S7bRdX6Q3E1UOcyVL4v10QEUzk=",
|
||||
"owner": "krebs",
|
||||
"repo": "stockholm",
|
||||
"rev": "d4abc837cc7b87b4f23fe48cc306df26e3de7aab",
|
||||
"rev": "fc32e4609140fffa1312a4ca1aeea550b7467448",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -1721,13 +1657,8 @@
|
||||
"base16-helix": "base16-helix",
|
||||
"base16-vim": "base16-vim",
|
||||
"firefox-gnome-theme": "firefox-gnome-theme",
|
||||
"flake-compat": "flake-compat_2",
|
||||
"flake-parts": "flake-parts_3",
|
||||
"git-hooks": "git-hooks",
|
||||
"gnome-shell": "gnome-shell",
|
||||
"home-manager": [
|
||||
"home-manager"
|
||||
],
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
@@ -1740,11 +1671,11 @@
|
||||
"tinted-zed": "tinted-zed"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1751658706,
|
||||
"narHash": "sha256-jqRbWjB8aH2qzq6nMQpwkzVBR4o9lNxAHFmRgGwnJ94=",
|
||||
"lastModified": 1754599117,
|
||||
"narHash": "sha256-AzAYdZlat002vCjCKWdFpGi2xUaiOU4DtIPnv1nomD8=",
|
||||
"owner": "danth",
|
||||
"repo": "stylix",
|
||||
"rev": "5dd301b72207d4fd8d8b929abd88ba1c486d1744",
|
||||
"rev": "312dec38b2231b21f36903d1bdce96daa11548ff",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -2009,16 +1940,17 @@
|
||||
"treefmt-nix": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nur",
|
||||
"stockholm",
|
||||
"buildbot-nix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1733222881,
|
||||
"narHash": "sha256-JIPcz1PrpXUCbaccEnrcUS8jjEb/1vJbZz5KkobyFdM=",
|
||||
"lastModified": 1750931469,
|
||||
"narHash": "sha256-0IEdQB1nS+uViQw4k3VGUXntjkDp7aAlqcxdewb/hAc=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "49717b5af6f80172275d47a418c9719a31a78b53",
|
||||
"rev": "ac8e6f32e11e9c7f153823abc3ab007f2a65d3e1",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -2028,28 +1960,6 @@
|
||||
}
|
||||
},
|
||||
"treefmt-nix_2": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"stockholm",
|
||||
"buildbot-nix",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1746216483,
|
||||
"narHash": "sha256-4h3s1L/kKqt3gMDcVfN8/4v2jqHrgLIe4qok4ApH5x4=",
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"rev": "29ec5026372e0dec56f890e50dbe4f45930320fd",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "treefmt-nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"treefmt-nix_3": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"stylix",
|
||||
|
||||
@@ -3,11 +3,13 @@
|
||||
config,
|
||||
pkgs,
|
||||
...
|
||||
}: let
|
||||
}:
|
||||
let
|
||||
lokiConfig = import ./loki.nix;
|
||||
blackboxConfig = import ./blackbox.nix;
|
||||
inherit (import ../../../lib) restic;
|
||||
in {
|
||||
in
|
||||
{
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
@@ -80,143 +82,150 @@ in {
|
||||
}
|
||||
];
|
||||
|
||||
services.prometheus.rules = let
|
||||
diskFreeThreshold = 10;
|
||||
in [
|
||||
(builtins.toJSON {
|
||||
groups = [
|
||||
{
|
||||
name = "niveum";
|
||||
rules = [
|
||||
{
|
||||
alert = "HostSystemdServiceCrashed";
|
||||
expr = ''(node_systemd_unit_state{state="failed"} == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'';
|
||||
annotations = {
|
||||
description = "{{$labels.name}} failed on {{$labels.instance}}";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "RootPartitionFull";
|
||||
for = "10m";
|
||||
expr = ''(node_filesystem_free_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"} < ${toString diskFreeThreshold}'';
|
||||
annotations = {
|
||||
description = ''{{ $labels.instance }} running out of space: {{ $value | printf "%.2f" }}% < ${toString diskFreeThreshold}%'';
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "RootPartitionFullWeek";
|
||||
for = "1h";
|
||||
expr =
|
||||
''node_filesystem_free_bytes{mountpoint="/"} ''
|
||||
+ ''and predict_linear(node_filesystem_free_bytes{mountpoint="/"}[2d], 7*24*3600) <= 0'';
|
||||
annotations = {
|
||||
description = "{{$labels.instance}} running out of space in 7 days";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HighLoad";
|
||||
expr = ''node_load15 / on(job) count(node_cpu_seconds_total{mode="system"}) by (job) >= 1.0'';
|
||||
for = "10m";
|
||||
annotations = {
|
||||
description = "{{$labels.instance}} running on high load: {{$value}}";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualNetworkThroughputIn";
|
||||
expr = ''(rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100'';
|
||||
for = "5m";
|
||||
annotations.description = "Host unusual network throughput in (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualNetworkThroughputOut";
|
||||
expr = ''(rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100'';
|
||||
for = "5m";
|
||||
annotations.description = "Host unusual network throughput out (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualDiskReadRate";
|
||||
expr = ''(rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50'';
|
||||
for = "5m";
|
||||
annotations.description = "Host unusual disk read rate (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualDiskWriteRate";
|
||||
expr = ''(rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50'';
|
||||
for = "2m";
|
||||
annotations.description = "Host unusual disk write rate (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostOutOfInodes";
|
||||
expr = ''node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'';
|
||||
for = "2m";
|
||||
annotations.description = "Host out of inodes (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostInodesWillFillIn24Hours";
|
||||
expr = ''node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{fstype!="msdosfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{fstype!="msdosfs"} == 0'';
|
||||
for = "2m";
|
||||
annotations.description = "Host inodes will fill in 24 hours (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HighRAM";
|
||||
expr = "node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes < node_memory_MemTotal_bytes * 0.1";
|
||||
for = "1h";
|
||||
annotations.description = "{{$labels.instance}} using lots of RAM";
|
||||
}
|
||||
{
|
||||
alert = "UptimeMonster";
|
||||
expr = "time() - node_boot_time_seconds > 2592000";
|
||||
annotations.description = "uptime monster {{$labels.instance}} up for more than 30 days";
|
||||
}
|
||||
{
|
||||
alert = "HostDown";
|
||||
expr = ''up == 0'';
|
||||
for = "5m";
|
||||
annotations = {
|
||||
description = "{{ $labels.instance }} seeming down since 5 minutes";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "Reboot";
|
||||
expr = "time() - node_boot_time_seconds < 300";
|
||||
annotations.description = "{{$labels.instance}} rebooted";
|
||||
}
|
||||
{
|
||||
alert = "ProbeFailed";
|
||||
expr = "probe_success == 0";
|
||||
for = "5m";
|
||||
annotations.description = "HTTP probe failed for {{$labels.instance}}";
|
||||
}
|
||||
{
|
||||
alert = "SlowProbe";
|
||||
expr = "avg_over_time(probe_http_duration_seconds[1m]) > 1";
|
||||
for = "5m";
|
||||
annotations.description = "HTTP probe slow for {{$labels.instance}}";
|
||||
}
|
||||
{
|
||||
alert = "HttpStatusCode";
|
||||
expr = "probe_http_status_code != 0 AND (probe_http_status_code <= 199 OR probe_http_status_code >= 400)";
|
||||
for = "5m";
|
||||
annotations.description = "status code {{$value}} for {{$labels.instance}}";
|
||||
}
|
||||
{
|
||||
alert = "SslExpirySoon";
|
||||
expr = "probe_ssl_earliest_cert_expiry - time() < 86400 * 30";
|
||||
for = "5m";
|
||||
annotations.description = "SSL certificate for {{$labels.instance}} expires in 30 days";
|
||||
}
|
||||
{
|
||||
alert = "SslExpiry";
|
||||
expr = "probe_ssl_earliest_cert_expiry - time() <= 0";
|
||||
for = "5m";
|
||||
annotations.description = "SSL certificate for {{$labels.instance}} has expired";
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
})
|
||||
];
|
||||
|
||||
services.prometheus.rules =
|
||||
let
|
||||
diskFreeThreshold = 10;
|
||||
in
|
||||
[
|
||||
(builtins.toJSON {
|
||||
groups = [
|
||||
{
|
||||
name = "niveum";
|
||||
rules = [
|
||||
{
|
||||
alert = "HostSystemdServiceCrashed";
|
||||
expr = ''(node_systemd_unit_state{state="failed"} == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'';
|
||||
annotations = {
|
||||
description = "{{$labels.name}} failed on {{$labels.instance}}";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "RootPartitionFull";
|
||||
for = "10m";
|
||||
expr = ''(node_filesystem_free_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"} < ${toString diskFreeThreshold}'';
|
||||
annotations = {
|
||||
description = ''{{ $labels.instance }} running out of space: {{ $value | printf "%.2f" }}% < ${toString diskFreeThreshold}%'';
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "RootPartitionFullWeek";
|
||||
for = "1h";
|
||||
expr =
|
||||
''node_filesystem_free_bytes{mountpoint="/"} ''
|
||||
+ ''and predict_linear(node_filesystem_free_bytes{mountpoint="/"}[2d], 7*24*3600) <= 0'';
|
||||
annotations = {
|
||||
description = "{{$labels.instance}} running out of space in 7 days";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HighLoad";
|
||||
expr = ''node_load15 / on(job) count(node_cpu_seconds_total{mode="system"}) by (job) >= 1.0'';
|
||||
for = "10m";
|
||||
annotations = {
|
||||
description = "{{$labels.instance}} running on high load: {{$value}}";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualNetworkThroughputIn";
|
||||
expr = ''(rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100'';
|
||||
for = "5m";
|
||||
annotations.description = "Host unusual network throughput in (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualNetworkThroughputOut";
|
||||
expr = ''(rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100'';
|
||||
for = "5m";
|
||||
annotations.description = "Host unusual network throughput out (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualDiskReadRate";
|
||||
expr = ''(rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50'';
|
||||
for = "5m";
|
||||
annotations.description = "Host unusual disk read rate (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostUnusualDiskWriteRate";
|
||||
expr = ''(rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50'';
|
||||
for = "2m";
|
||||
annotations.description = "Host unusual disk write rate (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostOutOfInodes";
|
||||
expr = ''node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'';
|
||||
for = "2m";
|
||||
annotations.description = "Host out of inodes (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HostInodesWillFillIn24Hours";
|
||||
expr = ''node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{fstype!="msdosfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{fstype!="msdosfs"} == 0'';
|
||||
for = "2m";
|
||||
annotations.description = "Host inodes will fill in 24 hours (instance {{ $labels.instance }})";
|
||||
}
|
||||
{
|
||||
alert = "HighRAM";
|
||||
expr = "node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes < node_memory_MemTotal_bytes * 0.1";
|
||||
for = "1h";
|
||||
annotations.description = "{{$labels.instance}} using lots of RAM";
|
||||
}
|
||||
{
|
||||
alert = "UptimeMonster";
|
||||
expr = "time() - node_boot_time_seconds > 2592000";
|
||||
annotations.description = "uptime monster {{$labels.instance}} up for more than 30 days";
|
||||
}
|
||||
{
|
||||
alert = "HostDown";
|
||||
expr = ''up == 0'';
|
||||
for = "5m";
|
||||
annotations = {
|
||||
description = "{{ $labels.instance }} seeming down since 5 minutes";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "Reboot";
|
||||
expr = "time() - node_boot_time_seconds < 300";
|
||||
annotations.description = "{{$labels.instance}} rebooted";
|
||||
}
|
||||
{
|
||||
alert = "Mastodon";
|
||||
expr = ''probe_success{instance="https://social.krebsco.de"}'';
|
||||
for = "5m";
|
||||
annotations.description = "Mastodon instance {{$labels.instance}} is down";
|
||||
}
|
||||
{
|
||||
alert = "ProbeFailed";
|
||||
expr = "probe_success == 0";
|
||||
for = "5m";
|
||||
annotations.description = "HTTP probe failed for {{$labels.instance}}";
|
||||
}
|
||||
{
|
||||
alert = "SlowProbe";
|
||||
expr = "avg_over_time(probe_http_duration_seconds[1m]) > 1";
|
||||
for = "5m";
|
||||
annotations.description = "HTTP probe slow for {{$labels.instance}}";
|
||||
}
|
||||
{
|
||||
alert = "HttpStatusCode";
|
||||
expr = "probe_http_status_code != 0 AND (probe_http_status_code <= 199 OR probe_http_status_code >= 400)";
|
||||
for = "5m";
|
||||
annotations.description = "status code {{$value}} for {{$labels.instance}}";
|
||||
}
|
||||
{
|
||||
alert = "SslExpirySoon";
|
||||
expr = "probe_ssl_earliest_cert_expiry - time() < 86400 * 30";
|
||||
for = "5m";
|
||||
annotations.description = "SSL certificate for {{$labels.instance}} expires in 30 days";
|
||||
}
|
||||
{
|
||||
alert = "SslExpiry";
|
||||
expr = "probe_ssl_earliest_cert_expiry - time() <= 0";
|
||||
for = "5m";
|
||||
annotations.description = "SSL certificate for {{$labels.instance}} has expired";
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
})
|
||||
];
|
||||
|
||||
# ref https://github.com/Mic92/dotfiles/blob/f44bac5dd6970ed3fbb4feb906917331ec3c2be5/machines/eva/modules/prometheus/default.nix
|
||||
systemd.services.matrix-hook = {
|
||||
@@ -246,6 +255,33 @@ in {
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.matrix-hook-lassulus = {
|
||||
description = "Matrix Hook";
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
environment = {
|
||||
HTTP_ADDRESS = "[::1]";
|
||||
HTTP_PORT = "9089";
|
||||
MX_HOMESERVER = "https://matrix.4d2.org";
|
||||
MX_ID = "@lakai:4d2.org";
|
||||
MX_ROOMID = "!MJAGqBAOKZGMywzwkI:lassul.us";
|
||||
MX_MSG_TEMPLATE = "${pkgs.matrix-hook}/message.html.tmpl";
|
||||
};
|
||||
serviceConfig = {
|
||||
EnvironmentFile = [
|
||||
# format: MX_TOKEN=<token>
|
||||
config.age.secrets.matrix-token-lakai-env.path
|
||||
];
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.matrix-hook}/bin/matrix-hook";
|
||||
Restart = "always";
|
||||
RestartSec = "10";
|
||||
DynamicUser = true;
|
||||
User = "matrix-hook";
|
||||
Group = "matrix-hook";
|
||||
};
|
||||
};
|
||||
|
||||
age.secrets = {
|
||||
matrix-token-lakai-env.file = ../../../secrets/matrix-token-lakai-env.age;
|
||||
};
|
||||
@@ -260,8 +296,23 @@ in {
|
||||
group_wait = "30s";
|
||||
repeat_interval = "24h";
|
||||
receiver = "matrix";
|
||||
routes = [
|
||||
{
|
||||
receiver = "lassulus";
|
||||
matchers = [ "alertname = \"Mastodon\"" ];
|
||||
}
|
||||
];
|
||||
};
|
||||
receivers = [
|
||||
{
|
||||
name = "lassulus";
|
||||
webhook_configs = [
|
||||
{
|
||||
url = "http://localhost:9089/alert";
|
||||
max_alerts = 5;
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
name = "matrix";
|
||||
webhook_configs = [
|
||||
@@ -306,13 +357,21 @@ in {
|
||||
{
|
||||
scheme = "http";
|
||||
path_prefix = "/";
|
||||
static_configs = [{targets = ["localhost:${toString config.services.prometheus.alertmanager.port}"];}];
|
||||
static_configs = [
|
||||
{ targets = [ "localhost:${toString config.services.prometheus.alertmanager.port}" ]; }
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
# otherwise bearer_token_file will fail
|
||||
services.prometheus.checkConfig = "syntax-only";
|
||||
|
||||
services.prometheus.extraFlags = [
|
||||
"--storage.tsdb.retention.time=7d"
|
||||
"--storage.tsdb.retention.size=2GB"
|
||||
"--storage.tsdb.wal-compression"
|
||||
];
|
||||
|
||||
services.prometheus.scrapeConfigs = [
|
||||
{
|
||||
job_name = "makanek";
|
||||
@@ -328,14 +387,14 @@ in {
|
||||
scrape_interval = "5m";
|
||||
job_name = "blackbox";
|
||||
metrics_path = "/probe";
|
||||
params.module = ["http_2xx"];
|
||||
params.module = [ "http_2xx" ];
|
||||
relabel_configs = [
|
||||
{
|
||||
source_labels = ["__address__"];
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
source_labels = ["__param_target"];
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
@@ -393,7 +452,7 @@ in {
|
||||
scrape_interval = "60s";
|
||||
metrics_path = "/api/prometheus";
|
||||
scheme = "http";
|
||||
static_configs = [{targets = ["zaatar.r:8123"];}];
|
||||
static_configs = [ { targets = [ "zaatar.r:8123" ]; } ];
|
||||
bearer_token_file = config.age.secrets.home-assistant-token.path;
|
||||
}
|
||||
{
|
||||
@@ -410,7 +469,7 @@ in {
|
||||
|
||||
services.prometheus.exporters.blackbox = {
|
||||
enable = true;
|
||||
configFile = (pkgs.formats.yaml {}).generate "blackbox.yaml" blackboxConfig;
|
||||
configFile = (pkgs.formats.yaml { }).generate "blackbox.yaml" blackboxConfig;
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [
|
||||
@@ -419,6 +478,6 @@ in {
|
||||
|
||||
services.loki = {
|
||||
enable = true;
|
||||
configFile = (pkgs.formats.yaml {}).generate "loki.yaml" lokiConfig;
|
||||
configFile = (pkgs.formats.yaml { }).generate "loki.yaml" lokiConfig;
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user