nixos-config/systems/LoutreOS/monitoring.nix

212 lines
6.3 KiB
Nix

{ config, lib, pkgs, ... }:
let
domaine = "nyanlout.re";
in
{
services = {
smartd = {
enable = true;
defaults.monitored = "-a -o on -s (S/../.././02|L/../../1/04)";
notifications.mail = {
enable = true;
recipient = "paul@nyanlout.re";
};
};
influxdb = {
enable = true;
dataDir = "/var/db/influxdb";
};
telegraf = {
enable = true;
extraConfig = {
inputs = {
zfs = { poolMetrics = true; };
net = { interfaces = [ "eno1" "eno2" "eno3" "eno4" ]; };
netstat = {};
cpu = { totalcpu = true; };
kernel = {};
mem = {};
processes = {};
system = {};
disk = {};
cgroup = [
{
paths = [
"/sys/fs/cgroup/memory/system.slice/*"
];
files = ["memory.*usage*" "memory.limit_in_bytes"];
}
{
paths = [
"/sys/fs/cgroup/cpu/system.slice/*"
];
files = ["cpuacct.usage" "cpu.cfs_period_us" "cpu.cfs_quota_us"];
}
];
ipmi_sensor = { path = "${pkgs.ipmitool}/bin/ipmitool"; };
smart = {
path = "${pkgs.writeShellScriptBin "smartctl" "/run/wrappers/bin/sudo ${pkgs.smartmontools}/bin/smartctl $@"}/bin/smartctl";
};
exec= [
{
commands = [
"${pkgs.python3}/bin/python ${pkgs.writeText "zpool.py" ''
import json
from subprocess import check_output
columns = ["NAME", "SIZE", "ALLOC", "FREE", "CKPOINT", "EXPANDSZ", "FRAG", "CAP", "DEDUP", "HEALTH", "ALTROOT"]
health = {'ONLINE':0, 'DEGRADED':11, 'OFFLINE':21, 'UNAVAIL':22, 'FAULTED':23, 'REMOVED':24}
stdout = check_output(["${pkgs.zfs}/bin/zpool", "list", "-Hp"],encoding='UTF-8').split('\n')
parsed_stdout = list(map(lambda x: dict(zip(columns,x.split('\t'))), stdout))[:-1]
for pool in parsed_stdout:
for item in pool:
if item in ["SIZE", "ALLOC", "FREE", "FRAG", "CAP"]:
pool[item] = int(pool[item])
if item in ["DEDUP"]:
pool[item] = float(pool[item])
if item == "HEALTH":
pool[item] = health[pool[item]]
print(json.dumps(parsed_stdout))
''}"
];
tag_keys = [ "NAME" ];
data_format = "json";
name_suffix = "_python_zpool";
}
];
};
outputs = {
influxdb = { database = "telegraf"; urls = [ "http://localhost:8086" ]; };
};
};
};
udev.extraRules = ''
KERNEL=="ipmi*", MODE="660", OWNER="telegraf"
'';
grafana = {
enable = true;
addr = "127.0.0.1";
dataDir = "/var/lib/grafana";
extraOptions = {
SERVER_ROOT_URL = "https://grafana.${domaine}";
SMTP_ENABLED = "true";
SMTP_FROM_ADDRESS = "grafana@${domaine}";
SMTP_SKIP_VERIFY = "true";
AUTH_DISABLE_LOGIN_FORM = "true";
AUTH_DISABLE_SIGNOUT_MENU = "true";
AUTH_ANONYMOUS_ENABLED = "true";
AUTH_ANONYMOUS_ORG_ROLE = "Admin";
AUTH_BASIC_ENABLED = "false";
};
};
zfs.zed.settings = {
ZED_EMAIL_ADDR = [ "paul@nyanlout.re" ];
ZED_NOTIFY_VERBOSE = true;
};
loki = {
enable = true;
configuration = {
auth_enabled = false;
server.http_listen_port = 3100;
ingester = {
lifecycler = {
address = "127.0.0.1";
ring = {
kvstore.store = "inmemory";
replication_factor = 1;
};
};
chunk_idle_period = "1h";
chunk_target_size = 1000000;
};
schema_config.configs = [
{
from = "2018-04-15";
store = "boltdb";
object_store = "filesystem";
schema = "v11";
index = {
prefix = "index_";
period = "168h";
};
}
];
storage_config = {
boltdb.directory = "/var/lib/loki/index";
filesystem.directory = "/var/lib/loki/chunks";
};
limits_config = {
enforce_metric_name = false;
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
chunk_store_config.max_look_back_period = "168h";
table_manager = {
retention_deletes_enabled = true;
retention_period = "168h";
};
};
};
promtail = {
enable = true;
configuration = {
server = {
http_listen_port = 9080;
grpc_listen_port = 0;
};
positions.filename = "/tmp/positions.yaml";
clients = [ { url = "http://127.0.0.1:3100/loki/api/v1/push"; } ];
scrape_configs = [
{
job_name = "nginx";
static_configs = [
{
labels = {
job = "nginx";
__path__ = "/var/log/nginx/*log";
};
}
];
pipeline_stages = [
{
match = {
selector = ''{job="nginx"}'';
stages = [
{
regex.expression = ''^(?P<remote_addr>[\w\.]+) - (?P<remote_user>[^ ]*) \[(?P<time_local>.*)\] "(?P<method>[^ ]*) (?P<request>[^ ]*) (?P<protocol>[^ ]*)" (?P<status>[\d]+) (?P<body_bytes_sent>[\d]+) "(?P<http_referer>[^"]*)" "(?P<http_user_agent>[^"]*)"?'';
}
{
labels = {
method = null;
request = null;
status = null;
};
}
];
};
}
];
}
];
};
};
};
systemd.services.promtail.serviceConfig.SupplementaryGroups = [ "nginx" ];
security.sudo.extraRules = [
{ commands = [ { command = "${pkgs.smartmontools}/bin/smartctl"; options = [ "NOPASSWD" ]; } ]; users = [ "telegraf" ]; }
];
}