# # Checks for all Linux Hosts # apply Service "cpu_usage" { import "generic-service" check_command = "cpu_usage" assign where host.address && host.vars.os == "Linux" } apply Service "memory" { import "generic-service" check_command = "memory" if (host.name != NodeName) { command_endpoint = host.name } vars.memory.warning = "10" vars.memory.critical = "5" assign where host.address && host.vars.os == "Linux" } apply Service "disk" { import "generic-service" check_command = "disk" if (host.name != NodeName) { command_endpoint = host.name } # Check all local disks vars.disk_all = true vars.disk_local = true vars.disk_ignore_ereg_path += [ "^/run", "^/var/lib/docker/volumes" ] vars.disk_exclude_type += ["devtmpfs", "sysfs", "procfs", "proc", "aufs", "tmpfs", "fuse.gvfsd-fuse", "fuse.fuse-remount", "fuse.jetbrains-toolbox", "fuse.sshfs", "fuse.nvim", "debugfs", "tracefs", "nsfs", "vfat", "udf", "overlay" ] # Set levels vars.disk.critical = "5%" vars.disk.warning = "15%" vars.disk_wfree = "15%" vars.disk_cfree = "10%" vars.disk_inode_wfree = "15%" vars.disk_inode_cfree = "10%" vars.disk_units = "MB" assign where host.address && host.vars.os == "Linux" } apply Service "load" { import "generic-service" check_command = "load" if (host.name != NodeName) { command_endpoint = host.name } vars.load_percpu = "true" assign where host.address && host.vars.os == "Linux" } apply Service "systemd_units" { import "generic-service" check_command = "systemd_units" if (host.name != NodeName) { command_endpoint = host.name } max_check_attempts = 5 retry_interval = 1m check_interval = 5m assign where host.address && host.vars.os == "Linux"; } ################################################################################ # Checks for updates, newer kernels, libs, etc. # ################################################################################ apply Service "apt-Updates" { import "generic-service" check_command = "apt" if (host.name != NodeName) { command_endpoint = host.name } check_interval = 15m assign where host.address && host.vars.os == "Linux" } apply Service "running_kernel" { import "generic-service" check_command = "needrestart_kernel" if (host.name != NodeName) { command_endpoint = host.name } check_interval = 15m assign where host.address && host.vars.os == "Linux" } apply Service "libs" { import "generic-service" check_command = "needrestart_libs" if (host.name != NodeName) { command_endpoint = host.name } check_interval = 15m assign where host.address && host.vars.os == "Linux" }