An error occurred while loading the file. Please try again.
groups:
- name: alertrules.nodes
rules:
- alert: high_cpu_usage_on_node
expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode="idle"}[5m])) * 100) > 90
for: 1m
annotations:
description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.'
summary: HIGH CPU USAGE WARNING ON '{% raw %}{{ $labels.nodename }}{% endraw %}'
- alert: high_memory_usage_on_node
expr: sum by(nodename) (((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) > 95
for: 1m
annotations:
description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of MEMORY. MEMORY usage is over {% raw %}{{ humanize $value}}{% endraw %}%.'
summary: HIGH MEMORY USAGE WARNING TASK ON '{% raw %}{{ $labels.nodename }}{% endraw %}'
- alert: high_load_on_node
expr: sum by(nodename) ((node_load1 / count without(cpu, mode) (node_cpu{mode="system"}))
* on(instance) group_left(nodename) node_uname_info * 100) > 200
for: 1m
annotations:
description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) has a high load average. Load average is {% raw %}{{ $value }}{% endraw %}%.'
summary: HIGH LOAD AVERAGE WARNING ON '{% raw %}{{ $labels.nodename }}{% endraw %}'
- alert: node_exporter_down
expr: up == 0
for: 1m
annotations:
description: The node exporter '{% raw %}{{ $labels.job }}{% endraw %}' is down.
summary: 'NODE EXPORTER SERVICE CRITICAL: NODE ''{% raw %}{{ $labels.host }}{% endraw %}'''
- alert: node_running_out_of_disk_space
expr: sum by(nodename) ((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})
* 100 / node_filesystem_size{mountpoint="/"} * on(instance) group_left(nodename) node_uname_info) > 80
for: 1m
annotations:
description: More than 80% of disk used. Disk usage is {% raw %}{{ humanize $value }}{% endraw %}%
summary: 'LOW DISK SPACE WARING: NODE ''{% raw %}{{ $labels.nodename }}{% endraw %}'' '