From c1150af4fd48d38c6728675932775ac9db90a824 Mon Sep 17 00:00:00 2001 From: S M Y <smy.altamash@gmail.com> Date: Fri, 28 Jun 2019 22:02:10 +0530 Subject: [PATCH] Changed the alertrules to get correct values --- .../templates/alertrules.nodes.yml | 12 ++++++------ .../stack-monitor/templates/alertrules.nodes.yml | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ansible/roles/stack-monitor-stateful/templates/alertrules.nodes.yml b/ansible/roles/stack-monitor-stateful/templates/alertrules.nodes.yml index 1b4d3bdc4..0a03893ba 100644 --- a/ansible/roles/stack-monitor-stateful/templates/alertrules.nodes.yml +++ b/ansible/roles/stack-monitor-stateful/templates/alertrules.nodes.yml @@ -7,24 +7,24 @@ groups: labels: severity: WARNING annotations: - description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' - summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.nodename }}{% endraw %}' + description: '{% raw %}{{ $labels.instance }}{% endraw %} is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' + summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.instance }}{% endraw %}' - alert: high_cpu_usage_on_node_CRITICAL expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode="idle"}[5m])) * 100) >= {{ node_cpu_usage_percentage_threshold_Critical }} and (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode="idle"}[5m])) * 100) < {{ node_cpu_usage_percentage_threshold_Fatal }} for: 1m labels: severity: CRITICAL annotations: - description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' - summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.nodename }}{% endraw %}' + description: '{% raw %}{{ $labels.instance }}{% endraw %} is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' + summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.instance }}{% endraw %}' - alert: high_cpu_usage_on_node_FATAL expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode="idle"}[5m])) * 100) >= {{ node_cpu_usage_percentage_threshold_Fatal }} for: 1m labels: severity: FATAL annotations: - description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' - summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.nodename }}{% endraw %}' + description: '{% raw %}{{ $labels.instance }}{% endraw %} is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' + summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.instance }}{% endraw %}' - alert: high_memory_usage_on_node_WARNING expr: sum by(nodename) ((((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) >= {{ node_memory_usage_percentage_threshold_Warning }} and (((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) < {{ node_memory_usage_percentage_threshold_Critical }} ) for: 1m diff --git a/ansible/roles/stack-monitor/templates/alertrules.nodes.yml b/ansible/roles/stack-monitor/templates/alertrules.nodes.yml index f7dca91ee..4cf006987 100644 --- a/ansible/roles/stack-monitor/templates/alertrules.nodes.yml +++ b/ansible/roles/stack-monitor/templates/alertrules.nodes.yml @@ -7,24 +7,24 @@ groups: labels: severity: WARNING annotations: - description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' - summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.nodename }}{% endraw %}' + description: '{% raw %}{{ $labels.instance }}{% endraw %} is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' + summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.instance }}{% endraw %}' - alert: high_cpu_usage_on_node_CRITICAL expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode="idle"}[5m])) * 100) >= {{ node_cpu_usage_percentage_threshold_Critical }} and (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode="idle"}[5m])) * 100) < {{ node_cpu_usage_percentage_threshold_Fatal }} for: 1m labels: severity: CRITICAL annotations: - description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' - summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.nodename }}{% endraw %}' + description: '{% raw %}{{ $labels.instance }}{% endraw %} is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' + summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.instance }}{% endraw %}' - alert: high_cpu_usage_on_node_FATAL expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode="idle"}[5m])) * 100) >= {{ node_cpu_usage_percentage_threshold_Fatal }} for: 1m labels: severity: FATAL annotations: - description: '{% raw %}{{ $labels.nodename }}{% endraw %} ({% raw %}{{ $labels.host }}{% endraw %}) is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' - summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.nodename }}{% endraw %}' + description: '{% raw %}{{ $labels.instance }}{% endraw %} is using a LOT of CPU. CPU usage is {% raw %}{{ humanize $value}}{% endraw %}%.' + summary: 'HIGH CPU USAGE WARNING ON {% raw %}{{ $labels.instance }}{% endraw %}' - alert: high_memory_usage_on_node_WARNING expr: sum by(nodename) ((((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) >= {{ node_memory_usage_percentage_threshold_Warning }} and (((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) < {{ node_memory_usage_percentage_threshold_Critical }} ) for: 1m -- GitLab