alertrules.process.yml 6.62 KiB
groups:
- name: alertrules.process
  rules:
  - alert: tomcat_process_not_running
    expr: namedprocess_namegroup_states{groupname="tomcat",state="Sleeping"} < 1
    for: 1m
    labels:
      severity: FATAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: tomcat process is not running
  - alert: search_process_not_running
    expr: namedprocess_namegroup_states{groupname="search",state="Sleeping"} < 1
    for: 1m
    labels:
      severity: FATAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: search process is not running   
  - alert: neo4j_process_not_running
    expr: namedprocess_namegroup_states{groupname="neo4j",state="Sleeping"} < 1
    for: 1m
    labels:
      severity: FATAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: neo4j process is not running
  - alert: kafka_process_not_running
    expr: namedprocess_namegroup_states{groupname="kafka",state="Sleeping"} < 1
    for: 1m
    labels:
      severity: FATAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: Kafka process is not running
  - alert: kafka_more_than_one_process_running
    expr: namedprocess_namegroup_num_procs{groupname="kafka"} > 1
    for: 1m
    labels:
      severity: CRITICAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: More than one process running
  - alert: secor_process_not_running
    expr: namedprocess_namegroup_states{groupname="secor",state="Sleeping"} != 9
    for: 1m
    labels:
      severity: CRITICAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: Secor process is not running
  - alert: zookeeper_process_not_running
    expr: namedprocess_namegroup_states{groupname="zookeeper",state="Sleeping"} < 1
    for: 1m
    labels:
      severity: FATAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: Zookeeper process is not running
  - alert: yarn_process_not_running
    expr: namedprocess_namegroup_states{groupname="yarn",state="Sleeping"} < 1
    for: 1m
    labels:
      severity: FATAL
    annotations:
      description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}'
      summary: YARN process is not running
  - alert: cassandra_process_not_running
    expr: namedprocess_namegroup_states{groupname="cassandra",state="Sleeping"} < 1
    for: 1m
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Cassandra process is not running - alert: elasticsearch_process_not_running expr: namedprocess_namegroup_states{groupname="elasticsearch",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Elasticsearch process is not running - alert: logstash_process_not_running expr: namedprocess_namegroup_states{groupname="logstash",state="Sleeping"} < 1 for: 1m labels: severity: CRITICAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Logstash process is not running - alert: Analytics_api_process_not_running expr: namedprocess_namegroup_states{groupname="analyticsapi",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Analytics API process is not running - alert: druid_zookeeper_process_not_running expr: namedprocess_namegroup_states{groupname="druidzookeeper",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Druid zookeeper is not running - alert: druid_postgres_process_not_running expr: namedprocess_namegroup_states{groupname="druidpostgres",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Druid postgres is not running - alert: druid_overlord_process_not_running expr: namedprocess_namegroup_states{groupname="overlord",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Druid overlord process is not running - alert: druid_coordinator_process_not_running expr: namedprocess_namegroup_states{groupname="coordinator",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Druid coordinator process is not running - alert: druid_historical_process_not_running expr: namedprocess_namegroup_states{groupname="historical",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Druid historical process is not running - alert: druid_broker_process_not_running
141142143144145146147148149150151152153154155156157158159160161162163164165166167168
expr: namedprocess_namegroup_states{groupname="broker",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Druid broker process is not running - alert: druid_middleManager_process_not_running expr: namedprocess_namegroup_states{groupname="middleManager",state="Sleeping"} < 1 for: 1m labels: severity: FATAL annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: Druid middleManager process is not running - alert: redisserver_process_not_running expr: namedprocess_namegroup_states{groupname="redis-server",state="Sleeping"} < 1 for: 1m annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: redis-server process is not running - alert: influxdb_process_not_running expr: namedprocess_namegroup_states{groupname="influxd",state="Sleeping"} < 1 for: 1m annotations: description: 'Number of running processes are: {% raw %}{{$value}}{% endraw %}' summary: influxdb process is not running