main.yml 5.23 KiB
prometheus_reservation_memory: 1G
prometheus_limit_memory: 1G
prometheus_storage_retention_time: 90d
alertmanager_reservation_memory: 100M
alertmanager_limit_memory: 100M
node_exporter_reservation_memory: 16M
node_exporter_limit_memory: 32M
cadvisor_reservation_memory: 100M
cadvisor_limit_memory: 100M
elasticsearch_exporter_reservation_memory: 8M
elasticsearch_exporter_limit_memory: 24M
postgres_exporter_reservation_memory: 16M
postgres_exporter_limit_memory: 32M
statsd_exporter_reservation_memory: 8M
statsd_exporter_limit_memory: 16M
blackbox_exporter_reservation_memory: 16M
blackbox_exporter_limit_memory: 32M
jsonpath_exporter_reservation_memory: 32M
jsonpath_exporter_limit_memory: 64M
azure_blob_exporter_reservation_memory: 16M
azure_blob_exporter_limit_memory: 64M
grafana_reservation_memory: 100M
grafana_limit_memory: 100M
container_cpu_usage_percentage_theshold: 90
container_memory_usage_percentage_theshold: 90
# Override this at environment level
expected_minimum_logs_per_minute: 0
server_side_http_errors_threshold_percentage: 1
# Defaults to 1 day (24 hours) with 1 hour extra for backup process to complete
expected_data_backup_interval_in_minutes: "{{ 25 * 60 }}"
# Defaults to 1 day (24 hours) with 1 hour extra for snapshot process to complete
expected_elasticsearch_snapshot_interval_in_minutes: "{{ 25 * 60 }}"
# Defaults to 1KB. Good enough to catch errors mentioned in https://about.gitlab.com/2017/02/01/gitlab-dot-com-database-incident/
expected_data_backup_size_in_bytes: 1024
enable_postgres_availability_check: true
enable_scraping_docker_metrics: false
docker_metrics_port: "2377"
kong_cluster_expected_number_of_nodes: "{{ kong_replicas | default(1) }}"
monitor_stack_files_dest_dir: /opt/docker/stacks/stateful_monitor/stack
monitor_config_files_dest_dir: /opt/docker/stacks/stateful_monitor/config
docker_service_replicas_exporter_version: v0.0.3
docker_service_replicas_exporter_name: docker-service-replicas-exporter
es_port: 9200
monitor_config_templates:
  - prometheus.yml
  - alertmanagerconfig.yml
  - blackboxconfig.yml
  - statsd_mapping.yml
  - elasticsearch_snapshots_exporter_config.yml
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
- data_backup_azure_blob_exporter_config.yml - alertrules.nodes.yml - alertrules.task.yml - alertrules.es.yml - alertrules.logs.yml - alertrules.backups.yml - alertrules.services.yml - alertrules.postgresql.yml - alertrules.process.yml - jmx_httpserver.yml monitor_config_templates_postgres: - postgresmasterqueries.yml - postgresslavequeries.yml devops_alerts_mailing_list: "{{ alerts_mailing_list }}" service_teams: - team: devops_team alerts_mailing_list: "{{ devops_alerts_mailing_list }}" services: - monitor_.* - monit - logger_.* - proxy_.* - team: app_team alerts_mailing_list: "{{ app_alerts_mailing_list | default(devops_alerts_mailing_list) }}" services: - actor-service - learner-service - lms-service - content-service - player_player - cassandra - composite_search - analytics-api - tomcat - logstash - search - neo4j - team: keycloak_team alerts_mailing_list: "{{ keycloak_alerts_mailing_list | default(devops_alerts_mailing_list)}}" services: - keycloak - team: api_manager_team alerts_mailing_list: "{{ api_manager_alerts_mailing_list | default(devops_alerts_mailing_list)}}" services: - api-manager_.* - adminutil_.* - team: site_team alerts_mailing_list: "{{ site_alerts_mailing_list | default(devops_alerts_mailing_list)}}" services: - sunbird_static_site - team: druid alerts_mailing_list: "{{ druid_alerts_mailing_list | default(devops_alerts_mailing_list)}}" services: - druidzookeeper - druidpostgres - overlord - coordinator - historical - broker - middlemanager service_blackbox_checks: - service_name: 'analytics-api' probe_module: http_2xx targets: - "{{sunbird_analytics_api_base_url}}/health" - service_name: 'learning-service'
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
probe_module: http_2xx targets: - "{{sunbird_content_repo_api_base_url}}/health" - service_name: 'search-service' probe_module: http_2xx targets: - "{{sunbird_search_service_api_base_url}}/health" - service_name: 'monit' probe_module: http_2xx targets: - "http://{{ groups['swarm-bootstrap-manager'][0] }}:2812" - service_name: 'keycloak' probe_module: http_2xx targets: - "{{proto}}://{{proxy_server_name}}/auth/realms/sunbird/protocol/openid-connect/auth?client_id=portal&state=foo&redirect_uri=https%3A%2F%2F{{proxy_server_name}}%2Fprivate%2Findex%3Fauth_callback%3D1&scope=openid&response_type=code" prometheus_route_prefix: prometheus prometheus_web_external_url: "{{proto}}://{{api__host}}:9090/{{ prometheus_route_prefix }}" prometheus_alertmanager_route_prefix: alertmanager prometheus_alertmanager_web_external_url: "{{proto}}://{{api__host}}:9093/{{ prometheus_alertmanager_route_prefix }}" postgres_exporter_password: postgres_exporter_postgres_port: 5432 postgres_exporter_user: postgres_exporter root_group: root root_owner: root backup_storage_name: prometheus_backup prometheus_stateful_mount_point: "/root/dockerdata/prometheus_stateful/data/" docker_service_replicas_memory_limit: 512MB