diff --git a/ansible/roles/stack-monitor/templates/prometheus.yml b/ansible/roles/stack-monitor/templates/prometheus.yml index 862b9e728d0cbc7f9f0bbd794b795d91bf209227..ec159d9afd55cd1b961855767a548f6cf0f78151 100644 --- a/ansible/roles/stack-monitor/templates/prometheus.yml +++ b/ansible/roles/stack-monitor/templates/prometheus.yml @@ -62,6 +62,16 @@ scrape_configs: action: drop {% endif %} + - job_name: 'nginx' + metrics_path: /metrics + scrape_interval: 10s + scrape_timeout: 5s + dns_sd_configs: + - names: + - 'tasks.proxy_proxy' + type: 'A' + port: 9145 + - job_name: 'statsd-exporter' static_configs: - targets: ['monitor_statsd_exporter:9102'] diff --git a/ansible/roles/stack-proxy/defaults/main.yml b/ansible/roles/stack-proxy/defaults/main.yml index 5b28734638d255d299ec78695934681caa4ebbc7..0ae31363be77f3393c7285701bfbd2bf7edd406f 100644 --- a/ansible/roles/stack-proxy/defaults/main.yml +++ b/ansible/roles/stack-proxy/defaults/main.yml @@ -3,7 +3,8 @@ hub_org: sunbird proxy_replicas: 1 proxy_reservation_memory: 32M -proxy_limit_memory: 128M +proxy_limit_memory: 64M +nginx_per_ip_connection_limit: 400 proxy_prometheus: false diff --git a/ansible/roles/stack-proxy/templates/nginx.conf b/ansible/roles/stack-proxy/templates/nginx.conf index baa921306d769b04152a5d98d5fcb3b6515da768..566be10a09d34e0c9f3631f6df4ea96464d563cb 100644 --- a/ansible/roles/stack-proxy/templates/nginx.conf +++ b/ansible/roles/stack-proxy/templates/nginx.conf @@ -5,7 +5,7 @@ error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; events { - worker_connections 2048; + worker_connections 10000; } @@ -13,29 +13,66 @@ http { include /etc/nginx/mime.types; default_type application/octet-stream; - log_format main '$http_x_forwarded_for - $remote_addr - $remote_user [$time_local] ' + lua_load_resty_core off; + log_format main '$remote_addr - $remote_user [$time_local] ' '"$request" $status $body_bytes_sent ' '$request_time $upstream_response_time $pipe' '"$http_referer" "$http_user_agent"'; access_log /var/log/nginx/access.log main; + # Shared dictionary to store metrics + lua_shared_dict prometheus_metrics 10M; + lua_package_path "/etc/nginx/lua_modules/?.lua"; + # Defining metrics + init_by_lua ' + prometheus = require("prometheus").init("prometheus_metrics") + metric_requests = prometheus:counter( + "nginx_http_requests_total", "Number of HTTP requests", {"host", "status", "request_method"}) + metric_latency = prometheus:histogram( + "nginx_http_request_duration_seconds", "HTTP request latency", {"host"}) + metric_connections = prometheus:gauge( + "nginx_http_connections", "Number of HTTP connections", {"state"}) + '; + + # Collecting metrics + log_by_lua ' + metric_requests:inc(1, {ngx.var.server_name, ngx.var.status, ngx.var.request_method}) + metric_latency:observe(tonumber(ngx.var.request_time), {ngx.var.server_name}) + '; + sendfile on; #tcp_nopush on; client_max_body_size 60M; - keepalive_timeout 65; + keepalive_timeout 500s; + keepalive_requests 200; + + # Nginx connection limit per ip + limit_conn_zone $binary_remote_addr zone=limitbyaddr:10m; + limit_conn_status 429; upstream kong { server api-manager_kong:8000; keepalive 1000; } - #gzip on; upstream player { server player_player:3000; keepalive 1000; } include /etc/nginx/conf.d/*.conf; + + server { + listen 9145; + location /metrics { + content_by_lua ' + metric_connections:set(ngx.var.connections_reading, {"reading"}) + metric_connections:set(ngx.var.connections_waiting, {"waiting"}) + metric_connections:set(ngx.var.connections_writing, {"writing"}) + prometheus:collect() + '; + } + } } diff --git a/ansible/roles/stack-proxy/templates/proxy-default.conf b/ansible/roles/stack-proxy/templates/proxy-default.conf index d462b95b990080bd1874289cd899f6903d3c14bc..586b0a4433376b88df1098674a1b173652a1ecaf 100644 --- a/ansible/roles/stack-proxy/templates/proxy-default.conf +++ b/ansible/roles/stack-proxy/templates/proxy-default.conf @@ -3,6 +3,8 @@ server { listen 80; listen [::]:80; server_name {{ proxy_server_name }}; + # Limitting open connection per ip + limit_conn limitbyaddr {{ nginx_per_ip_connection_limit }}; return 301 https://{{ proxy_server_name }}$request_uri; } @@ -18,6 +20,8 @@ server { {% endif %} server_name {{ proxy_server_name }}; + # Limitting open connection per ip + limit_conn limitbyaddr {{ nginx_per_ip_connection_limit }}; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/ansible/roles/stack-proxy/templates/stack-proxy.yml b/ansible/roles/stack-proxy/templates/stack-proxy.yml index eee187d9721e0b8eefed5119dabccf1489024ae1..cd155ba14a2e41b90d90de2e20cd05f94747bad5 100644 --- a/ansible/roles/stack-proxy/templates/stack-proxy.yml +++ b/ansible/roles/stack-proxy/templates/stack-proxy.yml @@ -3,11 +3,18 @@ version: '3.3' services: proxy: image: "{{hub_org}}/{{image_name}}:{{image_tag}}" + # This is a workaround for remote ip address is not visible nginx ports: - - "443:443" - - "80:80" + - mode: host + target: 80 + published: 80 + protocol: TCP + - mode: host + target: 443 + published: 443 + protocol: TCP deploy: - replicas: {{ proxy_replicas }} + mode: global resources: reservations: memory: "{{ proxy_reservation_memory }}" diff --git a/images/proxy/Dockerfile b/images/proxy/Dockerfile index 0896451336941b3dff597eef72139ad574683f5b..7bdb2cae33dcb635e8d12409fdd23e3b19d54dfc 100644 --- a/images/proxy/Dockerfile +++ b/images/proxy/Dockerfile @@ -1,5 +1,163 @@ -FROM nginx:1.13.8-alpine +FROM alpine:3.9 -RUN rm -rf /etc/nginx/conf.d +LABEL maintainer="NGINX Docker Maintainers <docker-maint@nginx.com>" -RUN rm -rf /usr/share/nginx/html +ENV NGINX_VERSION 1.15.8 + +ENV LUAJIT_LIB=/usr/local/lib +ENV LUAJIT_INC=/usr/local/include/luajit-2.1 + +COPY nginx_devel_kit /opt/nginx_devel_kit +COPY luajit /usr/src/luajit +COPY nginx_lua /opt/nginx_lua + +RUN GPG_KEYS=B0F4253373F8F6F510D42178520A9993A1C052F8 \ + && CONFIG="\ + --prefix=/etc/nginx \ + --sbin-path=/usr/sbin/nginx \ + --modules-path=/usr/lib/nginx/modules \ + --with-ld-opt='-Wl,-rpath,/usr/local/lib' \ + --add-module=/opt/nginx_devel_kit \ + --add-module=/opt/nginx_lua \ + --conf-path=/etc/nginx/nginx.conf \ + --error-log-path=/var/log/nginx/error.log \ + --http-log-path=/var/log/nginx/access.log \ + --pid-path=/var/run/nginx.pid \ + --lock-path=/var/run/nginx.lock \ + --http-client-body-temp-path=/var/cache/nginx/client_temp \ + --http-proxy-temp-path=/var/cache/nginx/proxy_temp \ + --http-fastcgi-temp-path=/var/cache/nginx/fastcgi_temp \ + --http-uwsgi-temp-path=/var/cache/nginx/uwsgi_temp \ + --http-scgi-temp-path=/var/cache/nginx/scgi_temp \ + --user=nginx \ + --group=nginx \ + --with-http_ssl_module \ + --with-http_realip_module \ + --with-http_addition_module \ + --with-http_sub_module \ + --with-http_dav_module \ + --with-http_flv_module \ + --with-http_mp4_module \ + --with-http_gunzip_module \ + --with-http_gzip_static_module \ + --with-http_random_index_module \ + --with-http_secure_link_module \ + --with-http_stub_status_module \ + --with-http_auth_request_module \ + --with-http_xslt_module=dynamic \ + --with-http_image_filter_module=dynamic \ + --with-http_geoip_module=dynamic \ + --with-threads \ + --with-stream \ + --with-stream_ssl_module \ + --with-stream_ssl_preread_module \ + --with-stream_realip_module \ + --with-stream_geoip_module=dynamic \ + --with-http_slice_module \ + --with-mail \ + --with-mail_ssl_module \ + --with-compat \ + --with-file-aio \ + --with-http_v2_module " \ + && addgroup -S nginx \ + && adduser -D -S -h /var/cache/nginx -s /sbin/nologin -G nginx nginx \ + && apk add --no-cache libgcc \ + && apk add --no-cache --virtual .build-deps \ + gcc \ + libc-dev \ + make \ + openssl-dev \ + pcre-dev \ + zlib-dev \ + linux-headers \ + curl \ + gnupg1 \ + libxslt-dev \ + gd-dev \ + geoip-dev \ + && curl -fSL https://nginx.org/download/nginx-$NGINX_VERSION.tar.gz -o nginx.tar.gz \ + && curl -fSL https://nginx.org/download/nginx-$NGINX_VERSION.tar.gz.asc -o nginx.tar.gz.asc \ + && export GNUPGHOME="$(mktemp -d)" \ + && found=''; \ + for server in \ + ha.pool.sks-keyservers.net \ + hkp://keyserver.ubuntu.com:80 \ + hkp://p80.pool.sks-keyservers.net:80 \ + pgp.mit.edu \ + ; do \ + echo "Fetching GPG key $GPG_KEYS from $server"; \ + gpg --keyserver "$server" --keyserver-options timeout=10 --recv-keys "$GPG_KEYS" && found=yes && break; \ + done; \ + test -z "$found" && echo >&2 "error: failed to fetch GPG key $GPG_KEYS" && exit 1; \ + gpg --batch --verify nginx.tar.gz.asc nginx.tar.gz \ + && rm -rf "$GNUPGHOME" nginx.tar.gz.asc \ + && mkdir -p /usr/src \ + && tar -zxC /usr/src -f nginx.tar.gz \ + && rm nginx.tar.gz \ + && cd /usr/src/luajit \ + && export LUAJIT_LIB=/usr/local/lib \ + && export LUAJIT_INC=/usr/local/include/luajit-2.1 \ + && cd /usr/src/luajit \ + && make -j$(getconf _NPROCESSORS_ONLN) \ + && make install \ + && ls /usr/local/lib /usr/local/include/luajit-2.1 \ + && cd /usr/src/nginx-$NGINX_VERSION \ + && ./configure $CONFIG --with-debug \ + && make -j$(getconf _NPROCESSORS_ONLN) \ + && mv objs/nginx objs/nginx-debug \ + && mv objs/ngx_http_xslt_filter_module.so objs/ngx_http_xslt_filter_module-debug.so \ + && mv objs/ngx_http_image_filter_module.so objs/ngx_http_image_filter_module-debug.so \ + && mv objs/ngx_http_geoip_module.so objs/ngx_http_geoip_module-debug.so \ + && mv objs/ngx_stream_geoip_module.so objs/ngx_stream_geoip_module-debug.so \ + && ./configure $CONFIG \ + && make -j$(getconf _NPROCESSORS_ONLN) \ + && make install \ + && rm -rf /etc/nginx/html/ \ + && mkdir /etc/nginx/conf.d/ \ + && mkdir -p /usr/share/nginx/html/ \ + && install -m644 html/index.html /usr/share/nginx/html/ \ + && install -m644 html/50x.html /usr/share/nginx/html/ \ + && install -m755 objs/nginx-debug /usr/sbin/nginx-debug \ + && install -m755 objs/ngx_http_xslt_filter_module-debug.so /usr/lib/nginx/modules/ngx_http_xslt_filter_module-debug.so \ + && install -m755 objs/ngx_http_image_filter_module-debug.so /usr/lib/nginx/modules/ngx_http_image_filter_module-debug.so \ + && install -m755 objs/ngx_http_geoip_module-debug.so /usr/lib/nginx/modules/ngx_http_geoip_module-debug.so \ + && install -m755 objs/ngx_stream_geoip_module-debug.so /usr/lib/nginx/modules/ngx_stream_geoip_module-debug.so \ + && ln -s ../../usr/lib/nginx/modules /etc/nginx/modules \ + && strip /usr/sbin/nginx* \ + && strip /usr/lib/nginx/modules/*.so \ + && rm -rf /usr/src/nginx-$NGINX_VERSION \ + \ + # Bring in gettext so we can get `envsubst`, then throw + # the rest away. To do this, we need to install `gettext` + # then move `envsubst` out of the way so `gettext` can + # be deleted completely, then move `envsubst` back. + && apk add --no-cache --virtual .gettext gettext \ + && mv /usr/bin/envsubst /tmp/ \ + \ + && runDeps="$( \ + scanelf --needed --nobanner --format '%n#p' /usr/sbin/nginx /usr/lib/nginx/modules/*.so /tmp/envsubst \ + | tr ',' '\n' \ + | sort -u \ + | awk 'system("[ -e /usr/local/lib/" $1 " ]") == 0 { next } { print "so:" $1 }' \ + )" \ + && apk add --no-cache --virtual .nginx-rundeps $runDeps \ + && apk del .build-deps \ + && apk del .gettext \ + && mv /tmp/envsubst /usr/local/bin/ \ + \ + # Bring in tzdata so users could set the timezones through the environment + # variables + && apk add --no-cache tzdata \ + \ + # forward request and error logs to docker log collector + && ln -sf /dev/stdout /var/log/nginx/access.log \ + && ln -sf /dev/stderr /var/log/nginx/error.log + +COPY nginx.conf /etc/nginx/nginx.conf +copy prometheus.lua /etc/nginx/lua_modules/ + +EXPOSE 80 + +STOPSIGNAL SIGTERM + +CMD ["nginx", "-g", "daemon off;"] diff --git a/images/proxy/Jenkinsfile b/images/proxy/Jenkinsfile index e00c08357f3bb19e327405bbb143be5376d129c5..1d80a01f5d887e65f43b50f2217c1bc11141ebda 100644 --- a/images/proxy/Jenkinsfile +++ b/images/proxy/Jenkinsfile @@ -32,10 +32,10 @@ node('build-slave') { stage('Build') { env.NODE_ENV = "build" print "Environment will be : ${env.NODE_ENV}" - sh("./images/proxy/build.sh ${build_tag} ${env.NODE_NAME} ${hub_org}") + sh("cd ./images/proxy && ./build.sh ${build_tag} ${env.NODE_NAME} ${hub_org}") } stage('ArchiveArtifacts') { - archiveArtifacts "metadata.json" + archiveArtifacts "images/proxy/metadata.json" currentBuild.description = "${build_tag}" } } diff --git a/images/proxy/build.sh b/images/proxy/build.sh index ebf64006ed41f04b25fe8ced503b18d2aafdc6a1..b7d6a3c9fa4aa4567c62dc00af1b02577a0e6ade 100755 --- a/images/proxy/build.sh +++ b/images/proxy/build.sh @@ -1,10 +1,24 @@ #!/bin/bash + # Build script set -eo pipefail + +# Downloading deps +wget https://codeload.github.com/simplresty/ngx_devel_kit/tar.gz/v0.3.0 -O ngx_devel_kit_0_3_0.tar.gz +wget https://codeload.github.com/openresty/luajit2/tar.gz/v2.1-20190626 -O luajit_2_1.tar.gz +wget https://codeload.github.com/openresty/lua-nginx-module/tar.gz/v0.10.15 -O ngx_lua.tar.gz + +# Creating deps directory +mkdir nginx_devel_kit luajit nginx_lua +tar --strip-components=1 -xf ngx_devel_kit_0_3_0.tar.gz -C nginx_devel_kit +tar --strip-components=1 -xf luajit_2_1.tar.gz -C luajit +tar --strip-components=1 -xf ngx_lua.tar.gz -C nginx_lua + +# Creating nginx build_tag=$1 name=proxy node=$2 org=$3 -docker build -f ./images/proxy/Dockerfile -t ${org}/${name}:${build_tag} . +docker build -t ${org}/${name}:${build_tag} . echo {\"image_name\" : \"${name}\", \"image_tag\" : \"${build_tag}\", \"node_name\" : \"$node\"} > metadata.json diff --git a/images/proxy/nginx.conf b/images/proxy/nginx.conf new file mode 100644 index 0000000000000000000000000000000000000000..dfdcc8d79971e83b6f322377e889a52d5447d80c --- /dev/null +++ b/images/proxy/nginx.conf @@ -0,0 +1,70 @@ +user nginx; +worker_processes 1; +load_module modules/ndk_http_module.so; +load_module modules/ngx_http_lua_module.so; + + +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + + +http { + include /etc/nginx/mime.types; + # Don't need complete openresty + lua_load_resty_core off; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '$request_time $upstream_response_time $pipe' + '"$http_referer" "$http_user_agent"'; + + access_log /var/log/nginx/access.log main; + + # Shared dictionary to store metrics + lua_shared_dict prometheus_metrics 10M; + lua_package_path "/etc/nginx/lua_modules/?.lua"; + # Collecting metrics + init_by_lua ' + prometheus = require("prometheus").init("prometheus_metrics") + metric_requests = prometheus:counter( + "nginx_http_requests_total", "Number of HTTP requests", {"host", "status", "request_method"}) + metric_latency = prometheus:histogram( + "nginx_http_request_duration_seconds", "HTTP request latency", {"host"}) + metric_connections = prometheus:gauge( + "nginx_http_connections", "Number of HTTP connections", {"state"}) + '; + + # Collecting metrics + log_by_lua ' + metric_requests:inc(1, {ngx.var.server_name, ngx.var.status, ngx.var.request_method}) + metric_latency:observe(tonumber(ngx.var.request_time), {ngx.var.server_name}) + '; + + + sendfile on; + #tcp_nopush on; + + keepalive_timeout 500s; + keepalive_requests 200; + + #gzip on; + + include /etc/nginx/conf.d/*.conf; + + server { + listen 9145; + location /metrics { + content_by_lua ' + metric_connections:set(ngx.var.connections_reading, {"reading"}) + metric_connections:set(ngx.var.connections_waiting, {"waiting"}) + metric_connections:set(ngx.var.connections_writing, {"writing"}) + prometheus:collect() + '; + } + } +} diff --git a/images/proxy/prometheus.lua b/images/proxy/prometheus.lua new file mode 100644 index 0000000000000000000000000000000000000000..e627012f95be535f0ab841dd285cdd433ded8e99 --- /dev/null +++ b/images/proxy/prometheus.lua @@ -0,0 +1,570 @@ +-- vim: ts=2:sw=2:sts=2:expandtab +-- +-- This module uses a single dictionary shared between Nginx workers to keep +-- all metrics. Each counter is stored as a separate entry in that dictionary, +-- which allows us to increment them using built-in `incr` method. +-- +-- Prometheus requires that (a) all samples for a given metric are presented +-- as one uninterrupted group, and (b) buckets of a histogram appear in +-- increasing numerical order. We satisfy that by carefully constructing full +-- metric names (i.e. metric name along with all labels) so that they meet +-- those requirements while being sorted alphabetically. In particular: +-- +-- * all labels for a given metric are presented in reproducible order (the one +-- used when labels were declared). "le" label for histogram metrics always +-- goes last; +-- * bucket boundaries (which are exposed as values of the "le" label) are +-- presented as floating point numbers with leading and trailing zeroes. +-- Number of of zeroes is determined for each bucketer automatically based on +-- bucket boundaries; +-- * internally "+Inf" bucket is stored as "Inf" (to make it appear after +-- all numeric buckets), and gets replaced by "+Inf" just before we +-- expose the metrics. +-- +-- For example, if you define your bucket boundaries as {0.00005, 10, 1000} +-- then we will keep the following samples for a metric `m1` with label +-- `site` set to `site1`: +-- +-- m1_bucket{site="site1",le="0000.00005"} +-- m1_bucket{site="site1",le="0010.00000"} +-- m1_bucket{site="site1",le="1000.00000"} +-- m1_bucket{site="site1",le="Inf"} +-- m1_count{site="site1"} +-- m1_sum{site="site1"} +-- +-- "Inf" will be replaced by "+Inf" while publishing metrics. +-- +-- You can find the latest version and documentation at +-- https://github.com/knyar/nginx-lua-prometheus +-- Released under MIT license. + + +-- Default set of latency buckets, 5ms to 10s: +local DEFAULT_BUCKETS = {0.005, 0.01, 0.02, 0.03, 0.05, 0.075, 0.1, 0.2, 0.3, + 0.4, 0.5, 0.75, 1, 1.5, 2, 3, 4, 5, 10} + +-- Metric is a "parent class" for all metrics. +local Metric = {} +function Metric:new(o) + o = o or {} + setmetatable(o, self) + self.__index = self + return o +end + +-- Checks that the right number of labels values have been passed. +-- +-- Args: +-- label_values: an array of label values. +-- +-- Returns: +-- an error message or nil +function Metric:check_label_values(label_values) + if self.label_names == nil and label_values == nil then + return + elseif self.label_names == nil and label_values ~= nil then + return "Expected no labels for " .. self.name .. ", got " .. #label_values + elseif label_values == nil and self.label_names ~= nil then + return "Expected " .. #self.label_names .. " labels for " .. + self.name .. ", got none" + elseif #self.label_names ~= #label_values then + return "Wrong number of labels for " .. self.name .. ". Expected " .. + #self.label_names .. ", got " .. #label_values + else + for i, k in ipairs(self.label_names) do + if label_values[i] == nil then + return "Unexpected nil value for label " .. k .. " of " .. self.name + end + end + end +end + +local Counter = Metric:new() +-- Increase a given counter by `value` +-- +-- Args: +-- value: (number) a value to add to the counter. Defaults to 1 if skipped. +-- label_values: an array of label values. Can be nil (i.e. not defined) for +-- metrics that have no labels. +function Counter:inc(value, label_values) + local err = self:check_label_values(label_values) + if err ~= nil then + self.prometheus:log_error(err) + return + end + if value ~= nil and value < 0 then + self.prometheus:log_error_kv(self.name, value, "Value should not be negative") + return + end + + self.prometheus:inc(self.name, self.label_names, label_values, value or 1) +end + +local Gauge = Metric:new() +-- Set a given gauge to `value` +-- +-- Args: +-- value: (number) a value to set the gauge to. Should be defined. +-- label_values: an array of label values. Can be nil (i.e. not defined) for +-- metrics that have no labels. +function Gauge:set(value, label_values) + if value == nil then + self.prometheus:log_error("No value passed for " .. self.name) + return + end + local err = self:check_label_values(label_values) + if err ~= nil then + self.prometheus:log_error(err) + return + end + self.prometheus:set(self.name, self.label_names, label_values, value) +end + + +-- Increase a given gauge by `value` +-- +-- Args: +-- value: (number) a value to add to the gauge (a negative value when you +-- need to decrease the value of the gauge). Defaults to 1 if skipped. +-- label_values: an array of label values. Can be nil (i.e. not defined) for +-- metrics that have no labels. +function Gauge:inc(value, label_values) + local err = self:check_label_values(label_values) + if err ~= nil then + self.prometheus:log_error(err) + return + end + self.prometheus:inc(self.name, self.label_names, label_values, value or 1) +end + +local Histogram = Metric:new() +-- Record a given value in a histogram. +-- +-- Args: +-- value: (number) a value to record. Should be defined. +-- label_values: an array of label values. Can be nil (i.e. not defined) for +-- metrics that have no labels. +function Histogram:observe(value, label_values) + if value == nil then + self.prometheus:log_error("No value passed for " .. self.name) + return + end + local err = self:check_label_values(label_values) + if err ~= nil then + self.prometheus:log_error(err) + return + end + self.prometheus:histogram_observe(self.name, self.label_names, label_values, value) +end + +local Prometheus = {} +Prometheus.__index = Prometheus +Prometheus.initialized = false + +-- Generate full metric name that includes all labels. +-- +-- Args: +-- name: string +-- label_names: (array) a list of label keys. +-- label_values: (array) a list of label values. +-- Returns: +-- (string) full metric name. +local function full_metric_name(name, label_names, label_values) + if not label_names then + return name + end + local label_parts = {} + for idx, key in ipairs(label_names) do + local label_value = (string.format("%s", label_values[idx]) + :gsub("[^\032-\126]", "") -- strip non-printable characters + :gsub("\\", "\\\\") + :gsub('"', '\\"')) + table.insert(label_parts, key .. '="' .. label_value .. '"') + end + return name .. "{" .. table.concat(label_parts, ",") .. "}" +end + +-- Construct bucket format for a list of buckets. +-- +-- This receives a list of buckets and returns a sprintf template that should +-- be used for bucket boundaries to make them come in increasing order when +-- sorted alphabetically. +-- +-- To re-phrase, this is where we detect how many leading and trailing zeros we +-- need. +-- +-- Args: +-- buckets: a list of buckets +-- +-- Returns: +-- (string) a sprintf template. +local function construct_bucket_format(buckets) + local max_order = 1 + local max_precision = 1 + for _, bucket in ipairs(buckets) do + assert(type(bucket) == "number", "bucket boundaries should be numeric") + -- floating point number with all trailing zeros removed + local as_string = string.format("%f", bucket):gsub("0*$", "") + local dot_idx = as_string:find(".", 1, true) + max_order = math.max(max_order, dot_idx - 1) + max_precision = math.max(max_precision, as_string:len() - dot_idx) + end + return "%0" .. (max_order + max_precision + 1) .. "." .. max_precision .. "f" +end + +-- Extract short metric name from the full one. +-- +-- Args: +-- full_name: (string) full metric name that can include labels. +-- +-- Returns: +-- (string) short metric name with no labels. For a `*_bucket` metric of +-- histogram the _bucket suffix will be removed. +local function short_metric_name(full_name) + local labels_start, _ = full_name:find("{") + if not labels_start then + -- no labels + return full_name + end + local suffix_idx, _ = full_name:find("_bucket{") + if suffix_idx and full_name:find("le=") then + -- this is a histogram metric + return full_name:sub(1, suffix_idx - 1) + end + -- this is not a histogram metric + return full_name:sub(1, labels_start - 1) +end + +-- Makes a shallow copy of a table +local function copy_table(table) + local new = {} + if table ~= nil then + for k, v in ipairs(table) do + new[k] = v + end + end + return new +end + +-- Check metric name and label names for correctness. +-- +-- Regular expressions to validate metric and label names are +-- documented in https://prometheus.io/docs/concepts/data_model/ +-- +-- Args: +-- metric_name: (string) metric name. +-- label_names: label names (array of strings). +-- +-- Returns: +-- Either an error string, or nil of no errors were found. +local function check_metric_and_label_names(metric_name, label_names) + if not metric_name:match("^[a-zA-Z_:][a-zA-Z0-9_:]*$") then + return "Metric name '" .. metric_name .. "' is invalid" + end + for _, label_name in ipairs(label_names or {}) do + if label_name == "le" then + return "Invalid label name 'le' in " .. metric_name + end + if not label_name:match("^[a-zA-Z_][a-zA-Z0-9_]*$") then + return "Metric '" .. metric_name .. "' label name '" .. label_name .. + "' is invalid" + end + end +end + +-- Initialize the module. +-- +-- This should be called once from the `init_by_lua` section in nginx +-- configuration. +-- +-- Args: +-- dict_name: (string) name of the nginx shared dictionary which will be +-- used to store all metrics +-- prefix: (optional string) if supplied, prefix is added to all +-- metric names on output +-- +-- Returns: +-- an object that should be used to register metrics. +function Prometheus.init(dict_name, prefix) + local self = setmetatable({}, Prometheus) + dict_name = dict_name or "prometheus_metrics" + self.dict = ngx.shared[dict_name] + if self.dict == nil then + ngx.log(ngx.ERR, + "Dictionary '", dict_name, "' does not seem to exist. ", + "Please define the dictionary using `lua_shared_dict`.") + return self + end + self.help = {} + if prefix then + self.prefix = prefix + else + self.prefix = '' + end + self.type = {} + self.registered = {} + self.buckets = {} + self.bucket_format = {} + self.initialized = true + + self:counter("nginx_metric_errors_total", + "Number of nginx-lua-prometheus errors") + self.dict:set("nginx_metric_errors_total", 0) + return self +end + +function Prometheus:log_error(...) + ngx.log(ngx.ERR, ...) + self.dict:incr("nginx_metric_errors_total", 1) +end + +function Prometheus:log_error_kv(key, value, err) + self:log_error( + "Error while setting '", key, "' to '", value, "': '", err, "'") +end + +-- Register a counter. +-- +-- Args: +-- name: (string) name of the metric. Required. +-- description: (string) description of the metric. Will be used for the HELP +-- comment on the metrics page. Optional. +-- label_names: array of strings, defining a list of metrics. Optional. +-- +-- Returns: +-- a Counter object. +function Prometheus:counter(name, description, label_names) + if not self.initialized then + ngx.log(ngx.ERR, "Prometheus module has not been initialized") + return + end + + local err = check_metric_and_label_names(name, label_names) + if err ~= nil then + self:log_error(err) + return + end + + if self.registered[name] then + self:log_error("Duplicate metric " .. name) + return + end + self.registered[name] = true + self.help[name] = description + self.type[name] = "counter" + + return Counter:new{name=name, label_names=label_names, prometheus=self} +end + +-- Register a gauge. +-- +-- Args: +-- name: (string) name of the metric. Required. +-- description: (string) description of the metric. Will be used for the HELP +-- comment on the metrics page. Optional. +-- label_names: array of strings, defining a list of metrics. Optional. +-- +-- Returns: +-- a Gauge object. +function Prometheus:gauge(name, description, label_names) + if not self.initialized then + ngx.log(ngx.ERR, "Prometheus module has not been initialized") + return + end + + local err = check_metric_and_label_names(name, label_names) + if err ~= nil then + self:log_error(err) + return + end + + if self.registered[name] then + self:log_error("Duplicate metric " .. name) + return + end + self.registered[name] = true + self.help[name] = description + self.type[name] = "gauge" + + return Gauge:new{name=name, label_names=label_names, prometheus=self} +end + + +-- Register a histogram. +-- +-- Args: +-- name: (string) name of the metric. Required. +-- description: (string) description of the metric. Will be used for the HELP +-- comment on the metrics page. Optional. +-- label_names: array of strings, defining a list of metrics. Optional. +-- buckets: array if numbers, defining bucket boundaries. Optional. +-- +-- Returns: +-- a Histogram object. +function Prometheus:histogram(name, description, label_names, buckets) + if not self.initialized then + ngx.log(ngx.ERR, "Prometheus module has not been initialized") + return + end + + local err = check_metric_and_label_names(name, label_names) + if err ~= nil then + self:log_error(err) + return + end + + for _, suffix in ipairs({"", "_bucket", "_count", "_sum"}) do + if self.registered[name .. suffix] then + self:log_error("Duplicate metric " .. name .. suffix) + return + end + self.registered[name .. suffix] = true + end + self.help[name] = description + self.type[name] = "histogram" + + self.buckets[name] = buckets or DEFAULT_BUCKETS + self.bucket_format[name] = construct_bucket_format(self.buckets[name]) + + return Histogram:new{name=name, label_names=label_names, prometheus=self} +end + +-- Set a given dictionary key. +-- This overwrites existing values, so it should only be used when initializing +-- metrics or when explicitely overwriting the previous value of a metric. +function Prometheus:set_key(key, value) + local ok, err = self.dict:safe_set(key, value) + if not ok then + self:log_error_kv(key, value, err) + end +end + +-- Increment a given metric by `value`. +-- +-- Args: +-- name: (string) short metric name without any labels. +-- label_names: (array) a list of label keys. +-- label_values: (array) a list of label values. +-- value: (number) value to add (a negative value when you need to decrease +-- the value of the gauge). Optional, defaults to 1. +function Prometheus:inc(name, label_names, label_values, value) + local key = full_metric_name(name, label_names, label_values) + if value == nil then value = 1 end + + local newval, err = self.dict:incr(key, value) + if newval then + return + end + -- Yes, this looks like a race, so I guess we might under-report some values + -- when multiple workers simultaneously try to create the same metric. + -- Hopefully this does not happen too often (shared dictionary does not get + -- reset during configuation reload). + if err == "not found" then + self:set_key(key, value) + return + end + -- Unexpected error + self:log_error_kv(key, value, err) +end + +-- Set the current value of a gauge to `value` +-- +-- Args: +-- name: (string) short metric name without any labels. +-- label_names: (array) a list of label keys. +-- label_values: (array) a list of label values. +-- value: (number) the new value for the gauge. +function Prometheus:set(name, label_names, label_values, value) + local key = full_metric_name(name, label_names, label_values) + self:set_key(key, value) +end + +-- Record a given value into a histogram metric. +-- +-- Args: +-- name: (string) short metric name without any labels. +-- label_names: (array) a list of label keys. +-- label_values: (array) a list of label values. +-- value: (number) value to observe. +function Prometheus:histogram_observe(name, label_names, label_values, value) + self:inc(name .. "_count", label_names, label_values, 1) + self:inc(name .. "_sum", label_names, label_values, value) + + -- we are going to mutate arrays of label names and values, so create a copy. + local l_names = copy_table(label_names) + local l_values = copy_table(label_values) + + -- Last bucket. Note, that the label value is "Inf" rather than "+Inf" + -- required by Prometheus. This is necessary for this bucket to be the last + -- one when all metrics are lexicographically sorted. "Inf" will get replaced + -- by "+Inf" in Prometheus:collect(). + table.insert(l_names, "le") + table.insert(l_values, "Inf") + self:inc(name .. "_bucket", l_names, l_values, 1) + + local label_count = #l_names + for _, bucket in ipairs(self.buckets[name]) do + if value <= bucket then + -- last label is now "le" + l_values[label_count] = self.bucket_format[name]:format(bucket) + self:inc(name .. "_bucket", l_names, l_values, 1) + end + end +end + +-- Prometheus compatible metric data as an array of strings. +-- +-- Returns: +-- Array of strings with all metrics in a text format compatible with +-- Prometheus. +function Prometheus:metric_data() + if not self.initialized then + ngx.log(ngx.ERR, "Prometheus module has not been initialized") + return + end + + local keys = self.dict:get_keys(0) + -- Prometheus server expects buckets of a histogram to appear in increasing + -- numerical order of their label values. + table.sort(keys) + + local seen_metrics = {} + local output = {} + for _, key in ipairs(keys) do + local value, err = self.dict:get(key) + if value then + local short_name = short_metric_name(key) + if not seen_metrics[short_name] then + if self.help[short_name] then + table.insert(output, string.format("# HELP %s%s %s\n", + self.prefix, short_name, self.help[short_name])) + end + if self.type[short_name] then + table.insert(output, string.format("# TYPE %s%s %s\n", + self.prefix, short_name, self.type[short_name])) + end + seen_metrics[short_name] = true + end + -- Replace "Inf" with "+Inf" in each metric's last bucket 'le' label. + if key:find('le="Inf"', 1, true) then + key = key:gsub('le="Inf"', 'le="+Inf"') + end + table.insert(output, string.format("%s%s %s\n", self.prefix, key, value)) + else + self:log_error("Error getting '", key, "': ", err) + end + end + return output +end + +-- Present all metrics in a text format compatible with Prometheus. +-- +-- This function should be used to expose the metrics on a separate HTTP page. +-- It will get the metrics from the dictionary, sort them, and expose them +-- aling with TYPE and HELP comments. +function Prometheus:collect() + ngx.header.content_type = "text/plain" + ngx.print(self:metric_data()) +end + +return Prometheus