diff --git a/REUSE.toml b/REUSE.toml index 773eafc..7c4f6a4 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -15,7 +15,7 @@ SPDX-PackageDownloadLocation = "https://git.madhouse-project.org/algernon/iocain SPDX-License-Identifier = "MIT" [[annotations]] - path = ["README.md", "docs/**/*.md"] + path = ["README.md", "docs/**/*.md", "docs/**/*.png"] precedence = "aggregate" SPDX-FileCopyrightText = "2025 Gergely Nagy" SPDX-License-Identifier = "MIT" @@ -30,3 +30,9 @@ SPDX-PackageDownloadLocation = "https://git.madhouse-project.org/algernon/iocain precedence = "aggregate" SPDX-FileCopyrightText = "2025 Gergely Nagy" SPDX-License-Identifier = "MIT" + +[[annotations]] + path = ["data/grafana-dashboard.json"] + precedence = "aggregate" + SPDX-FileCopyrightText = "2025 Gergely Nagy" + SPDX-License-Identifier = "MIT" diff --git a/data/grafana-dashboard.json b/data/grafana-dashboard.json new file mode 100644 index 0000000..cd17095 --- /dev/null +++ b/data/grafana-dashboard.json @@ -0,0 +1,448 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.3.2" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "The deadliest poison known to AI", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "panels": [], + "title": "Trap counters", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 10, + "x": 0, + "y": 1 + }, + "id": 3, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 32, + "minVizHeight": 32, + "minVizWidth": 8, + "namePlacement": "top", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "manual", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.3.2", + "targets": [ + { + "editorMode": "code", + "exemplar": false, + "expr": "sort_desc(sum(iocaine_requests_total{host=~\"$host\", user_agent_group=~\"$agent_group\"}) by (user_agent_group))", + "format": "time_series", + "instant": true, + "legendFormat": "{{user_agent_group}}", + "range": false, + "refId": "A", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + } + } + ], + "title": "User agents in the maze (by group)", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 10, + "x": 10, + "y": 1 + }, + "id": 4, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 32, + "minVizHeight": 32, + "minVizWidth": 8, + "namePlacement": "top", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "manual", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.3.2", + "targets": [ + { + "editorMode": "code", + "exemplar": false, + "expr": "sort_desc(sum(iocaine_requests_total{host=~\"$host\",user_agent_group=~\"$agent_group\"}) by (host))", + "format": "time_series", + "instant": true, + "legendFormat": "{{host}}", + "range": false, + "refId": "A", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + } + } + ], + "title": "Trapped visits by host", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 8 + }, + { + "color": "red", + "value": 20 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.3.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "sum(irate(iocaine_requests_total[5m]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + } + } + ], + "title": "Average throughput (5m)", + "type": "stat" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 2, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 1, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 32, + "minVizHeight": 32, + "minVizWidth": 8, + "namePlacement": "top", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "manual", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.3.2", + "targets": [ + { + "editorMode": "code", + "exemplar": false, + "expr": "sort_desc(sum(iocaine_requests_total{host=~\"$host\", user_agent_group=~\"$agent_group\"}) by (user_agent, user_agent_group))", + "format": "time_series", + "instant": true, + "legendFormat": "{{user_agent_group}}: {{user_agent}}", + "range": false, + "refId": "A", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + } + } + ], + "title": "User agents in the maze", + "type": "bargauge" + } + ], + "title": "Detailed user agents", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 40, + "tags": [ + "iocaine" + ], + "templating": { + "list": [ + { + "current": {}, + "definition": "label_values(iocaine_requests_total,host)", + "includeAll": true, + "label": "Host", + "multi": true, + "name": "host", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(iocaine_requests_total,host)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "current": {}, + "definition": "label_values(iocaine_requests_total,user_agent_group)", + "includeAll": true, + "label": "User Agent", + "multi": true, + "name": "agent_group", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(iocaine_requests_total,user_agent_group)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Iocaine", + "uid": "aec38snrfs4cgf", + "version": 31, + "weekStart": "" +} \ No newline at end of file diff --git a/docs/content/howto/index.md b/docs/content/howto/index.md new file mode 100644 index 0000000..bf70132 --- /dev/null +++ b/docs/content/howto/index.md @@ -0,0 +1,7 @@ +--- +title: "HOWTOs" +description: How to do X with iocaine? +weight: 3 +--- + +- [How to monitor iocaine with Prometheus and Grafana?](@/howto/monitoring-with-prometheus-and-grafana/index.md). diff --git a/docs/content/howto/monitoring-with-prometheus-and-grafana/dashboard.png b/docs/content/howto/monitoring-with-prometheus-and-grafana/dashboard.png new file mode 100644 index 0000000..f084397 Binary files /dev/null and b/docs/content/howto/monitoring-with-prometheus-and-grafana/dashboard.png differ diff --git a/docs/content/howto/monitoring-with-prometheus-and-grafana/index.md b/docs/content/howto/monitoring-with-prometheus-and-grafana/index.md new file mode 100644 index 0000000..7c913c0 --- /dev/null +++ b/docs/content/howto/monitoring-with-prometheus-and-grafana/index.md @@ -0,0 +1,129 @@ +--- +title: "Monitoring iocaine" +description: How to monitor iocaine with Prometheus and Grafana? +--- + +`iocaine` can be [configured](@/configuration/index.md#metrics) to expose [Prometheus](https://prometheus.io)-compatible metrics, separately from the garbage generator. When enabled, a single metric - `iocaine_requests_total` - is exposed, with various labels attached, if so configured. It is a simple counter, showing the number of hits `iocaine` served. + +# The simplest configuration + +Lets start with a simple configuration: no labels, just the metric. + +```toml +[metrics] +enable = true +``` + +This will expose the following metric on `http://127.0.0.1:42042/metrics`: + +``` +# TYPE iocaine_requests_total counter +iocaine_requests_total 1 +``` + +# Per-host metrics + +While an unlabeled metric is nice to have, it's a little bit bland. We can add a `host` label, to be able to group request totals by host - where the host is whatever is in the `Host` header when it reaches `iocaine`. + +```toml +[metrics] +enable = true +labels = [ "Host" ] +``` + +This will expose the following metrics on `http://127.0.0.1:42042/metrics`: + +``` +# TYPE iocaine_requests_total counter +iocaine_requests_total{host="host.example.com"} 1 +iocaine_requests_total{host="another-host.example.com"} 4 +``` + +# Per-agent metrics + +Perhaps even more useful than the `host` label, the `user_agent` label can be used to group the counter by user agent: + +```toml +[metrics] +enable = true +labels = [ "UserAgent" ] +``` + +This will expose the following metrics on `http://127.0.0.1:42042/metrics`: + +``` +# TYPE iocaine_requests_total counter +iocaine_requests_total{user_agent="curl/8.11.1"} 1 +iocaine_requests_total{user_agent="ClaudeBot: Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)"} 1234 +``` + +## User agent grouping + +The downside of the `user_agent` label is that it is unbounded, and the user agent strings tend to be long. To make it easier to group by agents, another label is available: `user_agent_group`. This label is applied by matching a list of regexps against the user agent, and if any matches, the corresponding group will be set for the value of the `user_agent_group` label. This is best demonstrated by an example: + +```toml +[metrics] +enable = true +labels = [ "UserAgent", "UserAgentGroup" ] + +[[metrics.agent_group]] +agent = "(?i:ClaudeBot)" +group = "ClaudeBot" + +[[metrics.agent_group]] +agent = "." +group = "Other" +``` + +Do note that the `UserAgentGroup` label *can* be used without `UserAgent`! They're used here together for demonstration purposes. + +The above configuration, assuming that we had the same visits as in the previous example, will result in the following metrics being exposed on `http://127.0.0.1:42042/metrics`: + +``` +# TYPE iocaine_requests_total counter +iocaine_requests_total{user_agent="curl/8.11.1", user_agent_group="Other"} 1 +iocaine_requests_total{user_agent="ClaudeBot: Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)", user_agent_group="ClaudeBot"} 1234 +``` + +It is strongly advised to set a catch-all agent group, so that the `user_agent_group` label can be limited to a known, smaller set of agents. Similarly, it is usually not a good idea to use the `UserAgent` label, because that can lead to metrics balooning out of control. Only use that label if you keep a close eye on Prometheus. + +# Prometheus configuration + +The `iocaine` metrics are exposed on an unauthenticated address, and Prometheus can be told to scrape it with a configuration akin to the following example: + +```yaml +scrape_configs: +- job_name: iocaine-job + static_configs: + - targets: ['localhost:42042'] +``` + +# Dashboard + +A [Grafana dashboard](https://git.madhouse-project.org/algernon/iocaine/src/branch/main/data/grafana-dashboard.json) is available to get you started with monitoring `iocaine` with Grafana. + +![Dashboard](dashboard.png) + +It assumes a configuration where all three labels are enabled: + +```toml +[metrics] +enable = true +labels = [ "Host", "UserAgent", "UserAgentGroup" ] + +[[metrics.agent_group]] +agent = "(?i:ClaudeBot)" +group = "ClaudeBot" + +[[metrics.agent_group]] +agent = "(?i:AmazonBot)" +group = "Amazon" + +[[metrics.agent_group]] +agent = "(?i:GPTBot|ChatGPT)" +group = "GPTBot" + +[[metrics.agent_group]] +agent = "." +group = "Other" +```