From 41f5dd56e1b93ec815daa98dd1f1caa7f2087312 Mon Sep 17 00:00:00 2001 From: Mohammed Al Sahaf Date: Wed, 2 Oct 2024 17:23:26 +0300 Subject: [PATCH] metrics: scope metrics to active config, add optional per-host metrics (#6531) * Add per host config * Pass host label when option is enabled * Test per host enabled * metrics: scope metrics per loaded config * doc and linter Signed-off-by: Mohammed Al Sahaf * inject the custom registry into the admin handler Co-Authored-By: Dave Henderson * remove `TODO` comment * fixes Signed-off-by: Mohammed Al Sahaf * refactor to delay metrics admin handler provision Signed-off-by: Mohammed Al Sahaf --------- Signed-off-by: Mohammed Al Sahaf Co-authored-by: Hussam Almarzooq Co-authored-by: Dave Henderson --- admin.go | 11 +- caddy.go | 9 +- caddyconfig/httpcaddyfile/serveroptions.go | 12 +- .../metrics_perhost.caddyfiletest | 37 ++++ context.go | 22 +- metrics.go | 23 +- modules/caddyhttp/app.go | 4 + modules/caddyhttp/metrics.go | 68 +++--- modules/caddyhttp/metrics_test.go | 196 +++++++++++++++++- modules/caddyhttp/routes.go | 4 +- modules/metrics/adminmetrics.go | 32 ++- modules/metrics/metrics.go | 13 +- 12 files changed, 365 insertions(+), 66 deletions(-) create mode 100644 caddytest/integration/caddyfile_adapt/metrics_perhost.caddyfiletest diff --git a/admin.go b/admin.go index 24c583235..0d5ecc92c 100644 --- a/admin.go +++ b/admin.go @@ -214,7 +214,7 @@ type AdminPermissions struct { // newAdminHandler reads admin's config and returns an http.Handler suitable // for use in an admin endpoint server, which will be listening on listenAddr. -func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) adminHandler { +func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool, ctx Context) adminHandler { muxWrap := adminHandler{mux: http.NewServeMux()} // secure the local or remote endpoint respectively @@ -270,7 +270,6 @@ func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) admi // register third-party module endpoints for _, m := range GetModules("admin.api") { router := m.New().(AdminRouter) - handlerLabel := m.ID.Name() for _, route := range router.Routes() { addRoute(route.Pattern, handlerLabel, route.Handler) } @@ -382,7 +381,9 @@ func (admin AdminConfig) allowedOrigins(addr NetworkAddress) []*url.URL { // for the admin endpoint exists in cfg, a default one is used, so // that there is always an admin server (unless it is explicitly // configured to be disabled). -func replaceLocalAdminServer(cfg *Config) error { +// Critically note that some elements and functionality of the context +// may not be ready, e.g. storage. Tread carefully. +func replaceLocalAdminServer(cfg *Config, ctx Context) error { // always* be sure to close down the old admin endpoint // as gracefully as possible, even if the new one is // disabled -- careful to use reference to the current @@ -424,7 +425,7 @@ func replaceLocalAdminServer(cfg *Config) error { return err } - handler := cfg.Admin.newAdminHandler(addr, false) + handler := cfg.Admin.newAdminHandler(addr, false, ctx) ln, err := addr.Listen(context.TODO(), 0, net.ListenConfig{}) if err != nil { @@ -545,7 +546,7 @@ func replaceRemoteAdminServer(ctx Context, cfg *Config) error { // make the HTTP handler but disable Host/Origin enforcement // because we are using TLS authentication instead - handler := cfg.Admin.newAdminHandler(addr, true) + handler := cfg.Admin.newAdminHandler(addr, true, ctx) // create client certificate pool for TLS mutual auth, and extract public keys // so that we can enforce access controls at the application layer diff --git a/caddy.go b/caddy.go index 7dd989c9e..b3e8889fa 100644 --- a/caddy.go +++ b/caddy.go @@ -399,6 +399,7 @@ func unsyncedDecodeAndRun(cfgJSON []byte, allowPersist bool) error { func run(newCfg *Config, start bool) (Context, error) { ctx, err := provisionContext(newCfg, start) if err != nil { + globalMetrics.configSuccess.Set(0) return ctx, err } @@ -410,6 +411,7 @@ func run(newCfg *Config, start bool) (Context, error) { // some of the other apps at runtime err = ctx.cfg.Admin.provisionAdminRouters(ctx) if err != nil { + globalMetrics.configSuccess.Set(0) return ctx, err } @@ -435,9 +437,11 @@ func run(newCfg *Config, start bool) (Context, error) { return nil }() if err != nil { + globalMetrics.configSuccess.Set(0) return ctx, err } - + globalMetrics.configSuccess.Set(1) + globalMetrics.configSuccessTime.SetToCurrentTime() // now that the user's config is running, finish setting up anything else, // such as remote admin endpoint, config loader, etc. return ctx, finishSettingUp(ctx, ctx.cfg) @@ -471,6 +475,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error) ctx, cancel := NewContext(Context{Context: context.Background(), cfg: newCfg}) defer func() { if err != nil { + globalMetrics.configSuccess.Set(0) // if there were any errors during startup, // we should cancel the new context we created // since the associated config won't be used; @@ -497,7 +502,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error) // start the admin endpoint (and stop any prior one) if replaceAdminServer { - err = replaceLocalAdminServer(newCfg) + err = replaceLocalAdminServer(newCfg, ctx) if err != nil { return ctx, fmt.Errorf("starting caddy administration endpoint: %v", err) } diff --git a/caddyconfig/httpcaddyfile/serveroptions.go b/caddyconfig/httpcaddyfile/serveroptions.go index 7087cdba5..b05af47f5 100644 --- a/caddyconfig/httpcaddyfile/serveroptions.go +++ b/caddyconfig/httpcaddyfile/serveroptions.go @@ -240,13 +240,13 @@ func unmarshalCaddyfileServerOptions(d *caddyfile.Dispenser) (any, error) { } case "metrics": - if d.NextArg() { - return nil, d.ArgErr() - } - if nesting := d.Nesting(); d.NextBlock(nesting) { - return nil, d.ArgErr() - } serverOpts.Metrics = new(caddyhttp.Metrics) + for nesting := d.Nesting(); d.NextBlock(nesting); { + switch d.Val() { + case "per_host": + serverOpts.Metrics.PerHost = true + } + } case "trace": if d.NextArg() { diff --git a/caddytest/integration/caddyfile_adapt/metrics_perhost.caddyfiletest b/caddytest/integration/caddyfile_adapt/metrics_perhost.caddyfiletest new file mode 100644 index 000000000..499215515 --- /dev/null +++ b/caddytest/integration/caddyfile_adapt/metrics_perhost.caddyfiletest @@ -0,0 +1,37 @@ +{ + servers :80 { + metrics { + per_host + } + } +} +:80 { + respond "Hello" +} +---------- +{ + "apps": { + "http": { + "servers": { + "srv0": { + "listen": [ + ":80" + ], + "routes": [ + { + "handle": [ + { + "body": "Hello", + "handler": "static_response" + } + ] + } + ], + "metrics": { + "per_host": true + } + } + } + } + } +} diff --git a/context.go b/context.go index 5b8c10703..17a8aa4f8 100644 --- a/context.go +++ b/context.go @@ -23,6 +23,8 @@ import ( "reflect" "github.com/caddyserver/certmagic" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" "go.uber.org/zap" "go.uber.org/zap/exp/zapslog" @@ -47,6 +49,7 @@ type Context struct { ancestry []Module cleanupFuncs []func() // invoked at every config unload exitFuncs []func(context.Context) // invoked at config unload ONLY IF the process is exiting (EXPERIMENTAL) + metricsRegistry *prometheus.Registry } // NewContext provides a new context derived from the given @@ -58,7 +61,7 @@ type Context struct { // modules which are loaded will be properly unloaded. // See standard library context package's documentation. func NewContext(ctx Context) (Context, context.CancelFunc) { - newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg} + newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg, metricsRegistry: prometheus.NewPedanticRegistry()} c, cancel := context.WithCancel(ctx.Context) wrappedCancel := func() { cancel() @@ -79,6 +82,7 @@ func NewContext(ctx Context) (Context, context.CancelFunc) { } } newCtx.Context = c + newCtx.initMetrics() return newCtx, wrappedCancel } @@ -97,6 +101,22 @@ func (ctx *Context) Filesystems() FileSystems { return ctx.cfg.filesystems } +// Returns the active metrics registry for the context +// EXPERIMENTAL: This API is subject to change. +func (ctx *Context) GetMetricsRegistry() *prometheus.Registry { + return ctx.metricsRegistry +} + +func (ctx *Context) initMetrics() { + ctx.metricsRegistry.MustRegister( + collectors.NewBuildInfoCollector(), + adminMetrics.requestCount, + adminMetrics.requestErrors, + globalMetrics.configSuccess, + globalMetrics.configSuccessTime, + ) +} + // OnExit executes f when the process exits gracefully. // The function is only executed if the process is gracefully // shut down while this context is active. diff --git a/metrics.go b/metrics.go index 0f8ea03cb..0ee3853eb 100644 --- a/metrics.go +++ b/metrics.go @@ -4,30 +4,33 @@ import ( "net/http" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/collectors" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/caddyserver/caddy/v2/internal/metrics" ) // define and register the metrics used in this package. func init() { - prometheus.MustRegister(collectors.NewBuildInfoCollector()) - const ns, sub = "caddy", "admin" - - adminMetrics.requestCount = promauto.NewCounterVec(prometheus.CounterOpts{ + adminMetrics.requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: ns, Subsystem: sub, Name: "http_requests_total", Help: "Counter of requests made to the Admin API's HTTP endpoints.", }, []string{"handler", "path", "code", "method"}) - adminMetrics.requestErrors = promauto.NewCounterVec(prometheus.CounterOpts{ + adminMetrics.requestErrors = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: ns, Subsystem: sub, Name: "http_request_errors_total", Help: "Number of requests resulting in middleware errors.", }, []string{"handler", "path", "method"}) + globalMetrics.configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "caddy_config_last_reload_successful", + Help: "Whether the last configuration reload attempt was successful.", + }) + globalMetrics.configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "caddy_config_last_reload_success_timestamp_seconds", + Help: "Timestamp of the last successful configuration reload.", + }) } // adminMetrics is a collection of metrics that can be tracked for the admin API. @@ -36,6 +39,12 @@ var adminMetrics = struct { requestErrors *prometheus.CounterVec }{} +// globalMetrics is a collection of metrics that can be tracked for Caddy global state +var globalMetrics = struct { + configSuccess prometheus.Gauge + configSuccessTime prometheus.Gauge +}{} + // Similar to promhttp.InstrumentHandlerCounter, but upper-cases method names // instead of lower-casing them. // diff --git a/modules/caddyhttp/app.go b/modules/caddyhttp/app.go index 7a5c10623..7dc2bee72 100644 --- a/modules/caddyhttp/app.go +++ b/modules/caddyhttp/app.go @@ -347,6 +347,10 @@ func (app *App) Provision(ctx caddy.Context) error { // route handler so that important security checks are done, etc. primaryRoute := emptyHandler if srv.Routes != nil { + if srv.Metrics != nil { + srv.Metrics.init = sync.Once{} + srv.Metrics.httpMetrics = &httpMetrics{} + } err := srv.Routes.ProvisionHandlers(ctx, srv.Metrics) if err != nil { return fmt.Errorf("server %s: setting up route handlers: %v", srvName, err) diff --git a/modules/caddyhttp/metrics.go b/modules/caddyhttp/metrics.go index 111389218..947721429 100644 --- a/modules/caddyhttp/metrics.go +++ b/modules/caddyhttp/metrics.go @@ -10,15 +10,23 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/internal/metrics" ) // Metrics configures metrics observations. // EXPERIMENTAL and subject to change or removal. -type Metrics struct{} +type Metrics struct { + // Enable per-host metrics. Enabling this option may + // incur high-memory consumption, depending on the number of hosts + // managed by Caddy. + PerHost bool `json:"per_host,omitempty"` -var httpMetrics = struct { - init sync.Once + init sync.Once + httpMetrics *httpMetrics `json:"-"` +} + +type httpMetrics struct { requestInFlight *prometheus.GaugeVec requestCount *prometheus.CounterVec requestErrors *prometheus.CounterVec @@ -26,27 +34,28 @@ var httpMetrics = struct { requestSize *prometheus.HistogramVec responseSize *prometheus.HistogramVec responseDuration *prometheus.HistogramVec -}{ - init: sync.Once{}, } -func initHTTPMetrics() { +func initHTTPMetrics(ctx caddy.Context, metrics *Metrics) { const ns, sub = "caddy", "http" - + registry := ctx.GetMetricsRegistry() basicLabels := []string{"server", "handler"} - httpMetrics.requestInFlight = promauto.NewGaugeVec(prometheus.GaugeOpts{ + if metrics.PerHost { + basicLabels = append(basicLabels, "host") + } + metrics.httpMetrics.requestInFlight = promauto.With(registry).NewGaugeVec(prometheus.GaugeOpts{ Namespace: ns, Subsystem: sub, Name: "requests_in_flight", Help: "Number of requests currently handled by this server.", }, basicLabels) - httpMetrics.requestErrors = promauto.NewCounterVec(prometheus.CounterOpts{ + metrics.httpMetrics.requestErrors = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{ Namespace: ns, Subsystem: sub, Name: "request_errors_total", Help: "Number of requests resulting in middleware errors.", }, basicLabels) - httpMetrics.requestCount = promauto.NewCounterVec(prometheus.CounterOpts{ + metrics.httpMetrics.requestCount = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{ Namespace: ns, Subsystem: sub, Name: "requests_total", @@ -58,28 +67,31 @@ func initHTTPMetrics() { sizeBuckets := prometheus.ExponentialBuckets(256, 4, 8) httpLabels := []string{"server", "handler", "code", "method"} - httpMetrics.requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + if metrics.PerHost { + httpLabels = append(httpLabels, "host") + } + metrics.httpMetrics.requestDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{ Namespace: ns, Subsystem: sub, Name: "request_duration_seconds", Help: "Histogram of round-trip request durations.", Buckets: durationBuckets, }, httpLabels) - httpMetrics.requestSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ + metrics.httpMetrics.requestSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{ Namespace: ns, Subsystem: sub, Name: "request_size_bytes", Help: "Total size of the request. Includes body", Buckets: sizeBuckets, }, httpLabels) - httpMetrics.responseSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ + metrics.httpMetrics.responseSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{ Namespace: ns, Subsystem: sub, Name: "response_size_bytes", Help: "Size of the returned response.", Buckets: sizeBuckets, }, httpLabels) - httpMetrics.responseDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + metrics.httpMetrics.responseDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{ Namespace: ns, Subsystem: sub, Name: "response_duration_seconds", @@ -101,14 +113,15 @@ func serverNameFromContext(ctx context.Context) string { type metricsInstrumentedHandler struct { handler string mh MiddlewareHandler + metrics *Metrics } -func newMetricsInstrumentedHandler(handler string, mh MiddlewareHandler) *metricsInstrumentedHandler { - httpMetrics.init.Do(func() { - initHTTPMetrics() +func newMetricsInstrumentedHandler(ctx caddy.Context, handler string, mh MiddlewareHandler, metrics *Metrics) *metricsInstrumentedHandler { + metrics.init.Do(func() { + initHTTPMetrics(ctx, metrics) }) - return &metricsInstrumentedHandler{handler, mh} + return &metricsInstrumentedHandler{handler, mh, metrics} } func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Request, next Handler) error { @@ -119,7 +132,12 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re // of a panic statusLabels := prometheus.Labels{"server": server, "handler": h.handler, "method": method, "code": ""} - inFlight := httpMetrics.requestInFlight.With(labels) + if h.metrics.PerHost { + labels["host"] = r.Host + statusLabels["host"] = r.Host + } + + inFlight := h.metrics.httpMetrics.requestInFlight.With(labels) inFlight.Inc() defer inFlight.Dec() @@ -131,13 +149,13 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re writeHeaderRecorder := ShouldBufferFunc(func(status int, header http.Header) bool { statusLabels["code"] = metrics.SanitizeCode(status) ttfb := time.Since(start).Seconds() - httpMetrics.responseDuration.With(statusLabels).Observe(ttfb) + h.metrics.httpMetrics.responseDuration.With(statusLabels).Observe(ttfb) return false }) wrec := NewResponseRecorder(w, nil, writeHeaderRecorder) err := h.mh.ServeHTTP(wrec, r, next) dur := time.Since(start).Seconds() - httpMetrics.requestCount.With(labels).Inc() + h.metrics.httpMetrics.requestCount.With(labels).Inc() observeRequest := func(status int) { // If the code hasn't been set yet, and we didn't encounter an error, we're @@ -148,9 +166,9 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re statusLabels["code"] = metrics.SanitizeCode(status) } - httpMetrics.requestDuration.With(statusLabels).Observe(dur) - httpMetrics.requestSize.With(statusLabels).Observe(float64(computeApproximateRequestSize(r))) - httpMetrics.responseSize.With(statusLabels).Observe(float64(wrec.Size())) + h.metrics.httpMetrics.requestDuration.With(statusLabels).Observe(dur) + h.metrics.httpMetrics.requestSize.With(statusLabels).Observe(float64(computeApproximateRequestSize(r))) + h.metrics.httpMetrics.responseSize.With(statusLabels).Observe(float64(wrec.Size())) } if err != nil { @@ -159,7 +177,7 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re observeRequest(handlerErr.StatusCode) } - httpMetrics.requestErrors.With(labels).Inc() + h.metrics.httpMetrics.requestErrors.With(labels).Inc() return err } diff --git a/modules/caddyhttp/metrics_test.go b/modules/caddyhttp/metrics_test.go index 8f88549d5..4a0519b87 100644 --- a/modules/caddyhttp/metrics_test.go +++ b/modules/caddyhttp/metrics_test.go @@ -6,9 +6,10 @@ import ( "net/http" "net/http/httptest" "strings" + "sync" "testing" - "github.com/prometheus/client_golang/prometheus" + "github.com/caddyserver/caddy/v2" "github.com/prometheus/client_golang/prometheus/testutil" ) @@ -27,10 +28,15 @@ func TestServerNameFromContext(t *testing.T) { } func TestMetricsInstrumentedHandler(t *testing.T) { + ctx, _ := caddy.NewContext(caddy.Context{Context: context.Background()}) + metrics := &Metrics{ + init: sync.Once{}, + httpMetrics: &httpMetrics{}, + } handlerErr := errors.New("oh noes") response := []byte("hello world!") h := HandlerFunc(func(w http.ResponseWriter, r *http.Request) error { - if actual := testutil.ToFloat64(httpMetrics.requestInFlight); actual != 1.0 { + if actual := testutil.ToFloat64(metrics.httpMetrics.requestInFlight); actual != 1.0 { t.Errorf("Not same: expected %#v, but got %#v", 1.0, actual) } if handlerErr == nil { @@ -43,7 +49,7 @@ func TestMetricsInstrumentedHandler(t *testing.T) { return h.ServeHTTP(w, r) }) - ih := newMetricsInstrumentedHandler("bar", mh) + ih := newMetricsInstrumentedHandler(ctx, "bar", mh, metrics) r := httptest.NewRequest("GET", "/", nil) w := httptest.NewRecorder() @@ -51,7 +57,7 @@ func TestMetricsInstrumentedHandler(t *testing.T) { if actual := ih.ServeHTTP(w, r, h); actual != handlerErr { t.Errorf("Not same: expected %#v, but got %#v", handlerErr, actual) } - if actual := testutil.ToFloat64(httpMetrics.requestInFlight); actual != 0.0 { + if actual := testutil.ToFloat64(metrics.httpMetrics.requestInFlight); actual != 0.0 { t.Errorf("Not same: expected %#v, but got %#v", 0.0, actual) } @@ -64,7 +70,7 @@ func TestMetricsInstrumentedHandler(t *testing.T) { mh = middlewareHandlerFunc(func(w http.ResponseWriter, r *http.Request, h Handler) error { return nil }) - ih = newMetricsInstrumentedHandler("empty", mh) + ih = newMetricsInstrumentedHandler(ctx, "empty", mh, metrics) r = httptest.NewRequest("GET", "/", nil) w = httptest.NewRecorder() @@ -83,7 +89,7 @@ func TestMetricsInstrumentedHandler(t *testing.T) { return Error(http.StatusTooManyRequests, nil) }) - ih = newMetricsInstrumentedHandler("foo", mh) + ih = newMetricsInstrumentedHandler(ctx, "foo", mh, metrics) r = httptest.NewRequest("GET", "/", nil) w = httptest.NewRecorder() @@ -183,7 +189,183 @@ func TestMetricsInstrumentedHandler(t *testing.T) { caddy_http_request_errors_total{handler="bar",server="UNKNOWN"} 1 caddy_http_request_errors_total{handler="foo",server="UNKNOWN"} 1 ` - if err := testutil.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(expected), + if err := testutil.GatherAndCompare(ctx.GetMetricsRegistry(), strings.NewReader(expected), + "caddy_http_request_size_bytes", + "caddy_http_response_size_bytes", + // caddy_http_request_duration_seconds_sum will vary based on how long the test took to run, + // so we check just the _bucket and _count metrics + "caddy_http_request_duration_seconds_bucket", + "caddy_http_request_duration_seconds_count", + "caddy_http_request_errors_total", + ); err != nil { + t.Errorf("received unexpected error: %s", err) + } +} + +func TestMetricsInstrumentedHandlerPerHost(t *testing.T) { + ctx, _ := caddy.NewContext(caddy.Context{Context: context.Background()}) + metrics := &Metrics{ + PerHost: true, + init: sync.Once{}, + httpMetrics: &httpMetrics{}, + } + handlerErr := errors.New("oh noes") + response := []byte("hello world!") + h := HandlerFunc(func(w http.ResponseWriter, r *http.Request) error { + if actual := testutil.ToFloat64(metrics.httpMetrics.requestInFlight); actual != 1.0 { + t.Errorf("Not same: expected %#v, but got %#v", 1.0, actual) + } + if handlerErr == nil { + w.Write(response) + } + return handlerErr + }) + + mh := middlewareHandlerFunc(func(w http.ResponseWriter, r *http.Request, h Handler) error { + return h.ServeHTTP(w, r) + }) + + ih := newMetricsInstrumentedHandler(ctx, "bar", mh, metrics) + + r := httptest.NewRequest("GET", "/", nil) + w := httptest.NewRecorder() + + if actual := ih.ServeHTTP(w, r, h); actual != handlerErr { + t.Errorf("Not same: expected %#v, but got %#v", handlerErr, actual) + } + if actual := testutil.ToFloat64(metrics.httpMetrics.requestInFlight); actual != 0.0 { + t.Errorf("Not same: expected %#v, but got %#v", 0.0, actual) + } + + handlerErr = nil + if err := ih.ServeHTTP(w, r, h); err != nil { + t.Errorf("Received unexpected error: %v", err) + } + + // an empty handler - no errors, no header written + mh = middlewareHandlerFunc(func(w http.ResponseWriter, r *http.Request, h Handler) error { + return nil + }) + ih = newMetricsInstrumentedHandler(ctx, "empty", mh, metrics) + r = httptest.NewRequest("GET", "/", nil) + w = httptest.NewRecorder() + + if err := ih.ServeHTTP(w, r, h); err != nil { + t.Errorf("Received unexpected error: %v", err) + } + if actual := w.Result().StatusCode; actual != 200 { + t.Errorf("Not same: expected status code %#v, but got %#v", 200, actual) + } + if actual := w.Result().Header; len(actual) != 0 { + t.Errorf("Not empty: expected headers to be empty, but got %#v", actual) + } + + // handler returning an error with an HTTP status + mh = middlewareHandlerFunc(func(w http.ResponseWriter, r *http.Request, h Handler) error { + return Error(http.StatusTooManyRequests, nil) + }) + + ih = newMetricsInstrumentedHandler(ctx, "foo", mh, metrics) + + r = httptest.NewRequest("GET", "/", nil) + w = httptest.NewRecorder() + + if err := ih.ServeHTTP(w, r, nil); err == nil { + t.Errorf("expected error to be propagated") + } + + expected := ` + # HELP caddy_http_request_duration_seconds Histogram of round-trip request durations. + # TYPE caddy_http_request_duration_seconds histogram + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="0.005"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="0.01"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="0.025"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="0.05"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="0.1"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="0.25"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="0.5"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="1"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="2.5"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="5"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="10"} 1 + caddy_http_request_duration_seconds_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="+Inf"} 1 + caddy_http_request_duration_seconds_count{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN"} 1 + # HELP caddy_http_request_size_bytes Total size of the request. Includes body + # TYPE caddy_http_request_size_bytes histogram + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="256"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="1024"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="4096"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="16384"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="65536"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="262144"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="1.048576e+06"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="4.194304e+06"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="+Inf"} 1 + caddy_http_request_size_bytes_sum{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN"} 23 + caddy_http_request_size_bytes_count{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="256"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="1024"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="4096"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="16384"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="65536"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="262144"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="1.048576e+06"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="4.194304e+06"} 1 + caddy_http_request_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="+Inf"} 1 + caddy_http_request_size_bytes_sum{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN"} 23 + caddy_http_request_size_bytes_count{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="256"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="1024"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="4096"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="16384"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="65536"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="262144"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="1.048576e+06"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="4.194304e+06"} 1 + caddy_http_request_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="+Inf"} 1 + caddy_http_request_size_bytes_sum{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN"} 23 + caddy_http_request_size_bytes_count{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN"} 1 + # HELP caddy_http_response_size_bytes Size of the returned response. + # TYPE caddy_http_response_size_bytes histogram + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="256"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="1024"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="4096"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="16384"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="65536"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="262144"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="1.048576e+06"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="4.194304e+06"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN",le="+Inf"} 1 + caddy_http_response_size_bytes_sum{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN"} 12 + caddy_http_response_size_bytes_count{code="200",handler="bar",host="example.com",method="GET",server="UNKNOWN"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="256"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="1024"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="4096"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="16384"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="65536"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="262144"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="1.048576e+06"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="4.194304e+06"} 1 + caddy_http_response_size_bytes_bucket{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN",le="+Inf"} 1 + caddy_http_response_size_bytes_sum{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN"} 0 + caddy_http_response_size_bytes_count{code="200",handler="empty",host="example.com",method="GET",server="UNKNOWN"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="256"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="1024"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="4096"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="16384"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="65536"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="262144"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="1.048576e+06"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="4.194304e+06"} 1 + caddy_http_response_size_bytes_bucket{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN",le="+Inf"} 1 + caddy_http_response_size_bytes_sum{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN"} 0 + caddy_http_response_size_bytes_count{code="429",handler="foo",host="example.com",method="GET",server="UNKNOWN"} 1 + # HELP caddy_http_request_errors_total Number of requests resulting in middleware errors. + # TYPE caddy_http_request_errors_total counter + caddy_http_request_errors_total{handler="bar",host="example.com",server="UNKNOWN"} 1 + caddy_http_request_errors_total{handler="foo",host="example.com",server="UNKNOWN"} 1 + ` + if err := testutil.GatherAndCompare(ctx.GetMetricsRegistry(), strings.NewReader(expected), "caddy_http_request_size_bytes", "caddy_http_response_size_bytes", // caddy_http_request_duration_seconds_sum will vary based on how long the test took to run, diff --git a/modules/caddyhttp/routes.go b/modules/caddyhttp/routes.go index 54a3f38e6..939d01e55 100644 --- a/modules/caddyhttp/routes.go +++ b/modules/caddyhttp/routes.go @@ -314,11 +314,11 @@ func wrapRoute(route Route) Middleware { // we need to pull this particular MiddlewareHandler // pointer into its own stack frame to preserve it so it // won't be overwritten in future loop iterations. -func wrapMiddleware(_ caddy.Context, mh MiddlewareHandler, metrics *Metrics) Middleware { +func wrapMiddleware(ctx caddy.Context, mh MiddlewareHandler, metrics *Metrics) Middleware { handlerToUse := mh if metrics != nil { // wrap the middleware with metrics instrumentation - handlerToUse = newMetricsInstrumentedHandler(caddy.GetModuleName(mh), mh) + handlerToUse = newMetricsInstrumentedHandler(ctx, caddy.GetModuleName(mh), mh, metrics) } return func(next Handler) Handler { diff --git a/modules/metrics/adminmetrics.go b/modules/metrics/adminmetrics.go index 1cf398e4c..1e3a841dd 100644 --- a/modules/metrics/adminmetrics.go +++ b/modules/metrics/adminmetrics.go @@ -15,8 +15,11 @@ package metrics import ( + "errors" "net/http" + "github.com/prometheus/client_golang/prometheus" + "github.com/caddyserver/caddy/v2" ) @@ -29,7 +32,11 @@ func init() { // is permanently mounted to the admin API endpoint at "/metrics". // See the Metrics module for a configurable endpoint that is usable if the // Admin API is disabled. -type AdminMetrics struct{} +type AdminMetrics struct { + registry *prometheus.Registry + + metricsHandler http.Handler +} // CaddyModule returns the Caddy module information. func (AdminMetrics) CaddyModule() caddy.ModuleInfo { @@ -39,17 +46,28 @@ func (AdminMetrics) CaddyModule() caddy.ModuleInfo { } } +// Provision - +func (m *AdminMetrics) Provision(ctx caddy.Context) error { + m.registry = ctx.GetMetricsRegistry() + if m.registry == nil { + return errors.New("no metrics registry found") + } + m.metricsHandler = createMetricsHandler(nil, false, m.registry) + return nil +} + // Routes returns a route for the /metrics endpoint. func (m *AdminMetrics) Routes() []caddy.AdminRoute { - metricsHandler := createMetricsHandler(nil, false) - h := caddy.AdminHandlerFunc(func(w http.ResponseWriter, r *http.Request) error { - metricsHandler.ServeHTTP(w, r) - return nil - }) - return []caddy.AdminRoute{{Pattern: "/metrics", Handler: h}} + return []caddy.AdminRoute{{Pattern: "/metrics", Handler: caddy.AdminHandlerFunc(m.serveHTTP)}} +} + +func (m *AdminMetrics) serveHTTP(w http.ResponseWriter, r *http.Request) error { + m.metricsHandler.ServeHTTP(w, r) + return nil } // Interface guards var ( + _ caddy.Provisioner = (*AdminMetrics)(nil) _ caddy.AdminRouter = (*AdminMetrics)(nil) ) diff --git a/modules/metrics/metrics.go b/modules/metrics/metrics.go index dc6196a15..42b30d88d 100644 --- a/modules/metrics/metrics.go +++ b/modules/metrics/metrics.go @@ -15,6 +15,7 @@ package metrics import ( + "errors" "net/http" "github.com/prometheus/client_golang/prometheus" @@ -62,7 +63,11 @@ func (l *zapLogger) Println(v ...any) { // Provision sets up m. func (m *Metrics) Provision(ctx caddy.Context) error { log := ctx.Logger() - m.metricsHandler = createMetricsHandler(&zapLogger{log}, !m.DisableOpenMetrics) + registry := ctx.GetMetricsRegistry() + if registry == nil { + return errors.New("no metrics registry found") + } + m.metricsHandler = createMetricsHandler(&zapLogger{log}, !m.DisableOpenMetrics, registry) return nil } @@ -107,9 +112,9 @@ var ( _ caddyfile.Unmarshaler = (*Metrics)(nil) ) -func createMetricsHandler(logger promhttp.Logger, enableOpenMetrics bool) http.Handler { - return promhttp.InstrumentMetricHandler(prometheus.DefaultRegisterer, - promhttp.HandlerFor(prometheus.DefaultGatherer, promhttp.HandlerOpts{ +func createMetricsHandler(logger promhttp.Logger, enableOpenMetrics bool, registry *prometheus.Registry) http.Handler { + return promhttp.InstrumentMetricHandler(registry, + promhttp.HandlerFor(registry, promhttp.HandlerOpts{ // will only log errors if logger is non-nil ErrorLog: logger,