2019-10-10 03:28:07 +02:00
|
|
|
// Copyright 2015 Matthew Holt and The Caddy Authors
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package reverseproxy
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"sync/atomic"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/caddyserver/caddy/v2"
|
|
|
|
"github.com/vulcand/oxy/memmetrics"
|
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
2020-03-24 17:37:47 +01:00
|
|
|
caddy.RegisterModule(internalCircuitBreaker{})
|
2019-10-10 03:28:07 +02:00
|
|
|
}
|
|
|
|
|
2020-03-24 17:37:47 +01:00
|
|
|
// internalCircuitBreaker implements circuit breaking functionality
|
2019-10-10 03:28:07 +02:00
|
|
|
// for requests within this process over a sliding time window.
|
2020-03-24 17:37:47 +01:00
|
|
|
type internalCircuitBreaker struct {
|
2019-10-10 03:28:07 +02:00
|
|
|
tripped int32
|
2020-01-19 02:42:56 +01:00
|
|
|
cbFactor int32
|
2019-10-10 03:28:07 +02:00
|
|
|
threshold float64
|
|
|
|
metrics *memmetrics.RTMetrics
|
|
|
|
tripTime time.Duration
|
|
|
|
Config
|
|
|
|
}
|
|
|
|
|
|
|
|
// CaddyModule returns the Caddy module information.
|
2020-03-24 17:37:47 +01:00
|
|
|
func (internalCircuitBreaker) CaddyModule() caddy.ModuleInfo {
|
2019-10-10 03:28:07 +02:00
|
|
|
return caddy.ModuleInfo{
|
2020-03-24 17:37:47 +01:00
|
|
|
ID: "http.reverse_proxy.circuit_breakers.internal",
|
|
|
|
New: func() caddy.Module { return new(internalCircuitBreaker) },
|
2019-10-10 03:28:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Provision sets up a configured circuit breaker.
|
2020-03-24 17:37:47 +01:00
|
|
|
func (c *internalCircuitBreaker) Provision(ctx caddy.Context) error {
|
2020-01-19 02:42:56 +01:00
|
|
|
f, ok := typeCB[c.Factor]
|
2019-10-10 03:28:07 +02:00
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("type is not defined")
|
|
|
|
}
|
|
|
|
|
|
|
|
if c.TripTime == "" {
|
|
|
|
c.TripTime = defaultTripTime
|
|
|
|
}
|
|
|
|
|
|
|
|
tw, err := time.ParseDuration(c.TripTime)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot parse trip_time duration, %v", err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
mt, err := memmetrics.NewRTMetrics()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot create new metrics: %v", err.Error())
|
|
|
|
}
|
|
|
|
|
2020-01-19 02:42:56 +01:00
|
|
|
c.cbFactor = f
|
2019-10-10 03:28:07 +02:00
|
|
|
c.tripTime = tw
|
|
|
|
c.threshold = c.Threshold
|
|
|
|
c.metrics = mt
|
|
|
|
c.tripped = 0
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ok returns whether the circuit breaker is tripped or not.
|
2020-03-24 17:37:47 +01:00
|
|
|
func (c *internalCircuitBreaker) Ok() bool {
|
2019-10-10 03:28:07 +02:00
|
|
|
tripped := atomic.LoadInt32(&c.tripped)
|
|
|
|
return tripped == 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// RecordMetric records a response status code and execution time of a request. This function should be run in a separate goroutine.
|
2020-03-24 17:37:47 +01:00
|
|
|
func (c *internalCircuitBreaker) RecordMetric(statusCode int, latency time.Duration) {
|
2019-10-10 03:28:07 +02:00
|
|
|
c.metrics.Record(statusCode, latency)
|
|
|
|
c.checkAndSet()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ok checks our metrics to see if we should trip our circuit breaker, or if the fallback duration has completed.
|
2020-03-24 17:37:47 +01:00
|
|
|
func (c *internalCircuitBreaker) checkAndSet() {
|
2019-10-10 03:28:07 +02:00
|
|
|
var isTripped bool
|
|
|
|
|
2020-01-19 02:42:56 +01:00
|
|
|
switch c.cbFactor {
|
|
|
|
case factorErrorRatio:
|
2019-10-10 03:28:07 +02:00
|
|
|
// check if amount of network errors exceed threshold over sliding window, threshold for comparison should be < 1.0 i.e. .5 = 50th percentile
|
|
|
|
if c.metrics.NetworkErrorRatio() > c.threshold {
|
|
|
|
isTripped = true
|
|
|
|
}
|
2020-01-19 02:42:56 +01:00
|
|
|
case factorLatency:
|
2019-10-10 03:28:07 +02:00
|
|
|
// check if threshold in milliseconds is reached and trip
|
|
|
|
hist, err := c.metrics.LatencyHistogram()
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
l := hist.LatencyAtQuantile(c.threshold)
|
|
|
|
if l.Nanoseconds()/int64(time.Millisecond) > int64(c.threshold) {
|
|
|
|
isTripped = true
|
|
|
|
}
|
2020-01-19 02:42:56 +01:00
|
|
|
case factorStatusCodeRatio:
|
2019-10-10 03:28:07 +02:00
|
|
|
// check ratio of error status codes of sliding window, threshold for comparison should be < 1.0 i.e. .5 = 50th percentile
|
|
|
|
if c.metrics.ResponseCodeRatio(500, 600, 0, 600) > c.threshold {
|
|
|
|
isTripped = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if isTripped {
|
|
|
|
c.metrics.Reset()
|
|
|
|
atomic.AddInt32(&c.tripped, 1)
|
|
|
|
|
|
|
|
// wait tripTime amount before allowing operations to resume.
|
|
|
|
t := time.NewTimer(c.tripTime)
|
|
|
|
<-t.C
|
|
|
|
|
|
|
|
atomic.AddInt32(&c.tripped, -1)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Config represents the configuration of a circuit breaker.
|
|
|
|
type Config struct {
|
2020-01-19 02:42:56 +01:00
|
|
|
// The threshold over sliding window that would trip the circuit breaker
|
2019-10-10 03:28:07 +02:00
|
|
|
Threshold float64 `json:"threshold"`
|
2020-01-19 02:42:56 +01:00
|
|
|
// Possible values: latency, error_ratio, and status_ratio. It
|
|
|
|
// defaults to latency.
|
|
|
|
Factor string `json:"factor"`
|
|
|
|
// How long to wait after the circuit is tripped before allowing operations to resume.
|
|
|
|
// The default is 5s.
|
|
|
|
TripTime string `json:"trip_time"`
|
2019-10-10 03:28:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
2020-01-19 02:42:56 +01:00
|
|
|
factorLatency = iota + 1
|
|
|
|
factorErrorRatio
|
|
|
|
factorStatusCodeRatio
|
2019-10-10 03:28:07 +02:00
|
|
|
defaultTripTime = "5s"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2020-01-19 02:42:56 +01:00
|
|
|
// typeCB handles converting a Config Factor value to the internal circuit breaker types.
|
2019-10-10 03:28:07 +02:00
|
|
|
typeCB = map[string]int32{
|
2020-01-19 02:42:56 +01:00
|
|
|
"latency": factorLatency,
|
|
|
|
"error_ratio": factorErrorRatio,
|
|
|
|
"status_ratio": factorStatusCodeRatio,
|
2019-10-10 03:28:07 +02:00
|
|
|
}
|
|
|
|
)
|