From b42334eb9172b17c3732a04975716d35d4f20b0d Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Sat, 14 Nov 2015 18:00:18 -0700 Subject: [PATCH] Several improvements and bug fixes related to graceful reloads Added a -grace flag to customize graceful shutdown period, fixed bugs related to closing file descriptors (and dup'ed fds), improved healthcheck signaling to parent, fixed a race condition with the graceful listener, etc. These improvements mainly provide better support for frequent reloading or unusual use cases of Start and Stop after a Restart (POSIX systems). This forum thread was valuable help in debugging: https://forum.golangbridge.org/t/bind-address-already-in-use-even-after-listener-closed/1510?u=matt --- caddy/caddy.go | 72 +++++++++++++++++++++++++--------------------- caddy/helpers.go | 26 ++++++++++++----- caddy/restart.go | 8 ++++-- dist/CHANGES.txt | 3 ++ main.go | 2 ++ server/graceful.go | 20 ++++++------- server/server.go | 58 +++++++++++++++++++++---------------- 7 files changed, 113 insertions(+), 76 deletions(-) diff --git a/caddy/caddy.go b/caddy/caddy.go index 20099aa79..e6569f42e 100644 --- a/caddy/caddy.go +++ b/caddy/caddy.go @@ -29,6 +29,7 @@ import ( "path" "strings" "sync" + "time" "github.com/mholt/caddy/caddy/letsencrypt" "github.com/mholt/caddy/server" @@ -43,11 +44,14 @@ var ( // If true, initialization will not show any informative output. Quiet bool - // HTTP2 indicates whether HTTP2 is enabled or not + // HTTP2 indicates whether HTTP2 is enabled or not. HTTP2 bool // TODO: temporary flag until http2 is standard - // PidFile is the path to the pidfile to create + // PidFile is the path to the pidfile to create. PidFile string + + // GracefulTimeout is the maximum duration of a graceful shutdown. + GracefulTimeout time.Duration ) var ( @@ -91,17 +95,20 @@ const ( ) // Start starts Caddy with the given Caddyfile. If cdyfile -// is nil or the process is forked from a parent as part of -// a graceful restart, Caddy will check to see if Caddyfile -// was piped from stdin and use that. It blocks until all the -// servers are listening. +// is nil, the LoadCaddyfile function will be called to get +// one. // -// If this process is a fork and no Caddyfile was piped in, -// an error will be returned (the Restart() function does this -// for you automatically). If this process is NOT a fork and -// cdyfile is nil, a default configuration will be assumed. -// In any case, an error is returned if Caddy could not be -// started. +// This function blocks until all the servers are listening. +// +// Note (POSIX): If Start is called in the child process of a +// restart more than once within the duration of the graceful +// cutoff (i.e. the child process called Start a first time, +// then called Stop, then Start again within the first 5 seconds +// or however long GracefulTimeout is) and the Caddyfiles have +// at least one listener address in common, the second Start +// may fail with "address already in use" as there's no +// guarantee that the parent process has relinquished the +// address before the grace period ends. func Start(cdyfile Input) (err error) { // If we return with no errors, we must do two things: tell the // parent that we succeeded and write to the pidfile. @@ -148,17 +155,6 @@ func Start(cdyfile Input) (err error) { } startedBefore = true - // Close remaining file descriptors we may have inherited that we don't need - if IsRestart() { - for _, fdIndex := range loadedGob.ListenerFds { - file := os.NewFile(fdIndex, "") - fln, err := net.FileListener(file) - if err == nil { - fln.Close() - } - } - } - // Show initialization output if !Quiet && !IsRestart() { var checkedFdLimit bool @@ -192,7 +188,7 @@ func startServers(groupings bindingGroup) error { errChan := make(chan error, len(groupings)) // must be buffered to allow Serve functions below to return if stopped later for _, group := range groupings { - s, err := server.New(group.BindAddr.String(), group.Configs) + s, err := server.New(group.BindAddr.String(), group.Configs, GracefulTimeout) if err != nil { return err } @@ -215,7 +211,8 @@ func startServers(groupings bindingGroup) error { return errors.New("listener for " + s.Addr + " was not a ListenerFile") } - delete(loadedGob.ListenerFds, s.Addr) // mark it as used + file.Close() + delete(loadedGob.ListenerFds, s.Addr) } } @@ -252,6 +249,14 @@ func startServers(groupings bindingGroup) error { serversMu.Unlock() } + // Close the remaining (unused) file descriptors to free up resources + if IsRestart() { + for key, fdIndex := range loadedGob.ListenerFds { + os.NewFile(fdIndex, "").Close() + delete(loadedGob.ListenerFds, key) + } + } + // Wait for all servers to finish starting startupWg.Wait() @@ -276,7 +281,9 @@ func Stop() error { serversMu.Lock() for _, s := range servers { - s.Stop() // TODO: error checking/reporting? + if err := s.Stop(); err != nil { + log.Printf("[ERROR] Stopping %s: %v", s.Addr, err) + } } servers = []*server.Server{} // don't reuse servers serversMu.Unlock() @@ -289,12 +296,13 @@ func Wait() { wg.Wait() } -// LoadCaddyfile loads a Caddyfile in a way that prioritizes -// reading from stdin pipe; otherwise it calls loader to load -// the Caddyfile. If loader does not return a Caddyfile, the -// default one will be returned. Thus, if there are no other -// errors, this function always returns at least the default -// Caddyfile (not the previously-used Caddyfile). +// LoadCaddyfile loads a Caddyfile, prioritizing a Caddyfile +// piped from stdin as part of a restart (only happens on first call +// to LoadCaddyfile). If it is not a restart, this function tries +// calling the user's loader function, and if that returns nil, then +// this function resorts to the default configuration. Thus, if there +// are no other errors, this function always returns at least the +// default Caddyfile. func LoadCaddyfile(loader func() (Input, error)) (cdyfile Input, err error) { // If we are a fork, finishing the restart is highest priority; // piped input is required in this case. diff --git a/caddy/helpers.go b/caddy/helpers.go index 8d615055a..f864b54b4 100644 --- a/caddy/helpers.go +++ b/caddy/helpers.go @@ -10,6 +10,7 @@ import ( "runtime" "strconv" "strings" + "sync" "github.com/mholt/caddy/caddy/letsencrypt" ) @@ -44,17 +45,28 @@ func checkFdlimit() { // If this process is not a restart, this function does nothing. // Calling this function once this process has successfully initialized // is vital so that the parent process can unblock and kill itself. +// This function is idempotent; it executes at most once per process. func signalSuccessToParent() { - if IsRestart() { - ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3 - _, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent - if err != nil { - log.Printf("[ERROR] Communicating successful init to parent: %v", err) + signalParentOnce.Do(func() { + if IsRestart() { + ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3 + _, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent + if err != nil { + log.Printf("[ERROR] Communicating successful init to parent: %v", err) + } + ppipe.Close() } - ppipe.Close() - } + }) } +// signalParentOnce is used to make sure that the parent is only +// signaled once; doing so more than once breaks whatever socket is +// at fd 4 (the reason for this is still unclear - to reproduce, +// call Stop() and Start() in succession at least once after a +// restart, then try loading first host of Caddyfile in the browser). +// Do not use this directly - call signalSuccessToParent instead. +var signalParentOnce sync.Once + // caddyfileGob maps bind address to index of the file descriptor // in the Files array passed to the child process. It also contains // the caddyfile contents. Used only during graceful restarts. diff --git a/caddy/restart.go b/caddy/restart.go index c41f27e40..c921e44cd 100644 --- a/caddy/restart.go +++ b/caddy/restart.go @@ -84,6 +84,11 @@ func Restart(newCaddyfile Input) error { return err } + // Immediately close our dup'ed fds and the write end of our signal pipe + for _, f := range extraFiles { + f.Close() + } + // Feed Caddyfile to the child err = gob.NewEncoder(wpipe).Encode(cdyfileGob) if err != nil { @@ -92,7 +97,6 @@ func Restart(newCaddyfile Input) error { wpipe.Close() // Determine whether child startup succeeded - sigwpipe.Close() // close our copy of the write end of the pipe -- TODO: why? answer, readErr := ioutil.ReadAll(sigrpipe) if answer == nil || len(answer) == 0 { cmdErr := cmd.Wait() // get exit status @@ -103,6 +107,6 @@ func Restart(newCaddyfile Input) error { return errIncompleteRestart } - // Looks like child was successful; we can exit gracefully. + // Looks like child is successful; we can exit gracefully. return Stop() } diff --git a/dist/CHANGES.txt b/dist/CHANGES.txt index fee86d766..88c35a86d 100644 --- a/dist/CHANGES.txt +++ b/dist/CHANGES.txt @@ -10,8 +10,11 @@ CHANGES - New -ca flag to customize ACME CA server URL - New -revoke flag to revoke a certificate - New -log flag to enable process log +- New -pidfile flag to enable writing pidfile +- New -grace flag to customize the graceful shutdown timeout - browse: Render filenames with multiple whitespace properly - markdown: Include Last-Modified header in response +- markdown: Render tables, strikethrough, and fenced code blocks - startup, shutdown: Better Windows support - templates: Bug fix for .Host when port is absent - templates: Include Last-Modified header in response diff --git a/main.go b/main.go index 1a468631c..3ec8ad7fc 100644 --- a/main.go +++ b/main.go @@ -10,6 +10,7 @@ import ( "runtime" "strconv" "strings" + "time" "github.com/mholt/caddy/caddy" "github.com/mholt/caddy/caddy/letsencrypt" @@ -34,6 +35,7 @@ func init() { flag.StringVar(&conf, "conf", "", "Configuration file to use (default="+caddy.DefaultConfigFile+")") flag.StringVar(&cpu, "cpu", "100%", "CPU cap") flag.StringVar(&letsencrypt.DefaultEmail, "email", "", "Default Let's Encrypt account email address") + flag.DurationVar(&caddy.GracefulTimeout, "grace", 5*time.Second, "Maximum duration of graceful shutdown") flag.StringVar(&caddy.Host, "host", caddy.DefaultHost, "Default host") flag.BoolVar(&caddy.HTTP2, "http2", true, "HTTP/2 support") // TODO: temporary flag until http2 merged into std lib flag.StringVar(&logfile, "log", "", "Process log file") diff --git a/server/graceful.go b/server/graceful.go index 6b2ae4f5c..5057d039b 100644 --- a/server/graceful.go +++ b/server/graceful.go @@ -2,7 +2,6 @@ package server import ( "net" - "os" "sync" "syscall" ) @@ -13,7 +12,9 @@ func newGracefulListener(l ListenerFile, wg *sync.WaitGroup) *gracefulListener { gl := &gracefulListener{ListenerFile: l, stop: make(chan error), httpWg: wg} go func() { <-gl.stop + gl.Lock() gl.stopped = true + gl.Unlock() gl.stop <- gl.ListenerFile.Close() }() return gl @@ -24,12 +25,13 @@ func newGracefulListener(l ListenerFile, wg *sync.WaitGroup) *gracefulListener { // methods mainly wrap net.Listener to be graceful. type gracefulListener struct { ListenerFile - stop chan error - stopped bool - httpWg *sync.WaitGroup // pointer to the host's wg used for counting connections + stop chan error + stopped bool + sync.Mutex // protects the stopped flag + httpWg *sync.WaitGroup // pointer to the host's wg used for counting connections } -// Accept accepts a connection. This type wraps +// Accept accepts a connection. func (gl *gracefulListener) Accept() (c net.Conn, err error) { c, err = gl.ListenerFile.Accept() if err != nil { @@ -42,18 +44,16 @@ func (gl *gracefulListener) Accept() (c net.Conn, err error) { // Close immediately closes the listener. func (gl *gracefulListener) Close() error { + gl.Lock() if gl.stopped { + gl.Unlock() return syscall.EINVAL } + gl.Unlock() gl.stop <- nil return <-gl.stop } -// File implements ListenerFile; it gets the file of the listening socket. -func (gl *gracefulListener) File() (*os.File, error) { - return gl.ListenerFile.File() -} - // gracefulConn represents a connection on a // gracefulListener so that we can keep track // of the number of connections, thus facilitating diff --git a/server/server.go b/server/server.go index 4aaa1d965..96979aad2 100644 --- a/server/server.go +++ b/server/server.go @@ -8,7 +8,6 @@ import ( "crypto/x509" "fmt" "io/ioutil" - "log" "net" "net/http" "os" @@ -26,13 +25,14 @@ import ( // graceful termination (POSIX only). type Server struct { *http.Server - HTTP2 bool // temporary while http2 is not in std lib (TODO: remove flag when part of std lib) - tls bool // whether this server is serving all HTTPS hosts or not - vhosts map[string]virtualHost // virtual hosts keyed by their address - listener ListenerFile // the listener which is bound to the socket - listenerMu sync.Mutex // protects listener - httpWg sync.WaitGroup // used to wait on outstanding connections - startChan chan struct{} // used to block until server is finished starting + HTTP2 bool // temporary while http2 is not in std lib (TODO: remove flag when part of std lib) + tls bool // whether this server is serving all HTTPS hosts or not + vhosts map[string]virtualHost // virtual hosts keyed by their address + listener ListenerFile // the listener which is bound to the socket + listenerMu sync.Mutex // protects listener + httpWg sync.WaitGroup // used to wait on outstanding connections + startChan chan struct{} // used to block until server is finished starting + connTimeout time.Duration // the maximum duration of a graceful shutdown } // ListenerFile represents a listener. @@ -42,14 +42,17 @@ type ListenerFile interface { } // New creates a new Server which will bind to addr and serve -// the sites/hosts configured in configs. This function does -// not start serving. +// the sites/hosts configured in configs. Its listener will +// gracefully close when the server is stopped which will take +// no longer than gracefulTimeout. +// +// This function does not start serving. // // Do not re-use a server (start, stop, then start again). We // could probably add more locking to make this possible, but // as it stands, you should dispose of a server after stopping it. // The behavior of serving with a spent server is undefined. -func New(addr string, configs []Config) (*Server, error) { +func New(addr string, configs []Config, gracefulTimeout time.Duration) (*Server, error) { var tls bool if len(configs) > 0 { tls = configs[0].TLS.Enabled @@ -63,9 +66,10 @@ func New(addr string, configs []Config) (*Server, error) { // WriteTimeout: 2 * time.Minute, // MaxHeaderBytes: 1 << 16, }, - tls: tls, - vhosts: make(map[string]virtualHost), - startChan: make(chan struct{}), + tls: tls, + vhosts: make(map[string]virtualHost), + startChan: make(chan struct{}), + connTimeout: gracefulTimeout, } s.Handler = s // this is weird, but whatever @@ -241,7 +245,7 @@ func serveTLSWithSNI(s *Server, ln net.Listener, tlsConfigs []TLSConfig) error { // connections to close (up to a max timeout of a few // seconds); on Windows it will close the listener // immediately. -func (s *Server) Stop() error { +func (s *Server) Stop() (err error) { s.Server.SetKeepAlivesEnabled(false) if runtime.GOOS != "windows" { @@ -256,20 +260,19 @@ func (s *Server) Stop() error { // Wait for remaining connections to finish or // force them all to close after timeout select { - case <-time.After(5 * time.Second): // TODO: make configurable + case <-time.After(s.connTimeout): case <-done: } } // Close the listener now; this stops the server without delay s.listenerMu.Lock() - err := s.listener.Close() - s.listenerMu.Unlock() - if err != nil { - log.Printf("[ERROR] Closing listener for %s: %v", s.Addr, err) + if s.listener != nil { + err = s.listener.Close() } + s.listenerMu.Unlock() - return err + return } // WaitUntilStarted blocks until the server s is started, meaning @@ -279,12 +282,17 @@ func (s *Server) WaitUntilStarted() { <-s.startChan } -// ListenerFd gets the file descriptor of the listener. +// ListenerFd gets a dup'ed file of the listener. If there +// is no underlying file, the return value will be nil. It +// is the caller's responsibility to close the file. func (s *Server) ListenerFd() *os.File { s.listenerMu.Lock() defer s.listenerMu.Unlock() - file, _ := s.listener.File() - return file + if s.listener != nil { + file, _ := s.listener.File() + return file + } + return nil } // ServeHTTP is the entry point for every request to the address that s @@ -370,7 +378,7 @@ func setupClientAuth(tlsConfigs []TLSConfig, config *tls.Config) error { // RunFirstStartupFuncs runs all of the server's FirstStartup // callback functions unless one of them returns an error first. -// It is up the caller's responsibility to call this only once and +// It is the caller's responsibility to call this only once and // at the correct time. The functions here should not be executed // at restarts or where the user does not explicitly start a new // instance of the server.