From 5e0896305c3291acafd6266d009d0a9bdcfe7951 Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Sat, 12 Aug 2017 11:04:32 -0600 Subject: [PATCH] SIGUSR2 triggers graceful binary upgrades (spawns new process) (#1814) * SIGUSR2 triggers graceful binary upgrades (spawns new process) * Move some functions around, hopefully fixing Windows build * Clean up a couple file closes and add links to useful debugging thread * Use two underscores in upgrade env var To help ensure uniqueness / avoid possible collisions --- caddy.go | 72 ++++++++---- caddyhttp/httpserver/https.go | 11 +- sigtrap_posix.go | 28 ++--- upgrade.go | 212 ++++++++++++++++++++++++++++++++++ 4 files changed, 279 insertions(+), 44 deletions(-) create mode 100644 upgrade.go diff --git a/caddy.go b/caddy.go index 558cfd221..ffab1454b 100644 --- a/caddy.go +++ b/caddy.go @@ -17,6 +17,7 @@ package caddy import ( "bytes" + "encoding/gob" "fmt" "io" "io/ioutil" @@ -51,7 +52,7 @@ var ( // isUpgrade will be set to true if this process // was started as part of an upgrade, where a parent // Caddy process started this one. - isUpgrade bool + isUpgrade = os.Getenv("CADDY__UPGRADE") == "1" // started will be set to true when the first // instance is started; it never gets set to @@ -360,6 +361,16 @@ type AfterStartup interface { // is returned. Consequently, this function never returns a nil // value as long as there are no errors. func LoadCaddyfile(serverType string) (Input, error) { + // If we are finishing an upgrade, we must obtain the Caddyfile + // from our parent process, regardless of configured loaders. + if IsUpgrade() { + err := gob.NewDecoder(os.Stdin).Decode(&loadedGob) + if err != nil { + return nil, err + } + return loadedGob.Caddyfile, nil + } + // Ask plugged-in loaders for a Caddyfile cdyfile, err := loadCaddyfileInput(serverType) if err != nil { @@ -424,9 +435,16 @@ func (i *Instance) Caddyfile() Input { // // This function blocks until all the servers are listening. func Start(cdyfile Input) (*Instance, error) { - writePidFile() inst := &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)} - return inst, startWithListenerFds(cdyfile, inst, nil) + err := startWithListenerFds(cdyfile, inst, nil) + if err != nil { + return inst, err + } + signalSuccessToParent() + if pidErr := writePidFile(); pidErr != nil { + log.Printf("[ERROR] Could not write pidfile: %v", pidErr) + } + return inst, nil } func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]restartTriple) error { @@ -445,7 +463,8 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r } // run startup callbacks - if restartFds == nil { + if !IsUpgrade() && restartFds == nil { + // first startup means not a restart or upgrade for _, firstStartupFunc := range inst.onFirstStartup { err := firstStartupFunc() if err != nil { @@ -500,7 +519,6 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r // callbacks will not be executed between directives, since the purpose // is only to check the input for valid syntax. func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bool) error { - // If parsing only inst will be nil, create an instance for this function call only. if justValidate { inst = &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)} @@ -536,7 +554,6 @@ func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bo } return nil - } func executeDirectives(inst *Instance, filename string, @@ -616,6 +633,30 @@ func startServers(serverList []Server, inst *Instance, restartFds map[string]res err error ) + // if performing an upgrade, obtain listener file descriptors + // from parent process + if IsUpgrade() { + if gs, ok := s.(GracefulServer); ok { + addr := gs.Address() + if fdIndex, ok := loadedGob.ListenerFds["tcp"+addr]; ok { + file := os.NewFile(fdIndex, "") + ln, err = net.FileListener(file) + file.Close() + if err != nil { + return err + } + } + if fdIndex, ok := loadedGob.ListenerFds["udp"+addr]; ok { + file := os.NewFile(fdIndex, "") + pc, err = net.FilePacketConn(file) + file.Close() + if err != nil { + return err + } + } + } + } + // If this is a reload and s is a GracefulServer, // reuse the listener for a graceful restart. if gs, ok := s.(GracefulServer); ok && restartFds != nil { @@ -795,25 +836,6 @@ func IsInternal(addr string) bool { return false } -// Upgrade re-launches the process, preserving the listeners -// for a graceful restart. It does NOT load new configuration; -// it only starts the process anew with a fresh binary. -// -// TODO: This is not yet implemented -func Upgrade() error { - return fmt.Errorf("not implemented") - // TODO: have child process set isUpgrade = true -} - -// IsUpgrade returns true if this process is part of an upgrade -// where a parent caddy process spawned this one to upgrade -// the binary. -func IsUpgrade() bool { - mu.Lock() - defer mu.Unlock() - return isUpgrade -} - // Started returns true if at least one instance has been // started by this package. It never gets reset to false // once it is set to true. diff --git a/caddyhttp/httpserver/https.go b/caddyhttp/httpserver/https.go index 912e9f930..c35c93ab1 100644 --- a/caddyhttp/httpserver/https.go +++ b/caddyhttp/httpserver/https.go @@ -44,9 +44,14 @@ func activateHTTPS(cctx caddy.Context) error { // renew all relevant certificates that need renewal. this is important // to do right away so we guarantee that renewals aren't missed, and // also the user can respond to any potential errors that occur. - err = caddytls.RenewManagedCertificates(true) - if err != nil { - return err + // (skip if upgrading, because the parent process is likely already listening + // on the ports we'd need to do ACME before we finish starting; parent process + // already running renewal ticker, so renewal won't be missed anyway.) + if !caddy.IsUpgrade() { + err = caddytls.RenewManagedCertificates(true) + if err != nil { + return err + } } if !caddy.Quiet && operatorPresent { diff --git a/sigtrap_posix.go b/sigtrap_posix.go index b4cbd9c6f..65dcdd726 100644 --- a/sigtrap_posix.go +++ b/sigtrap_posix.go @@ -13,7 +13,7 @@ import ( func trapSignalsPosix() { go func() { sigchan := make(chan os.Signal, 1) - signal.Notify(sigchan, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1) + signal.Notify(sigchan, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1, syscall.SIGUSR2) for sig := range sigchan { switch sig { @@ -48,19 +48,9 @@ func trapSignalsPosix() { log.Println("[INFO] SIGUSR1: Reloading") // Start with the existing Caddyfile - instancesMu.Lock() - if len(instances) == 0 { - instancesMu.Unlock() - log.Println("[ERROR] SIGUSR1: No server instances are fully running") - continue - } - inst := instances[0] // we only support one instance at this time - instancesMu.Unlock() - - updatedCaddyfile := inst.caddyfileInput - if updatedCaddyfile == nil { - // Hmm, did spawing process forget to close stdin? Anyhow, this is unusual. - log.Println("[ERROR] SIGUSR1: no Caddyfile to reload (was stdin left open?)") + caddyfileToUse, inst, err := getCurrentCaddyfile() + if err != nil { + log.Printf("[ERROR] SIGUSR1: %v", err) continue } if loaderUsed.loader == nil { @@ -76,14 +66,20 @@ func trapSignalsPosix() { continue } if newCaddyfile != nil { - updatedCaddyfile = newCaddyfile + caddyfileToUse = newCaddyfile } // Kick off the restart; our work is done - inst, err = inst.Restart(updatedCaddyfile) + inst, err = inst.Restart(caddyfileToUse) if err != nil { log.Printf("[ERROR] SIGUSR1: %v", err) } + + case syscall.SIGUSR2: + log.Println("[INFO] SIGUSR2: Upgrading") + if err := Upgrade(); err != nil { + log.Printf("[ERROR] SIGUSR2: upgrading: %v", err) + } } } }() diff --git a/upgrade.go b/upgrade.go new file mode 100644 index 000000000..f79b36dd4 --- /dev/null +++ b/upgrade.go @@ -0,0 +1,212 @@ +package caddy + +import ( + "encoding/gob" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "sync" +) + +func init() { + // register CaddyfileInput with gob so it knows into + // which concrete type to decode an Input interface + gob.Register(CaddyfileInput{}) +} + +// IsUpgrade returns true if this process is part of an upgrade +// where a parent caddy process spawned this one to upgrade +// the binary. +func IsUpgrade() bool { + mu.Lock() + defer mu.Unlock() + return isUpgrade +} + +// Upgrade re-launches the process, preserving the listeners +// for a graceful upgrade. It does NOT load new configuration; +// it only starts the process anew with the current config. +// This makes it possible to perform zero-downtime binary upgrades. +// +// TODO: For more information when debugging, see: +// https://forum.golangbridge.org/t/bind-address-already-in-use-even-after-listener-closed/1510?u=matt +// https://github.com/mholt/shared-conn +func Upgrade() error { + log.Println("[INFO] Upgrading") + + // use existing Caddyfile; do not change configuration during upgrade + currentCaddyfile, _, err := getCurrentCaddyfile() + if err != nil { + return err + } + + if len(os.Args) == 0 { // this should never happen, but... + os.Args = []string{""} + } + + // tell the child that it's a restart + env := os.Environ() + if !IsUpgrade() { + env = append(env, "CADDY__UPGRADE=1") + } + + // prepare our payload to the child process + cdyfileGob := transferGob{ + ListenerFds: make(map[string]uintptr), + Caddyfile: currentCaddyfile, + } + + // prepare a pipe to the fork's stdin so it can get the Caddyfile + rpipe, wpipe, err := os.Pipe() + if err != nil { + return err + } + + // prepare a pipe that the child process will use to communicate + // its success with us by sending > 0 bytes + sigrpipe, sigwpipe, err := os.Pipe() + if err != nil { + return err + } + + // pass along relevant file descriptors to child process; ordering + // is very important since we rely on these being in certain positions. + extraFiles := []*os.File{sigwpipe} // fd 3 + + // add file descriptors of all the sockets + for i, j := 0, 0; ; i++ { + instancesMu.Lock() + if i >= len(instances) { + instancesMu.Unlock() + break + } + inst := instances[i] + instancesMu.Unlock() + + for _, s := range inst.servers { + gs, gracefulOk := s.server.(GracefulServer) + ln, lnOk := s.listener.(Listener) + pc, pcOk := s.packet.(PacketConn) + if gracefulOk { + if lnOk { + lnFile, _ := ln.File() + extraFiles = append(extraFiles, lnFile) + cdyfileGob.ListenerFds["tcp"+gs.Address()] = uintptr(4 + j) // 4 fds come before any of the listeners + j++ + } + if pcOk { + pcFile, _ := pc.File() + extraFiles = append(extraFiles, pcFile) + cdyfileGob.ListenerFds["udp"+gs.Address()] = uintptr(4 + j) // 4 fds come before any of the listeners + j++ + } + } + } + } + + // set up the command + cmd := exec.Command(os.Args[0], os.Args[1:]...) + cmd.Stdin = rpipe // fd 0 + cmd.Stdout = os.Stdout // fd 1 + cmd.Stderr = os.Stderr // fd 2 + cmd.ExtraFiles = extraFiles + cmd.Env = env + + // spawn the child process + err = cmd.Start() + if err != nil { + return err + } + + // immediately close our dup'ed fds and the write end of our signal pipe + for _, f := range extraFiles { + f.Close() + } + + // feed Caddyfile to the child + err = gob.NewEncoder(wpipe).Encode(cdyfileGob) + if err != nil { + return err + } + wpipe.Close() + + // determine whether child startup succeeded + answer, readErr := ioutil.ReadAll(sigrpipe) + if answer == nil || len(answer) == 0 { + cmdErr := cmd.Wait() // get exit status + errStr := fmt.Sprintf("child failed to initialize: %v", cmdErr) + if readErr != nil { + errStr += fmt.Sprintf(" - additionally, error communicating with child process: %v", readErr) + } + return fmt.Errorf(errStr) + } + + // looks like child is successful; we can exit gracefully. + log.Println("[INFO] Upgrade finished") + return Stop() +} + +// getCurrentCaddyfile gets the Caddyfile used by the +// current (first) Instance and returns both of them. +func getCurrentCaddyfile() (Input, *Instance, error) { + instancesMu.Lock() + if len(instances) == 0 { + instancesMu.Unlock() + return nil, nil, fmt.Errorf("no server instances are fully running") + } + inst := instances[0] + instancesMu.Unlock() + + currentCaddyfile := inst.caddyfileInput + if currentCaddyfile == nil { + // hmm, did spawing process forget to close stdin? Anyhow, this is unusual. + return nil, inst, fmt.Errorf("no Caddyfile to reload (was stdin left open?)") + } + return currentCaddyfile, inst, nil +} + +// signalSuccessToParent tells the parent our status using pipe at index 3. +// If this process is not a restart, this function does nothing. +// Calling this function once this process has successfully initialized +// is vital so that the parent process can unblock and kill itself. +// This function is idempotent; it executes at most once per process. +func signalSuccessToParent() { + signalParentOnce.Do(func() { + if IsUpgrade() { + ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3 + _, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent + if err != nil { + log.Printf("[ERROR] Communicating successful init to parent: %v", err) + } + ppipe.Close() + } + }) +} + +// signalParentOnce is used to make sure that the parent is only +// signaled once; doing so more than once breaks whatever socket is +// at fd 4 (TODO: the reason for this is still unclear - to reproduce, +// call Stop() and Start() in succession at least once after a +// restart, then try loading first host of Caddyfile in the browser +// - this was pre-v0.9; this code and godoc is borrowed from the +// implementation then, but I'm not sure if it's been fixed yet, as +// of v0.10.7). Do not use this directly; call signalSuccessToParent +// instead. +var signalParentOnce sync.Once + +// transferGob is used if this is a child process as part of +// a graceful upgrade; it is used to map listeners to their +// index in the list of inherited file descriptors. This +// variable is not safe for concurrent access. +var loadedGob transferGob + +// transferGob maps bind address to index of the file descriptor +// in the Files array passed to the child process. It also contains +// the Caddyfile contents and any other state needed by the new process. +// Used only during graceful upgrades. +type transferGob struct { + ListenerFds map[string]uintptr + Caddyfile Input +}