SIGUSR2 triggers graceful binary upgrades (spawns new process) (#1814)

* SIGUSR2 triggers graceful binary upgrades (spawns new process)

* Move some functions around, hopefully fixing Windows build

* Clean up a couple file closes and add links to useful debugging thread

* Use two underscores in upgrade env var

To help ensure uniqueness / avoid possible collisions
This commit is contained in:
Matt Holt 2017-08-12 11:04:32 -06:00 committed by GitHub
parent d2fa8600fc
commit 5e0896305c
4 changed files with 279 additions and 44 deletions

View file

@ -17,6 +17,7 @@ package caddy
import (
"bytes"
"encoding/gob"
"fmt"
"io"
"io/ioutil"
@ -51,7 +52,7 @@ var (
// isUpgrade will be set to true if this process
// was started as part of an upgrade, where a parent
// Caddy process started this one.
isUpgrade bool
isUpgrade = os.Getenv("CADDY__UPGRADE") == "1"
// started will be set to true when the first
// instance is started; it never gets set to
@ -360,6 +361,16 @@ type AfterStartup interface {
// is returned. Consequently, this function never returns a nil
// value as long as there are no errors.
func LoadCaddyfile(serverType string) (Input, error) {
// If we are finishing an upgrade, we must obtain the Caddyfile
// from our parent process, regardless of configured loaders.
if IsUpgrade() {
err := gob.NewDecoder(os.Stdin).Decode(&loadedGob)
if err != nil {
return nil, err
}
return loadedGob.Caddyfile, nil
}
// Ask plugged-in loaders for a Caddyfile
cdyfile, err := loadCaddyfileInput(serverType)
if err != nil {
@ -424,9 +435,16 @@ func (i *Instance) Caddyfile() Input {
//
// This function blocks until all the servers are listening.
func Start(cdyfile Input) (*Instance, error) {
writePidFile()
inst := &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)}
return inst, startWithListenerFds(cdyfile, inst, nil)
err := startWithListenerFds(cdyfile, inst, nil)
if err != nil {
return inst, err
}
signalSuccessToParent()
if pidErr := writePidFile(); pidErr != nil {
log.Printf("[ERROR] Could not write pidfile: %v", pidErr)
}
return inst, nil
}
func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]restartTriple) error {
@ -445,7 +463,8 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r
}
// run startup callbacks
if restartFds == nil {
if !IsUpgrade() && restartFds == nil {
// first startup means not a restart or upgrade
for _, firstStartupFunc := range inst.onFirstStartup {
err := firstStartupFunc()
if err != nil {
@ -500,7 +519,6 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r
// callbacks will not be executed between directives, since the purpose
// is only to check the input for valid syntax.
func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bool) error {
// If parsing only inst will be nil, create an instance for this function call only.
if justValidate {
inst = &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)}
@ -536,7 +554,6 @@ func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bo
}
return nil
}
func executeDirectives(inst *Instance, filename string,
@ -616,6 +633,30 @@ func startServers(serverList []Server, inst *Instance, restartFds map[string]res
err error
)
// if performing an upgrade, obtain listener file descriptors
// from parent process
if IsUpgrade() {
if gs, ok := s.(GracefulServer); ok {
addr := gs.Address()
if fdIndex, ok := loadedGob.ListenerFds["tcp"+addr]; ok {
file := os.NewFile(fdIndex, "")
ln, err = net.FileListener(file)
file.Close()
if err != nil {
return err
}
}
if fdIndex, ok := loadedGob.ListenerFds["udp"+addr]; ok {
file := os.NewFile(fdIndex, "")
pc, err = net.FilePacketConn(file)
file.Close()
if err != nil {
return err
}
}
}
}
// If this is a reload and s is a GracefulServer,
// reuse the listener for a graceful restart.
if gs, ok := s.(GracefulServer); ok && restartFds != nil {
@ -795,25 +836,6 @@ func IsInternal(addr string) bool {
return false
}
// Upgrade re-launches the process, preserving the listeners
// for a graceful restart. It does NOT load new configuration;
// it only starts the process anew with a fresh binary.
//
// TODO: This is not yet implemented
func Upgrade() error {
return fmt.Errorf("not implemented")
// TODO: have child process set isUpgrade = true
}
// IsUpgrade returns true if this process is part of an upgrade
// where a parent caddy process spawned this one to upgrade
// the binary.
func IsUpgrade() bool {
mu.Lock()
defer mu.Unlock()
return isUpgrade
}
// Started returns true if at least one instance has been
// started by this package. It never gets reset to false
// once it is set to true.

View file

@ -44,9 +44,14 @@ func activateHTTPS(cctx caddy.Context) error {
// renew all relevant certificates that need renewal. this is important
// to do right away so we guarantee that renewals aren't missed, and
// also the user can respond to any potential errors that occur.
err = caddytls.RenewManagedCertificates(true)
if err != nil {
return err
// (skip if upgrading, because the parent process is likely already listening
// on the ports we'd need to do ACME before we finish starting; parent process
// already running renewal ticker, so renewal won't be missed anyway.)
if !caddy.IsUpgrade() {
err = caddytls.RenewManagedCertificates(true)
if err != nil {
return err
}
}
if !caddy.Quiet && operatorPresent {

View file

@ -13,7 +13,7 @@ import (
func trapSignalsPosix() {
go func() {
sigchan := make(chan os.Signal, 1)
signal.Notify(sigchan, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1)
signal.Notify(sigchan, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1, syscall.SIGUSR2)
for sig := range sigchan {
switch sig {
@ -48,19 +48,9 @@ func trapSignalsPosix() {
log.Println("[INFO] SIGUSR1: Reloading")
// Start with the existing Caddyfile
instancesMu.Lock()
if len(instances) == 0 {
instancesMu.Unlock()
log.Println("[ERROR] SIGUSR1: No server instances are fully running")
continue
}
inst := instances[0] // we only support one instance at this time
instancesMu.Unlock()
updatedCaddyfile := inst.caddyfileInput
if updatedCaddyfile == nil {
// Hmm, did spawing process forget to close stdin? Anyhow, this is unusual.
log.Println("[ERROR] SIGUSR1: no Caddyfile to reload (was stdin left open?)")
caddyfileToUse, inst, err := getCurrentCaddyfile()
if err != nil {
log.Printf("[ERROR] SIGUSR1: %v", err)
continue
}
if loaderUsed.loader == nil {
@ -76,14 +66,20 @@ func trapSignalsPosix() {
continue
}
if newCaddyfile != nil {
updatedCaddyfile = newCaddyfile
caddyfileToUse = newCaddyfile
}
// Kick off the restart; our work is done
inst, err = inst.Restart(updatedCaddyfile)
inst, err = inst.Restart(caddyfileToUse)
if err != nil {
log.Printf("[ERROR] SIGUSR1: %v", err)
}
case syscall.SIGUSR2:
log.Println("[INFO] SIGUSR2: Upgrading")
if err := Upgrade(); err != nil {
log.Printf("[ERROR] SIGUSR2: upgrading: %v", err)
}
}
}
}()

212
upgrade.go Normal file
View file

@ -0,0 +1,212 @@
package caddy
import (
"encoding/gob"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"sync"
)
func init() {
// register CaddyfileInput with gob so it knows into
// which concrete type to decode an Input interface
gob.Register(CaddyfileInput{})
}
// IsUpgrade returns true if this process is part of an upgrade
// where a parent caddy process spawned this one to upgrade
// the binary.
func IsUpgrade() bool {
mu.Lock()
defer mu.Unlock()
return isUpgrade
}
// Upgrade re-launches the process, preserving the listeners
// for a graceful upgrade. It does NOT load new configuration;
// it only starts the process anew with the current config.
// This makes it possible to perform zero-downtime binary upgrades.
//
// TODO: For more information when debugging, see:
// https://forum.golangbridge.org/t/bind-address-already-in-use-even-after-listener-closed/1510?u=matt
// https://github.com/mholt/shared-conn
func Upgrade() error {
log.Println("[INFO] Upgrading")
// use existing Caddyfile; do not change configuration during upgrade
currentCaddyfile, _, err := getCurrentCaddyfile()
if err != nil {
return err
}
if len(os.Args) == 0 { // this should never happen, but...
os.Args = []string{""}
}
// tell the child that it's a restart
env := os.Environ()
if !IsUpgrade() {
env = append(env, "CADDY__UPGRADE=1")
}
// prepare our payload to the child process
cdyfileGob := transferGob{
ListenerFds: make(map[string]uintptr),
Caddyfile: currentCaddyfile,
}
// prepare a pipe to the fork's stdin so it can get the Caddyfile
rpipe, wpipe, err := os.Pipe()
if err != nil {
return err
}
// prepare a pipe that the child process will use to communicate
// its success with us by sending > 0 bytes
sigrpipe, sigwpipe, err := os.Pipe()
if err != nil {
return err
}
// pass along relevant file descriptors to child process; ordering
// is very important since we rely on these being in certain positions.
extraFiles := []*os.File{sigwpipe} // fd 3
// add file descriptors of all the sockets
for i, j := 0, 0; ; i++ {
instancesMu.Lock()
if i >= len(instances) {
instancesMu.Unlock()
break
}
inst := instances[i]
instancesMu.Unlock()
for _, s := range inst.servers {
gs, gracefulOk := s.server.(GracefulServer)
ln, lnOk := s.listener.(Listener)
pc, pcOk := s.packet.(PacketConn)
if gracefulOk {
if lnOk {
lnFile, _ := ln.File()
extraFiles = append(extraFiles, lnFile)
cdyfileGob.ListenerFds["tcp"+gs.Address()] = uintptr(4 + j) // 4 fds come before any of the listeners
j++
}
if pcOk {
pcFile, _ := pc.File()
extraFiles = append(extraFiles, pcFile)
cdyfileGob.ListenerFds["udp"+gs.Address()] = uintptr(4 + j) // 4 fds come before any of the listeners
j++
}
}
}
}
// set up the command
cmd := exec.Command(os.Args[0], os.Args[1:]...)
cmd.Stdin = rpipe // fd 0
cmd.Stdout = os.Stdout // fd 1
cmd.Stderr = os.Stderr // fd 2
cmd.ExtraFiles = extraFiles
cmd.Env = env
// spawn the child process
err = cmd.Start()
if err != nil {
return err
}
// immediately close our dup'ed fds and the write end of our signal pipe
for _, f := range extraFiles {
f.Close()
}
// feed Caddyfile to the child
err = gob.NewEncoder(wpipe).Encode(cdyfileGob)
if err != nil {
return err
}
wpipe.Close()
// determine whether child startup succeeded
answer, readErr := ioutil.ReadAll(sigrpipe)
if answer == nil || len(answer) == 0 {
cmdErr := cmd.Wait() // get exit status
errStr := fmt.Sprintf("child failed to initialize: %v", cmdErr)
if readErr != nil {
errStr += fmt.Sprintf(" - additionally, error communicating with child process: %v", readErr)
}
return fmt.Errorf(errStr)
}
// looks like child is successful; we can exit gracefully.
log.Println("[INFO] Upgrade finished")
return Stop()
}
// getCurrentCaddyfile gets the Caddyfile used by the
// current (first) Instance and returns both of them.
func getCurrentCaddyfile() (Input, *Instance, error) {
instancesMu.Lock()
if len(instances) == 0 {
instancesMu.Unlock()
return nil, nil, fmt.Errorf("no server instances are fully running")
}
inst := instances[0]
instancesMu.Unlock()
currentCaddyfile := inst.caddyfileInput
if currentCaddyfile == nil {
// hmm, did spawing process forget to close stdin? Anyhow, this is unusual.
return nil, inst, fmt.Errorf("no Caddyfile to reload (was stdin left open?)")
}
return currentCaddyfile, inst, nil
}
// signalSuccessToParent tells the parent our status using pipe at index 3.
// If this process is not a restart, this function does nothing.
// Calling this function once this process has successfully initialized
// is vital so that the parent process can unblock and kill itself.
// This function is idempotent; it executes at most once per process.
func signalSuccessToParent() {
signalParentOnce.Do(func() {
if IsUpgrade() {
ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3
_, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent
if err != nil {
log.Printf("[ERROR] Communicating successful init to parent: %v", err)
}
ppipe.Close()
}
})
}
// signalParentOnce is used to make sure that the parent is only
// signaled once; doing so more than once breaks whatever socket is
// at fd 4 (TODO: the reason for this is still unclear - to reproduce,
// call Stop() and Start() in succession at least once after a
// restart, then try loading first host of Caddyfile in the browser
// - this was pre-v0.9; this code and godoc is borrowed from the
// implementation then, but I'm not sure if it's been fixed yet, as
// of v0.10.7). Do not use this directly; call signalSuccessToParent
// instead.
var signalParentOnce sync.Once
// transferGob is used if this is a child process as part of
// a graceful upgrade; it is used to map listeners to their
// index in the list of inherited file descriptors. This
// variable is not safe for concurrent access.
var loadedGob transferGob
// transferGob maps bind address to index of the file descriptor
// in the Files array passed to the child process. It also contains
// the Caddyfile contents and any other state needed by the new process.
// Used only during graceful upgrades.
type transferGob struct {
ListenerFds map[string]uintptr
Caddyfile Input
}