mirror of
https://github.com/caddyserver/caddy.git
synced 2025-01-23 09:06:29 +01:00
SIGUSR2 triggers graceful binary upgrades (spawns new process) (#1814)
* SIGUSR2 triggers graceful binary upgrades (spawns new process) * Move some functions around, hopefully fixing Windows build * Clean up a couple file closes and add links to useful debugging thread * Use two underscores in upgrade env var To help ensure uniqueness / avoid possible collisions
This commit is contained in:
parent
d2fa8600fc
commit
5e0896305c
4 changed files with 279 additions and 44 deletions
72
caddy.go
72
caddy.go
|
@ -17,6 +17,7 @@ package caddy
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
@ -51,7 +52,7 @@ var (
|
|||
// isUpgrade will be set to true if this process
|
||||
// was started as part of an upgrade, where a parent
|
||||
// Caddy process started this one.
|
||||
isUpgrade bool
|
||||
isUpgrade = os.Getenv("CADDY__UPGRADE") == "1"
|
||||
|
||||
// started will be set to true when the first
|
||||
// instance is started; it never gets set to
|
||||
|
@ -360,6 +361,16 @@ type AfterStartup interface {
|
|||
// is returned. Consequently, this function never returns a nil
|
||||
// value as long as there are no errors.
|
||||
func LoadCaddyfile(serverType string) (Input, error) {
|
||||
// If we are finishing an upgrade, we must obtain the Caddyfile
|
||||
// from our parent process, regardless of configured loaders.
|
||||
if IsUpgrade() {
|
||||
err := gob.NewDecoder(os.Stdin).Decode(&loadedGob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return loadedGob.Caddyfile, nil
|
||||
}
|
||||
|
||||
// Ask plugged-in loaders for a Caddyfile
|
||||
cdyfile, err := loadCaddyfileInput(serverType)
|
||||
if err != nil {
|
||||
|
@ -424,9 +435,16 @@ func (i *Instance) Caddyfile() Input {
|
|||
//
|
||||
// This function blocks until all the servers are listening.
|
||||
func Start(cdyfile Input) (*Instance, error) {
|
||||
writePidFile()
|
||||
inst := &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)}
|
||||
return inst, startWithListenerFds(cdyfile, inst, nil)
|
||||
err := startWithListenerFds(cdyfile, inst, nil)
|
||||
if err != nil {
|
||||
return inst, err
|
||||
}
|
||||
signalSuccessToParent()
|
||||
if pidErr := writePidFile(); pidErr != nil {
|
||||
log.Printf("[ERROR] Could not write pidfile: %v", pidErr)
|
||||
}
|
||||
return inst, nil
|
||||
}
|
||||
|
||||
func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]restartTriple) error {
|
||||
|
@ -445,7 +463,8 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r
|
|||
}
|
||||
|
||||
// run startup callbacks
|
||||
if restartFds == nil {
|
||||
if !IsUpgrade() && restartFds == nil {
|
||||
// first startup means not a restart or upgrade
|
||||
for _, firstStartupFunc := range inst.onFirstStartup {
|
||||
err := firstStartupFunc()
|
||||
if err != nil {
|
||||
|
@ -500,7 +519,6 @@ func startWithListenerFds(cdyfile Input, inst *Instance, restartFds map[string]r
|
|||
// callbacks will not be executed between directives, since the purpose
|
||||
// is only to check the input for valid syntax.
|
||||
func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bool) error {
|
||||
|
||||
// If parsing only inst will be nil, create an instance for this function call only.
|
||||
if justValidate {
|
||||
inst = &Instance{serverType: cdyfile.ServerType(), wg: new(sync.WaitGroup)}
|
||||
|
@ -536,7 +554,6 @@ func ValidateAndExecuteDirectives(cdyfile Input, inst *Instance, justValidate bo
|
|||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func executeDirectives(inst *Instance, filename string,
|
||||
|
@ -616,6 +633,30 @@ func startServers(serverList []Server, inst *Instance, restartFds map[string]res
|
|||
err error
|
||||
)
|
||||
|
||||
// if performing an upgrade, obtain listener file descriptors
|
||||
// from parent process
|
||||
if IsUpgrade() {
|
||||
if gs, ok := s.(GracefulServer); ok {
|
||||
addr := gs.Address()
|
||||
if fdIndex, ok := loadedGob.ListenerFds["tcp"+addr]; ok {
|
||||
file := os.NewFile(fdIndex, "")
|
||||
ln, err = net.FileListener(file)
|
||||
file.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if fdIndex, ok := loadedGob.ListenerFds["udp"+addr]; ok {
|
||||
file := os.NewFile(fdIndex, "")
|
||||
pc, err = net.FilePacketConn(file)
|
||||
file.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a reload and s is a GracefulServer,
|
||||
// reuse the listener for a graceful restart.
|
||||
if gs, ok := s.(GracefulServer); ok && restartFds != nil {
|
||||
|
@ -795,25 +836,6 @@ func IsInternal(addr string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Upgrade re-launches the process, preserving the listeners
|
||||
// for a graceful restart. It does NOT load new configuration;
|
||||
// it only starts the process anew with a fresh binary.
|
||||
//
|
||||
// TODO: This is not yet implemented
|
||||
func Upgrade() error {
|
||||
return fmt.Errorf("not implemented")
|
||||
// TODO: have child process set isUpgrade = true
|
||||
}
|
||||
|
||||
// IsUpgrade returns true if this process is part of an upgrade
|
||||
// where a parent caddy process spawned this one to upgrade
|
||||
// the binary.
|
||||
func IsUpgrade() bool {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
return isUpgrade
|
||||
}
|
||||
|
||||
// Started returns true if at least one instance has been
|
||||
// started by this package. It never gets reset to false
|
||||
// once it is set to true.
|
||||
|
|
|
@ -44,10 +44,15 @@ func activateHTTPS(cctx caddy.Context) error {
|
|||
// renew all relevant certificates that need renewal. this is important
|
||||
// to do right away so we guarantee that renewals aren't missed, and
|
||||
// also the user can respond to any potential errors that occur.
|
||||
// (skip if upgrading, because the parent process is likely already listening
|
||||
// on the ports we'd need to do ACME before we finish starting; parent process
|
||||
// already running renewal ticker, so renewal won't be missed anyway.)
|
||||
if !caddy.IsUpgrade() {
|
||||
err = caddytls.RenewManagedCertificates(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if !caddy.Quiet && operatorPresent {
|
||||
fmt.Println(" done.")
|
||||
|
|
|
@ -13,7 +13,7 @@ import (
|
|||
func trapSignalsPosix() {
|
||||
go func() {
|
||||
sigchan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigchan, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1)
|
||||
signal.Notify(sigchan, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGUSR1, syscall.SIGUSR2)
|
||||
|
||||
for sig := range sigchan {
|
||||
switch sig {
|
||||
|
@ -48,19 +48,9 @@ func trapSignalsPosix() {
|
|||
log.Println("[INFO] SIGUSR1: Reloading")
|
||||
|
||||
// Start with the existing Caddyfile
|
||||
instancesMu.Lock()
|
||||
if len(instances) == 0 {
|
||||
instancesMu.Unlock()
|
||||
log.Println("[ERROR] SIGUSR1: No server instances are fully running")
|
||||
continue
|
||||
}
|
||||
inst := instances[0] // we only support one instance at this time
|
||||
instancesMu.Unlock()
|
||||
|
||||
updatedCaddyfile := inst.caddyfileInput
|
||||
if updatedCaddyfile == nil {
|
||||
// Hmm, did spawing process forget to close stdin? Anyhow, this is unusual.
|
||||
log.Println("[ERROR] SIGUSR1: no Caddyfile to reload (was stdin left open?)")
|
||||
caddyfileToUse, inst, err := getCurrentCaddyfile()
|
||||
if err != nil {
|
||||
log.Printf("[ERROR] SIGUSR1: %v", err)
|
||||
continue
|
||||
}
|
||||
if loaderUsed.loader == nil {
|
||||
|
@ -76,14 +66,20 @@ func trapSignalsPosix() {
|
|||
continue
|
||||
}
|
||||
if newCaddyfile != nil {
|
||||
updatedCaddyfile = newCaddyfile
|
||||
caddyfileToUse = newCaddyfile
|
||||
}
|
||||
|
||||
// Kick off the restart; our work is done
|
||||
inst, err = inst.Restart(updatedCaddyfile)
|
||||
inst, err = inst.Restart(caddyfileToUse)
|
||||
if err != nil {
|
||||
log.Printf("[ERROR] SIGUSR1: %v", err)
|
||||
}
|
||||
|
||||
case syscall.SIGUSR2:
|
||||
log.Println("[INFO] SIGUSR2: Upgrading")
|
||||
if err := Upgrade(); err != nil {
|
||||
log.Printf("[ERROR] SIGUSR2: upgrading: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
|
212
upgrade.go
Normal file
212
upgrade.go
Normal file
|
@ -0,0 +1,212 @@
|
|||
package caddy
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sync"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// register CaddyfileInput with gob so it knows into
|
||||
// which concrete type to decode an Input interface
|
||||
gob.Register(CaddyfileInput{})
|
||||
}
|
||||
|
||||
// IsUpgrade returns true if this process is part of an upgrade
|
||||
// where a parent caddy process spawned this one to upgrade
|
||||
// the binary.
|
||||
func IsUpgrade() bool {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
return isUpgrade
|
||||
}
|
||||
|
||||
// Upgrade re-launches the process, preserving the listeners
|
||||
// for a graceful upgrade. It does NOT load new configuration;
|
||||
// it only starts the process anew with the current config.
|
||||
// This makes it possible to perform zero-downtime binary upgrades.
|
||||
//
|
||||
// TODO: For more information when debugging, see:
|
||||
// https://forum.golangbridge.org/t/bind-address-already-in-use-even-after-listener-closed/1510?u=matt
|
||||
// https://github.com/mholt/shared-conn
|
||||
func Upgrade() error {
|
||||
log.Println("[INFO] Upgrading")
|
||||
|
||||
// use existing Caddyfile; do not change configuration during upgrade
|
||||
currentCaddyfile, _, err := getCurrentCaddyfile()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(os.Args) == 0 { // this should never happen, but...
|
||||
os.Args = []string{""}
|
||||
}
|
||||
|
||||
// tell the child that it's a restart
|
||||
env := os.Environ()
|
||||
if !IsUpgrade() {
|
||||
env = append(env, "CADDY__UPGRADE=1")
|
||||
}
|
||||
|
||||
// prepare our payload to the child process
|
||||
cdyfileGob := transferGob{
|
||||
ListenerFds: make(map[string]uintptr),
|
||||
Caddyfile: currentCaddyfile,
|
||||
}
|
||||
|
||||
// prepare a pipe to the fork's stdin so it can get the Caddyfile
|
||||
rpipe, wpipe, err := os.Pipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// prepare a pipe that the child process will use to communicate
|
||||
// its success with us by sending > 0 bytes
|
||||
sigrpipe, sigwpipe, err := os.Pipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// pass along relevant file descriptors to child process; ordering
|
||||
// is very important since we rely on these being in certain positions.
|
||||
extraFiles := []*os.File{sigwpipe} // fd 3
|
||||
|
||||
// add file descriptors of all the sockets
|
||||
for i, j := 0, 0; ; i++ {
|
||||
instancesMu.Lock()
|
||||
if i >= len(instances) {
|
||||
instancesMu.Unlock()
|
||||
break
|
||||
}
|
||||
inst := instances[i]
|
||||
instancesMu.Unlock()
|
||||
|
||||
for _, s := range inst.servers {
|
||||
gs, gracefulOk := s.server.(GracefulServer)
|
||||
ln, lnOk := s.listener.(Listener)
|
||||
pc, pcOk := s.packet.(PacketConn)
|
||||
if gracefulOk {
|
||||
if lnOk {
|
||||
lnFile, _ := ln.File()
|
||||
extraFiles = append(extraFiles, lnFile)
|
||||
cdyfileGob.ListenerFds["tcp"+gs.Address()] = uintptr(4 + j) // 4 fds come before any of the listeners
|
||||
j++
|
||||
}
|
||||
if pcOk {
|
||||
pcFile, _ := pc.File()
|
||||
extraFiles = append(extraFiles, pcFile)
|
||||
cdyfileGob.ListenerFds["udp"+gs.Address()] = uintptr(4 + j) // 4 fds come before any of the listeners
|
||||
j++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set up the command
|
||||
cmd := exec.Command(os.Args[0], os.Args[1:]...)
|
||||
cmd.Stdin = rpipe // fd 0
|
||||
cmd.Stdout = os.Stdout // fd 1
|
||||
cmd.Stderr = os.Stderr // fd 2
|
||||
cmd.ExtraFiles = extraFiles
|
||||
cmd.Env = env
|
||||
|
||||
// spawn the child process
|
||||
err = cmd.Start()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// immediately close our dup'ed fds and the write end of our signal pipe
|
||||
for _, f := range extraFiles {
|
||||
f.Close()
|
||||
}
|
||||
|
||||
// feed Caddyfile to the child
|
||||
err = gob.NewEncoder(wpipe).Encode(cdyfileGob)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wpipe.Close()
|
||||
|
||||
// determine whether child startup succeeded
|
||||
answer, readErr := ioutil.ReadAll(sigrpipe)
|
||||
if answer == nil || len(answer) == 0 {
|
||||
cmdErr := cmd.Wait() // get exit status
|
||||
errStr := fmt.Sprintf("child failed to initialize: %v", cmdErr)
|
||||
if readErr != nil {
|
||||
errStr += fmt.Sprintf(" - additionally, error communicating with child process: %v", readErr)
|
||||
}
|
||||
return fmt.Errorf(errStr)
|
||||
}
|
||||
|
||||
// looks like child is successful; we can exit gracefully.
|
||||
log.Println("[INFO] Upgrade finished")
|
||||
return Stop()
|
||||
}
|
||||
|
||||
// getCurrentCaddyfile gets the Caddyfile used by the
|
||||
// current (first) Instance and returns both of them.
|
||||
func getCurrentCaddyfile() (Input, *Instance, error) {
|
||||
instancesMu.Lock()
|
||||
if len(instances) == 0 {
|
||||
instancesMu.Unlock()
|
||||
return nil, nil, fmt.Errorf("no server instances are fully running")
|
||||
}
|
||||
inst := instances[0]
|
||||
instancesMu.Unlock()
|
||||
|
||||
currentCaddyfile := inst.caddyfileInput
|
||||
if currentCaddyfile == nil {
|
||||
// hmm, did spawing process forget to close stdin? Anyhow, this is unusual.
|
||||
return nil, inst, fmt.Errorf("no Caddyfile to reload (was stdin left open?)")
|
||||
}
|
||||
return currentCaddyfile, inst, nil
|
||||
}
|
||||
|
||||
// signalSuccessToParent tells the parent our status using pipe at index 3.
|
||||
// If this process is not a restart, this function does nothing.
|
||||
// Calling this function once this process has successfully initialized
|
||||
// is vital so that the parent process can unblock and kill itself.
|
||||
// This function is idempotent; it executes at most once per process.
|
||||
func signalSuccessToParent() {
|
||||
signalParentOnce.Do(func() {
|
||||
if IsUpgrade() {
|
||||
ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3
|
||||
_, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent
|
||||
if err != nil {
|
||||
log.Printf("[ERROR] Communicating successful init to parent: %v", err)
|
||||
}
|
||||
ppipe.Close()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// signalParentOnce is used to make sure that the parent is only
|
||||
// signaled once; doing so more than once breaks whatever socket is
|
||||
// at fd 4 (TODO: the reason for this is still unclear - to reproduce,
|
||||
// call Stop() and Start() in succession at least once after a
|
||||
// restart, then try loading first host of Caddyfile in the browser
|
||||
// - this was pre-v0.9; this code and godoc is borrowed from the
|
||||
// implementation then, but I'm not sure if it's been fixed yet, as
|
||||
// of v0.10.7). Do not use this directly; call signalSuccessToParent
|
||||
// instead.
|
||||
var signalParentOnce sync.Once
|
||||
|
||||
// transferGob is used if this is a child process as part of
|
||||
// a graceful upgrade; it is used to map listeners to their
|
||||
// index in the list of inherited file descriptors. This
|
||||
// variable is not safe for concurrent access.
|
||||
var loadedGob transferGob
|
||||
|
||||
// transferGob maps bind address to index of the file descriptor
|
||||
// in the Files array passed to the child process. It also contains
|
||||
// the Caddyfile contents and any other state needed by the new process.
|
||||
// Used only during graceful upgrades.
|
||||
type transferGob struct {
|
||||
ListenerFds map[string]uintptr
|
||||
Caddyfile Input
|
||||
}
|
Loading…
Reference in a new issue