diff --git a/caddyhttp/httpserver/plugin.go b/caddyhttp/httpserver/plugin.go
index 0bac0c069..8e03a300c 100644
--- a/caddyhttp/httpserver/plugin.go
+++ b/caddyhttp/httpserver/plugin.go
@@ -485,6 +485,7 @@ var directives = []string{
 	"push",
 	"datadog",    // github.com/payintech/caddy-datadog
 	"prometheus", // github.com/miekg/caddy-prometheus
+	"templates",
 	"proxy",
 	"fastcgi",
 	"cgi", // github.com/jung-kurt/caddy-cgi
@@ -492,7 +493,6 @@ var directives = []string{
 	"filemanager", // github.com/hacdias/filemanager/caddy/filemanager
 	"webdav",      // github.com/hacdias/caddy-webdav
 	"markdown",
-	"templates",
 	"browse",
 	"jekyll",    // github.com/hacdias/filemanager/caddy/jekyll
 	"hugo",      // github.com/hacdias/filemanager/caddy/hugo
diff --git a/caddyhttp/httpserver/recorder.go b/caddyhttp/httpserver/recorder.go
index da89056cf..2651549d4 100644
--- a/caddyhttp/httpserver/recorder.go
+++ b/caddyhttp/httpserver/recorder.go
@@ -1,7 +1,10 @@
 package httpserver
 
 import (
+	"bytes"
+	"io"
 	"net/http"
+	"sync"
 	"time"
 )
 
@@ -25,9 +28,7 @@ type ResponseRecorder struct {
 	start    time.Time
 }
 
-// NewResponseRecorder makes and returns a new responseRecorder,
-// which captures the HTTP Status code from the ResponseWriter
-// and also the length of the response body written through it.
+// NewResponseRecorder makes and returns a new ResponseRecorder.
 // Because a status is not set unless WriteHeader is called
 // explicitly, this constructor initializes with a status code
 // of 200 to cover the default case.
@@ -56,15 +57,155 @@ func (r *ResponseRecorder) Write(buf []byte) (int, error) {
 	return n, err
 }
 
-// Size is a Getter to size property
+// Size returns the size of the recorded response body.
 func (r *ResponseRecorder) Size() int {
 	return r.size
 }
 
-// Status is a Getter to status property
+// Status returns the recorded response status code.
 func (r *ResponseRecorder) Status() int {
 	return r.status
 }
 
+// ResponseBuffer is a type that conditionally buffers the
+// response in memory. It implements http.ResponseWriter so
+// that it can stream the response if it is not buffering.
+// Whether it buffers is decided by a func passed into the
+// constructor, NewResponseBuffer.
+//
+// This type implements http.ResponseWriter, so you can pass
+// this to the Next() middleware in the chain and record its
+// response. However, since the entire response body will be
+// buffered in memory, only use this when explicitly configured
+// and required for some specific reason. For example, the
+// text/template package only parses templates out of []byte
+// and not io.Reader, so the templates directive uses this
+// type to obtain the entire template text, but only on certain
+// requests that match the right Content-Type, etc.
+//
+// ResponseBuffer also implements io.ReaderFrom for performance
+// reasons. The standard lib's http.response type (unexported)
+// uses io.Copy to write the body. io.Copy makes an allocation
+// if the destination does not have a ReadFrom method (or if
+// the source does not have a WriteTo method, but that's
+// irrelevant here). Our ReadFrom is smart: if buffering, it
+// calls the buffer's ReadFrom, which makes no allocs because
+// it is already a buffer! If we're streaming the response
+// instead, ReadFrom uses io.CopyBuffer with a pooled buffer
+// that is managed within this package.
+type ResponseBuffer struct {
+	*ResponseWriterWrapper
+	Buffer       *bytes.Buffer
+	header       http.Header
+	status       int
+	shouldBuffer func(status int, header http.Header) bool
+	stream       bool
+	rw           http.ResponseWriter
+}
+
+// NewResponseBuffer returns a new ResponseBuffer that will
+// use buf to store the full body of the response if shouldBuffer
+// returns true. If shouldBuffer returns false, then the response
+// body will be streamed directly to rw.
+//
+// shouldBuffer will be passed the status code and header fields of
+// the response. With that information, the function should decide
+// whether to buffer the response in memory. For example: the templates
+// directive uses this to determine whether the response is the
+// right Content-Type (according to user config) for a template.
+//
+// For performance, the buf you pass in should probably be obtained
+// from a sync.Pool in order to reuse allocated space.
+func NewResponseBuffer(buf *bytes.Buffer, rw http.ResponseWriter,
+	shouldBuffer func(status int, header http.Header) bool) *ResponseBuffer {
+	rb := &ResponseBuffer{
+		Buffer:       buf,
+		header:       make(http.Header),
+		status:       http.StatusOK, // default status code
+		shouldBuffer: shouldBuffer,
+		rw:           rw,
+	}
+	rb.ResponseWriterWrapper = &ResponseWriterWrapper{ResponseWriter: rw}
+	return rb
+}
+
+// Header returns the response header map.
+func (rb *ResponseBuffer) Header() http.Header {
+	return rb.header
+}
+
+// WriteHeader calls shouldBuffer to decide whether the
+// upcoming body should be buffered, and then writes
+// the header to the response.
+func (rb *ResponseBuffer) WriteHeader(status int) {
+	rb.status = status
+	rb.stream = !rb.shouldBuffer(status, rb.header)
+	if rb.stream {
+		rb.CopyHeader()
+		rb.ResponseWriterWrapper.WriteHeader(status)
+	}
+}
+
+// Write writes buf to rb.Buffer if buffering, otherwise
+// to the ResponseWriter directly if streaming.
+func (rb *ResponseBuffer) Write(buf []byte) (int, error) {
+	if rb.stream {
+		return rb.ResponseWriterWrapper.Write(buf)
+	}
+	return rb.Buffer.Write(buf)
+}
+
+// Buffered returns whether rb has decided to buffer the response.
+func (rb *ResponseBuffer) Buffered() bool {
+	return !rb.stream
+}
+
+// CopyHeader copies the buffered header in rb to the ResponseWriter,
+// but it does not write the header out.
+func (rb *ResponseBuffer) CopyHeader() {
+	for field, val := range rb.header {
+		rb.ResponseWriterWrapper.Header()[field] = val
+	}
+}
+
+// ReadFrom avoids allocations when writing to the buffer (if buffering),
+// and reduces allocations when writing to the ResponseWriter directly
+// (if streaming).
+//
+// In local testing with the templates directive, req/sec were improved
+// from ~8,200 to ~9,600 on templated files by ensuring that this type
+// implements io.ReaderFrom.
+func (rb *ResponseBuffer) ReadFrom(src io.Reader) (int64, error) {
+	if rb.stream {
+		// first see if we can avoid any allocations at all
+		if wt, ok := src.(io.WriterTo); ok {
+			return wt.WriteTo(rb.ResponseWriterWrapper)
+		}
+		// if not, use a pooled copy buffer to reduce allocs
+		// (this improved req/sec from ~25,300 to ~27,000 on
+		// static files served directly with the fileserver,
+		// but results fluctuated a little on each run).
+		// a note of caution:
+		// https://go-review.googlesource.com/c/22134#message-ff351762308fe05f6b72a487d6842e3988916486
+		buf := respBufPool.Get().([]byte)
+		n, err := io.CopyBuffer(rb.ResponseWriterWrapper, src, buf)
+		respBufPool.Put(buf) // defer'ing this slowed down benchmarks a smidgin, I think
+		return n, err
+	}
+	return rb.Buffer.ReadFrom(src)
+}
+
+// respBufPool is used for io.CopyBuffer when ResponseBuffer
+// is configured to stream a response.
+var respBufPool = &sync.Pool{
+	New: func() interface{} {
+		return make([]byte, 32*1024)
+	},
+}
+
 // Interface guards
-var _ HTTPInterfaces = (*ResponseRecorder)(nil)
+var (
+	_ HTTPInterfaces = (*ResponseRecorder)(nil)
+	_ HTTPInterfaces = (*ResponseBuffer)(nil)
+	_ io.ReaderFrom  = (*ResponseBuffer)(nil)
+)
diff --git a/caddyhttp/templates/templates.go b/caddyhttp/templates/templates.go
index 0b547f4b6..75e7ef852 100644
--- a/caddyhttp/templates/templates.go
+++ b/caddyhttp/templates/templates.go
@@ -6,92 +6,103 @@ import (
 	"bytes"
 	"mime"
 	"net/http"
-	"os"
 	"path"
 	"path/filepath"
+	"strconv"
+	"strings"
 	"sync"
 	"text/template"
+	"time"
 
 	"github.com/mholt/caddy/caddyhttp/httpserver"
 )
 
 // ServeHTTP implements the httpserver.Handler interface.
 func (t Templates) ServeHTTP(w http.ResponseWriter, r *http.Request) (int, error) {
+	// iterate rules, to find first one that matches the request path
 	for _, rule := range t.Rules {
 		if !httpserver.Path(r.URL.Path).Matches(rule.Path) {
 			continue
 		}
 
-		// Check for index files
 		fpath := r.URL.Path
-		if idx, ok := httpserver.IndexFile(t.FileSys, fpath, rule.IndexFiles); ok {
-			fpath = idx
-		}
 
-		// Check the extension
-		reqExt := path.Ext(fpath)
+		// get a buffer from the pool and make a response recorder
+		buf := t.BufPool.Get().(*bytes.Buffer)
+		buf.Reset()
+		defer t.BufPool.Put(buf)
 
-		for _, ext := range rule.Extensions {
-			if reqExt == ext {
-				// Create execution context
-				ctx := httpserver.NewContextWithHeader(w.Header())
-				ctx.Root = t.FileSys
-				ctx.Req = r
-				ctx.URL = r.URL
-
-				// New template
-				templateName := filepath.Base(fpath)
-				tpl := template.New(templateName)
-
-				// Set delims
-				if rule.Delims != [2]string{} {
-					tpl.Delims(rule.Delims[0], rule.Delims[1])
-				}
-
-				// Add custom functions
-				tpl.Funcs(httpserver.TemplateFuncs)
-
-				// Build the template
-				templatePath := filepath.Join(t.Root, fpath)
-				tpl, err := tpl.ParseFiles(templatePath)
-				if err != nil {
-					if os.IsNotExist(err) {
-						return http.StatusNotFound, nil
-					} else if os.IsPermission(err) {
-						return http.StatusForbidden, nil
+		// only buffer the response when we want to execute a template
+		shouldBuf := func(status int, header http.Header) bool {
+			// see if this request matches a template extension
+			reqExt := path.Ext(fpath)
+			for _, ext := range rule.Extensions {
+				if reqExt == "" {
+					// request has no extension, so check response Content-Type
+					ct := mime.TypeByExtension(ext)
+					if strings.Contains(header.Get("Content-Type"), ct) {
+						return true
 					}
-					return http.StatusInternalServerError, err
+				} else if reqExt == ext {
+					return true
 				}
-
-				// Execute it
-				buf := t.BufPool.Get().(*bytes.Buffer)
-				buf.Reset()
-				defer t.BufPool.Put(buf)
-				err = tpl.Execute(buf, ctx)
-				if err != nil {
-					return http.StatusInternalServerError, err
-				}
-
-				// If Content-Type isn't set here, http.ResponseWriter.Write
-				// will set it according to response body. But other middleware
-				// such as gzip can modify response body, then Content-Type
-				// detected by http.ResponseWriter.Write is wrong.
-				ctype := mime.TypeByExtension(ext)
-				if ctype == "" {
-					ctype = http.DetectContentType(buf.Bytes())
-				}
-				w.Header().Set("Content-Type", ctype)
-
-				templateInfo, err := os.Stat(templatePath)
-				if err == nil {
-					// add the Last-Modified header if we were able to read the stamp
-					httpserver.SetLastModifiedHeader(w, templateInfo.ModTime())
-				}
-				buf.WriteTo(w)
-
-				return http.StatusOK, nil
 			}
+			return false
 		}
+
+		// prepare a buffer to hold the response, if applicable
+		rb := httpserver.NewResponseBuffer(buf, w, shouldBuf)
+
+		// pass request up the chain to let another middleware provide us the template
+		code, err := t.Next.ServeHTTP(rb, r)
+		if !rb.Buffered() || code >= 300 || err != nil {
+			return code, err
+		}
+
+		// create a new template
+		templateName := filepath.Base(fpath)
+		tpl := template.New(templateName)
+
+		// set delimiters
+		if rule.Delims != [2]string{} {
+			tpl.Delims(rule.Delims[0], rule.Delims[1])
+		}
+
+		// add custom functions
+		tpl.Funcs(httpserver.TemplateFuncs)
+
+		// parse the template
+		parsedTpl, err := tpl.Parse(rb.Buffer.String())
+		if err != nil {
+			return http.StatusInternalServerError, err
+		}
+
+		// create execution context for the template template
+		ctx := httpserver.NewContextWithHeader(w.Header())
+		ctx.Root = t.FileSys
+		ctx.Req = r
+		ctx.URL = r.URL
+
+		// execute the template
+		buf.Reset()
+		err = parsedTpl.Execute(buf, ctx)
+		if err != nil {
+			return http.StatusInternalServerError, err
+		}
+
+		// copy the buffered header into the real ResponseWriter
+		rb.CopyHeader()
+
+		// set the actual content length now that the template was executed
+		w.Header().Set("Content-Length", strconv.FormatInt(int64(buf.Len()), 10))
+
+		// get the modification time in preparation to ServeContent
+		modTime, _ := time.Parse(http.TimeFormat, w.Header().Get("Last-Modified"))
+
+		// at last, write the rendered template to the response
+		http.ServeContent(w, r, templateName, modTime, bytes.NewReader(buf.Bytes()))
+
+		return http.StatusOK, nil
 	}
 
 	return t.Next.ServeHTTP(w, r)
diff --git a/caddyhttp/templates/templates_test.go b/caddyhttp/templates/templates_test.go
index c1d53a2de..b129cb9d9 100644
--- a/caddyhttp/templates/templates_test.go
+++ b/caddyhttp/templates/templates_test.go
@@ -2,19 +2,20 @@ package templates
 
 import (
 	"bytes"
+	"context"
 	"net/http"
 	"net/http/httptest"
 	"sync"
 	"testing"
 
 	"github.com/mholt/caddy/caddyhttp/httpserver"
+	"github.com/mholt/caddy/caddyhttp/staticfiles"
 )
 
 func TestTemplates(t *testing.T) {
+	siteRoot := "./testdata"
 	tmpl := Templates{
-		Next: httpserver.HandlerFunc(func(w http.ResponseWriter, r *http.Request) (int, error) {
-			return 0, nil
-		}),
+		Next: staticfiles.FileServer{Root: http.Dir(siteRoot)},
 		Rules: []Rule{
 			{
 				Extensions: []string{".html"},
@@ -28,15 +29,13 @@ func TestTemplates(t *testing.T) {
 				Delims:     [2]string{"{%", "%}"},
 			},
 		},
-		Root:    "./testdata",
-		FileSys: http.Dir("./testdata"),
+		Root:    siteRoot,
+		FileSys: http.Dir(siteRoot),
 		BufPool: &sync.Pool{New: func() interface{} { return new(bytes.Buffer) }},
 	}
 
 	tmplroot := Templates{
-		Next: httpserver.HandlerFunc(func(w http.ResponseWriter, r *http.Request) (int, error) {
-			return 0, nil
-		}),
+		Next: staticfiles.FileServer{Root: http.Dir(siteRoot)},
 		Rules: []Rule{
 			{
 				Extensions: []string{".html"},
@@ -44,8 +43,8 @@ func TestTemplates(t *testing.T) {
 				Path:       "/",
 			},
 		},
-		Root:    "./testdata",
-		FileSys: http.Dir("./testdata"),
+		Root:    siteRoot,
+		FileSys: http.Dir(siteRoot),
 		BufPool: &sync.Pool{New: func() interface{} { return new(bytes.Buffer) }},
 	}
 
@@ -54,6 +53,7 @@ func TestTemplates(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Test: Could not create HTTP request: %v", err)
 	}
+	req = req.WithContext(context.WithValue(req.Context(), httpserver.OriginalURLCtxKey, *req.URL))
 
 	rec := httptest.NewRecorder()
 
@@ -77,6 +77,7 @@ func TestTemplates(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Could not create HTTP request: %v", err)
 	}
+	req = req.WithContext(context.WithValue(req.Context(), httpserver.OriginalURLCtxKey, *req.URL))
 
 	rec = httptest.NewRecorder()
 
@@ -100,6 +101,7 @@ func TestTemplates(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Could not create HTTP request: %v", err)
 	}
+	req = req.WithContext(context.WithValue(req.Context(), httpserver.OriginalURLCtxKey, *req.URL))
 
 	rec = httptest.NewRecorder()
 
@@ -122,6 +124,7 @@ func TestTemplates(t *testing.T) {
 	if err != nil {
 		t.Fatalf("Could not create HTTP request: %v", err)
 	}
+	req = req.WithContext(context.WithValue(req.Context(), httpserver.OriginalURLCtxKey, *req.URL))
 
 	rec = httptest.NewRecorder()