[bugfix] Store LastModified for domain perm subs + send as If-Modified-Since (#3655)

This commit is contained in:
tobi 2025-01-20 09:56:00 +00:00 committed by GitHub
parent 9835d3e65d
commit 37fd7c7a6a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 377 additions and 42 deletions

View file

@ -128,4 +128,14 @@ instance-deliver-to-shared-inboxes: true
# Options: [true, false]
# Default: false
instance-inject-mastodon-version: false
# String. 24hr time of day formatted as hh:mm.
# Examples: ["14:30", "00:00", "04:00"]
# Default: "23:00" (11pm).
instance-subscriptions-process-from: "23:00"
# Duration. Period between subscription updates.
# Examples: ["24h", "72h", "12h"]
# Default: "24h" (once per day).
instance-subscriptions-process-every: "24h"
```

View file

@ -415,6 +415,15 @@ instance-deliver-to-shared-inboxes: true
# Default: false
instance-inject-mastodon-version: false
# String. 24hr time of day formatted as hh:mm.
# Examples: ["14:30", "00:00", "04:00"]
# Default: "23:00" (11pm).
instance-subscriptions-process-from: "23:00"
# Duration. Period between subscription updates.
# Examples: ["24h", "72h", "12h"]
# Default: "24h" (once per day).
instance-subscriptions-process-every: "24h"
###########################
##### ACCOUNTS CONFIG #####

View file

@ -372,6 +372,7 @@ func sizeofDomainPermissionSubscription() uintptr {
FetchedAt: exampleTime,
SuccessfullyFetchedAt: exampleTime,
ETag: exampleID,
LastModified: exampleTime,
Error: exampleTextSmall,
}))
}

View file

@ -33,6 +33,7 @@ type DomainPermissionSubscription struct {
FetchPassword string `bun:",nullzero"`
FetchedAt time.Time `bun:"type:timestamptz,nullzero"`
SuccessfullyFetchedAt time.Time `bun:"type:timestamptz,nullzero"`
LastModified time.Time `bun:"type:timestamptz,nullzero"`
ETag string `bun:"etag,nullzero"`
Error string `bun:",nullzero"`
}

View file

@ -0,0 +1,76 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package migrations
import (
"context"
"fmt"
"reflect"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/uptrace/bun"
)
func init() {
up := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// Bail if "last_modified"
// column already created.
if exists, err := doesColumnExist(
ctx,
tx,
"domain_permission_subscriptions",
"last_modified",
); err != nil {
return err
} else if exists {
return nil
}
// Derive column definition.
var permSub *gtsmodel.DomainPermissionSubscription
permSubType := reflect.TypeOf(permSub)
colDef, err := getBunColumnDef(tx, permSubType, "LastModified")
if err != nil {
return fmt.Errorf("error making column def: %w", err)
}
log.Info(ctx, "adding domain_permission_subscriptions.last_modified column...")
if _, err := tx.
NewAddColumn().
Model(permSub).
ColumnExpr(colDef).
Exec(ctx); err != nil {
return fmt.Errorf("error adding column: %w", err)
}
return nil
})
}
down := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return nil
})
}
if err := Migrations.Register(up, down); err != nil {
panic(err)
}
}

View file

@ -34,7 +34,8 @@ type DomainPermissionSubscription struct {
FetchPassword string `bun:",nullzero"` // Password to send when doing a GET of URI using basic auth.
FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // Time when fetch of URI was last attempted.
SuccessfullyFetchedAt time.Time `bun:"type:timestamptz,nullzero"` // Time when the domain permission list was last *successfuly* fetched, to be transmitted as If-Modified-Since header.
ETag string `bun:"etag,nullzero"` // Etag last received from the server (if any) on successful fetch.
LastModified time.Time `bun:"type:timestamptz,nullzero"` // "Last-Modified" time received from the server (if any) on last successful fetch. Used for HTTP request caching.
ETag string `bun:"etag,nullzero"` // "ETag" header last received from the server (if any) on last successful fetch. Used for HTTP request caching.
Error string `bun:",nullzero"` // If latest fetch attempt errored, this field stores the error message. Cleared on latest successful fetch.
}

View file

@ -253,13 +253,9 @@ func (s *Subscriptions) ProcessDomainPermissionSubscription(
// to indicate a successful fetch, and return.
if resp.Unmodified {
l.Debug("received 304 Not Modified from remote")
permSub.SuccessfullyFetchedAt = permSub.FetchedAt
if permSub.ETag == "" && resp.ETag != "" {
// We didn't have an ETag before but
// we have one now: probably the remote
// added ETag support in the meantime.
permSub.ETag = resp.ETag
}
permSub.LastModified = resp.LastModified
permSub.SuccessfullyFetchedAt = permSub.FetchedAt
return nil, nil
}
@ -308,6 +304,7 @@ func (s *Subscriptions) ProcessDomainPermissionSubscription(
// This can now be considered a successful fetch.
permSub.SuccessfullyFetchedAt = permSub.FetchedAt
permSub.ETag = resp.ETag
permSub.LastModified = resp.LastModified
permSub.Error = ""
// Keep track of which domain perms are

View file

@ -107,7 +107,7 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksCSV() {
}
// The just-fetched perm sub should
// have ETag and count etc set now.
// have cache meta and count etc set now.
permSub, err := testStructs.State.DB.GetDomainPermissionSubscriptionByID(
ctx, testSubscription.ID,
)
@ -121,7 +121,8 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksCSV() {
suite.FailNow(err.Error())
}
suite.Equal("bigbums6969", permSub.ETag)
suite.Equal("\"bigbums6969\"", permSub.ETag)
suite.EqualValues(1726956000, permSub.LastModified.Unix())
suite.EqualValues(3, count)
suite.WithinDuration(time.Now(), permSub.FetchedAt, 1*time.Minute)
suite.WithinDuration(time.Now(), permSub.SuccessfullyFetchedAt, 1*time.Minute)
@ -186,7 +187,7 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksJSON() {
}
// The just-fetched perm sub should
// have ETag and count etc set now.
// have cache meta and count etc set now.
permSub, err := testStructs.State.DB.GetDomainPermissionSubscriptionByID(
ctx, testSubscription.ID,
)
@ -200,7 +201,8 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksJSON() {
suite.FailNow(err.Error())
}
suite.Equal("don't modify me daddy", permSub.ETag)
suite.Equal("\"don't modify me daddy\"", permSub.ETag)
suite.EqualValues(1726956000, permSub.LastModified.Unix())
suite.EqualValues(3, count)
suite.WithinDuration(time.Now(), permSub.FetchedAt, 1*time.Minute)
suite.WithinDuration(time.Now(), permSub.SuccessfullyFetchedAt, 1*time.Minute)
@ -265,7 +267,7 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksPlain() {
}
// The just-fetched perm sub should
// have ETag and count etc set now.
// have cache meta and count etc set now.
permSub, err := testStructs.State.DB.GetDomainPermissionSubscriptionByID(
ctx, testSubscription.ID,
)
@ -279,13 +281,14 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksPlain() {
suite.FailNow(err.Error())
}
suite.Equal("this is a legit etag i swear", permSub.ETag)
suite.Equal("\"this is a legit etag i swear\"", permSub.ETag)
suite.EqualValues(1726956000, permSub.LastModified.Unix())
suite.EqualValues(3, count)
suite.WithinDuration(time.Now(), permSub.FetchedAt, 1*time.Minute)
suite.WithinDuration(time.Now(), permSub.SuccessfullyFetchedAt, 1*time.Minute)
}
func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVETag() {
func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVCaching() {
var (
ctx = context.Background()
testStructs = testrig.SetupTestStructs(rMediaPath, rTemplatePath)
@ -297,7 +300,7 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVETag() {
)
// Create a subscription for a CSV list of baddies.
// Include the ETag so it gets sent with the request.
// Include ETag + LastModified so they get sent with the request.
testSubscription = &gtsmodel.DomainPermissionSubscription{
ID: "01JGE681TQSBPAV59GZXPKE62H",
Priority: 255,
@ -309,7 +312,8 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVETag() {
CreatedByAccount: testAccount,
URI: "https://lists.example.org/baddies.csv",
ContentType: gtsmodel.DomainPermSubContentTypeCSV,
ETag: "bigbums6969",
ETag: "\"bigbums6969\"",
LastModified: testrig.TimeMustParse("2024-09-21T22:00:00Z"),
}
)
defer testrig.TearDownTestStructs(testStructs)
@ -339,7 +343,7 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVETag() {
}
// The just-fetched perm sub should
// have ETag and count etc set now.
// have cache meta and count etc set now.
permSub, err := testStructs.State.DB.GetDomainPermissionSubscriptionByID(
ctx, testSubscription.ID,
)
@ -353,12 +357,157 @@ func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVETag() {
suite.FailNow(err.Error())
}
suite.Equal("bigbums6969", permSub.ETag)
suite.Equal("\"bigbums6969\"", permSub.ETag)
suite.EqualValues(1726956000, permSub.LastModified.Unix())
suite.Zero(count)
suite.WithinDuration(time.Now(), permSub.FetchedAt, 1*time.Minute)
suite.WithinDuration(time.Now(), permSub.SuccessfullyFetchedAt, 1*time.Minute)
}
func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVFutureLastModified() {
var (
ctx = context.Background()
testStructs = testrig.SetupTestStructs(rMediaPath, rTemplatePath)
testAccount = suite.testAccounts["admin_account"]
subscriptions = subscriptions.New(
testStructs.State,
testStructs.TransportController,
testStructs.TypeConverter,
)
// Create a subscription for a CSV list of baddies.
// Request the future last modified value.
testSubscription = &gtsmodel.DomainPermissionSubscription{
ID: "01JGE681TQSBPAV59GZXPKE62H",
Priority: 255,
Title: "whatever!",
PermissionType: gtsmodel.DomainPermissionBlock,
AsDraft: util.Ptr(false),
AdoptOrphans: util.Ptr(true),
CreatedByAccountID: testAccount.ID,
CreatedByAccount: testAccount,
URI: "https://lists.example.org/baddies.csv?future=true",
ContentType: gtsmodel.DomainPermSubContentTypeCSV,
}
)
defer testrig.TearDownTestStructs(testStructs)
// Store test subscription.
if err := testStructs.State.DB.PutDomainPermissionSubscription(
ctx, testSubscription,
); err != nil {
suite.FailNow(err.Error())
}
// Process all subscriptions.
subscriptions.ProcessDomainPermissionSubscriptions(ctx, testSubscription.PermissionType)
// We should now have blocks for
// each domain on the subscribed list.
for _, domain := range []string{
"bumfaces.net",
"peepee.poopoo",
"nothanks.com",
} {
var (
perm gtsmodel.DomainPermission
err error
)
if !testrig.WaitFor(func() bool {
perm, err = testStructs.State.DB.GetDomainBlock(ctx, domain)
return err == nil
}) {
suite.FailNowf("", "timed out waiting for domain %s", domain)
}
suite.Equal(testSubscription.ID, perm.GetSubscriptionID())
}
// The just-fetched perm sub should have ETag
// set now, but last modified should be thrown away.
permSub, err := testStructs.State.DB.GetDomainPermissionSubscriptionByID(
ctx, testSubscription.ID,
)
if err != nil {
suite.FailNow(err.Error())
}
suite.Equal("\"bigbums6969\"", permSub.ETag)
suite.Zero(permSub.LastModified)
}
func (suite *SubscriptionsTestSuite) TestDomainBlocksCSVGarbageLastModified() {
var (
ctx = context.Background()
testStructs = testrig.SetupTestStructs(rMediaPath, rTemplatePath)
testAccount = suite.testAccounts["admin_account"]
subscriptions = subscriptions.New(
testStructs.State,
testStructs.TransportController,
testStructs.TypeConverter,
)
// Create a subscription for a CSV list of baddies.
// Request the garbage last modified value.
testSubscription = &gtsmodel.DomainPermissionSubscription{
ID: "01JGE681TQSBPAV59GZXPKE62H",
Priority: 255,
Title: "whatever!",
PermissionType: gtsmodel.DomainPermissionBlock,
AsDraft: util.Ptr(false),
AdoptOrphans: util.Ptr(true),
CreatedByAccountID: testAccount.ID,
CreatedByAccount: testAccount,
URI: "https://lists.example.org/baddies.csv?garbage=true",
ContentType: gtsmodel.DomainPermSubContentTypeCSV,
}
)
defer testrig.TearDownTestStructs(testStructs)
// Store test subscription.
if err := testStructs.State.DB.PutDomainPermissionSubscription(
ctx, testSubscription,
); err != nil {
suite.FailNow(err.Error())
}
// Process all subscriptions.
subscriptions.ProcessDomainPermissionSubscriptions(ctx, testSubscription.PermissionType)
// We should now have blocks for
// each domain on the subscribed list.
for _, domain := range []string{
"bumfaces.net",
"peepee.poopoo",
"nothanks.com",
} {
var (
perm gtsmodel.DomainPermission
err error
)
if !testrig.WaitFor(func() bool {
perm, err = testStructs.State.DB.GetDomainBlock(ctx, domain)
return err == nil
}) {
suite.FailNowf("", "timed out waiting for domain %s", domain)
}
suite.Equal(testSubscription.ID, perm.GetSubscriptionID())
}
// The just-fetched perm sub should have ETag
// set now, but last modified should be thrown away.
permSub, err := testStructs.State.DB.GetDomainPermissionSubscriptionByID(
ctx, testSubscription.ID,
)
if err != nil {
suite.FailNow(err.Error())
}
suite.Equal("\"bigbums6969\"", permSub.ETag)
suite.Zero(permSub.LastModified)
}
func (suite *SubscriptionsTestSuite) TestDomainBlocks404() {
var (
ctx = context.Background()

View file

@ -21,9 +21,11 @@
"context"
"io"
"net/http"
"time"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
type DereferenceDomainPermissionsResp struct {
@ -39,6 +41,10 @@ type DereferenceDomainPermissionsResp struct {
// May be set
// if 200 or 304.
ETag string
// May be set
// if 200 or 304.
LastModified time.Time
}
func (t *transport) DereferenceDomainPermissions(
@ -60,27 +66,27 @@ func (t *transport) DereferenceDomainPermissions(
// Set relevant Accept headers.
// Allow fallback in case target doesn't
// negotiate content type correctly.
req.Header.Add("Accept-Charset", "utf-8")
req.Header.Add("Accept", permSub.ContentType.String()+","+"*/*")
req.Header.Set("Accept-Charset", "utf-8")
req.Header.Set("Accept", permSub.ContentType.String()+","+"*/*")
// If skipCache is true, we want to skip setting Cache
// headers so that we definitely don't get a 304 back.
if !skipCache {
// If we've successfully fetched this list
// before, set If-Modified-Since to last
// success to make the request conditional.
// If we've got a Last-Modified stored for this list,
// set If-Modified-Since to make the request conditional.
//
// See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
if !permSub.SuccessfullyFetchedAt.IsZero() {
timeStr := permSub.SuccessfullyFetchedAt.Format(http.TimeFormat)
req.Header.Add("If-Modified-Since", timeStr)
if !permSub.LastModified.IsZero() {
// http.Time wants UTC.
lmUTC := permSub.LastModified.UTC()
req.Header.Set("If-Modified-Since", lmUTC.Format(http.TimeFormat))
}
// If we've got an ETag stored for this list, set
// If-None-Match to make the request conditional.
// See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag#caching_of_unchanged_resources.
if len(permSub.ETag) != 0 {
req.Header.Add("If-None-Match", permSub.ETag)
if permSub.ETag != "" {
req.Header.Set("If-None-Match", permSub.ETag)
}
}
@ -99,11 +105,12 @@ func (t *transport) DereferenceDomainPermissions(
return nil, err
}
// Check already if we were given an ETag
// we can use, as ETag is often returned
// even on 304 Not Modified responses.
// Check already if we were given a valid ETag or
// Last-Modified we can use, as these cache headers
// are often returned even on Not Modified responses.
permsResp := &DereferenceDomainPermissionsResp{
ETag: rsp.Header.Get("Etag"),
ETag: rsp.Header.Get("ETag"),
LastModified: validateLastModified(ctx, rsp.Header.Get("Last-Modified")),
}
if rsp.StatusCode == http.StatusNotModified {
@ -119,3 +126,43 @@ func (t *transport) DereferenceDomainPermissions(
return permsResp, nil
}
// Validate Last-Modified to ensure it's not
// garbagio, and not more than a minute in the
// future (to allow for clock issues + rounding).
func validateLastModified(
ctx context.Context,
lastModified string,
) time.Time {
if lastModified == "" {
// Not set,
// no problem.
return time.Time{}
}
// Try to parse and see what we get.
switch lm, err := http.ParseTime(lastModified); {
case err != nil:
// No good,
// chuck it.
log.Debugf(ctx,
"discarding invalid Last-Modified header %s: %+v",
lastModified, err,
)
return time.Time{}
case lm.Unix() > time.Now().Add(1*time.Minute).Unix():
// In the future,
// chuck it.
log.Debugf(ctx,
"discarding in-the-future Last-Modified header %s",
lastModified,
)
return time.Time{}
default:
// It's fine,
// keep it.
return lm
}
}

View file

@ -81,7 +81,7 @@ type Transport interface {
// DereferenceDomainPermissions dereferences the
// permissions list present at the given permSub's URI.
//
// If "force", then If-Modified-Since and If-None-Match
// If "skipCache", then If-Modified-Since and If-None-Match
// headers will *NOT* be sent with the outgoing request.
//
// If err == nil and Unmodified == false, then it's up
@ -89,7 +89,7 @@ type Transport interface {
DereferenceDomainPermissions(
ctx context.Context,
permSub *gtsmodel.DomainPermissionSubscription,
force bool,
skipCache bool,
) (*DereferenceDomainPermissionsResp, error)
// Finger performs a webfinger request with the given username and domain, and returns the bytes from the response body.

View file

@ -446,17 +446,20 @@ func DomainPermissionSubscriptionResponse(req *http.Request) (
) {
const (
lastModified = "Sat, 21 Sep 2024 22:00:00 GMT"
futureLastModified = "Mon, 15 Jan 2300 22:00:00 GMT"
garbageLastModified = "I LIKE BIG BUTTS AND I CANNOT LIE"
csvResp = `#domain,#severity,#reject_media,#reject_reports,#public_comment,#obfuscate
bumfaces.net,suspend,false,false,big jerks,false
peepee.poopoo,suspend,false,false,harassment,false
nothanks.com,suspend,false,false,,false`
csvRespETag = "bigbums6969"
csvRespETag = "\"bigbums6969\""
textResp = `bumfaces.net
peepee.poopoo
nothanks.com`
textRespETag = "this is a legit etag i swear"
textRespETag = "\"this is a legit etag i swear\""
jsonResp = `[
{
"domain": "bumfaces.net",
@ -473,12 +476,15 @@ func DomainPermissionSubscriptionResponse(req *http.Request) (
"suspended_at": "2020-05-13T13:29:12.000Z"
}
]`
jsonRespETag = "don't modify me daddy"
jsonRespETag = "\"don't modify me daddy\""
)
switch req.URL.String() {
case "https://lists.example.org/baddies.csv":
extraHeaders = map[string]string{"ETag": csvRespETag}
extraHeaders = map[string]string{
"Last-Modified": lastModified,
"ETag": csvRespETag,
}
if req.Header.Get("If-None-Match") == csvRespETag {
// Cached.
responseCode = http.StatusNotModified
@ -490,7 +496,10 @@ func DomainPermissionSubscriptionResponse(req *http.Request) (
responseContentLength = len(responseBytes)
case "https://lists.example.org/baddies.txt":
extraHeaders = map[string]string{"ETag": textRespETag}
extraHeaders = map[string]string{
"Last-Modified": lastModified,
"ETag": textRespETag,
}
if req.Header.Get("If-None-Match") == textRespETag {
// Cached.
responseCode = http.StatusNotModified
@ -502,7 +511,10 @@ func DomainPermissionSubscriptionResponse(req *http.Request) (
responseContentLength = len(responseBytes)
case "https://lists.example.org/baddies.json":
extraHeaders = map[string]string{"ETag": jsonRespETag}
extraHeaders = map[string]string{
"Last-Modified": lastModified,
"ETag": jsonRespETag,
}
if req.Header.Get("If-None-Match") == jsonRespETag {
// Cached.
responseCode = http.StatusNotModified
@ -513,6 +525,38 @@ func DomainPermissionSubscriptionResponse(req *http.Request) (
}
responseContentLength = len(responseBytes)
case "https://lists.example.org/baddies.csv?future=true":
extraHeaders = map[string]string{
// Provide the future last modified value.
"Last-Modified": futureLastModified,
"ETag": csvRespETag,
}
if req.Header.Get("If-None-Match") == csvRespETag {
// Cached.
responseCode = http.StatusNotModified
} else {
responseBytes = []byte(csvResp)
responseContentType = textCSV
responseCode = http.StatusOK
}
responseContentLength = len(responseBytes)
case "https://lists.example.org/baddies.csv?garbage=true":
extraHeaders = map[string]string{
// Provide the garbage last modified value.
"Last-Modified": garbageLastModified,
"ETag": csvRespETag,
}
if req.Header.Get("If-None-Match") == csvRespETag {
// Cached.
responseCode = http.StatusNotModified
} else {
responseBytes = []byte(csvResp)
responseContentType = textCSV
responseCode = http.StatusOK
}
responseContentLength = len(responseBytes)
default:
responseCode = http.StatusNotFound
responseBytes = []byte(`{"error":"not found"}`)