gotosocial/internal/federation/dereferencing/dereferencer.go

126 lines
4.3 KiB
Go
Raw Normal View History

// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package dereferencing
import (
"net/url"
"sync"
"time"
"github.com/superseriousbusiness/gotosocial/internal/filter/visibility"
"github.com/superseriousbusiness/gotosocial/internal/media"
[feature] status refetch support (#1690) * revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 10:15:54 +01:00
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/typeutils"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
// FreshnessWindow represents a duration in which a
// Status or Account is still considered to be "fresh"
// (ie., not in need of a refresh from remote), if its
// last FetchedAt value falls within the window.
//
// For example, if an Account was FetchedAt 09:00, and it
// is now 12:00, then it would be considered "fresh"
// according to DefaultAccountFreshness, but not according
// to Fresh, which would indicate that the Account requires
// refreshing from remote.
type FreshnessWindow time.Duration
var (
// 6 hours.
//
// Default window for doing a
// fresh dereference of an Account.
DefaultAccountFreshness = util.Ptr(FreshnessWindow(6 * time.Hour))
// 2 hours.
//
// Default window for doing a
// fresh dereference of a Status.
DefaultStatusFreshness = util.Ptr(FreshnessWindow(2 * time.Hour))
// 5 minutes.
//
// Fresh is useful when you're wanting
// a more up-to-date model of something
// that exceeds default freshness windows.
//
// This is tuned to be quite fresh without
// causing loads of dereferencing calls.
Fresh = util.Ptr(FreshnessWindow(5 * time.Minute))
// 10 seconds.
//
// Freshest is useful when you want an
// immediately up to date model of something
// that's even fresher than Fresh.
//
// Be careful using this one; it can cause
// lots of unnecessary traffic if used unwisely.
Freshest = util.Ptr(FreshnessWindow(10 * time.Second))
)
// Dereferencer wraps logic and functionality for doing dereferencing
// of remote accounts, statuses, etc, from federated instances.
type Dereferencer struct {
[feature] status refetch support (#1690) * revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 10:15:54 +01:00
state *state.State
converter *typeutils.Converter
transportController transport.Controller
mediaManager *media.Manager
visibility *visibility.Filter
// in-progress dereferencing emoji. we already perform
// locks per-status and per-account so we don't need
// processing maps for other media which won't often
// end up being repeated. worst case we run into an
// db.ErrAlreadyExists error which then gets handled
// appropriately by enrich{Account,Status}Safely().
derefEmojis map[string]*media.ProcessingEmoji
derefEmojisMu sync.Mutex
// handshakes marks current in-progress handshakes
// occurring, useful to prevent a deadlock between
// gotosocial instances attempting to dereference
// accounts for the first time. when a handshake is
// currently ongoing we know not to block waiting
// on certain data and instead return an in-progress
// form of the data as we currently see it.
handshakes map[string][]*url.URL
handshakesMu sync.Mutex
}
// NewDereferencer returns a Dereferencer initialized with the given parameters.
func NewDereferencer(
state *state.State,
converter *typeutils.Converter,
transportController transport.Controller,
visFilter *visibility.Filter,
mediaManager *media.Manager,
) Dereferencer {
return Dereferencer{
[feature] status refetch support (#1690) * revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 10:15:54 +01:00
state: state,
converter: converter,
transportController: transportController,
mediaManager: mediaManager,
visibility: visFilter,
derefEmojis: make(map[string]*media.ProcessingEmoji),
handshakes: make(map[string][]*url.URL),
}
}