2021-08-10 13:32:39 +02:00
/ *
GoToSocial
2021-12-20 18:42:19 +01:00
Copyright ( C ) 2021 - 2022 GoToSocial Authors admin @ gotosocial . org
2021-08-10 13:32:39 +02:00
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU Affero General Public License for more details .
You should have received a copy of the GNU Affero General Public License
along with this program . If not , see < http : //www.gnu.org/licenses/>.
* /
package dereferencing
import (
"context"
"encoding/json"
"errors"
"fmt"
2022-01-16 18:52:55 +01:00
"io"
2021-08-10 13:32:39 +02:00
"net/url"
2021-08-25 15:34:33 +02:00
"strings"
2022-01-24 13:12:17 +01:00
"sync"
2022-01-24 18:12:04 +01:00
"time"
2021-08-10 13:32:39 +02:00
"github.com/sirupsen/logrus"
2021-11-13 17:29:43 +01:00
"github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/activity/streams/vocab"
2021-08-10 13:32:39 +02:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
2022-01-09 18:41:22 +01:00
"github.com/superseriousbusiness/gotosocial/internal/media"
2021-08-10 13:32:39 +02:00
"github.com/superseriousbusiness/gotosocial/internal/transport"
)
2021-08-25 15:34:33 +02:00
func instanceAccount ( account * gtsmodel . Account ) bool {
return strings . EqualFold ( account . Username , account . Domain ) ||
account . FollowersURI == "" ||
account . FollowingURI == "" ||
( account . Username == "internal.fetch" && strings . Contains ( account . Note , "internal service actor" ) )
}
2022-05-30 16:20:10 +02:00
// GetRemoteAccountParams wraps parameters for a remote account lookup.
type GetRemoteAccountParams struct {
2022-05-30 18:45:38 +02:00
// The username of the user doing the lookup request (optional).
// If not set, then the GtS instance account will be used to do the lookup.
RequestingUsername string
// The ActivityPub URI of the remote account (optional).
// If not set, the ActivityPub URI of the remote account will be discovered
// via webfinger, so you must set RemoteAccountUsername and RemoteAccountHost
// if this parameter is not set.
RemoteAccountID * url . URL
// The username of the remote account (optional).
// If RemoteAccountID is not set, then this value must be set.
2022-05-30 16:20:10 +02:00
RemoteAccountUsername string
2022-05-30 18:45:38 +02:00
// The host of the remote account (optional).
// If RemoteAccountID is not set, then this value must be set.
RemoteAccountHost string
// Whether to do a blocking call to the remote instance. If true,
// then the account's media and other fields will be fully dereferenced before it is returned.
// If false, then the account's media and other fields will be dereferenced in the background,
// so only a minimal account representation will be returned by GetRemoteAccount.
Blocking bool
// Whether to refresh the account by performing dereferencing all over again.
// If true, the account will be updated and returned.
// If false, and the account already exists in the database, then that will be returned instead.
Refresh bool
2022-05-30 16:20:10 +02:00
}
2021-08-10 13:32:39 +02:00
// GetRemoteAccount completely dereferences a remote account, converts it to a GtS model account,
2022-05-30 18:45:38 +02:00
// puts or updates it in the database (if necessary), and returns it to a caller.
2022-05-30 16:20:10 +02:00
func ( d * deref ) GetRemoteAccount ( ctx context . Context , params GetRemoteAccountParams ) ( * gtsmodel . Account , error ) {
2022-05-30 18:45:38 +02:00
// check for sensible params
if params . RemoteAccountID == nil && ( params . RemoteAccountUsername == "" || params . RemoteAccountHost == "" ) {
return nil , errors . New ( "GetRemoteAccount: RemoteAccountID wasn't set, and RemoteAccountUsername/RemoteAccountHost weren't set either, so a lookup couldn't be performed" )
}
2021-08-10 13:32:39 +02:00
2022-05-30 18:45:38 +02:00
new := true
2022-01-24 18:12:04 +01:00
2022-05-30 18:45:38 +02:00
if params . RemoteAccountID != nil {
// check if we already have the account in our db, and just return it unless we'd doing a refresh
remoteAccount , err := d . db . GetAccountByURI ( ctx , params . RemoteAccountID . String ( ) )
if err == nil {
new = false
if ! params . Refresh {
// make sure the account fields are populated before returning:
// even if we're not doing a refresh, the caller might want to block
// until everything is loaded
changed , err := d . populateAccountFields ( ctx , remoteAccount , params . RequestingUsername , params . Refresh , params . Blocking )
2022-01-25 11:21:22 +01:00
if err != nil {
2022-05-30 18:45:38 +02:00
return nil , fmt . Errorf ( "GetRemoteAccount: error populating remoteAccount fields: %s" , err )
}
if changed {
updatedAccount , err := d . db . UpdateAccount ( ctx , remoteAccount )
if err != nil {
return nil , fmt . Errorf ( "GetRemoteAccount: error updating remoteAccount: %s" , err )
}
return updatedAccount , err
2022-01-25 11:21:22 +01:00
}
2022-01-24 18:12:04 +01:00
2022-05-30 18:45:38 +02:00
return remoteAccount , nil
}
2021-08-10 13:32:39 +02:00
}
}
if new {
2022-01-24 13:12:17 +01:00
// we haven't seen this account before: dereference it from remote
accountable , err := d . dereferenceAccountable ( ctx , username , remoteAccountID )
2021-08-10 13:32:39 +02:00
if err != nil {
2022-01-24 13:12:17 +01:00
return nil , fmt . Errorf ( "GetRemoteAccount: error dereferencing accountable: %s" , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
newAccount , err := d . typeConverter . ASRepresentationToAccount ( ctx , accountable , refresh )
if err != nil {
return nil , fmt . Errorf ( "GetRemoteAccount: error converting accountable to account: %s" , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
ulid , err := id . NewRandomULID ( )
if err != nil {
return nil , fmt . Errorf ( "GetRemoteAccount: error generating new id for account: %s" , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
newAccount . ID = ulid
2022-01-25 11:21:22 +01:00
if _ , err := d . populateAccountFields ( ctx , newAccount , username , refresh , blocking ) ; err != nil {
2022-01-24 13:12:17 +01:00
return nil , fmt . Errorf ( "GetRemoteAccount: error populating further account fields: %s" , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
if err := d . db . Put ( ctx , newAccount ) ; err != nil {
return nil , fmt . Errorf ( "GetRemoteAccount: error putting new account: %s" , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
return newAccount , nil
}
// we have seen this account before, but we have to refresh it
refreshedAccountable , err := d . dereferenceAccountable ( ctx , username , remoteAccountID )
if err != nil {
return nil , fmt . Errorf ( "GetRemoteAccount: error dereferencing refreshedAccountable: %s" , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
refreshedAccount , err := d . typeConverter . ASRepresentationToAccount ( ctx , refreshedAccountable , refresh )
if err != nil {
return nil , fmt . Errorf ( "GetRemoteAccount: error converting refreshedAccountable to refreshedAccount: %s" , err )
}
refreshedAccount . ID = remoteAccount . ID
2022-01-25 11:21:22 +01:00
changed , err := d . populateAccountFields ( ctx , refreshedAccount , username , refresh , blocking )
if err != nil {
2022-01-24 13:12:17 +01:00
return nil , fmt . Errorf ( "GetRemoteAccount: error populating further refreshedAccount fields: %s" , err )
}
2022-01-25 11:21:22 +01:00
if changed {
updatedAccount , err := d . db . UpdateAccount ( ctx , refreshedAccount )
if err != nil {
return nil , fmt . Errorf ( "GetRemoteAccount: error updating refreshedAccount: %s" , err )
}
return updatedAccount , nil
2022-01-24 13:12:17 +01:00
}
2022-01-25 11:21:22 +01:00
return refreshedAccount , nil
2021-08-10 13:32:39 +02:00
}
// dereferenceAccountable calls remoteAccountID with a GET request, and tries to parse whatever
// it finds as something that an account model can be constructed out of.
//
// Will work for Person, Application, or Service models.
2021-08-25 15:34:33 +02:00
func ( d * deref ) dereferenceAccountable ( ctx context . Context , username string , remoteAccountID * url . URL ) ( ap . Accountable , error ) {
2021-08-10 13:32:39 +02:00
d . startHandshake ( username , remoteAccountID )
defer d . stopHandshake ( username , remoteAccountID )
2021-08-25 15:34:33 +02:00
if blocked , err := d . db . IsDomainBlocked ( ctx , remoteAccountID . Host ) ; blocked || err != nil {
2021-08-10 13:32:39 +02:00
return nil , fmt . Errorf ( "DereferenceAccountable: domain %s is blocked" , remoteAccountID . Host )
}
2021-08-25 15:34:33 +02:00
transport , err := d . transportController . NewTransportForUsername ( ctx , username )
2021-08-10 13:32:39 +02:00
if err != nil {
return nil , fmt . Errorf ( "DereferenceAccountable: transport err: %s" , err )
}
2021-10-04 15:24:19 +02:00
b , err := transport . Dereference ( ctx , remoteAccountID )
2021-08-10 13:32:39 +02:00
if err != nil {
return nil , fmt . Errorf ( "DereferenceAccountable: error deferencing %s: %s" , remoteAccountID . String ( ) , err )
}
m := make ( map [ string ] interface { } )
if err := json . Unmarshal ( b , & m ) ; err != nil {
return nil , fmt . Errorf ( "DereferenceAccountable: error unmarshalling bytes into json: %s" , err )
}
2021-10-04 15:24:19 +02:00
t , err := streams . ToType ( ctx , m )
2021-08-10 13:32:39 +02:00
if err != nil {
return nil , fmt . Errorf ( "DereferenceAccountable: error resolving json into ap vocab type: %s" , err )
}
switch t . GetTypeName ( ) {
2021-09-03 10:30:40 +02:00
case ap . ActorApplication :
2021-08-10 13:32:39 +02:00
p , ok := t . ( vocab . ActivityStreamsApplication )
if ! ok {
return nil , errors . New ( "DereferenceAccountable: error resolving type as activitystreams application" )
}
return p , nil
2021-09-30 12:27:42 +02:00
case ap . ActorGroup :
p , ok := t . ( vocab . ActivityStreamsGroup )
if ! ok {
return nil , errors . New ( "DereferenceAccountable: error resolving type as activitystreams group" )
}
return p , nil
case ap . ActorOrganization :
p , ok := t . ( vocab . ActivityStreamsOrganization )
if ! ok {
return nil , errors . New ( "DereferenceAccountable: error resolving type as activitystreams organization" )
}
return p , nil
case ap . ActorPerson :
p , ok := t . ( vocab . ActivityStreamsPerson )
if ! ok {
return nil , errors . New ( "DereferenceAccountable: error resolving type as activitystreams person" )
}
return p , nil
2021-09-03 10:30:40 +02:00
case ap . ActorService :
2021-08-10 13:32:39 +02:00
p , ok := t . ( vocab . ActivityStreamsService )
if ! ok {
return nil , errors . New ( "DereferenceAccountable: error resolving type as activitystreams service" )
}
return p , nil
}
return nil , fmt . Errorf ( "DereferenceAccountable: type name %s not supported" , t . GetTypeName ( ) )
}
2022-01-24 13:12:17 +01:00
// populateAccountFields populates any fields on the given account that weren't populated by the initial
2021-08-10 13:32:39 +02:00
// dereferencing. This includes things like header and avatar etc.
2022-01-25 11:21:22 +01:00
func ( d * deref ) populateAccountFields ( ctx context . Context , account * gtsmodel . Account , requestingUsername string , blocking bool , refresh bool ) ( bool , error ) {
2022-01-24 13:12:17 +01:00
// if we're dealing with an instance account, just bail, we don't need to do anything
if instanceAccount ( account ) {
2022-01-25 11:21:22 +01:00
return false , nil
2022-01-24 13:12:17 +01:00
}
2021-08-10 13:32:39 +02:00
accountURI , err := url . Parse ( account . URI )
if err != nil {
2022-01-25 11:21:22 +01:00
return false , fmt . Errorf ( "populateAccountFields: couldn't parse account URI %s: %s" , account . URI , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
2021-08-25 15:34:33 +02:00
if blocked , err := d . db . IsDomainBlocked ( ctx , accountURI . Host ) ; blocked || err != nil {
2022-01-25 11:21:22 +01:00
return false , fmt . Errorf ( "populateAccountFields: domain %s is blocked" , accountURI . Host )
2021-08-10 13:32:39 +02:00
}
2021-08-25 15:34:33 +02:00
t , err := d . transportController . NewTransportForUsername ( ctx , requestingUsername )
2021-08-10 13:32:39 +02:00
if err != nil {
2022-01-25 11:21:22 +01:00
return false , fmt . Errorf ( "populateAccountFields: error getting transport for user: %s" , err )
2021-08-10 13:32:39 +02:00
}
// fetch the header and avatar
2022-01-25 11:21:22 +01:00
changed , err := d . fetchRemoteAccountMedia ( ctx , account , t , refresh , blocking )
if err != nil {
return false , fmt . Errorf ( "populateAccountFields: error fetching header/avi for account: %s" , err )
2021-08-10 13:32:39 +02:00
}
2022-01-25 11:21:22 +01:00
return changed , nil
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
// fetchRemoteAccountMedia fetches and stores the header and avatar for a remote account,
// using a transport on behalf of requestingUsername.
2021-08-10 13:32:39 +02:00
//
2022-01-25 11:21:22 +01:00
// The returned boolean indicates whether anything changed -- in other words, whether the
// account should be updated in the database.
//
2021-08-10 13:32:39 +02:00
// targetAccount's AvatarMediaAttachmentID and HeaderMediaAttachmentID will be updated as necessary.
//
2022-01-24 13:12:17 +01:00
// If refresh is true, then the media will be fetched again even if it's already been fetched before.
//
// If blocking is true, then the calls to the media manager made by this function will be blocking:
// in other words, the function won't return until the header and the avatar have been fully processed.
2022-01-25 11:21:22 +01:00
func ( d * deref ) fetchRemoteAccountMedia ( ctx context . Context , targetAccount * gtsmodel . Account , t transport . Transport , blocking bool , refresh bool ) ( bool , error ) {
changed := false
2021-08-10 13:32:39 +02:00
accountURI , err := url . Parse ( targetAccount . URI )
if err != nil {
2022-01-25 11:21:22 +01:00
return changed , fmt . Errorf ( "fetchRemoteAccountMedia: couldn't parse account URI %s: %s" , targetAccount . URI , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
2021-08-25 15:34:33 +02:00
if blocked , err := d . db . IsDomainBlocked ( ctx , accountURI . Host ) ; blocked || err != nil {
2022-01-25 11:21:22 +01:00
return changed , fmt . Errorf ( "fetchRemoteAccountMedia: domain %s is blocked" , accountURI . Host )
2021-08-10 13:32:39 +02:00
}
if targetAccount . AvatarRemoteURL != "" && ( targetAccount . AvatarMediaAttachmentID == "" || refresh ) {
2022-01-24 13:12:17 +01:00
var processingMedia * media . ProcessingMedia
2022-01-08 17:17:01 +01:00
2022-02-08 13:17:10 +01:00
d . dereferencingAvatarsLock . Lock ( ) // LOCK HERE
2022-01-24 13:12:17 +01:00
// first check if we're already processing this media
if alreadyProcessing , ok := d . dereferencingAvatars [ targetAccount . ID ] ; ok {
// we're already on it, no worries
processingMedia = alreadyProcessing
2022-01-08 17:17:01 +01:00
}
2022-01-24 13:12:17 +01:00
if processingMedia == nil {
// we're not already processing it so start now
avatarIRI , err := url . Parse ( targetAccount . AvatarRemoteURL )
if err != nil {
2022-02-08 13:17:10 +01:00
d . dereferencingAvatarsLock . Unlock ( )
2022-01-25 11:21:22 +01:00
return changed , err
2022-01-24 13:12:17 +01:00
}
data := func ( innerCtx context . Context ) ( io . Reader , int , error ) {
return t . DereferenceMedia ( innerCtx , avatarIRI )
}
avatar := true
2022-02-22 13:50:33 +01:00
newProcessing , err := d . mediaManager . ProcessMedia ( ctx , data , nil , targetAccount . ID , & media . AdditionalMediaInfo {
2022-01-24 13:12:17 +01:00
RemoteURL : & targetAccount . AvatarRemoteURL ,
Avatar : & avatar ,
} )
if err != nil {
2022-02-08 13:17:10 +01:00
d . dereferencingAvatarsLock . Unlock ( )
2022-01-25 11:21:22 +01:00
return changed , err
2022-01-24 13:12:17 +01:00
}
// store it in our map to indicate it's in process
d . dereferencingAvatars [ targetAccount . ID ] = newProcessing
processingMedia = newProcessing
2022-01-08 17:17:01 +01:00
}
2022-02-08 13:17:10 +01:00
d . dereferencingAvatarsLock . Unlock ( ) // UNLOCK HERE
2022-01-08 17:17:01 +01:00
2022-01-24 13:12:17 +01:00
// block until loaded if required...
if blocking {
if err := lockAndLoad ( ctx , d . dereferencingAvatarsLock , processingMedia , d . dereferencingAvatars , targetAccount . ID ) ; err != nil {
2022-01-25 11:21:22 +01:00
return changed , err
2022-01-24 13:12:17 +01:00
}
} else {
// ...otherwise do it async
go func ( ) {
2022-01-24 18:12:04 +01:00
dlCtx , done := context . WithDeadline ( context . Background ( ) , time . Now ( ) . Add ( 1 * time . Minute ) )
if err := lockAndLoad ( dlCtx , d . dereferencingAvatarsLock , processingMedia , d . dereferencingAvatars , targetAccount . ID ) ; err != nil {
2022-01-24 13:12:17 +01:00
logrus . Errorf ( "fetchRemoteAccountMedia: error during async lock and load of avatar: %s" , err )
}
2022-01-24 18:12:04 +01:00
done ( )
2022-01-24 13:12:17 +01:00
} ( )
}
2022-01-24 18:12:04 +01:00
targetAccount . AvatarMediaAttachmentID = processingMedia . AttachmentID ( )
2022-01-25 11:21:22 +01:00
changed = true
2021-08-10 13:32:39 +02:00
}
if targetAccount . HeaderRemoteURL != "" && ( targetAccount . HeaderMediaAttachmentID == "" || refresh ) {
2022-01-24 13:12:17 +01:00
var processingMedia * media . ProcessingMedia
2022-01-08 17:17:01 +01:00
2022-02-08 13:17:10 +01:00
d . dereferencingHeadersLock . Lock ( ) // LOCK HERE
2022-01-24 13:12:17 +01:00
// first check if we're already processing this media
if alreadyProcessing , ok := d . dereferencingHeaders [ targetAccount . ID ] ; ok {
// we're already on it, no worries
processingMedia = alreadyProcessing
2022-01-08 17:17:01 +01:00
}
2022-01-24 13:12:17 +01:00
if processingMedia == nil {
// we're not already processing it so start now
headerIRI , err := url . Parse ( targetAccount . HeaderRemoteURL )
if err != nil {
2022-02-08 13:17:10 +01:00
d . dereferencingAvatarsLock . Unlock ( )
2022-01-25 11:21:22 +01:00
return changed , err
2022-01-24 13:12:17 +01:00
}
data := func ( innerCtx context . Context ) ( io . Reader , int , error ) {
return t . DereferenceMedia ( innerCtx , headerIRI )
}
header := true
2022-02-22 13:50:33 +01:00
newProcessing , err := d . mediaManager . ProcessMedia ( ctx , data , nil , targetAccount . ID , & media . AdditionalMediaInfo {
2022-01-24 13:12:17 +01:00
RemoteURL : & targetAccount . HeaderRemoteURL ,
Header : & header ,
} )
if err != nil {
2022-02-08 13:17:10 +01:00
d . dereferencingAvatarsLock . Unlock ( )
2022-01-25 11:21:22 +01:00
return changed , err
2022-01-24 13:12:17 +01:00
}
// store it in our map to indicate it's in process
d . dereferencingHeaders [ targetAccount . ID ] = newProcessing
processingMedia = newProcessing
2022-01-08 17:17:01 +01:00
}
2022-02-08 13:17:10 +01:00
d . dereferencingHeadersLock . Unlock ( ) // UNLOCK HERE
2022-01-08 17:17:01 +01:00
2022-01-24 13:12:17 +01:00
// block until loaded if required...
if blocking {
if err := lockAndLoad ( ctx , d . dereferencingHeadersLock , processingMedia , d . dereferencingHeaders , targetAccount . ID ) ; err != nil {
2022-01-25 11:21:22 +01:00
return changed , err
2022-01-24 13:12:17 +01:00
}
} else {
// ...otherwise do it async
go func ( ) {
2022-01-24 18:12:04 +01:00
dlCtx , done := context . WithDeadline ( context . Background ( ) , time . Now ( ) . Add ( 1 * time . Minute ) )
if err := lockAndLoad ( dlCtx , d . dereferencingHeadersLock , processingMedia , d . dereferencingHeaders , targetAccount . ID ) ; err != nil {
2022-01-24 13:12:17 +01:00
logrus . Errorf ( "fetchRemoteAccountMedia: error during async lock and load of header: %s" , err )
}
2022-01-24 18:12:04 +01:00
done ( )
2022-01-24 13:12:17 +01:00
} ( )
}
2022-01-24 18:12:04 +01:00
targetAccount . HeaderMediaAttachmentID = processingMedia . AttachmentID ( )
2022-01-25 11:21:22 +01:00
changed = true
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
2022-01-25 11:21:22 +01:00
return changed , nil
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
func lockAndLoad ( ctx context . Context , lock * sync . Mutex , processing * media . ProcessingMedia , processingMap map [ string ] * media . ProcessingMedia , accountID string ) error {
// whatever happens, remove the in-process media from the map
defer func ( ) {
lock . Lock ( )
delete ( processingMap , accountID )
lock . Unlock ( )
} ( )
// try and load it
_ , err := processing . LoadAttachment ( ctx )
return err
}