2023-03-12 15:00:57 +00:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-08-10 12:32:39 +01:00
package dereferencing
import (
2021-08-25 14:34:33 +01:00
"context"
2023-06-24 08:32:10 +01:00
"net/http"
2021-08-10 12:32:39 +01:00
"net/url"
2022-07-19 09:47:55 +01:00
"codeberg.org/gruf/go-kv"
2023-06-03 10:35:15 +01:00
"github.com/superseriousbusiness/activity/pub"
2021-08-10 12:32:39 +01:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
2021-12-07 12:31:39 +00:00
"github.com/superseriousbusiness/gotosocial/internal/config"
2023-05-28 13:08:35 +01:00
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
2022-09-25 12:09:41 +01:00
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
2022-07-19 09:47:55 +01:00
"github.com/superseriousbusiness/gotosocial/internal/log"
2021-08-10 12:32:39 +01:00
)
2022-09-25 12:09:41 +01:00
// maxIter defines how many iterations of descendants or
// ancesters we are willing to follow before returning error.
const maxIter = 1000
2023-11-04 20:21:20 +00:00
// dereferenceThread handles dereferencing status thread after
// fetch. Passing off appropriate parts to be enqueued for async
// processing, or handling some parts synchronously when required.
func ( d * Dereferencer ) dereferenceThread (
ctx context . Context ,
requestUser string ,
uri * url . URL ,
status * gtsmodel . Status ,
statusable ap . Statusable ,
isNew bool ,
) {
if isNew {
// This is a new status that we need the ancestors of in
// order to determine visibility. Perform the initial part
// of thread dereferencing, i.e. parents, synchronously.
err := d . DereferenceStatusAncestors ( ctx , requestUser , status )
if err != nil {
log . Error ( ctx , err )
}
2021-08-10 12:32:39 +01:00
2023-11-04 20:21:20 +00:00
// Enqueue dereferencing remaining status thread, (children), asychronously .
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
if err := d . DereferenceStatusDescendants ( ctx , requestUser , uri , statusable ) ; err != nil {
log . Error ( ctx , err )
}
} )
} else {
// This is an existing status, dereference the WHOLE thread asynchronously.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
if err := d . DereferenceStatusAncestors ( ctx , requestUser , status ) ; err != nil {
log . Error ( ctx , err )
}
if err := d . DereferenceStatusDescendants ( ctx , requestUser , uri , statusable ) ; err != nil {
log . Error ( ctx , err )
}
} )
2021-08-10 12:32:39 +01:00
}
}
2023-10-23 10:58:13 +01:00
// DereferenceStatusAncestors iterates upwards from the given status, using InReplyToURI, to ensure that as many parent statuses as possible are dereferenced.
func ( d * Dereferencer ) DereferenceStatusAncestors ( ctx context . Context , username string , status * gtsmodel . Status ) error {
2023-09-05 11:22:02 +01:00
// Start log entry with fields
l := log . WithContext ( ctx ) .
WithFields ( kv . Fields {
{ "username" , username } ,
{ "original" , status . URI } ,
} ... )
// Keep track of already dereferenced statuses
// for this ancestor thread to prevent recursion.
derefdStatuses := make ( map [ string ] struct { } , 10 )
2023-06-24 08:32:10 +01:00
// Mark given status as the one
// we're currently working on.
2023-09-05 11:22:02 +01:00
current := status
2021-08-10 12:32:39 +01:00
2022-09-25 12:09:41 +01:00
for i := 0 ; i < maxIter ; i ++ {
2023-06-24 08:32:10 +01:00
if current . InReplyToURI == "" {
// Status has no parent, we've
// reached the top of the chain.
2021-08-10 12:32:39 +01:00
return nil
}
2022-06-11 15:25:41 +01:00
2024-01-31 13:29:47 +00:00
l = l . WithField ( "parent" , current . InReplyToURI )
l . Trace ( "following status ancestor" )
// Parse status parent URI for later use.
uri , err := url . Parse ( current . InReplyToURI )
if err != nil {
l . Warnf ( "invalid uri: %v" , err )
return nil
}
2023-09-05 11:22:02 +01:00
// Check whether this parent has already been deref'd.
if _ , ok := derefdStatuses [ current . InReplyToURI ] ; ok {
2024-01-31 13:29:47 +00:00
l . Warn ( "self referencing status ancestor" )
2023-09-05 11:22:02 +01:00
return nil
}
2024-01-31 13:29:47 +00:00
// Add this status's parent URI to map of deref'd.
derefdStatuses [ current . InReplyToURI ] = struct { } { }
2022-09-25 12:09:41 +01:00
2024-01-31 13:29:47 +00:00
// Fetch parent status by current's reply URI, this handles
// case of existing (updating if necessary) or a new status.
parent , _ , _ , err := d . getStatusByURI ( ctx , username , uri )
2022-09-25 12:09:41 +01:00
2024-01-31 13:29:47 +00:00
// Check for a returned HTTP code via error.
switch code := gtserror . StatusCode ( err ) ; {
2022-09-25 12:09:41 +01:00
2024-01-31 13:29:47 +00:00
// Status codes 404 and 410 incicate the status does not exist anymore.
// Gone (410) is the preferred for deletion, but we accept NotFound too.
case code == http . StatusNotFound || code == http . StatusGone :
l . Trace ( "status orphaned" )
current . InReplyToID = ""
current . InReplyToURI = ""
current . InReplyToAccountID = ""
current . InReplyTo = nil
current . InReplyToAccount = nil
if err := d . state . DB . UpdateStatus ( ctx ,
current ,
2023-06-24 08:32:10 +01:00
"in_reply_to_id" ,
2024-01-31 13:29:47 +00:00
"in_reply_to_uri" ,
2023-06-24 08:32:10 +01:00
"in_reply_to_account_id" ,
) ; err != nil {
return gtserror . Newf ( "db error updating status %s: %w" , current . ID , err )
}
2024-01-31 13:29:47 +00:00
return nil
2023-06-24 08:32:10 +01:00
2024-01-31 13:29:47 +00:00
// An error was returned for a status during
// an attempted NEW dereference, return here.
case err != nil && current . InReplyToID == "" :
return gtserror . Newf ( "error dereferencing new %s: %w" , current . InReplyToURI , err )
// An error was returned for an existing parent,
// we simply treat this as a temporary situation.
// (we fallback to using existing parent status).
case err != nil :
l . Errorf ( "error getting parent: %v" , err )
// The ID has changed for currently stored parent ID
// (which may be empty, if new!) and fetched version.
//
// Update the current's inReplyTo fields to parent.
case current . InReplyToID != parent . ID :
l . Tracef ( "parent changed %s => %s" , current . InReplyToID , parent . ID )
current . InReplyToAccountID = parent . AccountID
current . InReplyToAccount = parent . Account
current . InReplyToURI = parent . URI
current . InReplyToID = parent . ID
if err := d . state . DB . UpdateStatus ( ctx ,
current ,
"in_reply_to_id" ,
2023-06-24 08:32:10 +01:00
"in_reply_to_uri" ,
2024-01-31 13:29:47 +00:00
"in_reply_to_account_id" ,
2023-06-24 08:32:10 +01:00
) ; err != nil {
return gtserror . Newf ( "db error updating status %s: %w" , current . ID , err )
2022-09-25 12:09:41 +01:00
}
}
2024-01-31 13:29:47 +00:00
// Set next parent to use.
current . InReplyTo = parent
current = current . InReplyTo
2021-08-10 12:32:39 +01:00
}
2023-06-24 08:32:10 +01:00
return gtserror . Newf ( "reached %d ancestor iterations for %q" , maxIter , status . URI )
2021-08-10 12:32:39 +01:00
}
2023-10-23 10:58:13 +01:00
// DereferenceStatusDescendents iterates downwards from the given status, using its replies, to ensure that as many children statuses as possible are dereferenced.
func ( d * Dereferencer ) DereferenceStatusDescendants ( ctx context . Context , username string , statusIRI * url . URL , parent ap . Statusable ) error {
2023-09-05 11:22:02 +01:00
statusIRIStr := statusIRI . String ( )
2022-07-19 09:47:55 +01:00
2022-09-25 12:09:41 +01:00
// Start log entry with fields
2023-02-17 11:02:29 +00:00
l := log . WithContext ( ctx ) .
WithFields ( kv . Fields {
{ "username" , username } ,
2023-09-05 11:22:02 +01:00
{ "status" , statusIRIStr } ,
2023-02-17 11:02:29 +00:00
} ... )
2021-08-10 12:32:39 +01:00
2022-09-25 12:09:41 +01:00
// Log function start
l . Trace ( "beginning" )
2023-09-05 11:22:02 +01:00
// OUR instance hostname.
localhost := config . GetHost ( )
// Keep track of already dereferenced collection
// pages for this thread to prevent recursion.
derefdPages := make ( map [ string ] struct { } , 10 )
// frame represents a single stack frame when
// iteratively derefencing status descendants.
2022-09-25 12:09:41 +01:00
type frame struct {
2023-09-05 11:22:02 +01:00
// page is the current activity streams
// collection page we are on (as we often
// push a frame to stack mid-paging).
2023-09-23 18:28:12 +01:00
page ap . CollectionPageIterator
2023-09-05 11:22:02 +01:00
// pageURI is the URI string of
// the frame's collection page
// (is useful for logging).
pageURI string
2021-08-10 12:32:39 +01:00
}
2022-09-25 12:09:41 +01:00
var (
2023-09-05 11:22:02 +01:00
// current stack frame
2022-09-25 12:09:41 +01:00
current * frame
// stack is a list of "shelved" descendand iterator
// frames. this is pushed to when a child status frame
// is found that we need to further iterate down, and
// popped from into 'current' when that child's tree
// of further descendants is exhausted.
stack = [ ] * frame {
2023-09-05 11:22:02 +01:00
func ( ) * frame {
// Start input frame is built from the first input.
2023-09-23 18:28:12 +01:00
page , pageURI := getAttachedStatusCollectionPage ( parent )
2023-09-05 11:22:02 +01:00
if page == nil {
return nil
}
return & frame { page : page , pageURI : pageURI }
} ( ) ,
2022-09-25 12:09:41 +01:00
}
2021-08-10 12:32:39 +01:00
2022-09-25 12:09:41 +01:00
// popStack will remove and return the top frame
// from the stack, or nil if currently empty.
popStack = func ( ) * frame {
if len ( stack ) == 0 {
return nil
}
2021-08-10 12:32:39 +01:00
2022-09-25 12:09:41 +01:00
// Get frame index
idx := len ( stack ) - 1
2021-08-10 12:32:39 +01:00
2022-09-25 12:09:41 +01:00
// Pop last frame
frame := stack [ idx ]
stack = stack [ : idx ]
2021-08-10 12:32:39 +01:00
2022-09-25 12:09:41 +01:00
return frame
}
)
2021-08-10 12:32:39 +01:00
2022-09-25 12:09:41 +01:00
stackLoop :
for i := 0 ; i < maxIter ; i ++ {
// Pop next frame, nil means we are at end
if current = popStack ( ) ; current == nil {
return nil
2021-08-10 12:32:39 +01:00
}
2022-09-26 09:14:36 +01:00
pageLoop :
for {
2023-09-05 11:22:02 +01:00
l . Tracef ( "following collection page: %s" , current . pageURI )
2022-09-25 12:09:41 +01:00
itemLoop :
2022-09-26 09:50:14 +01:00
for {
2023-09-23 18:28:12 +01:00
// Get next item from page iter.
next := current . page . NextItem ( )
if next == nil {
2022-09-26 09:50:14 +01:00
break itemLoop
}
2021-08-10 12:32:39 +01:00
2024-01-31 13:29:47 +00:00
// Check for available IRI.
2023-09-23 18:28:12 +01:00
itemIRI , _ := pub . ToId ( next )
2022-09-25 12:09:41 +01:00
if itemIRI == nil {
continue itemLoop
}
2023-09-05 11:22:02 +01:00
if itemIRI . Host == localhost {
2022-09-25 12:09:41 +01:00
// This child is one of ours,
continue itemLoop
}
2023-05-12 10:15:54 +01:00
// Dereference the remote status and store in the database.
2023-06-24 08:32:10 +01:00
// getStatusByURI guards against the following conditions:
2023-09-05 11:22:02 +01:00
// - refetching recently fetched statuses (recursion!)
2023-06-24 08:32:10 +01:00
// - remote domain is blocked (will return unretrievable)
2023-09-05 11:22:02 +01:00
// - any http type error for a new status returns unretrievable
2023-11-04 20:21:20 +00:00
_ , statusable , _ , err := d . getStatusByURI ( ctx , username , itemIRI )
2022-09-25 12:09:41 +01:00
if err != nil {
2024-01-31 13:29:47 +00:00
l . Errorf ( "error dereferencing remote status %s: %v" , itemIRI , err )
2023-05-12 10:15:54 +01:00
continue itemLoop
}
if statusable == nil {
2023-09-05 11:22:02 +01:00
// A nil statusable return from
// getStatusByURI() indicates a
// remote status that was already
// dereferenced recently (so no
// need to go through descendents).
continue itemLoop
}
2023-09-23 18:28:12 +01:00
// Extract any attached collection + ID URI from status.
page , pageURI := getAttachedStatusCollectionPage ( statusable )
2023-09-05 11:22:02 +01:00
if page == nil {
2022-09-25 12:09:41 +01:00
continue itemLoop
2021-08-10 12:32:39 +01:00
}
2022-09-25 12:09:41 +01:00
// Put current and next frame at top of stack
stack = append ( stack , current , & frame {
2023-09-05 11:22:02 +01:00
pageURI : pageURI ,
page : page ,
2022-09-25 12:09:41 +01:00
} )
2022-09-26 08:39:59 +01:00
// Now start at top of loop
continue stackLoop
2021-08-10 12:32:39 +01:00
}
2023-09-23 18:28:12 +01:00
// Get the next page from iterator.
next := current . page . NextPage ( )
if next == nil || ! next . IsIRI ( ) {
2022-09-25 12:09:41 +01:00
continue stackLoop
}
2023-09-23 18:28:12 +01:00
// Get the next page IRI.
nextURI := next . GetIRI ( )
nextURIStr := nextURI . String ( )
2023-09-05 11:22:02 +01:00
// Check whether this page has already been deref'd.
2023-09-23 18:28:12 +01:00
if _ , ok := derefdPages [ nextURIStr ] ; ok {
l . Warnf ( "self referencing collection page(s): %s" , nextURIStr )
2022-09-26 09:14:36 +01:00
continue stackLoop
}
2022-09-25 12:09:41 +01:00
2023-09-05 11:22:02 +01:00
// Mark this collection page as deref'd.
2023-09-23 18:28:12 +01:00
derefdPages [ nextURIStr ] = struct { } { }
2023-09-05 11:22:02 +01:00
// Dereference this next collection page by its IRI.
2023-05-12 10:15:54 +01:00
collectionPage , err := d . dereferenceCollectionPage ( ctx ,
username ,
2023-09-23 18:28:12 +01:00
nextURI ,
2023-05-12 10:15:54 +01:00
)
2022-09-25 12:09:41 +01:00
if err != nil {
2023-09-23 18:28:12 +01:00
l . Errorf ( "error dereferencing collection page %q: %s" , nextURIStr , err )
2022-09-25 12:09:41 +01:00
continue stackLoop
}
2023-09-05 11:22:02 +01:00
// Set the next collection page.
2022-09-25 12:09:41 +01:00
current . page = collectionPage
2023-09-23 18:28:12 +01:00
current . pageURI = nextURIStr
2022-09-26 09:14:36 +01:00
continue pageLoop
2021-08-10 12:32:39 +01:00
}
}
2023-09-05 11:22:02 +01:00
return gtserror . Newf ( "reached %d descendant iterations for %q" , maxIter , statusIRIStr )
}