2021-08-10 12:32:39 +01:00
|
|
|
/*
|
|
|
|
GoToSocial
|
2021-12-20 17:42:19 +00:00
|
|
|
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
|
2021-08-10 12:32:39 +01:00
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU Affero General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package dereferencing
|
|
|
|
|
|
|
|
import (
|
2021-08-25 14:34:33 +01:00
|
|
|
"context"
|
2021-08-10 12:32:39 +01:00
|
|
|
"fmt"
|
|
|
|
"net/url"
|
|
|
|
|
2022-07-19 09:47:55 +01:00
|
|
|
"codeberg.org/gruf/go-kv"
|
2021-08-10 12:32:39 +01:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/ap"
|
2021-12-07 12:31:39 +00:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/config"
|
2022-07-19 09:47:55 +01:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/log"
|
2021-12-20 14:19:53 +00:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/uris"
|
2021-08-10 12:32:39 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// DereferenceThread takes a statusable (something that has withReplies and withInReplyTo),
|
|
|
|
// and dereferences statusables in the conversation.
|
|
|
|
//
|
|
|
|
// This process involves working up and down the chain of replies, and parsing through the collections of IDs
|
|
|
|
// presented by remote instances as part of their replies collections, and will likely involve making several calls to
|
|
|
|
// multiple different hosts.
|
2021-08-25 14:34:33 +01:00
|
|
|
func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error {
|
2022-07-19 09:47:55 +01:00
|
|
|
l := log.WithFields(kv.Fields{
|
|
|
|
|
|
|
|
{"username", username},
|
|
|
|
{"statusIRI", statusIRI},
|
|
|
|
}...)
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("entering DereferenceThread")
|
2021-08-10 12:32:39 +01:00
|
|
|
|
|
|
|
// if it's our status we already have everything stashed so we can bail early
|
2022-05-30 13:41:24 +01:00
|
|
|
if statusIRI.Host == config.GetHost() {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("iri belongs to us, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// first make sure we have this status in our db
|
2022-05-23 16:40:03 +01:00
|
|
|
_, statusable, err := d.GetRemoteStatus(ctx, username, statusIRI, true, false)
|
2021-08-10 12:32:39 +01:00
|
|
|
if err != nil {
|
2022-05-23 16:40:03 +01:00
|
|
|
return fmt.Errorf("DereferenceThread: error getting initial status with id %s: %s", statusIRI.String(), err)
|
2021-08-10 12:32:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// first iterate up through ancestors, dereferencing if necessary as we go
|
2021-08-25 14:34:33 +01:00
|
|
|
if err := d.iterateAncestors(ctx, username, *statusIRI); err != nil {
|
2021-08-10 12:32:39 +01:00
|
|
|
return fmt.Errorf("error iterating ancestors of status %s: %s", statusIRI.String(), err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// now iterate down through descendants, again dereferencing as we go
|
2021-08-25 14:34:33 +01:00
|
|
|
if err := d.iterateDescendants(ctx, username, *statusIRI, statusable); err != nil {
|
2021-08-10 12:32:39 +01:00
|
|
|
return fmt.Errorf("error iterating descendants of status %s: %s", statusIRI.String(), err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// iterateAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
|
2021-08-25 14:34:33 +01:00
|
|
|
func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI url.URL) error {
|
2022-07-19 09:47:55 +01:00
|
|
|
l := log.WithFields(kv.Fields{
|
|
|
|
|
|
|
|
{"username", username},
|
|
|
|
{"statusIRI", statusIRI},
|
|
|
|
}...)
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("entering iterateAncestors")
|
2021-08-10 12:32:39 +01:00
|
|
|
|
|
|
|
// if it's our status we don't need to dereference anything so we can immediately move up the chain
|
2022-05-30 13:41:24 +01:00
|
|
|
if statusIRI.Host == config.GetHost() {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("iri belongs to us, moving up to next ancestor")
|
2021-08-10 12:32:39 +01:00
|
|
|
|
|
|
|
// since this is our status, we know we can extract the id from the status path
|
2021-12-20 14:19:53 +00:00
|
|
|
_, id, err := uris.ParseStatusesPath(&statusIRI)
|
2021-08-10 12:32:39 +01:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-08-25 14:34:33 +01:00
|
|
|
status, err := d.db.GetStatusByID(ctx, id)
|
|
|
|
if err != nil {
|
2021-08-10 12:32:39 +01:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if status.InReplyToURI == "" {
|
|
|
|
// status doesn't reply to anything
|
|
|
|
return nil
|
|
|
|
}
|
2022-06-11 15:25:41 +01:00
|
|
|
|
2021-08-10 12:32:39 +01:00
|
|
|
nextIRI, err := url.Parse(status.URI)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-06-11 15:25:41 +01:00
|
|
|
|
2021-08-25 14:34:33 +01:00
|
|
|
return d.iterateAncestors(ctx, username, *nextIRI)
|
2021-08-10 12:32:39 +01:00
|
|
|
}
|
|
|
|
|
2022-05-23 16:40:03 +01:00
|
|
|
// If we reach here, we're looking at a remote status
|
|
|
|
_, statusable, err := d.GetRemoteStatus(ctx, username, &statusIRI, true, false)
|
2021-08-10 12:32:39 +01:00
|
|
|
if err != nil {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Debugf("couldn't get remote status %s: %s; can't iterate any more ancestors", statusIRI.String(), err)
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
inReplyTo := ap.ExtractInReplyToURI(statusable)
|
|
|
|
if inReplyTo == nil || inReplyTo.String() == "" {
|
|
|
|
// status doesn't reply to anything
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// now move up to the next ancestor
|
2021-08-25 14:34:33 +01:00
|
|
|
return d.iterateAncestors(ctx, username, *inReplyTo)
|
2021-08-10 12:32:39 +01:00
|
|
|
}
|
|
|
|
|
2021-08-25 14:34:33 +01:00
|
|
|
func (d *deref) iterateDescendants(ctx context.Context, username string, statusIRI url.URL, statusable ap.Statusable) error {
|
2022-07-19 09:47:55 +01:00
|
|
|
l := log.WithFields(kv.Fields{
|
|
|
|
|
|
|
|
{"username", username},
|
|
|
|
{"statusIRI", statusIRI},
|
|
|
|
}...)
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("entering iterateDescendants")
|
2021-08-10 12:32:39 +01:00
|
|
|
|
|
|
|
// if it's our status we already have descendants stashed so we can bail early
|
2022-05-30 13:41:24 +01:00
|
|
|
if statusIRI.Host == config.GetHost() {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("iri belongs to us, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
replies := statusable.GetActivityStreamsReplies()
|
|
|
|
if replies == nil || !replies.IsActivityStreamsCollection() {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("no replies, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
repliesCollection := replies.GetActivityStreamsCollection()
|
|
|
|
if repliesCollection == nil {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("replies collection is nil, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
first := repliesCollection.GetActivityStreamsFirst()
|
|
|
|
if first == nil {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("replies collection has no first, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
firstPage := first.GetActivityStreamsCollectionPage()
|
|
|
|
if firstPage == nil {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("first has no collection page, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
firstPageNext := firstPage.GetActivityStreamsNext()
|
|
|
|
if firstPageNext == nil || !firstPageNext.IsIRI() {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("next is not an iri, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var foundReplies int
|
|
|
|
currentPageIRI := firstPageNext.GetIRI()
|
|
|
|
|
|
|
|
pageLoop:
|
|
|
|
for {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Tracef("dereferencing page %s", currentPageIRI)
|
|
|
|
collectionPage, err := d.DereferenceCollectionPage(ctx, username, currentPageIRI)
|
2021-08-10 12:32:39 +01:00
|
|
|
if err != nil {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Debugf("couldn't get remote collection page %s: %s; breaking pageLoop", currentPageIRI, err)
|
|
|
|
break pageLoop
|
2021-08-10 12:32:39 +01:00
|
|
|
}
|
|
|
|
|
2022-06-11 15:25:41 +01:00
|
|
|
pageItems := collectionPage.GetActivityStreamsItems()
|
|
|
|
if pageItems.Len() == 0 {
|
2021-08-10 12:32:39 +01:00
|
|
|
// no items on this page, which means we're done
|
|
|
|
break pageLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
// have a look through items and see what we can find
|
2022-06-11 15:25:41 +01:00
|
|
|
for iter := pageItems.Begin(); iter != pageItems.End(); iter = iter.Next() {
|
2021-08-10 12:32:39 +01:00
|
|
|
// We're looking for a url to feed to GetRemoteStatus.
|
2022-06-11 15:25:41 +01:00
|
|
|
// Each item can be either an IRI, or a Note.
|
2021-08-10 12:32:39 +01:00
|
|
|
// If a note, we grab the ID from it and call it, rather than parsing the note.
|
|
|
|
var itemURI *url.URL
|
2021-11-22 07:46:19 +00:00
|
|
|
switch {
|
|
|
|
case iter.IsIRI():
|
2021-08-10 12:32:39 +01:00
|
|
|
// iri, easy
|
|
|
|
itemURI = iter.GetIRI()
|
2021-11-22 07:46:19 +00:00
|
|
|
case iter.IsActivityStreamsNote():
|
2021-08-10 12:32:39 +01:00
|
|
|
// note, get the id from it to use as iri
|
2022-06-11 15:25:41 +01:00
|
|
|
note := iter.GetActivityStreamsNote()
|
|
|
|
noteID := note.GetJSONLDId()
|
|
|
|
if noteID != nil && noteID.IsIRI() {
|
|
|
|
itemURI = noteID.GetIRI()
|
2021-08-10 12:32:39 +01:00
|
|
|
}
|
2021-11-22 07:46:19 +00:00
|
|
|
default:
|
2021-08-10 12:32:39 +01:00
|
|
|
// if it's not an iri or a note, we don't know how to process it
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2022-05-30 13:41:24 +01:00
|
|
|
if itemURI.Host == config.GetHost() {
|
2021-08-10 12:32:39 +01:00
|
|
|
// skip if the reply is from us -- we already have it then
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// we can confidently say now that we found something
|
2021-11-22 07:46:19 +00:00
|
|
|
foundReplies++
|
2021-08-10 12:32:39 +01:00
|
|
|
|
|
|
|
// get the remote statusable and put it in the db
|
2022-05-24 10:00:37 +01:00
|
|
|
_, statusable, err := d.GetRemoteStatus(ctx, username, itemURI, true, false)
|
2022-05-23 16:40:03 +01:00
|
|
|
if err == nil {
|
2021-08-10 12:32:39 +01:00
|
|
|
// now iterate descendants of *that* status
|
2021-08-25 14:34:33 +01:00
|
|
|
if err := d.iterateDescendants(ctx, username, *itemURI, statusable); err != nil {
|
2021-08-10 12:32:39 +01:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-11 15:25:41 +01:00
|
|
|
nextPage := collectionPage.GetActivityStreamsNext()
|
|
|
|
if nextPage != nil && nextPage.IsIRI() {
|
|
|
|
nextPageIRI := nextPage.GetIRI()
|
|
|
|
l.Tracef("moving on to next page %s", nextPageIRI)
|
|
|
|
currentPageIRI = nextPageIRI
|
2021-08-10 12:32:39 +01:00
|
|
|
} else {
|
2022-06-11 15:25:41 +01:00
|
|
|
l.Trace("no next page, bailing")
|
2021-08-10 12:32:39 +01:00
|
|
|
break pageLoop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
l.Debugf("foundReplies %d", foundReplies)
|
|
|
|
return nil
|
|
|
|
}
|