[bugfix] fix checks for deref the same status descendants / ascendants (#2181)

This commit is contained in:
kim 2023-09-05 11:22:02 +01:00 committed by GitHub
parent 9f2199f9a9
commit 916c6d07ba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -56,9 +56,20 @@ func (d *deref) DereferenceStatusAncestors(
username string,
status *gtsmodel.Status,
) error {
// Start log entry with fields
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"original", status.URI},
}...)
// Keep track of already dereferenced statuses
// for this ancestor thread to prevent recursion.
derefdStatuses := make(map[string]struct{}, 10)
// Mark given status as the one
// we're currently working on.
var current = status
current := status
for i := 0; i < maxIter; i++ {
if current.InReplyToURI == "" {
@ -67,14 +78,21 @@ func (d *deref) DereferenceStatusAncestors(
return nil
}
l := log.
WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"originalStatusIRI", status.URI},
{"currentStatusURI", current.URI},
{"currentInReplyToURI", current.InReplyToURI},
}...)
// Add new log fields for this iteration.
l = l.WithFields(kv.Fields{
{"current", current.URI},
{"parent", current.InReplyToURI},
}...)
l.Trace("following status ancestors")
// Check whether this parent has already been deref'd.
if _, ok := derefdStatuses[current.InReplyToURI]; ok {
l.Warn("self referencing status ancestors")
return nil
}
// Add this status URI to map of deref'd.
derefdStatuses[current.URI] = struct{}{}
if current.InReplyToID != "" {
// We already have an InReplyToID set. This means
@ -123,7 +141,7 @@ func (d *deref) DereferenceStatusAncestors(
// by another action.
//
// TODO: clean this up in a nightly task.
l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID)
l.Warn("orphaned status (parent no longer exists)")
return nil // Cannot iterate further.
}
@ -134,16 +152,15 @@ func (d *deref) DereferenceStatusAncestors(
inReplyToURI, err := url.Parse(current.InReplyToURI)
if err != nil || inReplyToURI == nil {
// Parent URI is not something we can handle.
l.Debug("current status has been orphaned (invalid InReplyToURI)")
l.Warn("orphaned status (invalid InReplyToURI)")
return nil //nolint:nilerr
}
// Parent URI is valid, try to get it.
// getStatusByURI guards against the following conditions:
//
// - refetching recently fetched statuses (recursion!)
// - remote domain is blocked (will return unretrievable)
// - domain is local (will try to return something, or
// return unretrievable).
// - any http type error for a new status returns unretrievable
parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
if err == nil {
// We successfully fetched the parent.
@ -171,7 +188,7 @@ func (d *deref) DereferenceStatusAncestors(
case code == http.StatusGone:
// 410 means the status has definitely been deleted.
// Update this status to reflect that, then bail.
l.Debug("current status has been orphaned (call to parent returned code 410 Gone)")
l.Debug("orphaned status: parent returned 410 Gone")
current.InReplyToURI = ""
if err := d.state.DB.UpdateStatus(
@ -180,19 +197,19 @@ func (d *deref) DereferenceStatusAncestors(
); err != nil {
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
}
return nil
case code != 0:
// We had a code, but not one indicating deletion,
// log the code but don't return error or update the
// status; we can try again later.
l.Warnf("cannot dereference parent (%q)", err)
// We had a code, but not one indicating deletion, log the code
// but don't return error or update the status; we can try again later.
l.Warnf("orphaned status: http error dereferencing parent: %v)", err)
return nil
case gtserror.Unretrievable(err):
// Not retrievable for some other reason, so just
// bail; we can try again later if necessary.
l.Debugf("parent unretrievable (%q)", err)
// bail for now; we can try again later if necessary.
l.Warnf("orphaned status: parent unretrievable: %v)", err)
return nil
default:
@ -205,35 +222,44 @@ func (d *deref) DereferenceStatusAncestors(
}
func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
// Take ref to original
ogIRI := statusIRI
statusIRIStr := statusIRI.String()
// Start log entry with fields
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"statusIRI", ogIRI},
{"status", statusIRIStr},
}...)
// Log function start
l.Trace("beginning")
// frame represents a single stack frame when iteratively
// dereferencing status descendants. where statusIRI and
// statusable are of the status whose children we are to
// descend, page is the current activity streams collection
// page of entities we are on (as we often push a frame to
// stack mid-paging), and item___ are entity iterators for
// this activity streams collection page.
// OUR instance hostname.
localhost := config.GetHost()
// Keep track of already dereferenced collection
// pages for this thread to prevent recursion.
derefdPages := make(map[string]struct{}, 10)
// frame represents a single stack frame when
// iteratively derefencing status descendants.
type frame struct {
statusIRI *url.URL
statusable ap.Statusable
page ap.CollectionPageable
itemIter vocab.ActivityStreamsItemsPropertyIterator
// page is the current activity streams
// collection page we are on (as we often
// push a frame to stack mid-paging).
page ap.CollectionPageable
// pageURI is the URI string of
// the frame's collection page
// (is useful for logging).
pageURI string
// items is the entity iterator for frame's page.
items vocab.ActivityStreamsItemsPropertyIterator
}
var (
// current is the current stack frame
// current stack frame
current *frame
// stack is a list of "shelved" descendand iterator
@ -242,11 +268,14 @@ type frame struct {
// popped from into 'current' when that child's tree
// of further descendants is exhausted.
stack = []*frame{
{
// Starting input is first frame
statusIRI: statusIRI,
statusable: parent,
},
func() *frame {
// Start input frame is built from the first input.
page, pageURI := getAttachedStatusCollection(parent)
if page == nil {
return nil
}
return &frame{page: page, pageURI: pageURI}
}(),
}
// popStack will remove and return the top frame
@ -274,42 +303,9 @@ type frame struct {
return nil
}
if current.page == nil {
if current.statusIRI.Host == config.GetHost() {
// This is a local status, no looping to do
continue stackLoop
}
l.Tracef("following remote status descendants: %s", current.statusIRI)
// Look for an attached status replies (as collection)
replies := current.statusable.GetActivityStreamsReplies()
if replies == nil {
continue stackLoop
}
// Get the status replies collection
collection := replies.GetActivityStreamsCollection()
if collection == nil {
continue stackLoop
}
// Get the "first" property of the replies collection
first := collection.GetActivityStreamsFirst()
if first == nil {
continue stackLoop
}
// Set the first activity stream collection page
current.page = first.GetActivityStreamsCollectionPage()
if current.page == nil {
continue stackLoop
}
}
pageLoop:
for {
if current.itemIter == nil {
if current.items == nil {
// Get the items associated with this page
items := current.page.GetActivityStreamsItems()
if items == nil {
@ -317,21 +313,23 @@ type frame struct {
}
// Start off the item iterator
current.itemIter = items.Begin()
current.items = items.Begin()
}
l.Tracef("following collection page: %s", current.pageURI)
itemLoop:
for {
// Check for remaining iter
if current.itemIter == nil {
if current.items == nil {
break itemLoop
}
// Get current item iterator
itemIter := current.itemIter
itemIter := current.items
// Set the next available iterator
current.itemIter = itemIter.Next()
current.items = itemIter.Next()
// Check for available IRI on item
itemIRI, _ := pub.ToId(itemIter)
@ -339,76 +337,123 @@ type frame struct {
continue itemLoop
}
if itemIRI.Host == config.GetHost() {
if itemIRI.Host == localhost {
// This child is one of ours,
continue itemLoop
}
// Dereference the remote status and store in the database.
// getStatusByURI guards against the following conditions:
//
// - refetching recently fetched statuses (recursion!)
// - remote domain is blocked (will return unretrievable)
// - domain is local (will try to return something, or
// return unretrievable).
// - any http type error for a new status returns unretrievable
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
if err != nil {
if !gtserror.Unretrievable(err) {
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
}
continue itemLoop
}
if statusable == nil {
// Already up-to-date.
// A nil statusable return from
// getStatusByURI() indicates a
// remote status that was already
// dereferenced recently (so no
// need to go through descendents).
continue itemLoop
}
// Extract any attached collection + URI from status.
page, pageURI := getAttachedStatusCollection(statusable)
if page == nil {
continue itemLoop
}
// Put current and next frame at top of stack
stack = append(stack, current, &frame{
statusIRI: itemIRI,
statusable: statusable,
pageURI: pageURI,
page: page,
})
// Now start at top of loop
continue stackLoop
}
// Get the current page's "next" property
// Get the current page's "next" property.
pageNext := current.page.GetActivityStreamsNext()
if pageNext == nil || !pageNext.IsIRI() {
continue stackLoop
}
// Get the IRI of the "next" property.
pageNextIRI := pageNext.GetIRI()
pageNextURI := pageNext.GetIRI()
pageNextURIStr := pageNextURI.String()
// Ensure this isn't a self-referencing page...
// We don't need to store / check against a map of IRIs
// as our getStatusByIRI() function above prevents iter'ing
// over statuses that have been dereferenced recently, due to
// the `fetched_at` field preventing frequent refetches.
if id := current.page.GetJSONLDId(); id != nil &&
pageNextIRI.String() == id.Get().String() {
log.Warnf(ctx, "self referencing collection page: %s", pageNextIRI)
// Check whether this page has already been deref'd.
if _, ok := derefdPages[pageNextURIStr]; ok {
l.Warnf("self referencing collection page(s): %s", pageNextURIStr)
continue stackLoop
}
// Dereference this next collection page by its IRI
// Mark this collection page as deref'd.
derefdPages[pageNextURIStr] = struct{}{}
// Dereference this next collection page by its IRI.
collectionPage, err := d.dereferenceCollectionPage(ctx,
username,
pageNextIRI,
pageNextURI,
)
if err != nil {
l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err)
l.Errorf("error dereferencing collection page %q: %s", pageNextURIStr, err)
continue stackLoop
}
// Set the updated collection page
// Set the next collection page.
current.page = collectionPage
current.pageURI = pageNextURIStr
continue pageLoop
}
}
return gtserror.Newf("reached %d descendant iterations for %q", maxIter, ogIRI.String())
return gtserror.Newf("reached %d descendant iterations for %q", maxIter, statusIRIStr)
}
// getAttachedStatusCollection is a small utility function to fetch the first page
// of an attached activity streams collection from a provided statusable object .
func getAttachedStatusCollection(status ap.Statusable) (page ap.CollectionPageable, uri string) { //nolint:gocritic
// Look for an attached status replies (as collection)
replies := status.GetActivityStreamsReplies()
if replies == nil {
return nil, ""
}
// Get the status replies collection
collection := replies.GetActivityStreamsCollection()
if collection == nil {
return nil, ""
}
// Get the "first" property of the replies collection
first := collection.GetActivityStreamsFirst()
if first == nil {
return nil, ""
}
// Return the first activity stream collection page
page = first.GetActivityStreamsCollectionPage()
if page == nil {
return nil, ""
}
if pageID := page.GetJSONLDId(); pageID != nil {
// By default use collection JSONLD ID
return page, pageID.Get().String()
} else if statusID := status.GetJSONLDId(); statusID != nil {
// Else, if possible use status JSONLD ID
return page, statusID.Get().String()
} else {
// MUST have some kind of ID
return nil, ""
}
}