From 1d51e3c8d68fe79ee1103d3e412d4e823d99eaaa Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Fri, 23 Feb 2024 15:24:40 +0000 Subject: [PATCH] [bugfix] 2643 bug search for account url doesnt always work when redirected (#2673) * update activity library so dereferencer returns full response and checks *final* link to allow for redirects * temporarily add bodged fixed library * remove unused code * update getAccountFeatured() to use dereferenceCollectionPage() * make sure to release map * perform a 2nd decode to ensure reader is empty after primary decode * add comment explaining choice of using Decode() instead of Unmarshal() * update embedded activity library to latest matching https://github.com/superseriousbusiness/activity/pull/21 * add checks to look for changed URI and re-check database if redirected * update max iteration count to 512, add checks during dereferenceAncestors() for indirect URLs * remove doubled-up code * fix use of status instead of current * use URIs for checking equality for security * use the latest known URI for boost_of_uri in case original was an indirect * add dereferenceCollection() function for dereferenceAccountFeatured() * pull in latest github.com/superseriousbusiness/activity version (and remove the bodge!!) * fix typo in code comments * update decodeType() to accept a readcloser and handle body closing * switch to checking using BoostOfID and add note why not using BoostOfURI * ensure InReplyTo gets unset when deleting status parent in case currently stubbed * add tests for Collection and CollectionPage iterators --- go.mod | 2 +- go.sum | 4 +- internal/ap/ap_test.go | 7 +- internal/ap/collections.go | 112 ++++++++++ internal/ap/collections_test.go | 148 +++++++++++++ internal/ap/interfaces.go | 9 + internal/ap/resolve.go | 206 +++++++++++------- internal/ap/resolve_test.go | 20 +- internal/ap/util.go | 29 +++ internal/federation/authenticate.go | 16 +- internal/federation/dereferencing/account.go | 144 ++++++------ internal/federation/dereferencing/announce.go | 11 +- .../{collectionpage.go => collection.go} | 46 ++-- internal/federation/dereferencing/status.go | 43 +++- internal/federation/dereferencing/thread.go | 92 +++++--- internal/transport/controller.go | 54 ++++- internal/transport/dereference.go | 11 +- internal/transport/transport.go | 2 +- internal/typeutils/astointernal_test.go | 6 +- .../activity/pub/base_actor.go | 27 +-- .../pub/federating_wrapped_callbacks.go | 13 +- .../activity/pub/side_effect_actor.go | 11 +- .../activity/pub/transport.go | 21 +- .../superseriousbusiness/activity/pub/util.go | 60 ++++- vendor/modules.txt | 2 +- 25 files changed, 814 insertions(+), 282 deletions(-) create mode 100644 internal/ap/collections_test.go rename internal/federation/dereferencing/{collectionpage.go => collection.go} (77%) diff --git a/go.mod b/go.mod index 4ff8c87a1..135dc2990 100644 --- a/go.mod +++ b/go.mod @@ -46,7 +46,7 @@ require ( github.com/spf13/cobra v1.8.0 github.com/spf13/viper v1.18.2 github.com/stretchr/testify v1.8.4 - github.com/superseriousbusiness/activity v1.6.0-gts + github.com/superseriousbusiness/activity v1.6.0-gts.0.20240221151241-5d56c04088d4 github.com/superseriousbusiness/httpsig v1.2.0-SSB github.com/superseriousbusiness/oauth2/v4 v4.3.2-SSB.0.20230227143000-f4900831d6c8 github.com/tdewolff/minify/v2 v2.20.17 diff --git a/go.sum b/go.sum index 57fde77d9..e3ac40382 100644 --- a/go.sum +++ b/go.sum @@ -483,8 +483,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/sunfish-shogi/bufseekio v0.0.0-20210207115823-a4185644b365/go.mod h1:dEzdXgvImkQ3WLI+0KQpmEx8T/C/ma9KeS3AfmU899I= -github.com/superseriousbusiness/activity v1.6.0-gts h1:SwrTpqof0bIzYYsNyM7WH9Vxqz+6kN4BGQjzKvlIN1Y= -github.com/superseriousbusiness/activity v1.6.0-gts/go.mod h1:AZw0Xb4Oju8rmaJCZ21gc5CPg47MmNgyac+Hx5jo8VM= +github.com/superseriousbusiness/activity v1.6.0-gts.0.20240221151241-5d56c04088d4 h1:kPjQR/hVZtROTzkxptp/EIR7Wm58O8jppwpCFrZ7sVU= +github.com/superseriousbusiness/activity v1.6.0-gts.0.20240221151241-5d56c04088d4/go.mod h1:AZw0Xb4Oju8rmaJCZ21gc5CPg47MmNgyac+Hx5jo8VM= github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe h1:ksl2oCx/Qo8sNDc3Grb8WGKBM9nkvhCm25uvlT86azE= github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe/go.mod h1:gH4P6gN1V+wmIw5o97KGaa1RgXB/tVpC2UNzijhg3E4= github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB h1:8psprYSK1KdOSH7yQ4PbJq0YYaGQY+gzdW/B0ExDb/8= diff --git a/internal/ap/ap_test.go b/internal/ap/ap_test.go index 583a37c53..0a9f66ca6 100644 --- a/internal/ap/ap_test.go +++ b/internal/ap/ap_test.go @@ -18,8 +18,10 @@ package ap_test import ( + "bytes" "context" "encoding/json" + "io" "github.com/stretchr/testify/suite" "github.com/superseriousbusiness/activity/pub" @@ -187,7 +189,10 @@ func (suite *APTestSuite) noteWithHashtags1() ap.Statusable { } }`) - statusable, err := ap.ResolveStatusable(context.Background(), noteJson) + statusable, err := ap.ResolveStatusable( + context.Background(), + io.NopCloser(bytes.NewReader(noteJson)), + ) if err != nil { suite.FailNow(err.Error()) } diff --git a/internal/ap/collections.go b/internal/ap/collections.go index ba3887a5b..da789e179 100644 --- a/internal/ap/collections.go +++ b/internal/ap/collections.go @@ -26,6 +26,24 @@ "github.com/superseriousbusiness/gotosocial/internal/paging" ) +// TODO: replace must of this logic with just +// using extractIRIs() on the iterator types. + +// ToCollectionIterator attempts to resolve the given vocab type as a Collection +// like object and wrap in a standardised interface in order to iterate its contents. +func ToCollectionIterator(t vocab.Type) (CollectionIterator, error) { + switch name := t.GetTypeName(); name { + case ObjectCollection: + t := t.(vocab.ActivityStreamsCollection) + return WrapCollection(t), nil + case ObjectOrderedCollection: + t := t.(vocab.ActivityStreamsOrderedCollection) + return WrapOrderedCollection(t), nil + default: + return nil, fmt.Errorf("%T(%s) was not Collection-like", t, name) + } +} + // ToCollectionPageIterator attempts to resolve the given vocab type as a CollectionPage // like object and wrap in a standardised interface in order to iterate its contents. func ToCollectionPageIterator(t vocab.Type) (CollectionPageIterator, error) { @@ -41,6 +59,16 @@ func ToCollectionPageIterator(t vocab.Type) (CollectionPageIterator, error) { } } +// WrapCollection wraps an ActivityStreamsCollection in a standardised collection interface. +func WrapCollection(collection vocab.ActivityStreamsCollection) CollectionIterator { + return ®ularCollectionIterator{ActivityStreamsCollection: collection} +} + +// WrapOrderedCollection wraps an ActivityStreamsOrderedCollection in a standardised collection interface. +func WrapOrderedCollection(collection vocab.ActivityStreamsOrderedCollection) CollectionIterator { + return &orderedCollectionIterator{ActivityStreamsOrderedCollection: collection} +} + // WrapCollectionPage wraps an ActivityStreamsCollectionPage in a standardised collection page interface. func WrapCollectionPage(page vocab.ActivityStreamsCollectionPage) CollectionPageIterator { return ®ularCollectionPageIterator{ActivityStreamsCollectionPage: page} @@ -51,6 +79,90 @@ func WrapOrderedCollectionPage(page vocab.ActivityStreamsOrderedCollectionPage) return &orderedCollectionPageIterator{ActivityStreamsOrderedCollectionPage: page} } +// regularCollectionIterator implements CollectionIterator +// for the vocab.ActivitiyStreamsCollection type. +type regularCollectionIterator struct { + vocab.ActivityStreamsCollection + items vocab.ActivityStreamsItemsPropertyIterator + once bool // only init items once +} + +func (iter *regularCollectionIterator) NextItem() TypeOrIRI { + if !iter.initItems() { + return nil + } + cur := iter.items + iter.items = iter.items.Next() + return cur +} + +func (iter *regularCollectionIterator) PrevItem() TypeOrIRI { + if !iter.initItems() { + return nil + } + cur := iter.items + iter.items = iter.items.Prev() + return cur +} + +func (iter *regularCollectionIterator) initItems() bool { + if iter.once { + return (iter.items != nil) + } + iter.once = true + if iter.ActivityStreamsCollection == nil { + return false // no page set + } + items := iter.GetActivityStreamsItems() + if items == nil { + return false // no items found + } + iter.items = items.Begin() + return (iter.items != nil) +} + +// orderedCollectionIterator implements CollectionIterator +// for the vocab.ActivitiyStreamsOrderedCollection type. +type orderedCollectionIterator struct { + vocab.ActivityStreamsOrderedCollection + items vocab.ActivityStreamsOrderedItemsPropertyIterator + once bool // only init items once +} + +func (iter *orderedCollectionIterator) NextItem() TypeOrIRI { + if !iter.initItems() { + return nil + } + cur := iter.items + iter.items = iter.items.Next() + return cur +} + +func (iter *orderedCollectionIterator) PrevItem() TypeOrIRI { + if !iter.initItems() { + return nil + } + cur := iter.items + iter.items = iter.items.Prev() + return cur +} + +func (iter *orderedCollectionIterator) initItems() bool { + if iter.once { + return (iter.items != nil) + } + iter.once = true + if iter.ActivityStreamsOrderedCollection == nil { + return false // no page set + } + items := iter.GetActivityStreamsOrderedItems() + if items == nil { + return false // no items found + } + iter.items = items.Begin() + return (iter.items != nil) +} + // regularCollectionPageIterator implements CollectionPageIterator // for the vocab.ActivitiyStreamsCollectionPage type. type regularCollectionPageIterator struct { diff --git a/internal/ap/collections_test.go b/internal/ap/collections_test.go new file mode 100644 index 000000000..87a5bb057 --- /dev/null +++ b/internal/ap/collections_test.go @@ -0,0 +1,148 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package ap_test + +import ( + "net/url" + "slices" + "testing" + + "github.com/superseriousbusiness/activity/pub" + "github.com/superseriousbusiness/activity/streams" + "github.com/superseriousbusiness/activity/streams/vocab" + "github.com/superseriousbusiness/gotosocial/internal/ap" +) + +var testIteratorIRIs = [][]string{ + { + "https://google.com", + "https://mastodon.social", + "http://naughty.naughty.website/heres/the/porn", + "https://god.monarchies.suck?yes=they&really=do", + }, + { + // zero length + }, + { + "https://superseriousbusiness.org", + "http://gotosocial.tv/@slothsgonewild", + }, +} + +func TestToCollectionIterator(t *testing.T) { + for _, iris := range testIteratorIRIs { + testToCollectionIterator(t, toCollection(iris), "", iris) + testToCollectionIterator(t, toOrderedCollection(iris), "", iris) + } + testToCollectionIterator(t, streams.NewActivityStreamsAdd(), "*typeadd.ActivityStreamsAdd(Add) was not Collection-like", nil) + testToCollectionIterator(t, streams.NewActivityStreamsBlock(), "*typeblock.ActivityStreamsBlock(Block) was not Collection-like", nil) +} + +func TestToCollectionPageIterator(t *testing.T) { + for _, iris := range testIteratorIRIs { + testToCollectionPageIterator(t, toCollectionPage(iris), "", iris) + testToCollectionPageIterator(t, toOrderedCollectionPage(iris), "", iris) + } + testToCollectionPageIterator(t, streams.NewActivityStreamsAdd(), "*typeadd.ActivityStreamsAdd(Add) was not CollectionPage-like", nil) + testToCollectionPageIterator(t, streams.NewActivityStreamsBlock(), "*typeblock.ActivityStreamsBlock(Block) was not CollectionPage-like", nil) +} + +func testToCollectionIterator(t *testing.T, in vocab.Type, expectErr string, expectIRIs []string) { + collect, err := ap.ToCollectionIterator(in) + if !errCheck(err, expectErr) { + t.Fatalf("did not return expected error: expect=%v receive=%v", expectErr, err) + } + iris := gatherFromIterator(collect) + if !slices.Equal(iris, expectIRIs) { + t.Fatalf("did not return expected iris: expect=%v receive=%v", expectIRIs, iris) + } +} + +func testToCollectionPageIterator(t *testing.T, in vocab.Type, expectErr string, expectIRIs []string) { + page, err := ap.ToCollectionPageIterator(in) + if !errCheck(err, expectErr) { + t.Fatalf("did not return expected error: expect=%v receive=%v", expectErr, err) + } + iris := gatherFromIterator(page) + if !slices.Equal(iris, expectIRIs) { + t.Fatalf("did not return expected iris: expect=%v receive=%v", expectIRIs, iris) + } +} + +func toCollection(iris []string) vocab.ActivityStreamsCollection { + collect := streams.NewActivityStreamsCollection() + collect.SetActivityStreamsItems(toItems(iris)) + return collect +} + +func toOrderedCollection(iris []string) vocab.ActivityStreamsOrderedCollection { + collect := streams.NewActivityStreamsOrderedCollection() + collect.SetActivityStreamsOrderedItems(toOrderedItems(iris)) + return collect +} + +func toCollectionPage(iris []string) vocab.ActivityStreamsCollectionPage { + page := streams.NewActivityStreamsCollectionPage() + page.SetActivityStreamsItems(toItems(iris)) + return page +} + +func toOrderedCollectionPage(iris []string) vocab.ActivityStreamsOrderedCollectionPage { + page := streams.NewActivityStreamsOrderedCollectionPage() + page.SetActivityStreamsOrderedItems(toOrderedItems(iris)) + return page +} + +func toItems(iris []string) vocab.ActivityStreamsItemsProperty { + items := streams.NewActivityStreamsItemsProperty() + for _, iri := range iris { + u, _ := url.Parse(iri) + items.AppendIRI(u) + } + return items +} + +func toOrderedItems(iris []string) vocab.ActivityStreamsOrderedItemsProperty { + items := streams.NewActivityStreamsOrderedItemsProperty() + for _, iri := range iris { + u, _ := url.Parse(iri) + items.AppendIRI(u) + } + return items +} + +func gatherFromIterator(iter ap.CollectionIterator) []string { + var iris []string + if iter == nil { + return nil + } + for item := iter.NextItem(); item != nil; item = iter.NextItem() { + id, _ := pub.ToId(item) + if id != nil { + iris = append(iris, id.String()) + } + } + return iris +} + +func errCheck(err error, str string) bool { + if err == nil { + return str == "" + } + return err.Error() == str +} diff --git a/internal/ap/interfaces.go b/internal/ap/interfaces.go index 811e09125..fa8e8a338 100644 --- a/internal/ap/interfaces.go +++ b/internal/ap/interfaces.go @@ -300,6 +300,15 @@ type ReplyToable interface { WithInReplyTo } +// CollectionIterator represents the minimum interface for interacting with a +// wrapped Collection or OrderedCollection in order to access next / prev items. +type CollectionIterator interface { + vocab.Type + + NextItem() TypeOrIRI + PrevItem() TypeOrIRI +} + // CollectionPageIterator represents the minimum interface for interacting with a wrapped // CollectionPage or OrderedCollectionPage in order to access both next / prev pages and items. type CollectionPageIterator interface { diff --git a/internal/ap/resolve.go b/internal/ap/resolve.go index 20a858900..b2e866b6f 100644 --- a/internal/ap/resolve.go +++ b/internal/ap/resolve.go @@ -22,8 +22,8 @@ "encoding/json" "errors" "fmt" + "io" "net/http" - "sync" "github.com/superseriousbusiness/activity/pub" "github.com/superseriousbusiness/activity/streams" @@ -31,61 +31,6 @@ "github.com/superseriousbusiness/gotosocial/internal/gtserror" ) -// mapPool is a memory pool of maps for JSON decoding. -var mapPool = sync.Pool{ - New: func() any { - return make(map[string]any) - }, -} - -// getMap acquires a map from memory pool. -func getMap() map[string]any { - m := mapPool.Get().(map[string]any) //nolint - return m -} - -// putMap clears and places map back in pool. -func putMap(m map[string]any) { - if len(m) > int(^uint8(0)) { - // don't pool overly - // large maps. - return - } - for k := range m { - delete(m, k) - } - mapPool.Put(m) -} - -// bytesToType tries to parse the given bytes slice -// as a JSON ActivityPub type, failing if the input -// bytes are not parseable as JSON, or do not parse -// to an ActivityPub that we can understand. -// -// The given map pointer will also be populated with -// the parsed JSON, to allow further processing. -func bytesToType( - ctx context.Context, - b []byte, - raw map[string]any, -) (vocab.Type, error) { - // Unmarshal the raw JSON bytes into a "raw" map. - // This will fail if the input is not parseable - // as JSON; eg., a remote has returned HTML as a - // fallback response to an ActivityPub JSON request. - if err := json.Unmarshal(b, &raw); err != nil { - return nil, gtserror.NewfAt(3, "error unmarshalling bytes into json: %w", err) - } - - // Resolve an ActivityStreams type. - t, err := streams.ToType(ctx, raw) - if err != nil { - return nil, gtserror.NewfAt(3, "error resolving json into ap vocab type: %w", err) - } - - return t, nil -} - // ResolveActivity is a util function for pulling a pub.Activity type out of an incoming request body, // returning the resolved activity type, error and whether to accept activity (false = transient i.e. ignore). func ResolveIncomingActivity(r *http.Request) (pub.Activity, bool, gtserror.WithCode) { @@ -93,25 +38,23 @@ func ResolveIncomingActivity(r *http.Request) (pub.Activity, bool, gtserror.With // destination. raw := getMap() - // Tidy up when done. - defer r.Body.Close() + // Decode data as JSON into 'raw' map + // and get the resolved AS vocab.Type. + // (this handles close of request body). + t, err := decodeType(r.Context(), r.Body, raw) - // Decode the JSON body stream into "raw" map. - if err := json.NewDecoder(r.Body).Decode(&raw); err != nil { - err := gtserror.Newf("error decoding json: %w", err) - return nil, false, gtserror.NewErrorInternalError(err) - } - - // Resolve "raw" JSON to vocab.Type. - t, err := streams.ToType(r.Context(), raw) if err != nil { + // NOTE: if the error here was due to the response body + // ending early, the connection will have broken so it + // doesn't matter if we try to return 400 or 500, the + // error is mainly for our logging. tl;dr there's not a + // huge need to differentiate between those error types. + if !streams.IsUnmatchedErr(err) { err := gtserror.Newf("error matching json to type: %w", err) return nil, false, gtserror.NewErrorInternalError(err) } - // Respond with bad request; we just couldn't - // match the type to one that we know about. const text = "body json not resolvable as ActivityStreams type" return nil, false, gtserror.NewErrorBadRequest(errors.New(text), text) } @@ -142,18 +85,19 @@ func ResolveIncomingActivity(r *http.Request) (pub.Activity, bool, gtserror.With return activity, true, nil } -// ResolveStatusable tries to resolve the given bytes into an ActivityPub Statusable representation. -// It will then perform normalization on the Statusable. +// ResolveStatusable tries to resolve the response data as an ActivityPub +// Statusable representation. It will then perform normalization on the Statusable. // // Works for: Article, Document, Image, Video, Note, Page, Event, Place, Profile, Question. -func ResolveStatusable(ctx context.Context, b []byte) (Statusable, error) { +func ResolveStatusable(ctx context.Context, body io.ReadCloser) (Statusable, error) { // Get "raw" map // destination. raw := getMap() - // Convert raw bytes to an AP type. - // This will also populate the map. - t, err := bytesToType(ctx, b, raw) + // Decode data as JSON into 'raw' map + // and get the resolved AS vocab.Type. + // (this handles close of given body). + t, err := decodeType(ctx, body, raw) if err != nil { return nil, gtserror.SetWrongType(err) } @@ -183,18 +127,19 @@ func ResolveStatusable(ctx context.Context, b []byte) (Statusable, error) { return statusable, nil } -// ResolveStatusable tries to resolve the given bytes into an ActivityPub Accountable representation. -// It will then perform normalization on the Accountable. +// ResolveAccountable tries to resolve the given reader into an ActivityPub +// Accountable representation. It will then perform normalization on the Accountable. // // Works for: Application, Group, Organization, Person, Service -func ResolveAccountable(ctx context.Context, b []byte) (Accountable, error) { +func ResolveAccountable(ctx context.Context, body io.ReadCloser) (Accountable, error) { // Get "raw" map // destination. raw := getMap() - // Convert raw bytes to an AP type. - // This will also populate the map. - t, err := bytesToType(ctx, b, raw) + // Decode data as JSON into 'raw' map + // and get the resolved AS vocab.Type. + // (this handles close of given body). + t, err := decodeType(ctx, body, raw) if err != nil { return nil, gtserror.SetWrongType(err) } @@ -213,3 +158,104 @@ func ResolveAccountable(ctx context.Context, b []byte) (Accountable, error) { return accountable, nil } + +// ResolveCollection tries to resolve the given reader into an ActivityPub Collection-like +// representation, then wrapping as abstracted iterator. Works for: Collection, OrderedCollection. +func ResolveCollection(ctx context.Context, body io.ReadCloser) (CollectionIterator, error) { + // Get "raw" map + // destination. + raw := getMap() + + // Decode data as JSON into 'raw' map + // and get the resolved AS vocab.Type. + // (this handles close of given body). + t, err := decodeType(ctx, body, raw) + if err != nil { + return nil, gtserror.SetWrongType(err) + } + + // Release. + putMap(raw) + + // Cast as as Collection-like. + return ToCollectionIterator(t) +} + +// ResolveCollectionPage tries to resolve the given reader into an ActivityPub CollectionPage-like +// representation, then wrapping as abstracted iterator. Works for: CollectionPage, OrderedCollectionPage. +func ResolveCollectionPage(ctx context.Context, body io.ReadCloser) (CollectionPageIterator, error) { + // Get "raw" map + // destination. + raw := getMap() + + // Decode data as JSON into 'raw' map + // and get the resolved AS vocab.Type. + // (this handles close of given body). + t, err := decodeType(ctx, body, raw) + if err != nil { + return nil, gtserror.SetWrongType(err) + } + + // Release. + putMap(raw) + + // Cast as as CollectionPage-like. + return ToCollectionPageIterator(t) +} + +// emptydest is an empty JSON decode +// destination useful for "noop" decodes +// to check underlying reader is empty. +var emptydest = &struct{}{} + +// decodeType tries to read and parse the data +// at provided io.ReadCloser as a JSON ActivityPub +// type, failing if not parseable as JSON or not +// resolveable as one of our known AS types. +// +// NOTE: this function handles closing +// given body when it is finished with. +// +// The given map pointer will also be populated with +// the 'raw' JSON data, for further processing. +func decodeType( + ctx context.Context, + body io.ReadCloser, + raw map[string]any, +) (vocab.Type, error) { + + // Wrap body in JSON decoder. + // + // We do this instead of using json.Unmarshal() + // so we can take advantage of the decoder's streamed + // check of input data as valid JSON. This means that + // in the cases of garbage input, or even just fallback + // HTML responses that were incorrectly content-type'd, + // we can error-out as soon as possible. + dec := json.NewDecoder(body) + + // Unmarshal JSON source data into "raw" map. + if err := dec.Decode(&raw); err != nil { + _ = body.Close() // ensure closed. + return nil, gtserror.NewfAt(3, "error decoding into json: %w", err) + } + + // Perform a secondary decode just to ensure we drained the + // entirety of the data source. Error indicates either extra + // trailing garbage, or multiple JSON values (invalid data). + if err := dec.Decode(emptydest); err != io.EOF { + _ = body.Close() // ensure closed. + return nil, gtserror.NewfAt(3, "data remaining after json") + } + + // Done with body. + _ = body.Close() + + // Resolve an ActivityStreams type. + t, err := streams.ToType(ctx, raw) + if err != nil { + return nil, gtserror.NewfAt(3, "error resolving json into ap vocab type: %w", err) + } + + return t, nil +} diff --git a/internal/ap/resolve_test.go b/internal/ap/resolve_test.go index bc32af7e4..aaf90ab0a 100644 --- a/internal/ap/resolve_test.go +++ b/internal/ap/resolve_test.go @@ -18,7 +18,9 @@ package ap_test import ( + "bytes" "context" + "io" "testing" "github.com/stretchr/testify/suite" @@ -33,7 +35,9 @@ type ResolveTestSuite struct { func (suite *ResolveTestSuite) TestResolveDocumentAsStatusable() { b := []byte(suite.typeToJson(suite.document1)) - statusable, err := ap.ResolveStatusable(context.Background(), b) + statusable, err := ap.ResolveStatusable( + context.Background(), io.NopCloser(bytes.NewReader(b)), + ) suite.NoError(err) suite.NotNil(statusable) } @@ -41,7 +45,9 @@ func (suite *ResolveTestSuite) TestResolveDocumentAsStatusable() { func (suite *ResolveTestSuite) TestResolveDocumentAsAccountable() { b := []byte(suite.typeToJson(suite.document1)) - accountable, err := ap.ResolveAccountable(context.Background(), b) + accountable, err := ap.ResolveAccountable( + context.Background(), io.NopCloser(bytes.NewReader(b)), + ) suite.True(gtserror.IsWrongType(err)) suite.EqualError(err, "ResolveAccountable: cannot resolve vocab type *typedocument.ActivityStreamsDocument as accountable") suite.Nil(accountable) @@ -51,9 +57,11 @@ func (suite *ResolveTestSuite) TestResolveHTMLAsAccountable() { b := []byte(` .`) - accountable, err := ap.ResolveAccountable(context.Background(), b) + accountable, err := ap.ResolveAccountable( + context.Background(), io.NopCloser(bytes.NewReader(b)), + ) suite.True(gtserror.IsWrongType(err)) - suite.EqualError(err, "ResolveAccountable: error unmarshalling bytes into json: invalid character '<' looking for beginning of value") + suite.EqualError(err, "ResolveAccountable: error decoding into json: invalid character '<' looking for beginning of value") suite.Nil(accountable) } @@ -64,7 +72,9 @@ func (suite *ResolveTestSuite) TestResolveNonAPJSONAsAccountable() { "pee pee":"poo poo" }`) - accountable, err := ap.ResolveAccountable(context.Background(), b) + accountable, err := ap.ResolveAccountable( + context.Background(), io.NopCloser(bytes.NewReader(b)), + ) suite.True(gtserror.IsWrongType(err)) suite.EqualError(err, "ResolveAccountable: error resolving json into ap vocab type: activity stream did not match any known types") suite.Nil(accountable) diff --git a/internal/ap/util.go b/internal/ap/util.go index c810b7985..967a1659d 100644 --- a/internal/ap/util.go +++ b/internal/ap/util.go @@ -19,10 +19,39 @@ import ( "net/url" + "sync" "github.com/superseriousbusiness/activity/streams/vocab" ) +const mapmax = 256 + +// mapPool is a memory pool +// of maps for JSON decoding. +var mapPool sync.Pool + +// getMap acquires a map from memory pool. +func getMap() map[string]any { + v := mapPool.Get() + if v == nil { + // preallocate map of max-size. + m := make(map[string]any, mapmax) + v = m + } + return v.(map[string]any) //nolint +} + +// putMap clears and places map back in pool. +func putMap(m map[string]any) { + if len(m) > mapmax { + // drop maps beyond + // our maximum size. + return + } + clear(m) + mapPool.Put(m) +} + // _TypeOrIRI wraps a vocab.Type to implement TypeOrIRI. type _TypeOrIRI struct { vocab.Type diff --git a/internal/federation/authenticate.go b/internal/federation/authenticate.go index 59281fa65..596233b19 100644 --- a/internal/federation/authenticate.go +++ b/internal/federation/authenticate.go @@ -23,6 +23,7 @@ "encoding/json" "errors" "fmt" + "io" "net/http" "net/url" "time" @@ -414,10 +415,19 @@ func (f *Federator) callForPubKey( // The actual http call to the remote server is // made right here by the Dereference function. - pubKeyBytes, err := trans.Dereference(ctx, pubKeyID) + rsp, err := trans.Dereference(ctx, pubKeyID) + if err == nil { - // No problem. - return pubKeyBytes, nil + // Read the response body data. + b, err := io.ReadAll(rsp.Body) + _ = rsp.Body.Close() // done + + if err != nil { + err := gtserror.Newf("error reading pubkey: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } + + return b, nil } if gtserror.StatusCode(err) == http.StatusGone { diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index c3ad6be5e..10d15bca6 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -19,14 +19,12 @@ import ( "context" - "encoding/json" "errors" "io" "net/url" "time" - "github.com/superseriousbusiness/activity/streams" - "github.com/superseriousbusiness/activity/streams/vocab" + "github.com/superseriousbusiness/activity/pub" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/db" @@ -499,16 +497,17 @@ func (d *Dereferencer) enrichAccount( case err == nil && account.Domain != accDomain: // After webfinger, we now have correct account domain from which we can do a final DB check. - alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) + alreadyAcc, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) + return nil, nil, gtserror.Newf("db error getting account after webfinger: %w", err) } - if alreadyAcct != nil { - // We had this account stored under - // the discovered accountDomain. + if alreadyAcc != nil { + // We had this account stored + // under discovered accountDomain. + // // Proceed with this account. - account = alreadyAcct + account = alreadyAcc } // Whether we had the account or not, we @@ -537,8 +536,9 @@ func (d *Dereferencer) enrichAccount( ) } + // Check URI scheme ahead of time for more useful errs. if uri.Scheme != "http" && uri.Scheme != "https" { - err = errors.New("account URI scheme must be http or https") + err := errors.New("account URI scheme must be http or https") return nil, nil, gtserror.Newf( "invalid uri %q: %w", account.URI, gtserror.SetUnretrievable(err), @@ -567,20 +567,52 @@ func (d *Dereferencer) enrichAccount( // We were not given any (partial) ActivityPub // version of this account as a parameter. // Dereference latest version of the account. - b, err := tsport.Dereference(ctx, uri) + rsp, err := tsport.Dereference(ctx, uri) if err != nil { err := gtserror.Newf("error dereferencing %s: %w", uri, err) return nil, nil, gtserror.SetUnretrievable(err) } - // Attempt to resolve ActivityPub acc from data. - apubAcc, err = ap.ResolveAccountable(ctx, b) + // Attempt to resolve ActivityPub acc from response. + apubAcc, err = ap.ResolveAccountable(ctx, rsp.Body) + + // Tidy up now done. + _ = rsp.Body.Close() + if err != nil { - // ResolveAccountable will set Unretrievable/WrongType + // ResolveAccountable will set gtserror.WrongType // on the returned error, so we don't need to do it here. - err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) + err = gtserror.Newf("error resolving accountable %s: %w", uri, err) return nil, nil, err } + + // Check whether input URI and final returned URI + // have changed (i.e. we followed some redirects). + if finalURIStr := rsp.Request.URL.String(); // + finalURIStr != uri.String() { + + // NOTE: this URI check + database call is performed + // AFTER reading and closing response body, for performance. + // + // Check whether we have this account stored under *final* URI. + alreadyAcc, err := d.state.DB.GetAccountByURI(ctx, finalURIStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, nil, gtserror.Newf("db error getting account after redirects: %w", err) + } + + if alreadyAcc != nil { + // We had this account stored + // under discovered final URI. + // + // Proceed with this account. + account = alreadyAcc + } + + // Update the input URI to + // the final determined URI + // for later URI checks. + uri = rsp.Request.URL + } } /* @@ -1014,76 +1046,33 @@ func (d *Dereferencer) dereferenceAccountFeatured(ctx context.Context, requestUs return err } - // Pre-fetch a transport for requesting username, used by later deref procedures. - tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser) - if err != nil { - return gtserror.Newf("couldn't create transport: %w", err) - } - - b, err := tsport.Dereference(ctx, uri) + collect, err := d.dereferenceCollection(ctx, requestUser, uri) if err != nil { return err } - m := make(map[string]interface{}) - if err := json.Unmarshal(b, &m); err != nil { - return gtserror.Newf("error unmarshalling bytes into json: %w", err) - } - - t, err := streams.ToType(ctx, m) - if err != nil { - return gtserror.Newf("error resolving json into ap vocab type: %w", err) - } - - if t.GetTypeName() != ap.ObjectOrderedCollection { - return gtserror.Newf("%s was not an OrderedCollection", uri) - } - - collection, ok := t.(vocab.ActivityStreamsOrderedCollection) - if !ok { - return gtserror.New("couldn't coerce OrderedCollection") - } - - items := collection.GetActivityStreamsOrderedItems() - if items == nil { - return gtserror.New("nil orderedItems") - } - // Get previous pinned statuses (we'll need these later). wasPinned, err := d.state.DB.GetAccountPinnedStatuses(ctx, account.ID) if err != nil && !errors.Is(err, db.ErrNoEntries) { return gtserror.Newf("error getting account pinned statuses: %w", err) } - statusURIs := make([]*url.URL, 0, items.Len()) - for iter := items.Begin(); iter != items.End(); iter = iter.Next() { - var statusURI *url.URL + var statusURIs []*url.URL - switch { - case iter.IsActivityStreamsNote(): - // We got a whole Note. Extract the URI. - if note := iter.GetActivityStreamsNote(); note != nil { - if id := note.GetJSONLDId(); id != nil { - statusURI = id.GetIRI() - } - } - case iter.IsActivityStreamsArticle(): - // We got a whole Article. Extract the URI. - if article := iter.GetActivityStreamsArticle(); article != nil { - if id := article.GetJSONLDId(); id != nil { - statusURI = id.GetIRI() - } - } - default: - // Try to get just the URI. - statusURI = iter.GetIRI() + for { + // Get next collect item. + item := collect.NextItem() + if item == nil { + break } - if statusURI == nil { + // Check for available IRI. + itemIRI, _ := pub.ToId(item) + if itemIRI == nil { continue } - if statusURI.Host != uri.Host { + if itemIRI.Host != uri.Host { // If this status doesn't share a host with its featured // collection URI, we shouldn't trust it. Just move on. continue @@ -1093,13 +1082,13 @@ func (d *Dereferencer) dereferenceAccountFeatured(ctx context.Context, requestUs // We do this here so that even if we can't get // the status in the next part for some reason, // we still know it was *meant* to be pinned. - statusURIs = append(statusURIs, statusURI) + statusURIs = append(statusURIs, itemIRI) // Search for status by URI. Note this may return an existing model // we have stored with an error from attempted update, so check both. - status, _, _, err := d.getStatusByURI(ctx, requestUser, statusURI) + status, _, _, err := d.getStatusByURI(ctx, requestUser, itemIRI) if err != nil { - log.Errorf(ctx, "error getting status from featured collection %s: %v", statusURI, err) + log.Errorf(ctx, "error getting status from featured collection %s: %v", itemIRI, err) if status == nil { // This is only unactionable @@ -1108,20 +1097,23 @@ func (d *Dereferencer) dereferenceAccountFeatured(ctx context.Context, requestUs } } - // If the status was already pinned, we don't need to do anything. + // If the status was already pinned, + // we don't need to do anything. if !status.PinnedAt.IsZero() { continue } - if status.AccountID != account.ID { + if status.AccountURI != account.URI { // Someone's pinned a status that doesn't // belong to them, this doesn't work for us. continue } if status.BoostOfID != "" { - // Someone's pinned a boost. This also - // doesn't work for us. + // Someone's pinned a boost. This + // also doesn't work for us. (note + // we check using BoostOfID since + // BoostOfURI isn't *always* set). continue } diff --git a/internal/federation/dereferencing/announce.go b/internal/federation/dereferencing/announce.go index 8d082105b..02b1d5e5c 100644 --- a/internal/federation/dereferencing/announce.go +++ b/internal/federation/dereferencing/announce.go @@ -83,6 +83,10 @@ func (d *Dereferencer) EnrichAnnounce( return nil, gtserror.Newf("error generating id: %w", err) } + // Set boost_of_uri again in case the + // original URI was an indirect link. + boost.BoostOfURI = target.URI + // Populate remaining fields on // the boost wrapper using target. boost.Content = target.Content @@ -101,10 +105,10 @@ func (d *Dereferencer) EnrichAnnounce( boost.Replyable = target.Replyable boost.Likeable = target.Likeable - // Store the boost wrapper status. + // Store the boost wrapper status in database. switch err = d.state.DB.PutStatus(ctx, boost); { case err == nil: - // All good baby. + // all groovy. case errors.Is(err, db.ErrAlreadyExists): uri := boost.URI @@ -119,8 +123,7 @@ func (d *Dereferencer) EnrichAnnounce( ) } - default: - // Proper database error. + default: // Proper database error. return nil, gtserror.Newf("db error inserting status: %w", err) } diff --git a/internal/federation/dereferencing/collectionpage.go b/internal/federation/dereferencing/collection.go similarity index 77% rename from internal/federation/dereferencing/collectionpage.go rename to internal/federation/dereferencing/collection.go index dc5c68273..07f56c952 100644 --- a/internal/federation/dereferencing/collectionpage.go +++ b/internal/federation/dereferencing/collection.go @@ -19,16 +19,42 @@ import ( "context" - "encoding/json" "net/url" - "github.com/superseriousbusiness/activity/streams" "github.com/superseriousbusiness/activity/streams/vocab" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/log" ) +// dereferenceCollectionPage returns the activitystreams Collection at the specified IRI, or an error if something goes wrong. +func (d *Dereferencer) dereferenceCollection(ctx context.Context, username string, pageIRI *url.URL) (ap.CollectionIterator, error) { + if blocked, err := d.state.DB.IsDomainBlocked(ctx, pageIRI.Host); blocked || err != nil { + return nil, gtserror.Newf("domain %s is blocked", pageIRI.Host) + } + + transport, err := d.transportController.NewTransportForUsername(ctx, username) + if err != nil { + return nil, gtserror.Newf("error creating transport: %w", err) + } + + rsp, err := transport.Dereference(ctx, pageIRI) + if err != nil { + return nil, gtserror.Newf("error deferencing %s: %w", pageIRI.String(), err) + } + + collect, err := ap.ResolveCollection(ctx, rsp.Body) + + // Tidy up rsp body. + _ = rsp.Body.Close() + + if err != nil { + return nil, gtserror.Newf("error resolving collection %s: %w", pageIRI.String(), err) + } + + return collect, nil +} + // dereferenceCollectionPage returns the activitystreams CollectionPage at the specified IRI, or an error if something goes wrong. func (d *Dereferencer) dereferenceCollectionPage(ctx context.Context, username string, pageIRI *url.URL) (ap.CollectionPageIterator, error) { if blocked, err := d.state.DB.IsDomainBlocked(ctx, pageIRI.Host); blocked || err != nil { @@ -40,24 +66,18 @@ func (d *Dereferencer) dereferenceCollectionPage(ctx context.Context, username s return nil, gtserror.Newf("error creating transport: %w", err) } - b, err := transport.Dereference(ctx, pageIRI) + rsp, err := transport.Dereference(ctx, pageIRI) if err != nil { return nil, gtserror.Newf("error deferencing %s: %w", pageIRI.String(), err) } - m := make(map[string]interface{}) - if err := json.Unmarshal(b, &m); err != nil { - return nil, gtserror.Newf("error unmarshalling bytes into json: %w", err) - } + page, err := ap.ResolveCollectionPage(ctx, rsp.Body) - t, err := streams.ToType(ctx, m) - if err != nil { - return nil, gtserror.Newf("error resolving json into ap vocab type: %w", err) - } + // Tidy up rsp body. + _ = rsp.Body.Close() - page, err := ap.ToCollectionPageIterator(t) if err != nil { - return nil, gtserror.Newf("error resolving vocab type as page: %w", err) + return nil, gtserror.Newf("error resolving collection page %s: %w", pageIRI.String(), err) } return page, nil diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index e3f97553d..397d2aa28 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -393,16 +393,51 @@ func (d *Dereferencer) enrichStatus( if apubStatus == nil { // Dereference latest version of the status. - b, err := tsport.Dereference(ctx, uri) + rsp, err := tsport.Dereference(ctx, uri) if err != nil { err := gtserror.Newf("error dereferencing %s: %w", uri, err) return nil, nil, gtserror.SetUnretrievable(err) } - // Attempt to resolve ActivityPub status from data. - apubStatus, err = ap.ResolveStatusable(ctx, b) + // Attempt to resolve ActivityPub status from response. + apubStatus, err = ap.ResolveStatusable(ctx, rsp.Body) + + // Tidy up now done. + _ = rsp.Body.Close() + if err != nil { - return nil, nil, gtserror.Newf("error resolving statusable from data for account %s: %w", uri, err) + // ResolveStatusable will set gtserror.WrongType + // on the returned error, so we don't need to do it here. + err = gtserror.Newf("error resolving statusable %s: %w", uri, err) + return nil, nil, err + } + + // Check whether input URI and final returned URI + // have changed (i.e. we followed some redirects). + if finalURIStr := rsp.Request.URL.String(); // + finalURIStr != uri.String() { + + // NOTE: this URI check + database call is performed + // AFTER reading and closing response body, for performance. + // + // Check whether we have this status stored under *final* URI. + alreadyStatus, err := d.state.DB.GetStatusByURI(ctx, finalURIStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, nil, gtserror.Newf("db error getting status after redirects: %w", err) + } + + if alreadyStatus != nil { + // We had this status stored + // under discovered final URI. + // + // Proceed with this status. + status = alreadyStatus + } + + // Update the input URI to + // the final determined URI + // for later URI checks. + uri = rsp.Request.URL } } diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index 28f7ffa8a..e528581c9 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -33,7 +33,7 @@ // maxIter defines how many iterations of descendants or // ancesters we are willing to follow before returning error. -const maxIter = 1000 +const maxIter = 512 // dereferenceThread handles dereferencing status thread after // fetch. Passing off appropriate parts to be enqueued for async @@ -98,16 +98,10 @@ func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username return nil } + // Apparent current parent URI to log fields. l = l.WithField("parent", current.InReplyToURI) l.Trace("following status ancestor") - // Parse status parent URI for later use. - uri, err := url.Parse(current.InReplyToURI) - if err != nil { - l.Warnf("invalid uri: %v", err) - return nil - } - // Check whether this parent has already been deref'd. if _, ok := derefdStatuses[current.InReplyToURI]; ok { l.Warn("self referencing status ancestor") @@ -117,6 +111,13 @@ func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username // Add this status's parent URI to map of deref'd. derefdStatuses[current.InReplyToURI] = struct{}{} + // Parse status parent URI for later use. + uri, err := url.Parse(current.InReplyToURI) + if err != nil { + l.Warnf("invalid uri: %v", err) + return nil + } + // Fetch parent status by current's reply URI, this handles // case of existing (updating if necessary) or a new status. parent, _, _, err := d.getStatusByURI(ctx, username, uri) @@ -129,6 +130,7 @@ func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username // view the status (it's followers-only and // we don't follow, for example). case code == http.StatusNotFound: + // If this reply is followers-only or stricter, // we can safely assume the status it replies // to is also followers only or stricter. @@ -153,31 +155,43 @@ func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username // the now-gone parent. case code == http.StatusGone: l.Trace("status orphaned") - current.InReplyToID = "" - current.InReplyToURI = "" - current.InReplyToAccountID = "" current.InReplyTo = nil current.InReplyToAccount = nil - if err := d.state.DB.UpdateStatus(ctx, + return d.updateStatusParent(ctx, current, - "in_reply_to_id", - "in_reply_to_uri", - "in_reply_to_account_id", - ); err != nil { - return gtserror.Newf("db error updating status %s: %w", current.ID, err) - } - return nil + "", // status ID + "", // status URI + "", // account ID + ) // An error was returned for a status during // an attempted NEW dereference, return here. - case err != nil && current.InReplyToID == "": + // + // NOTE: this will catch all cases of a nil + // parent, all cases below can safely assume + // a non-nil parent in their code logic. + case err != nil && parent == nil: return gtserror.Newf("error dereferencing new %s: %w", current.InReplyToURI, err) // An error was returned for an existing parent, // we simply treat this as a temporary situation. - // (we fallback to using existing parent status). case err != nil: l.Errorf("error getting parent: %v", err) + } + + // Start a new switch case + // as the following scenarios + // are possible with / without + // any returned error. + switch { + + // The current status is using an indirect URL + // in order to reference the parent. This is just + // weird and broken... Leave the URI in place but + // don't link the statuses via database IDs as it + // could cause all sorts of unexpected situations. + case current.InReplyToURI != parent.URI: + l.Errorf("indirect in_reply_to_uri => %s", parent.URI) // The ID has changed for currently stored parent ID // (which may be empty, if new!) and fetched version. @@ -185,17 +199,14 @@ func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username // Update the current's inReplyTo fields to parent. case current.InReplyToID != parent.ID: l.Tracef("parent changed %s => %s", current.InReplyToID, parent.ID) - current.InReplyToAccountID = parent.AccountID current.InReplyToAccount = parent.Account - current.InReplyToURI = parent.URI - current.InReplyToID = parent.ID - if err := d.state.DB.UpdateStatus(ctx, + if err := d.updateStatusParent(ctx, current, - "in_reply_to_id", - "in_reply_to_uri", - "in_reply_to_account_id", + parent.ID, + parent.URI, + parent.AccountID, ); err != nil { - return gtserror.Newf("db error updating status %s: %w", current.ID, err) + return err } } @@ -384,3 +395,26 @@ func() *frame { return gtserror.Newf("reached %d descendant iterations for %q", maxIter, statusIRIStr) } + +// updateStatusParent updates the given status' parent +// status URI, ID and account ID to given values in DB. +func (d *Dereferencer) updateStatusParent( + ctx context.Context, + status *gtsmodel.Status, + parentStatusID string, + parentStatusURI string, + parentAccountID string, +) error { + status.InReplyToAccountID = parentAccountID + status.InReplyToURI = parentStatusURI + status.InReplyToID = parentStatusID + if err := d.state.DB.UpdateStatus(ctx, + status, + "in_reply_to_id", + "in_reply_to_uri", + "in_reply_to_account_id", + ); err != nil { + return gtserror.Newf("error updating status %s: %w", status.URI, err) + } + return nil +} diff --git a/internal/transport/controller.go b/internal/transport/controller.go index 32ef6d7c2..81022596a 100644 --- a/internal/transport/controller.go +++ b/internal/transport/controller.go @@ -18,11 +18,15 @@ package transport import ( + "bytes" "context" "crypto/rsa" "crypto/x509" "encoding/json" + "errors" "fmt" + "io" + "net/http" "net/url" "runtime" @@ -31,6 +35,7 @@ "github.com/superseriousbusiness/activity/pub" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/federation/federatingdb" "github.com/superseriousbusiness/gotosocial/internal/httpclient" "github.com/superseriousbusiness/gotosocial/internal/state" @@ -150,36 +155,73 @@ func (c *controller) NewTransportForUsername(ctx context.Context, username strin // account on this instance, without making any external api/http calls. // // It is passed to new transports, and should only be invoked when the iri.Host == this host. -func (c *controller) dereferenceLocalFollowers(ctx context.Context, iri *url.URL) ([]byte, error) { +func (c *controller) dereferenceLocalFollowers(ctx context.Context, iri *url.URL) (*http.Response, error) { followers, err := c.fedDB.Followers(ctx, iri) - if err != nil { + if err != nil && !errors.Is(err, db.ErrNoEntries) { return nil, err } + if followers == nil { + // Return a generic 404 not found response. + rsp := craftResponse(iri, http.StatusNotFound) + return rsp, nil + } + i, err := ap.Serialize(followers) if err != nil { return nil, err } - return json.Marshal(i) + b, err := json.Marshal(i) + if err != nil { + return nil, err + } + + // Return a response with AS data as body. + rsp := craftResponse(iri, http.StatusOK) + rsp.Body = io.NopCloser(bytes.NewReader(b)) + return rsp, nil } // dereferenceLocalUser is a shortcut to dereference followers an account on // this instance, without making any external api/http calls. // // It is passed to new transports, and should only be invoked when the iri.Host == this host. -func (c *controller) dereferenceLocalUser(ctx context.Context, iri *url.URL) ([]byte, error) { +func (c *controller) dereferenceLocalUser(ctx context.Context, iri *url.URL) (*http.Response, error) { user, err := c.fedDB.Get(ctx, iri) - if err != nil { + if err != nil && !errors.Is(err, db.ErrNoEntries) { return nil, err } + if user == nil { + // Return a generic 404 not found response. + rsp := craftResponse(iri, http.StatusNotFound) + return rsp, nil + } + i, err := ap.Serialize(user) if err != nil { return nil, err } - return json.Marshal(i) + b, err := json.Marshal(i) + if err != nil { + return nil, err + } + + // Return a response with AS data as body. + rsp := craftResponse(iri, http.StatusOK) + rsp.Body = io.NopCloser(bytes.NewReader(b)) + return rsp, nil +} + +func craftResponse(url *url.URL, code int) *http.Response { + rsp := new(http.Response) + rsp.Request = new(http.Request) + rsp.Request.URL = url + rsp.Status = http.StatusText(code) + rsp.StatusCode = code + return rsp } // privkeyToPublicStr will create a string representation of RSA public key from private. diff --git a/internal/transport/dereference.go b/internal/transport/dereference.go index 3a33a81ad..efd3f0fbf 100644 --- a/internal/transport/dereference.go +++ b/internal/transport/dereference.go @@ -19,7 +19,6 @@ import ( "context" - "io" "net/http" "net/url" @@ -29,7 +28,7 @@ "github.com/superseriousbusiness/gotosocial/internal/uris" ) -func (t *transport) Dereference(ctx context.Context, iri *url.URL) ([]byte, error) { +func (t *transport) Dereference(ctx context.Context, iri *url.URL) (*http.Response, error) { // if the request is to us, we can shortcut for certain URIs rather than going through // the normal request flow, thereby saving time and energy if iri.Host == config.GetHost() { @@ -62,18 +61,20 @@ func (t *transport) Dereference(ctx context.Context, iri *url.URL) ([]byte, erro if err != nil { return nil, err } - defer rsp.Body.Close() // Ensure a non-error status response. if rsp.StatusCode != http.StatusOK { - return nil, gtserror.NewFromResponse(rsp) + err := gtserror.NewFromResponse(rsp) + _ = rsp.Body.Close() // done with body + return nil, err } // Ensure that the incoming request content-type is expected. if ct := rsp.Header.Get("Content-Type"); !apiutil.ASContentType(ct) { err := gtserror.Newf("non activity streams response: %s", ct) + _ = rsp.Body.Close() // done with body return nil, gtserror.SetMalformed(err) } - return io.ReadAll(rsp.Body) + return rsp, nil } diff --git a/internal/transport/transport.go b/internal/transport/transport.go index a2a9dc23c..99972fe25 100644 --- a/internal/transport/transport.go +++ b/internal/transport/transport.go @@ -65,7 +65,7 @@ type Transport interface { GET(*http.Request) (*http.Response, error) // Dereference fetches the ActivityStreams object located at this IRI with a GET request. - Dereference(ctx context.Context, iri *url.URL) ([]byte, error) + Dereference(ctx context.Context, iri *url.URL) (*http.Response, error) // DereferenceMedia fetches the given media attachment IRI, returning the reader and filesize. DereferenceMedia(ctx context.Context, iri *url.URL) (io.ReadCloser, int64, error) diff --git a/internal/typeutils/astointernal_test.go b/internal/typeutils/astointernal_test.go index fc8cd19a0..2ee2f9607 100644 --- a/internal/typeutils/astointernal_test.go +++ b/internal/typeutils/astointernal_test.go @@ -18,9 +18,11 @@ package typeutils_test import ( + "bytes" "context" "encoding/json" "fmt" + "io" "testing" "github.com/stretchr/testify/suite" @@ -39,11 +41,11 @@ func (suite *ASToInternalTestSuite) jsonToType(in string) vocab.Type { ctx := context.Background() b := []byte(in) - if accountable, err := ap.ResolveAccountable(ctx, b); err == nil { + if accountable, err := ap.ResolveAccountable(ctx, io.NopCloser(bytes.NewReader(b))); err == nil { return accountable } - if statusable, err := ap.ResolveStatusable(ctx, b); err == nil { + if statusable, err := ap.ResolveStatusable(ctx, io.NopCloser(bytes.NewReader(b))); err == nil { return statusable } diff --git a/vendor/github.com/superseriousbusiness/activity/pub/base_actor.go b/vendor/github.com/superseriousbusiness/activity/pub/base_actor.go index a89b05cfb..dc79c3b6e 100644 --- a/vendor/github.com/superseriousbusiness/activity/pub/base_actor.go +++ b/vendor/github.com/superseriousbusiness/activity/pub/base_actor.go @@ -4,7 +4,6 @@ "context" "encoding/json" "fmt" - "io/ioutil" "net/http" "net/url" @@ -68,12 +67,7 @@ func NewSocialActor(c CommonBehavior, clock Clock) Actor { return &baseActor{ // Use SideEffectActor without s2s. - delegate: &SideEffectActor{ - common: c, - c2s: c2s, - db: db, - clock: clock, - }, + delegate: NewSideEffectActor(c, nil, c2s, db, clock), enableSocialProtocol: true, clock: clock, } @@ -98,12 +92,7 @@ func NewFederatingActor(c CommonBehavior, return &baseActorFederating{ baseActor{ // Use SideEffectActor without c2s. - delegate: &SideEffectActor{ - common: c, - s2s: s2s, - db: db, - clock: clock, - }, + delegate: NewSideEffectActor(c, s2s, nil, db, clock), enableFederatedProtocol: true, clock: clock, }, @@ -195,14 +184,10 @@ func (b *baseActor) PostInboxScheme(c context.Context, w http.ResponseWriter, r // Begin processing the request, but have not yet applied // authorization (ex: blocks). Obtain the activity reject unknown // activities. - raw, err := ioutil.ReadAll(r.Body) + m, err := readActivityPubReq(r) if err != nil { return true, err } - var m map[string]interface{} - if err = json.Unmarshal(raw, &m); err != nil { - return true, err - } asValue, err := streams.ToType(c, m) if err != nil && !streams.IsUnmatchedErr(err) { return true, err @@ -340,14 +325,10 @@ func (b *baseActor) PostOutboxScheme(c context.Context, w http.ResponseWriter, r return true, nil } // Everything is good to begin processing the request. - raw, err := ioutil.ReadAll(r.Body) + m, err := readActivityPubReq(r) if err != nil { return true, err } - var m map[string]interface{} - if err = json.Unmarshal(raw, &m); err != nil { - return true, err - } // Note that converting to a Type will NOT successfully convert types // not known to go-fed. This prevents accidentally wrapping an Activity // type unknown to go-fed in a Create below. Instead, diff --git a/vendor/github.com/superseriousbusiness/activity/pub/federating_wrapped_callbacks.go b/vendor/github.com/superseriousbusiness/activity/pub/federating_wrapped_callbacks.go index fffee8b81..fe1bf3c8e 100644 --- a/vendor/github.com/superseriousbusiness/activity/pub/federating_wrapped_callbacks.go +++ b/vendor/github.com/superseriousbusiness/activity/pub/federating_wrapped_callbacks.go @@ -2,7 +2,6 @@ import ( "context" - "encoding/json" "fmt" "net/url" @@ -244,12 +243,12 @@ func (w FederatingWrappedCallbacks) create(c context.Context, a vocab.ActivitySt if err != nil { return err } - b, err := tport.Dereference(c, iter.GetIRI()) + resp, err := tport.Dereference(c, iter.GetIRI()) if err != nil { return err } - var m map[string]interface{} - if err = json.Unmarshal(b, &m); err != nil { + m, err := readActivityPubResp(resp) + if err != nil { return err } t, err = streams.ToType(c, m) @@ -514,12 +513,12 @@ func (w FederatingWrappedCallbacks) accept(c context.Context, a vocab.ActivitySt if err != nil { return err } - b, err := tport.Dereference(c, iter.GetIRI()) + resp, err := tport.Dereference(c, iter.GetIRI()) if err != nil { return err } - var m map[string]interface{} - if err = json.Unmarshal(b, &m); err != nil { + m, err := readActivityPubResp(resp) + if err != nil { return err } t, err = streams.ToType(c, m) diff --git a/vendor/github.com/superseriousbusiness/activity/pub/side_effect_actor.go b/vendor/github.com/superseriousbusiness/activity/pub/side_effect_actor.go index 4fe300832..4062cf507 100644 --- a/vendor/github.com/superseriousbusiness/activity/pub/side_effect_actor.go +++ b/vendor/github.com/superseriousbusiness/activity/pub/side_effect_actor.go @@ -629,14 +629,14 @@ func (a *SideEffectActor) hasInboxForwardingValues(c context.Context, inboxIRI * if err != nil { return false, err } - b, err := tport.Dereference(c, iri) + resp, err := tport.Dereference(c, iri) if err != nil { // Do not fail the entire process if the data is // missing. continue } - var m map[string]interface{} - if err = json.Unmarshal(b, &m); err != nil { + m, err := readActivityPubResp(resp) + if err != nil { return false, err } t, err := streams.ToType(c, m) @@ -855,13 +855,14 @@ func (a *SideEffectActor) resolveActors(c context.Context, t Transport, r []*url // The returned actor could be nil, if it wasn't an actor (ex: a Collection or // OrderedCollection). func (a *SideEffectActor) dereferenceForResolvingInboxes(c context.Context, t Transport, actorIRI *url.URL) (actor vocab.Type, moreActorIRIs []*url.URL, err error) { - var resp []byte + var resp *http.Response resp, err = t.Dereference(c, actorIRI) if err != nil { return } var m map[string]interface{} - if err = json.Unmarshal(resp, &m); err != nil { + m, err = readActivityPubResp(resp) + if err != nil { return } actor, err = streams.ToType(c, m) diff --git a/vendor/github.com/superseriousbusiness/activity/pub/transport.go b/vendor/github.com/superseriousbusiness/activity/pub/transport.go index bdc58a97a..a770b8b46 100644 --- a/vendor/github.com/superseriousbusiness/activity/pub/transport.go +++ b/vendor/github.com/superseriousbusiness/activity/pub/transport.go @@ -5,7 +5,6 @@ "context" "crypto" "fmt" - "io/ioutil" "net/http" "net/url" "strings" @@ -40,11 +39,13 @@ func isSuccess(code int) bool { // // It may be reused multiple times, but never concurrently. type Transport interface { - // Dereference fetches the ActivityStreams object located at this IRI - // with a GET request. - Dereference(c context.Context, iri *url.URL) ([]byte, error) + // Dereference fetches the ActivityStreams object located at this IRI with + // a GET request. Note that Response will only be returned on status = OK. + Dereference(c context.Context, iri *url.URL) (*http.Response, error) + // Deliver sends an ActivityStreams object. Deliver(c context.Context, b []byte, to *url.URL) error + // BatchDeliver sends an ActivityStreams object to multiple recipients. BatchDeliver(c context.Context, b []byte, recipients []*url.URL) error } @@ -107,9 +108,8 @@ func NewHttpSigTransport( } } -// Dereference sends a GET request signed with an HTTP Signature to obtain an -// ActivityStreams value. -func (h HttpSigTransport) Dereference(c context.Context, iri *url.URL) ([]byte, error) { +// Dereference sends a GET request signed with an HTTP Signature to obtain an ActivityStreams value. +func (h HttpSigTransport) Dereference(c context.Context, iri *url.URL) (*http.Response, error) { req, err := http.NewRequest("GET", iri.String(), nil) if err != nil { return nil, err @@ -130,11 +130,11 @@ func (h HttpSigTransport) Dereference(c context.Context, iri *url.URL) ([]byte, if err != nil { return nil, err } - defer resp.Body.Close() if resp.StatusCode != http.StatusOK { + _ = resp.Body.Close() return nil, fmt.Errorf("GET request to %s failed (%d): %s", iri.String(), resp.StatusCode, resp.Status) } - return ioutil.ReadAll(resp.Body) + return resp, nil } // Deliver sends a POST request with an HTTP Signature. @@ -166,8 +166,7 @@ func (h HttpSigTransport) Deliver(c context.Context, b []byte, to *url.URL) erro return nil } -// BatchDeliver sends concurrent POST requests. Returns an error if any of the -// requests had an error. +// BatchDeliver sends concurrent POST requests. Returns an error if any of the requests had an error. func (h HttpSigTransport) BatchDeliver(c context.Context, b []byte, recipients []*url.URL) error { var wg sync.WaitGroup errCh := make(chan error, len(recipients)) diff --git a/vendor/github.com/superseriousbusiness/activity/pub/util.go b/vendor/github.com/superseriousbusiness/activity/pub/util.go index a0675b76e..e917205ee 100644 --- a/vendor/github.com/superseriousbusiness/activity/pub/util.go +++ b/vendor/github.com/superseriousbusiness/activity/pub/util.go @@ -8,6 +8,7 @@ "encoding/json" "errors" "fmt" + "io" "net/http" "net/url" "strings" @@ -71,6 +72,59 @@ func headerIsActivityPubMediaType(header string) bool { acceptHeader = "Accept" ) +// readActivityPubReq reads ActivityPub data from an incoming request, handling body close. +func readActivityPubReq(req *http.Request) (map[string]interface{}, error) { + // Ensure closed when done. + defer req.Body.Close() + + var m map[string]interface{} + + // Wrap body in a JSON decoder. + dec := json.NewDecoder(req.Body) + + // Decode JSON body as "raw" AP data map. + if err := dec.Decode(&m); err != nil { + return nil, err + } + + // Perform a final second decode to ensure no trailing + // garbage data or second JSON value (indicates malformed). + if err := dec.Decode(&struct{}{}); err != io.EOF { + return nil, errors.New("trailing data after json") + } + + return m, nil +} + +// readActivityPubResp reads ActivityPub data from a dereference response, handling media type check and body close. +func readActivityPubResp(resp *http.Response) (map[string]interface{}, error) { + // Ensure closed when done. + defer resp.Body.Close() + + // Check the incoming response media type is the expected ActivityPub content-type. + if mediaType := resp.Header.Get("Content-Type"); !headerIsActivityPubMediaType(mediaType) { + return nil, fmt.Errorf("%s %s resp was not ActivityPub media type: %s", resp.Request.Method, resp.Request.URL, mediaType) + } + + var m map[string]interface{} + + // Wrap body in a JSON decoder. + dec := json.NewDecoder(resp.Body) + + // Decode JSON body as "raw" AP data map. + if err := dec.Decode(&m); err != nil { + return nil, err + } + + // Perform a final second decode to ensure no trailing + // garbage data or second JSON value (indicates malformed). + if err := dec.Decode(&struct{}{}); err != io.EOF { + return nil, errors.New("trailing data after json") + } + + return m, nil +} + // isActivityPubPost returns true if the request is a POST request that has the // ActivityStreams content type header func isActivityPubPost(r *http.Request) bool { @@ -774,12 +828,12 @@ func mustHaveActivityActorsMatchObjectActors(c context.Context, if err != nil { return err } - b, err := tport.Dereference(c, iri) + resp, err := tport.Dereference(c, iri) if err != nil { return err } - var m map[string]interface{} - if err = json.Unmarshal(b, &m); err != nil { + m, err := readActivityPubResp(resp) + if err != nil { return err } t, err := streams.ToType(c, m) diff --git a/vendor/modules.txt b/vendor/modules.txt index 00ee7a1a5..a2f5ce0ea 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -507,7 +507,7 @@ github.com/stretchr/testify/suite # github.com/subosito/gotenv v1.6.0 ## explicit; go 1.18 github.com/subosito/gotenv -# github.com/superseriousbusiness/activity v1.6.0-gts +# github.com/superseriousbusiness/activity v1.6.0-gts.0.20240221151241-5d56c04088d4 ## explicit; go 1.18 github.com/superseriousbusiness/activity/pub github.com/superseriousbusiness/activity/streams