From 0c72282559fffa06e533de9eed375c9130c0e7a3 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 10 Mar 2025 13:52:19 +0100 Subject: [PATCH] [performance] Optimize local timeline + local status count queries (#3892) * [performance] Optimize local timeline + local status count queries * remove if not exists from create view --- internal/db/bundb/instance.go | 60 +++++----- ...310094108_statuses_count_query_optimize.go | 87 ++++++++++++++ internal/db/bundb/timeline.go | 108 +++++++++++++++++- internal/db/bundb/timeline_test.go | 28 +++++ 4 files changed, 251 insertions(+), 32 deletions(-) create mode 100644 internal/db/bundb/migrations/20250310094108_statuses_count_query_optimize.go diff --git a/internal/db/bundb/instance.go b/internal/db/bundb/instance.go index 7852ae52e..154c1d34e 100644 --- a/internal/db/bundb/instance.go +++ b/internal/db/bundb/instance.go @@ -77,45 +77,53 @@ func (i *instanceDB) CountInstanceUsers(ctx context.Context, domain string) (int } func (i *instanceDB) CountInstanceStatuses(ctx context.Context, domain string) (int, error) { - localhost := (domain == config.GetHost() || domain == config.GetAccountDomain()) + local := (domain == config.GetHost() || domain == config.GetAccountDomain()) - if localhost { - // Check for a cached instance statuses count, if so return this. - if n := i.state.Caches.DB.LocalInstance.Statuses.Load(); n != nil { - return *n, nil - } + if local { + return i.countLocalStatuses(ctx) } q := i.db. NewSelect(). - TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")) - - if localhost { - // if the domain is *this* domain, just count where local is true - q = q.Where("? = ?", bun.Ident("status.local"), true) - } else { - // join on the domain of the account - q = q. - Join("JOIN ? AS ? ON ? = ?", bun.Ident("accounts"), bun.Ident("account"), bun.Ident("account.id"), bun.Ident("status.account_id")). - Where("? = ?", bun.Ident("account.domain"), domain) - } - - // Ignore statuses that are currently pending approval. - q = q.Where("NOT ? = ?", bun.Ident("status.pending_approval"), true) - - // Ignore statuses that are direct messages. - q = q.Where("NOT ? = ?", bun.Ident("status.visibility"), gtsmodel.VisibilityDirect) + TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")). + // Join on the domain of the account. + Join( + "JOIN ? AS ? ON ? = ?", + bun.Ident("accounts"), bun.Ident("account"), + bun.Ident("account.id"), bun.Ident("status.account_id"), + ). + Where("? = ?", bun.Ident("account.domain"), domain). + // Ignore pending approval. + Where("? = ?", bun.Ident("status.pending_approval"), false). + // Ignore direct messages. + Where("NOT ? = ?", bun.Ident("status.visibility"), gtsmodel.VisibilityDirect) count, err := q.Count(ctx) if err != nil { return 0, err } - if localhost { - // Update cached instance statuses account value. - i.state.Caches.DB.LocalInstance.Statuses.Store(&count) + return count, nil +} + +func (i *instanceDB) countLocalStatuses(ctx context.Context) (int, error) { + // Check for a cached instance statuses count, if so return this. + if n := i.state.Caches.DB.LocalInstance.Statuses.Load(); n != nil { + return *n, nil } + // Select from local count view. + var count int + if err := i.db. + NewSelect(). + Table("statuses_local_count_view"). + Scan(ctx, &count); err != nil { + return 0, err + } + + // Update cached instance statuses account value. + i.state.Caches.DB.LocalInstance.Statuses.Store(&count) + return count, nil } diff --git a/internal/db/bundb/migrations/20250310094108_statuses_count_query_optimize.go b/internal/db/bundb/migrations/20250310094108_statuses_count_query_optimize.go new file mode 100644 index 000000000..b20340f32 --- /dev/null +++ b/internal/db/bundb/migrations/20250310094108_statuses_count_query_optimize.go @@ -0,0 +1,87 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package migrations + +import ( + "context" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/uptrace/bun" +) + +func init() { + up := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + + // Add new local statuses count view. + in := []int16{ + int16(gtsmodel.VisibilityPublic), + int16(gtsmodel.VisibilityUnlocked), + int16(gtsmodel.VisibilityFollowersOnly), + int16(gtsmodel.VisibilityMutualsOnly), + } + if _, err := tx. + NewRaw( + "CREATE VIEW ? AS "+ + "SELECT COUNT(*) FROM ? "+ + "WHERE (? = ?) AND (? IN (?)) AND (? = ?)", + bun.Ident("statuses_local_count_view"), + bun.Ident("statuses"), + bun.Ident("local"), true, + bun.Ident("visibility"), bun.In(in), + bun.Ident("pending_approval"), false, + ). + Exec(ctx); err != nil { + return err + } + + // Drop existing local index. + if _, err := tx. + NewDropIndex(). + Index("statuses_local_idx"). + IfExists(). + Exec(ctx); err != nil { + return err + } + + // Add new multicolumn local statuses + // index that works for the local count + // view and the local timeline query. + if _, err := tx. + NewCreateIndex(). + Table("statuses"). + Index("statuses_local_idx"). + Column("local", "visibility", "pending_approval"). + ColumnExpr("? DESC", bun.Ident("id")). + IfNotExists(). + Exec(ctx); err != nil { + return err + } + + return nil + }) + } + + down := func(ctx context.Context, db *bun.DB) error { + return nil + } + + if err := Migrations.Register(up, down); err != nil { + panic(err) + } +} diff --git a/internal/db/bundb/timeline.go b/internal/db/bundb/timeline.go index fcea0178a..f343a28e8 100644 --- a/internal/db/bundb/timeline.go +++ b/internal/db/bundb/timeline.go @@ -193,12 +193,29 @@ func (t *timelineDB) GetHomeTimeline(ctx context.Context, accountID string, maxI return t.state.DB.GetStatusesByIDs(ctx, statusIDs) } -func (t *timelineDB) GetPublicTimeline(ctx context.Context, maxID string, sinceID string, minID string, limit int, local bool) ([]*gtsmodel.Status, error) { +func (t *timelineDB) GetPublicTimeline( + ctx context.Context, + maxID string, + sinceID string, + minID string, + limit int, + local bool, +) ([]*gtsmodel.Status, error) { // Ensure reasonable if limit < 0 { limit = 0 } + if local { + return t.getLocalTimeline( + ctx, + maxID, + sinceID, + minID, + limit, + ) + } + // Make educated guess for slice size var ( statusIDs = make([]string, 0, limit) @@ -238,11 +255,6 @@ func (t *timelineDB) GetPublicTimeline(ctx context.Context, maxID string, sinceI frontToBack = false } - if local { - // return only statuses posted by local account havers - q = q.Where("? = ?", bun.Ident("status.local"), local) - } - // Only include statuses that aren't pending approval. q = q.Where("NOT ? = ?", bun.Ident("status.pending_approval"), true) @@ -280,6 +292,90 @@ func (t *timelineDB) GetPublicTimeline(ctx context.Context, maxID string, sinceI return t.state.DB.GetStatusesByIDs(ctx, statusIDs) } +func (t *timelineDB) getLocalTimeline( + ctx context.Context, + maxID string, + sinceID string, + minID string, + limit int, +) ([]*gtsmodel.Status, error) { + // Make educated guess for slice size + var ( + statusIDs = make([]string, 0, limit) + frontToBack = true + ) + + q := t.db. + NewSelect(). + TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")). + // Local only. + Where("? = ?", bun.Ident("status.local"), true). + // Public only. + Where("? = ?", bun.Ident("status.visibility"), gtsmodel.VisibilityPublic). + // Only include statuses that aren't pending approval. + Where("? = ?", bun.Ident("status.pending_approval"), false). + // Ignore boosts. + Where("? IS NULL", bun.Ident("status.boost_of_id")). + // Select only IDs from table + Column("status.id") + + if maxID == "" || maxID >= id.Highest { + const future = 24 * time.Hour + + // don't return statuses more than 24hr in the future + maxID = id.NewULIDFromTime(time.Now().Add(future)) + } + + // return only statuses LOWER (ie., older) than maxID + q = q.Where("? < ?", bun.Ident("status.id"), maxID) + + if sinceID != "" { + // return only statuses HIGHER (ie., newer) than sinceID + q = q.Where("? > ?", bun.Ident("status.id"), sinceID) + } + + if minID != "" { + // return only statuses HIGHER (ie., newer) than minID + q = q.Where("? > ?", bun.Ident("status.id"), minID) + + // page up + frontToBack = false + } + + if limit > 0 { + // limit amount of statuses returned + q = q.Limit(limit) + } + + if frontToBack { + // Page down. + q = q.Order("status.id DESC") + } else { + // Page up. + q = q.Order("status.id ASC") + } + + if err := q.Scan(ctx, &statusIDs); err != nil { + return nil, err + } + + if len(statusIDs) == 0 { + return nil, nil + } + + // If we're paging up, we still want statuses + // to be sorted by ID desc, so reverse ids slice. + // https://zchee.github.io/golang-wiki/SliceTricks/#reversing + if !frontToBack { + for l, r := 0, len(statusIDs)-1; l < r; l, r = l+1, r-1 { + statusIDs[l], statusIDs[r] = statusIDs[r], statusIDs[l] + } + } + + // Return status IDs loaded from cache + db. + return t.state.DB.GetStatusesByIDs(ctx, statusIDs) +} + // TODO optimize this query and the logic here, because it's slow as balls -- it takes like a literal second to return with a limit of 20! // It might be worth serving it through a timeline instead of raw DB queries, like we do for Home feeds. func (t *timelineDB) GetFavedTimeline(ctx context.Context, accountID string, maxID string, minID string, limit int) ([]*gtsmodel.Status, string, string, error) { diff --git a/internal/db/bundb/timeline_test.go b/internal/db/bundb/timeline_test.go index 756ece2d4..4988ab362 100644 --- a/internal/db/bundb/timeline_test.go +++ b/internal/db/bundb/timeline_test.go @@ -79,6 +79,19 @@ func (suite *TimelineTestSuite) publicCount() int { return publicCount } +func (suite *TimelineTestSuite) localCount() int { + var localCount int + for _, status := range suite.testStatuses { + if status.Visibility == gtsmodel.VisibilityPublic && + status.BoostOfID == "" && + !util.PtrOrZero(status.PendingApproval) && + util.PtrOrValue(status.Local, true) { + localCount++ + } + } + return localCount +} + func (suite *TimelineTestSuite) checkStatuses(statuses []*gtsmodel.Status, maxID string, minID string, expectedLength int) { if l := len(statuses); l != expectedLength { suite.FailNowf("", "expected %d statuses in slice, got %d", expectedLength, l) @@ -123,6 +136,21 @@ func (suite *TimelineTestSuite) TestGetPublicTimeline() { suite.checkStatuses(s, id.Highest, id.Lowest, suite.publicCount()) } +func (suite *TimelineTestSuite) TestGetPublicTimelineLocal() { + ctx := context.Background() + + s, err := suite.db.GetPublicTimeline(ctx, "", "", "", 20, true) + if err != nil { + suite.FailNow(err.Error()) + } + + suite.T().Log(kv.Field{ + K: "statuses", V: s, + }) + + suite.checkStatuses(s, id.Highest, id.Lowest, suite.localCount()) +} + func (suite *TimelineTestSuite) TestGetPublicTimelineWithFutureStatus() { ctx := context.Background()