[performance] Optimize local timeline + local status count queries (#3892)

* [performance] Optimize local timeline + local status count queries

* remove if not exists from create view
This commit is contained in:
tobi 2025-03-10 13:52:19 +01:00 committed by GitHub
parent 3db5bfa0f2
commit 0c72282559
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 251 additions and 32 deletions

View file

@ -77,45 +77,53 @@ func (i *instanceDB) CountInstanceUsers(ctx context.Context, domain string) (int
}
func (i *instanceDB) CountInstanceStatuses(ctx context.Context, domain string) (int, error) {
localhost := (domain == config.GetHost() || domain == config.GetAccountDomain())
local := (domain == config.GetHost() || domain == config.GetAccountDomain())
if localhost {
// Check for a cached instance statuses count, if so return this.
if n := i.state.Caches.DB.LocalInstance.Statuses.Load(); n != nil {
return *n, nil
}
if local {
return i.countLocalStatuses(ctx)
}
q := i.db.
NewSelect().
TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status"))
if localhost {
// if the domain is *this* domain, just count where local is true
q = q.Where("? = ?", bun.Ident("status.local"), true)
} else {
// join on the domain of the account
q = q.
Join("JOIN ? AS ? ON ? = ?", bun.Ident("accounts"), bun.Ident("account"), bun.Ident("account.id"), bun.Ident("status.account_id")).
Where("? = ?", bun.Ident("account.domain"), domain)
}
// Ignore statuses that are currently pending approval.
q = q.Where("NOT ? = ?", bun.Ident("status.pending_approval"), true)
// Ignore statuses that are direct messages.
q = q.Where("NOT ? = ?", bun.Ident("status.visibility"), gtsmodel.VisibilityDirect)
TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")).
// Join on the domain of the account.
Join(
"JOIN ? AS ? ON ? = ?",
bun.Ident("accounts"), bun.Ident("account"),
bun.Ident("account.id"), bun.Ident("status.account_id"),
).
Where("? = ?", bun.Ident("account.domain"), domain).
// Ignore pending approval.
Where("? = ?", bun.Ident("status.pending_approval"), false).
// Ignore direct messages.
Where("NOT ? = ?", bun.Ident("status.visibility"), gtsmodel.VisibilityDirect)
count, err := q.Count(ctx)
if err != nil {
return 0, err
}
if localhost {
// Update cached instance statuses account value.
i.state.Caches.DB.LocalInstance.Statuses.Store(&count)
return count, nil
}
func (i *instanceDB) countLocalStatuses(ctx context.Context) (int, error) {
// Check for a cached instance statuses count, if so return this.
if n := i.state.Caches.DB.LocalInstance.Statuses.Load(); n != nil {
return *n, nil
}
// Select from local count view.
var count int
if err := i.db.
NewSelect().
Table("statuses_local_count_view").
Scan(ctx, &count); err != nil {
return 0, err
}
// Update cached instance statuses account value.
i.state.Caches.DB.LocalInstance.Statuses.Store(&count)
return count, nil
}

View file

@ -0,0 +1,87 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package migrations
import (
"context"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/uptrace/bun"
)
func init() {
up := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// Add new local statuses count view.
in := []int16{
int16(gtsmodel.VisibilityPublic),
int16(gtsmodel.VisibilityUnlocked),
int16(gtsmodel.VisibilityFollowersOnly),
int16(gtsmodel.VisibilityMutualsOnly),
}
if _, err := tx.
NewRaw(
"CREATE VIEW ? AS "+
"SELECT COUNT(*) FROM ? "+
"WHERE (? = ?) AND (? IN (?)) AND (? = ?)",
bun.Ident("statuses_local_count_view"),
bun.Ident("statuses"),
bun.Ident("local"), true,
bun.Ident("visibility"), bun.In(in),
bun.Ident("pending_approval"), false,
).
Exec(ctx); err != nil {
return err
}
// Drop existing local index.
if _, err := tx.
NewDropIndex().
Index("statuses_local_idx").
IfExists().
Exec(ctx); err != nil {
return err
}
// Add new multicolumn local statuses
// index that works for the local count
// view and the local timeline query.
if _, err := tx.
NewCreateIndex().
Table("statuses").
Index("statuses_local_idx").
Column("local", "visibility", "pending_approval").
ColumnExpr("? DESC", bun.Ident("id")).
IfNotExists().
Exec(ctx); err != nil {
return err
}
return nil
})
}
down := func(ctx context.Context, db *bun.DB) error {
return nil
}
if err := Migrations.Register(up, down); err != nil {
panic(err)
}
}

View file

@ -193,12 +193,29 @@ func (t *timelineDB) GetHomeTimeline(ctx context.Context, accountID string, maxI
return t.state.DB.GetStatusesByIDs(ctx, statusIDs)
}
func (t *timelineDB) GetPublicTimeline(ctx context.Context, maxID string, sinceID string, minID string, limit int, local bool) ([]*gtsmodel.Status, error) {
func (t *timelineDB) GetPublicTimeline(
ctx context.Context,
maxID string,
sinceID string,
minID string,
limit int,
local bool,
) ([]*gtsmodel.Status, error) {
// Ensure reasonable
if limit < 0 {
limit = 0
}
if local {
return t.getLocalTimeline(
ctx,
maxID,
sinceID,
minID,
limit,
)
}
// Make educated guess for slice size
var (
statusIDs = make([]string, 0, limit)
@ -238,11 +255,6 @@ func (t *timelineDB) GetPublicTimeline(ctx context.Context, maxID string, sinceI
frontToBack = false
}
if local {
// return only statuses posted by local account havers
q = q.Where("? = ?", bun.Ident("status.local"), local)
}
// Only include statuses that aren't pending approval.
q = q.Where("NOT ? = ?", bun.Ident("status.pending_approval"), true)
@ -280,6 +292,90 @@ func (t *timelineDB) GetPublicTimeline(ctx context.Context, maxID string, sinceI
return t.state.DB.GetStatusesByIDs(ctx, statusIDs)
}
func (t *timelineDB) getLocalTimeline(
ctx context.Context,
maxID string,
sinceID string,
minID string,
limit int,
) ([]*gtsmodel.Status, error) {
// Make educated guess for slice size
var (
statusIDs = make([]string, 0, limit)
frontToBack = true
)
q := t.db.
NewSelect().
TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")).
// Local only.
Where("? = ?", bun.Ident("status.local"), true).
// Public only.
Where("? = ?", bun.Ident("status.visibility"), gtsmodel.VisibilityPublic).
// Only include statuses that aren't pending approval.
Where("? = ?", bun.Ident("status.pending_approval"), false).
// Ignore boosts.
Where("? IS NULL", bun.Ident("status.boost_of_id")).
// Select only IDs from table
Column("status.id")
if maxID == "" || maxID >= id.Highest {
const future = 24 * time.Hour
// don't return statuses more than 24hr in the future
maxID = id.NewULIDFromTime(time.Now().Add(future))
}
// return only statuses LOWER (ie., older) than maxID
q = q.Where("? < ?", bun.Ident("status.id"), maxID)
if sinceID != "" {
// return only statuses HIGHER (ie., newer) than sinceID
q = q.Where("? > ?", bun.Ident("status.id"), sinceID)
}
if minID != "" {
// return only statuses HIGHER (ie., newer) than minID
q = q.Where("? > ?", bun.Ident("status.id"), minID)
// page up
frontToBack = false
}
if limit > 0 {
// limit amount of statuses returned
q = q.Limit(limit)
}
if frontToBack {
// Page down.
q = q.Order("status.id DESC")
} else {
// Page up.
q = q.Order("status.id ASC")
}
if err := q.Scan(ctx, &statusIDs); err != nil {
return nil, err
}
if len(statusIDs) == 0 {
return nil, nil
}
// If we're paging up, we still want statuses
// to be sorted by ID desc, so reverse ids slice.
// https://zchee.github.io/golang-wiki/SliceTricks/#reversing
if !frontToBack {
for l, r := 0, len(statusIDs)-1; l < r; l, r = l+1, r-1 {
statusIDs[l], statusIDs[r] = statusIDs[r], statusIDs[l]
}
}
// Return status IDs loaded from cache + db.
return t.state.DB.GetStatusesByIDs(ctx, statusIDs)
}
// TODO optimize this query and the logic here, because it's slow as balls -- it takes like a literal second to return with a limit of 20!
// It might be worth serving it through a timeline instead of raw DB queries, like we do for Home feeds.
func (t *timelineDB) GetFavedTimeline(ctx context.Context, accountID string, maxID string, minID string, limit int) ([]*gtsmodel.Status, string, string, error) {

View file

@ -79,6 +79,19 @@ func (suite *TimelineTestSuite) publicCount() int {
return publicCount
}
func (suite *TimelineTestSuite) localCount() int {
var localCount int
for _, status := range suite.testStatuses {
if status.Visibility == gtsmodel.VisibilityPublic &&
status.BoostOfID == "" &&
!util.PtrOrZero(status.PendingApproval) &&
util.PtrOrValue(status.Local, true) {
localCount++
}
}
return localCount
}
func (suite *TimelineTestSuite) checkStatuses(statuses []*gtsmodel.Status, maxID string, minID string, expectedLength int) {
if l := len(statuses); l != expectedLength {
suite.FailNowf("", "expected %d statuses in slice, got %d", expectedLength, l)
@ -123,6 +136,21 @@ func (suite *TimelineTestSuite) TestGetPublicTimeline() {
suite.checkStatuses(s, id.Highest, id.Lowest, suite.publicCount())
}
func (suite *TimelineTestSuite) TestGetPublicTimelineLocal() {
ctx := context.Background()
s, err := suite.db.GetPublicTimeline(ctx, "", "", "", 20, true)
if err != nil {
suite.FailNow(err.Error())
}
suite.T().Log(kv.Field{
K: "statuses", V: s,
})
suite.checkStatuses(s, id.Highest, id.Lowest, suite.localCount())
}
func (suite *TimelineTestSuite) TestGetPublicTimelineWithFutureStatus() {
ctx := context.Background()