From b092da6d281a437c3f847e655c4ad182decb0978 Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Tue, 21 May 2024 13:20:19 +0000 Subject: [PATCH] [performance] cache v2 filter keyword regular expressions (#2903) * add caching of filterkeyword regular expressions * formatting * fix WholeWord nil check --- internal/cache/db.go | 5 ++ internal/db/bundb/filterkeyword.go | 67 ++++++++++++++----- internal/gtsmodel/filter.go | 35 +++++++--- internal/typeutils/internaltofrontend.go | 12 +--- internal/typeutils/internaltofrontend_test.go | 2 + 5 files changed, 85 insertions(+), 36 deletions(-) diff --git a/internal/cache/db.go b/internal/cache/db.go index 4ce541770..16bd10eaa 100644 --- a/internal/cache/db.go +++ b/internal/cache/db.go @@ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() { // See internal/db/bundb/filter.go. filterKeyword2.Filter = nil + // We specifically DO NOT unset + // the regexp field here, as any + // regexp.Regexp instance is safe + // for concurrent access. + return filterKeyword2 } diff --git a/internal/db/bundb/filterkeyword.go b/internal/db/bundb/filterkeyword.go index 5fd824a0b..87a8e2a2a 100644 --- a/internal/db/bundb/filterkeyword.go +++ b/internal/db/bundb/filterkeyword.go @@ -25,6 +25,7 @@ "github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/util" "github.com/uptrace/bun" ) @@ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod "ID", func() (*gtsmodel.FilterKeyword, error) { var filterKeyword gtsmodel.FilterKeyword - err := f.db. + + // Scan from DB. + if err := f.db. NewSelect(). Model(&filterKeyword). Where("? = ?", bun.Ident("id"), id). - Scan(ctx) - return &filterKeyword, err + Scan(ctx); err != nil { + return nil, err + } + + // Pre-compile filter keyword regular expression. + if err := filterKeyword.Compile(); err != nil { + return nil, gtserror.Newf("error compiling filter keyword regex: %w", err) + } + + return &filterKeyword, nil }, id, ) @@ -57,20 +68,20 @@ func() (*gtsmodel.FilterKeyword, error) { return filterKeyword, nil } -func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { +func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) { if filterKeyword.Filter == nil { // Filter is not set, fetch from the cache or database. - filter, err := f.state.DB.GetFilterByID( - // Don't populate the filter with all of its keywords and statuses or we'll just end up back here. + filterKeyword.Filter, err = f.state.DB.GetFilterByID( + + // Don't populate the filter with all of its keywords + // and statuses or we'll just end up back here. gtscontext.SetBarebones(ctx), filterKeyword.FilterID, ) if err != nil { return err } - filterKeyword.Filter = filter } - return nil } @@ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) { var filterKeywordIDs []string + if err := f.db. NewSelect(). Model((*gtsmodel.FilterKeyword)(nil)). @@ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st Scan(ctx, &filterKeywordIDs); err != nil { return nil, err } + if len(filterKeywordIDs) == 0 { return nil, nil } @@ -101,6 +114,8 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st filterKeywordIDs, func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) { uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs)) + + // Scan from DB. if err := f.db. NewSelect(). Model(&uncachedFilterKeywords). @@ -108,6 +123,16 @@ func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) { Scan(ctx); err != nil { return nil, err } + + // Compile all the keyword regular expressions. + uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { + if err := filterKeyword.Compile(); err != nil { + log.Errorf(ctx, "error compiling filter keyword regex: %v", err) + return true + } + return false + }) + return uncachedFilterKeywords, nil }, ) @@ -125,23 +150,26 @@ func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) { } // Populate the filter keywords. Remove any that we can't populate from the return slice. - errs := gtserror.NewMultiError(len(filterKeywords)) filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil { - errs.Appendf( - "error populating filter keyword %s: %w", - filterKeyword.ID, - err, - ) + log.Errorf(ctx, "error populating filter keyword: %v", err) return true } return false }) - return filterKeywords, errs.Combine() + return filterKeywords, nil } func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { + if filterKeyword.Regexp == nil { + // Ensure regexp is compiled + // before attempted caching. + err := filterKeyword.Compile() + if err != nil { + return gtserror.Newf("error compiling filter keyword regex: %w", err) + } + } return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { _, err := f.db. NewInsert(). @@ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo if len(columns) > 0 { columns = append(columns, "updated_at") } - + if filterKeyword.Regexp == nil { + // Ensure regexp is compiled + // before attempted caching. + err := filterKeyword.Compile() + if err != nil { + return gtserror.Newf("error compiling filter keyword regex: %w", err) + } + } return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { _, err := f.db. NewUpdate(). diff --git a/internal/gtsmodel/filter.go b/internal/gtsmodel/filter.go index db0a15dfd..c3feec00f 100644 --- a/internal/gtsmodel/filter.go +++ b/internal/gtsmodel/filter.go @@ -17,7 +17,10 @@ package gtsmodel -import "time" +import ( + "regexp" + "time" +) // Filter stores a filter created by a local account. type Filter struct { @@ -39,14 +42,28 @@ type Filter struct { // FilterKeyword stores a single keyword to filter statuses against. type FilterKeyword struct { - ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database - CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created - UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated - AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword. - FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. - Filter *Filter `bun:"-"` // Filter corresponding to FilterID - Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against. - WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries? + ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database + CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created + UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated + AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword. + FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. + Filter *Filter `bun:"-"` // Filter corresponding to FilterID + Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against. + WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries? + Regexp *regexp.Regexp `bun:"-"` // pre-prepared regular expression +} + +// Compile will compile this FilterKeyword as a prepared regular expression. +func (k *FilterKeyword) Compile() (err error) { + var wordBreak string + if k.WholeWord != nil && *k.WholeWord { + wordBreak = `\b` + } + + // Compile keyword filter regexp. + quoted := regexp.QuoteMeta(k.Keyword) + k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak) + return // caller is expected to wrap this error } // FilterStatus stores a single status to filter. diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go index 7a5572267..d7f1fac6c 100644 --- a/internal/typeutils/internaltofrontend.go +++ b/internal/typeutils/internaltofrontend.go @@ -22,7 +22,6 @@ "errors" "fmt" "math" - "regexp" "strconv" "strings" "time" @@ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults( keywordMatches := make([]string, 0, len(filter.Keywords)) fields := filterableTextFields(s) for _, filterKeyword := range filter.Keywords { - wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false) - wordBreak := `` - if wholeWord { - wordBreak = `\b` - } - re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak) - if err != nil { - return nil, err - } var isMatch bool for _, field := range fields { - if re.MatchString(field) { + if filterKeyword.Regexp.MatchString(field) { isMatch = true break } diff --git a/internal/typeutils/internaltofrontend_test.go b/internal/typeutils/internaltofrontend_test.go index 2c4f28a9b..676870c7b 100644 --- a/internal/typeutils/internaltofrontend_test.go +++ b/internal/typeutils/internaltofrontend_test.go @@ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() { requestingAccount := suite.testAccounts["local_account_1"] expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] + suite.NoError(expectedMatchingFilterKeyword.Compile()) expectedMatchingFilterKeyword.Filter = expectedMatchingFilter expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} @@ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() { expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] expectedMatchingFilter.Action = gtsmodel.FilterActionHide expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] + suite.NoError(expectedMatchingFilterKeyword.Compile()) expectedMatchingFilterKeyword.Filter = expectedMatchingFilter expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}