[chore/performance] Batch migration queries (#3798)

* separate enum migrations into their own individual transactions

* pee poo

* some performance tweaks and adding more comments

* batch

---------

Co-authored-by: kim <grufwub@gmail.com>
This commit is contained in:
tobi 2025-02-15 12:43:12 +01:00 committed by GitHub
parent ebbdeee0bb
commit 5dc8009e30
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 179 additions and 99 deletions

View file

@ -30,8 +30,6 @@
func init() {
up := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// Status visibility type indices.
var statusVisIndices = []struct {
name string
@ -61,6 +59,7 @@ func init() {
Column string
Default *new_gtsmodel.Visibility
IndexCleanupCallback func(ctx context.Context, tx bun.Tx) error
BatchByColumn string
}{
{
Table: "statuses",
@ -78,19 +77,26 @@ func init() {
}
return nil
},
BatchByColumn: "id",
},
{
Table: "sin_bin_statuses",
Column: "visibility",
BatchByColumn: "id",
},
{
Table: "account_settings",
Column: "privacy",
Default: util.Ptr(new_gtsmodel.VisibilityDefault)},
Default: util.Ptr(new_gtsmodel.VisibilityDefault),
BatchByColumn: "account_id",
},
{
Table: "account_settings",
Column: "web_visibility",
Default: util.Ptr(new_gtsmodel.VisibilityDefault)},
Default: util.Ptr(new_gtsmodel.VisibilityDefault),
BatchByColumn: "account_id",
},
}
// Get the mapping of old enum string values to new integer values.
@ -98,8 +104,12 @@ func init() {
// Convert all visibility tables.
for _, table := range visTables {
if err := convertEnums(ctx, tx, table.Table, table.Column,
visibilityMapping, table.Default, table.IndexCleanupCallback); err != nil {
// Perform each enum table conversion within its own transaction.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return convertEnums(ctx, tx, table.Table, table.Column,
visibilityMapping, table.Default, table.IndexCleanupCallback, table.BatchByColumn)
}); err != nil {
return err
}
}
@ -108,7 +118,7 @@ func init() {
log.Info(ctx, "creating new visibility indexes...")
for _, index := range statusVisIndices {
log.Infof(ctx, "creating new index %s...", index.name)
q := tx.NewCreateIndex().
q := db.NewCreateIndex().
Table("statuses").
Index(index.name).
Column(index.cols...)
@ -123,14 +133,15 @@ func init() {
// Get the mapping of old enum string values to the new integer value types.
notificationMapping := notificationEnumMapping[old_gtsmodel.NotificationType]()
// Migrate over old notifications table column over to new column type.
if err := convertEnums(ctx, tx, "notifications", "notification_type", //nolint:revive
notificationMapping, nil, nil); err != nil {
// Migrate over old notifications table column to new type in tx.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return convertEnums(ctx, tx, "notifications", "notification_type", //nolint:revive
notificationMapping, nil, nil, "id")
}); err != nil {
return err
}
return nil
})
}
down := func(ctx context.Context, db *bun.DB) error {

View file

@ -22,11 +22,13 @@
"errors"
"fmt"
"reflect"
"slices"
"strconv"
"strings"
"codeberg.org/gruf/go-byteutil"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/uptrace/bun"
"github.com/uptrace/bun/dialect"
@ -46,6 +48,7 @@ func convertEnums[OldType ~string, NewType ~int16](
mapping map[OldType]NewType,
defaultValue *NewType,
indexCleanupCallback func(context.Context, bun.Tx) error,
batchByColumn string,
) error {
if len(mapping) == 0 {
return errors.New("empty mapping")
@ -87,7 +90,7 @@ func convertEnums[OldType ~string, NewType ~int16](
var qbuf byteutil.Buffer
// Prepare a singular UPDATE statement using
// SET $newColumn = (CASE $column WHEN $old THEN $new ... END)
// SET $newColumn = (CASE $column WHEN $old THEN $new ... END).
qbuf.B = append(qbuf.B, "UPDATE ? SET ? = (CASE ? "...)
args = append(args, bun.Ident(table))
args = append(args, bun.Ident(newColumn))
@ -99,16 +102,82 @@ func convertEnums[OldType ~string, NewType ~int16](
qbuf.B = append(qbuf.B, "ELSE ? END)"...)
args = append(args, *defaultValue)
// Serialize it here to be
// used as the base for each
// set of batch queries below.
baseQStr := string(qbuf.B)
baseArgs := args
// Query batch size
// in number of rows.
const batchsz = 5000
// Stores highest batch value
// used in iterate queries,
// starting at highest possible.
highest := id.Highest
// Total updated rows.
var updated int
for {
// Limit to batchsz
// items at once.
batchQ := tx.
NewSelect().
Table(table).
Column(batchByColumn).
Where("? < ?", bun.Ident(batchByColumn), highest).
OrderExpr("? DESC", bun.Ident(batchByColumn)).
Limit(batchsz)
// Finalize UPDATE to operate on this batch only.
qStr := baseQStr + " WHERE ? IN (?)"
args := append(
slices.Clone(baseArgs),
bun.Ident(batchByColumn),
batchQ,
)
// Execute the prepared raw query with arguments.
res, err := tx.NewRaw(qbuf.String(), args...).Exec(ctx)
res, err := tx.NewRaw(qStr, args...).Exec(ctx)
if err != nil {
return gtserror.Newf("error updating old column values: %w", err)
}
// Count number items updated.
updated, _ := res.RowsAffected()
// Check how many items we updated.
thisUpdated, err := res.RowsAffected()
if err != nil {
return gtserror.Newf("error counting affected rows: %w", err)
}
if thisUpdated == 0 {
// Nothing updated
// means we're done.
break
}
// Update the overall count.
updated += int(thisUpdated)
// Log helpful message to admin.
log.Infof(ctx, "migrated %d of %d %s (up to %s)",
updated, total, table, highest)
// Get next highest
// id for next batch.
if err := tx.
NewSelect().
With("batch_query", batchQ).
ColumnExpr("min(?) FROM ?", bun.Ident(batchByColumn), bun.Ident("batch_query")).
Scan(ctx, &highest); err != nil {
return gtserror.Newf("error selecting next highest: %w", err)
}
}
if total != int(updated) {
log.Warnf(ctx, "total=%d does not match updated=%d", total, updated)
// Return error here in order to rollback the whole transaction.
return fmt.Errorf("total=%d does not match updated=%d", total, updated)
}
// Run index cleanup callback if set.