From 13e9abd02a1f4003c7be922a22e8f1d095a55d61 Mon Sep 17 00:00:00 2001
From: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Fri, 25 Nov 2022 18:23:42 +0100
Subject: [PATCH] [feature] Add `admin media prune orphaned` CLI command
(#1146)
* add FilePath regex
* add `admin media prune orphaned` command
* add prune orphaned function to media manager
* don't mark flag as required
* document admin media prune orphaned cmd
* oh envparsing.sh you coy minx
---
.../action/admin/media/prune/orphaned.go | 72 ++++++++++
cmd/gotosocial/admin.go | 36 +++++
docs/admin/cli.md | 35 ++++-
internal/config/config.go | 13 +-
internal/config/flags.go | 7 +
internal/config/helpers.gen.go | 99 +++++++++-----
internal/media/manager.go | 6 +
internal/media/pruneorphaned.go | 127 ++++++++++++++++++
internal/media/pruneorphaned_test.go | 82 +++++++++++
internal/regexes/regexes.go | 7 +
test/envparsing.sh | 2 +-
11 files changed, 441 insertions(+), 45 deletions(-)
create mode 100644 cmd/gotosocial/action/admin/media/prune/orphaned.go
create mode 100644 internal/media/pruneorphaned.go
create mode 100644 internal/media/pruneorphaned_test.go
diff --git a/cmd/gotosocial/action/admin/media/prune/orphaned.go b/cmd/gotosocial/action/admin/media/prune/orphaned.go
new file mode 100644
index 000000000..4ceb356bd
--- /dev/null
+++ b/cmd/gotosocial/action/admin/media/prune/orphaned.go
@@ -0,0 +1,72 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package prune
+
+import (
+ "context"
+ "fmt"
+
+ "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action"
+ "github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/db/bundb"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/superseriousbusiness/gotosocial/internal/media"
+ gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
+)
+
+// Orphaned prunes orphaned media from storage.
+var Orphaned action.GTSAction = func(ctx context.Context) error {
+ dbService, err := bundb.NewBunDBService(ctx)
+ if err != nil {
+ return fmt.Errorf("error creating dbservice: %s", err)
+ }
+
+ storage, err := gtsstorage.AutoConfig()
+ if err != nil {
+ return fmt.Errorf("error creating storage backend: %w", err)
+ }
+
+ manager, err := media.NewManager(dbService, storage)
+ if err != nil {
+ return fmt.Errorf("error instantiating mediamanager: %s", err)
+ }
+
+ dry := config.GetAdminMediaPruneDryRun()
+
+ pruned, err := manager.PruneOrphaned(ctx, dry)
+ if err != nil {
+ return fmt.Errorf("error pruning: %s", err)
+ }
+
+ if dry {
+ log.Infof("DRY RUN: %d stored items are orphaned and eligible to be pruned", pruned)
+ } else {
+ log.Infof("%d stored items were orphaned and pruned", pruned)
+ }
+
+ if err := storage.Close(); err != nil {
+ return fmt.Errorf("error closing storage backend: %w", err)
+ }
+
+ if err := dbService.Stop(ctx); err != nil {
+ return fmt.Errorf("error closing dbservice: %s", err)
+ }
+
+ return nil
+}
diff --git a/cmd/gotosocial/admin.go b/cmd/gotosocial/admin.go
index 4bf71d612..0575452fb 100644
--- a/cmd/gotosocial/admin.go
+++ b/cmd/gotosocial/admin.go
@@ -21,6 +21,7 @@
import (
"github.com/spf13/cobra"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/account"
+ "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/media/prune"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/trans"
"github.com/superseriousbusiness/gotosocial/internal/config"
)
@@ -152,5 +153,40 @@ func adminCommands() *cobra.Command {
config.AddAdminTrans(adminImportCmd)
adminCmd.AddCommand(adminImportCmd)
+ /*
+ ADMIN MEDIA COMMANDS
+ */
+
+ adminMediaCmd := &cobra.Command{
+ Use: "media",
+ Short: "admin commands related stored media attachments/emojis",
+ }
+
+ /*
+ ADMIN MEDIA PRUNE COMMANDS
+ */
+ adminMediaPruneCmd := &cobra.Command{
+ Use: "prune",
+ Short: "admin commands for pruning unused/orphaned media from storage",
+ }
+ config.AddAdminMediaPrune(adminMediaPruneCmd)
+
+ adminMediaPruneOrphanedCmd := &cobra.Command{
+ Use: "orphaned",
+ Short: "prune orphaned media from storage",
+ PreRunE: func(cmd *cobra.Command, args []string) error {
+ return preRun(preRunArgs{cmd: cmd})
+ },
+ RunE: func(cmd *cobra.Command, args []string) error {
+ return run(cmd.Context(), prune.Orphaned)
+ },
+ }
+ config.AddAdminMediaPrune(adminMediaPruneOrphanedCmd)
+ adminMediaPruneCmd.AddCommand(adminMediaPruneOrphanedCmd)
+
+ adminMediaCmd.AddCommand(adminMediaPruneCmd)
+
+ adminCmd.AddCommand(adminMediaCmd)
+
return adminCmd
}
diff --git a/docs/admin/cli.md b/docs/admin/cli.md
index e56a0de39..3de976eb7 100644
--- a/docs/admin/cli.md
+++ b/docs/admin/cli.md
@@ -35,7 +35,7 @@ You can set these options using environment variables, passing them as CLI flags
## gotosocial admin
-Contains `account` subcommands.
+Contains `account`, `export`, `import`, and `media` subcommands.
### gotosocial admin account create
@@ -254,3 +254,36 @@ Example:
```bash
gotosocial admin import --path example.json --config-path config.yaml
```
+
+### gotosocial admin media prune orphaned
+
+This command can be used to prune orphaned media from your GoToSocial.
+
+Orphaned media is defined as media that is in storage under a key that matches the format used by GoToSocial, but which does not have a corresponding database entry. This is useful for excising files that may be remaining from a previous installation, or files that were placed in storage mistakenly.
+
+**This command only works when GoToSocial is not running, since it acquires an exclusive lock on storage. Stop GoToSocial first before running this command!**
+
+```text
+prune orphaned media from storage
+
+Usage:
+ gotosocial admin media prune orphaned [flags]
+
+Flags:
+ --dry-run perform a dry run and only log number of items eligible for pruning (default true)
+ -h, --help help for orphaned
+```
+
+By default, this command performs a dry run, which will log how many items can be pruned. To do it for real, add `--dry-run=false` to the command.
+
+Example (dry run):
+
+```bash
+gotosocial admin media prune orphaned
+```
+
+Example (for real):
+
+```bash
+gotosocial admin media prune orphaned --dry-run=false
+```
diff --git a/internal/config/config.go b/internal/config/config.go
index ecbd079e6..cc37e9bfd 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -125,14 +125,15 @@ type Configuration struct {
SyslogProtocol string `name:"syslog-protocol" usage:"Protocol to use when directing logs to syslog. Leave empty to connect to local syslog."`
SyslogAddress string `name:"syslog-address" usage:"Address:port to send syslog logs to. Leave empty to connect to local syslog."`
- // TODO: move these elsewhere, these are more ephemeral vs long-running flags like above
- AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"`
- AdminAccountEmail string `name:"email" usage:"the email address of this account"`
- AdminAccountPassword string `name:"password" usage:"the password to set for this account"`
- AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"`
-
AdvancedCookiesSamesite string `name:"advanced-cookies-samesite" usage:"'strict' or 'lax', see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite"`
AdvancedRateLimitRequests int `name:"advanced-rate-limit-requests" usage:"Amount of HTTP requests to permit within a 5 minute window. 0 or less turns rate limiting off."`
+
+ // TODO: move these elsewhere, these are more ephemeral vs long-running flags like above
+ AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"`
+ AdminAccountEmail string `name:"email" usage:"the email address of this account"`
+ AdminAccountPassword string `name:"password" usage:"the password to set for this account"`
+ AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"`
+ AdminMediaPruneDryRun bool `name:"dry-run" usage:"perform a dry run and only log number of items eligible for pruning"`
}
// MarshalMap will marshal current Configuration into a map structure (useful for JSON).
diff --git a/internal/config/flags.go b/internal/config/flags.go
index ddc3c60de..c5df1c8b2 100644
--- a/internal/config/flags.go
+++ b/internal/config/flags.go
@@ -178,3 +178,10 @@ func AddAdminTrans(cmd *cobra.Command) {
panic(err)
}
}
+
+// AddAdminMediaPrune attaches flags pertaining to media storage prune commands.
+func AddAdminMediaPrune(cmd *cobra.Command) {
+ name := AdminMediaPruneDryRunFlag()
+ usage := fieldtag("AdminMediaPruneDryRun", "usage")
+ cmd.Flags().Bool(name, true, usage)
+}
diff --git a/internal/config/helpers.gen.go b/internal/config/helpers.gen.go
index 2786f5b5a..579814b99 100644
--- a/internal/config/helpers.gen.go
+++ b/internal/config/helpers.gen.go
@@ -1745,6 +1745,56 @@ func GetSyslogAddress() string { return global.GetSyslogAddress() }
// SetSyslogAddress safely sets the value for global configuration 'SyslogAddress' field
func SetSyslogAddress(v string) { global.SetSyslogAddress(v) }
+// GetAdvancedCookiesSamesite safely fetches the Configuration value for state's 'AdvancedCookiesSamesite' field
+func (st *ConfigState) GetAdvancedCookiesSamesite() (v string) {
+ st.mutex.Lock()
+ v = st.config.AdvancedCookiesSamesite
+ st.mutex.Unlock()
+ return
+}
+
+// SetAdvancedCookiesSamesite safely sets the Configuration value for state's 'AdvancedCookiesSamesite' field
+func (st *ConfigState) SetAdvancedCookiesSamesite(v string) {
+ st.mutex.Lock()
+ defer st.mutex.Unlock()
+ st.config.AdvancedCookiesSamesite = v
+ st.reloadToViper()
+}
+
+// AdvancedCookiesSamesiteFlag returns the flag name for the 'AdvancedCookiesSamesite' field
+func AdvancedCookiesSamesiteFlag() string { return "advanced-cookies-samesite" }
+
+// GetAdvancedCookiesSamesite safely fetches the value for global configuration 'AdvancedCookiesSamesite' field
+func GetAdvancedCookiesSamesite() string { return global.GetAdvancedCookiesSamesite() }
+
+// SetAdvancedCookiesSamesite safely sets the value for global configuration 'AdvancedCookiesSamesite' field
+func SetAdvancedCookiesSamesite(v string) { global.SetAdvancedCookiesSamesite(v) }
+
+// GetAdvancedRateLimitRequests safely fetches the Configuration value for state's 'AdvancedRateLimitRequests' field
+func (st *ConfigState) GetAdvancedRateLimitRequests() (v int) {
+ st.mutex.Lock()
+ v = st.config.AdvancedRateLimitRequests
+ st.mutex.Unlock()
+ return
+}
+
+// SetAdvancedRateLimitRequests safely sets the Configuration value for state's 'AdvancedRateLimitRequests' field
+func (st *ConfigState) SetAdvancedRateLimitRequests(v int) {
+ st.mutex.Lock()
+ defer st.mutex.Unlock()
+ st.config.AdvancedRateLimitRequests = v
+ st.reloadToViper()
+}
+
+// AdvancedRateLimitRequestsFlag returns the flag name for the 'AdvancedRateLimitRequests' field
+func AdvancedRateLimitRequestsFlag() string { return "advanced-rate-limit-requests" }
+
+// GetAdvancedRateLimitRequests safely fetches the value for global configuration 'AdvancedRateLimitRequests' field
+func GetAdvancedRateLimitRequests() int { return global.GetAdvancedRateLimitRequests() }
+
+// SetAdvancedRateLimitRequests safely sets the value for global configuration 'AdvancedRateLimitRequests' field
+func SetAdvancedRateLimitRequests(v int) { global.SetAdvancedRateLimitRequests(v) }
+
// GetAdminAccountUsername safely fetches the Configuration value for state's 'AdminAccountUsername' field
func (st *ConfigState) GetAdminAccountUsername() (v string) {
st.mutex.Lock()
@@ -1845,52 +1895,27 @@ func GetAdminTransPath() string { return global.GetAdminTransPath() }
// SetAdminTransPath safely sets the value for global configuration 'AdminTransPath' field
func SetAdminTransPath(v string) { global.SetAdminTransPath(v) }
-// GetAdvancedCookiesSamesite safely fetches the Configuration value for state's 'AdvancedCookiesSamesite' field
-func (st *ConfigState) GetAdvancedCookiesSamesite() (v string) {
+// GetAdminMediaPruneDryRun safely fetches the Configuration value for state's 'AdminMediaPruneDryRun' field
+func (st *ConfigState) GetAdminMediaPruneDryRun() (v bool) {
st.mutex.Lock()
- v = st.config.AdvancedCookiesSamesite
+ v = st.config.AdminMediaPruneDryRun
st.mutex.Unlock()
return
}
-// SetAdvancedCookiesSamesite safely sets the Configuration value for state's 'AdvancedCookiesSamesite' field
-func (st *ConfigState) SetAdvancedCookiesSamesite(v string) {
+// SetAdminMediaPruneDryRun safely sets the Configuration value for state's 'AdminMediaPruneDryRun' field
+func (st *ConfigState) SetAdminMediaPruneDryRun(v bool) {
st.mutex.Lock()
defer st.mutex.Unlock()
- st.config.AdvancedCookiesSamesite = v
+ st.config.AdminMediaPruneDryRun = v
st.reloadToViper()
}
-// AdvancedCookiesSamesiteFlag returns the flag name for the 'AdvancedCookiesSamesite' field
-func AdvancedCookiesSamesiteFlag() string { return "advanced-cookies-samesite" }
+// AdminMediaPruneDryRunFlag returns the flag name for the 'AdminMediaPruneDryRun' field
+func AdminMediaPruneDryRunFlag() string { return "dry-run" }
-// GetAdvancedCookiesSamesite safely fetches the value for global configuration 'AdvancedCookiesSamesite' field
-func GetAdvancedCookiesSamesite() string { return global.GetAdvancedCookiesSamesite() }
+// GetAdminMediaPruneDryRun safely fetches the value for global configuration 'AdminMediaPruneDryRun' field
+func GetAdminMediaPruneDryRun() bool { return global.GetAdminMediaPruneDryRun() }
-// SetAdvancedCookiesSamesite safely sets the value for global configuration 'AdvancedCookiesSamesite' field
-func SetAdvancedCookiesSamesite(v string) { global.SetAdvancedCookiesSamesite(v) }
-
-// GetAdvancedRateLimitRequests safely fetches the Configuration value for state's 'AdvancedRateLimitRequests' field
-func (st *ConfigState) GetAdvancedRateLimitRequests() (v int) {
- st.mutex.Lock()
- v = st.config.AdvancedRateLimitRequests
- st.mutex.Unlock()
- return
-}
-
-// SetAdvancedRateLimitRequests safely sets the Configuration value for state's 'AdvancedRateLimitRequests' field
-func (st *ConfigState) SetAdvancedRateLimitRequests(v int) {
- st.mutex.Lock()
- defer st.mutex.Unlock()
- st.config.AdvancedRateLimitRequests = v
- st.reloadToViper()
-}
-
-// AdvancedRateLimitRequestsFlag returns the flag name for the 'AdvancedRateLimitRequests' field
-func AdvancedRateLimitRequestsFlag() string { return "advanced-rate-limit-requests" }
-
-// GetAdvancedRateLimitRequests safely fetches the value for global configuration 'AdvancedRateLimitRequests' field
-func GetAdvancedRateLimitRequests() int { return global.GetAdvancedRateLimitRequests() }
-
-// SetAdvancedRateLimitRequests safely sets the value for global configuration 'AdvancedRateLimitRequests' field
-func SetAdvancedRateLimitRequests(v int) { global.SetAdvancedRateLimitRequests(v) }
+// SetAdminMediaPruneDryRun safely sets the value for global configuration 'AdminMediaPruneDryRun' field
+func SetAdminMediaPruneDryRun(v bool) { global.SetAdminMediaPruneDryRun(v) }
diff --git a/internal/media/manager.go b/internal/media/manager.go
index d04f161d4..67c03fb31 100644
--- a/internal/media/manager.go
+++ b/internal/media/manager.go
@@ -91,6 +91,12 @@ type Manager interface {
//
// The returned int is the amount of media that was pruned by this function.
PruneUnusedLocalAttachments(ctx context.Context) (int, error)
+ // PruneOrphaned prunes files that exist in storage but which do not have a corresponding
+ // entry in the database.
+ //
+ // If dry is true, then nothing will be changed, only the amount that *would* be removed
+ // is returned to the caller.
+ PruneOrphaned(ctx context.Context, dry bool) (int, error)
// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
diff --git a/internal/media/pruneorphaned.go b/internal/media/pruneorphaned.go
new file mode 100644
index 000000000..0d733cce5
--- /dev/null
+++ b/internal/media/pruneorphaned.go
@@ -0,0 +1,127 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package media
+
+import (
+ "context"
+ "errors"
+ "fmt"
+
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/superseriousbusiness/gotosocial/internal/regexes"
+)
+
+func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
+ var totalPruned int
+
+ // keys in storage will look like the following:
+ // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME]`
+ // we can filter out keys we're not interested in by
+ // matching through a regex
+ var matchCount int
+ match := func(storageKey string) bool {
+ if regexes.FilePath.MatchString(storageKey) {
+ matchCount++
+ return true
+ }
+ return false
+ }
+
+ log.Info("checking storage keys for orphaned pruning candidates...")
+ iterator, err := m.storage.Iterator(ctx, match)
+ if err != nil {
+ return 0, fmt.Errorf("PruneOrphaned: error getting storage iterator: %s", err)
+ }
+
+ // make sure we have some keys, and also advance
+ // the iterator to the first non-empty key
+ if !iterator.Next() {
+ return 0, nil
+ }
+
+ // for each key in the iterator, check if entry is orphaned
+ log.Info("got %d orphaned pruning candidates, checking for orphaned status, please wait...")
+ var checkedKeys int
+ orphanedKeys := make([]string, 0, matchCount)
+ for key := iterator.Key(); iterator.Next(); key = iterator.Key() {
+ if m.orphaned(ctx, key) {
+ orphanedKeys = append(orphanedKeys, key)
+ }
+ checkedKeys++
+ if checkedKeys%50 == 0 {
+ log.Infof("checked %d of %d orphaned pruning candidates...", checkedKeys, matchCount)
+ }
+ }
+ iterator.Release()
+
+ if !dry {
+ // the real deal, we have to delete stuff
+ for _, key := range orphanedKeys {
+ log.Infof("key %s corresponds to orphaned media, will remove it now", key)
+ if err := m.storage.Delete(ctx, key); err != nil {
+ log.Errorf("error deleting item with key %s from storage: %s", key, err)
+ continue
+ }
+ totalPruned++
+ }
+ } else {
+ // just a dry run, don't delete anything
+ for _, key := range orphanedKeys {
+ log.Infof("DRY RUN: key %s corresponds to orphaned media which would be deleted", key)
+ totalPruned++
+ }
+ }
+
+ return totalPruned, nil
+}
+
+func (m *manager) orphaned(ctx context.Context, key string) bool {
+ pathParts := regexes.FilePath.FindStringSubmatch(key)
+ if len(pathParts) != 6 {
+ return false
+ }
+
+ mediaType := pathParts[2]
+ mediaID := pathParts[4]
+
+ var orphaned bool
+ switch Type(mediaType) {
+ case TypeAttachment, TypeHeader, TypeAvatar:
+ if _, err := m.db.GetAttachmentByID(ctx, mediaID); err != nil {
+ if errors.Is(err, db.ErrNoEntries) {
+ orphaned = true
+ } else {
+ log.Errorf("orphaned: error calling GetAttachmentByID: %s", err)
+ }
+ }
+ case TypeEmoji:
+ if _, err := m.db.GetEmojiByID(ctx, mediaID); err != nil {
+ if errors.Is(err, db.ErrNoEntries) {
+ orphaned = true
+ } else {
+ log.Errorf("orphaned: error calling GetEmojiByID: %s", err)
+ }
+ }
+ default:
+ orphaned = true
+ }
+
+ return orphaned
+}
diff --git a/internal/media/pruneorphaned_test.go b/internal/media/pruneorphaned_test.go
new file mode 100644
index 000000000..f791bde12
--- /dev/null
+++ b/internal/media/pruneorphaned_test.go
@@ -0,0 +1,82 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package media_test
+
+import (
+ "bytes"
+ "context"
+ "os"
+ "testing"
+
+ "github.com/stretchr/testify/suite"
+)
+
+type PruneOrphanedTestSuite struct {
+ MediaStandardTestSuite
+}
+
+func (suite *PruneOrphanedTestSuite) TestPruneOrphanedDry() {
+ // add a big orphan panda to store
+ b, err := os.ReadFile("./test/big-panda.gif")
+ if err != nil {
+ panic(err)
+ }
+
+ pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
+ if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
+ panic(err)
+ }
+
+ // dry run should show up 1 orphaned panda
+ totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true)
+ suite.NoError(err)
+ suite.Equal(1, totalPruned)
+
+ // panda should still be in storage
+ hasKey, err := suite.storage.Has(context.Background(), pandaPath)
+ suite.NoError(err)
+ suite.True(hasKey)
+}
+
+func (suite *PruneOrphanedTestSuite) TestPruneOrphanedMoist() {
+ // add a big orphan panda to store
+ b, err := os.ReadFile("./test/big-panda.gif")
+ if err != nil {
+ panic(err)
+ }
+
+ pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
+ if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
+ panic(err)
+ }
+
+ // should show up 1 orphaned panda
+ totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false)
+ suite.NoError(err)
+ suite.Equal(1, totalPruned)
+
+ // panda should no longer be in storage
+ hasKey, err := suite.storage.Has(context.Background(), pandaPath)
+ suite.NoError(err)
+ suite.False(hasKey)
+}
+
+func TestPruneOrphanedTestSuite(t *testing.T) {
+ suite.Run(t, &PruneOrphanedTestSuite{})
+}
diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go
index c9286611e..657a79b91 100644
--- a/internal/regexes/regexes.go
+++ b/internal/regexes/regexes.go
@@ -140,6 +140,13 @@
// BlockPath parses a path that validates and captures the username part and the ulid part
// from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH
BlockPath = regexp.MustCompile(blockPath)
+
+ filePath = fmt.Sprintf(`^(%s)/([a-z]+)/([a-z]+)/(%s)\.([a-z]+)$`, ulid, ulid)
+ // FilePath parses a file storage path of the form [ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME]
+ // eg 01F8MH1H7YV1Z7D2C8K2730QBF/attachment/small/01F8MH8RMYQ6MSNY3JM2XT1CQ5.jpeg
+ // It captures the account id, media type, media size, file name, and file extension, eg
+ // `01F8MH1H7YV1Z7D2C8K2730QBF`, `attachment`, `small`, `01F8MH8RMYQ6MSNY3JM2XT1CQ5`, `jpeg`.
+ FilePath = regexp.MustCompile(filePath)
)
// bufpool is a memory pool of byte buffers for use in our regex utility functions.
diff --git a/test/envparsing.sh b/test/envparsing.sh
index 5522f1e18..1ef11ecaa 100755
--- a/test/envparsing.sh
+++ b/test/envparsing.sh
@@ -2,7 +2,7 @@
set -eu
-EXPECT='{"account-domain":"peepee","accounts-allow-custom-css":true,"accounts-approval-required":false,"accounts-reason-required":false,"accounts-registration-open":true,"advanced-cookies-samesite":"strict","advanced-rate-limit-requests":6969,"application-name":"gts","bind-address":"127.0.0.1","config-path":"internal/config/testdata/test.yaml","db-address":":memory:","db-database":"gotosocial_prod","db-password":"hunter2","db-port":6969,"db-tls-ca-cert":"","db-tls-mode":"disable","db-type":"sqlite","db-user":"sex-haver","email":"","host":"example.com","instance-deliver-to-shared-inboxes":false,"instance-expose-peers":true,"instance-expose-public-timeline":true,"instance-expose-suspended":true,"landing-page-user":"admin","letsencrypt-cert-dir":"/gotosocial/storage/certs","letsencrypt-email-address":"","letsencrypt-enabled":true,"letsencrypt-port":80,"log-db-queries":true,"log-level":"info","media-description-max-chars":5000,"media-description-min-chars":69,"media-emoji-local-max-size":420,"media-emoji-remote-max-size":420,"media-image-max-size":420,"media-remote-cache-days":30,"media-video-max-size":420,"oidc-client-id":"1234","oidc-client-secret":"shhhh its a secret","oidc-enabled":true,"oidc-idp-name":"sex-haver","oidc-issuer":"whoknows","oidc-scopes":["read","write"],"oidc-skip-verification":true,"password":"","path":"","port":6969,"protocol":"http","smtp-from":"queen.rip.in.piss@terfisland.org","smtp-host":"example.com","smtp-password":"hunter2","smtp-port":4269,"smtp-username":"sex-haver","software-version":"","statuses-cw-max-chars":420,"statuses-max-chars":69,"statuses-media-max-files":1,"statuses-poll-max-options":1,"statuses-poll-option-max-chars":50,"storage-backend":"local","storage-local-base-path":"/root/store","storage-s3-access-key":"minio","storage-s3-bucket":"gts","storage-s3-endpoint":"localhost:9000","storage-s3-proxy":true,"storage-s3-secret-key":"miniostorage","storage-s3-use-ssl":false,"syslog-address":"127.0.0.1:6969","syslog-enabled":true,"syslog-protocol":"udp","trusted-proxies":["127.0.0.1/32","docker.host.local"],"username":"","web-asset-base-dir":"/root","web-template-base-dir":"/root"}'
+EXPECT='{"account-domain":"peepee","accounts-allow-custom-css":true,"accounts-approval-required":false,"accounts-reason-required":false,"accounts-registration-open":true,"advanced-cookies-samesite":"strict","advanced-rate-limit-requests":6969,"application-name":"gts","bind-address":"127.0.0.1","config-path":"internal/config/testdata/test.yaml","db-address":":memory:","db-database":"gotosocial_prod","db-password":"hunter2","db-port":6969,"db-tls-ca-cert":"","db-tls-mode":"disable","db-type":"sqlite","db-user":"sex-haver","dry-run":false,"email":"","host":"example.com","instance-deliver-to-shared-inboxes":false,"instance-expose-peers":true,"instance-expose-public-timeline":true,"instance-expose-suspended":true,"landing-page-user":"admin","letsencrypt-cert-dir":"/gotosocial/storage/certs","letsencrypt-email-address":"","letsencrypt-enabled":true,"letsencrypt-port":80,"log-db-queries":true,"log-level":"info","media-description-max-chars":5000,"media-description-min-chars":69,"media-emoji-local-max-size":420,"media-emoji-remote-max-size":420,"media-image-max-size":420,"media-remote-cache-days":30,"media-video-max-size":420,"oidc-client-id":"1234","oidc-client-secret":"shhhh its a secret","oidc-enabled":true,"oidc-idp-name":"sex-haver","oidc-issuer":"whoknows","oidc-scopes":["read","write"],"oidc-skip-verification":true,"password":"","path":"","port":6969,"protocol":"http","smtp-from":"queen.rip.in.piss@terfisland.org","smtp-host":"example.com","smtp-password":"hunter2","smtp-port":4269,"smtp-username":"sex-haver","software-version":"","statuses-cw-max-chars":420,"statuses-max-chars":69,"statuses-media-max-files":1,"statuses-poll-max-options":1,"statuses-poll-option-max-chars":50,"storage-backend":"local","storage-local-base-path":"/root/store","storage-s3-access-key":"minio","storage-s3-bucket":"gts","storage-s3-endpoint":"localhost:9000","storage-s3-proxy":true,"storage-s3-secret-key":"miniostorage","storage-s3-use-ssl":false,"syslog-address":"127.0.0.1:6969","syslog-enabled":true,"syslog-protocol":"udp","trusted-proxies":["127.0.0.1/32","docker.host.local"],"username":"","web-asset-base-dir":"/root","web-template-base-dir":"/root"}'
# Set all the environment variables to
# ensure that these are parsed without panic