[feature] Add media list command (#1943)

* [feature] Add media list command

This is an attempt to help alleviate #1776. Using admin media list
--local the full path to each local media file will be printed, with a
newline. The output of this should be feadable into backup tools in
order to allow to backup local media too. Together with the database
this should allow to fully recover from the loss of an instance.

The list command also gets a --remote flag for symmetry. In the case
of --remote we print the RemoteURL instead, the location the asset can
be retrieved from.

To get all media, you can run with --local and --remote.

* [bugfix] Fix the test failures

* [feature] Reimplement list media as top commands

This changes the implementation of admin media list --<variant> to two
separate top-level commands, list-local and list-remote.

The implementation now iterates over over the database in batches of 200
in order to avoid loading all media metadata into memory.

* [feature] Implement ListMedia with filter callback

This does away with the somewhat odd iterator-like structure we had
before and does away with most of the loop duplication in list-local and
list-remote. Instead they call GetAllMediaPaths with a filter func to
select the media they want. That's accumulated into a slice and
eventually returned.

* [bugfix] Simplify remote filter

Since we don't append the empty string anywhere, the remote filter can
be limited to returning RemoteURL, as that'll be an empty string for
local media.

* [docs] Add media list commands to CLI reference

---------

Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
This commit is contained in:
Daenney 2023-07-07 11:35:05 +02:00 committed by GitHub
parent e70bf8a6c8
commit 81f33c3b9f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 206 additions and 0 deletions

View file

@ -0,0 +1,165 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package media
import (
"bufio"
"context"
"fmt"
"os"
"path"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/db/bundb"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/state"
)
type list struct {
dbService db.DB
state *state.State
maxID string
limit int
out *bufio.Writer
}
func (l *list) GetAllMediaPaths(ctx context.Context, filter func(*gtsmodel.MediaAttachment) string) ([]string, error) {
res := make([]string, 0, 100)
for {
attachments, err := l.dbService.GetAttachments(ctx, l.maxID, l.limit)
if err != nil {
return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err)
}
for _, a := range attachments {
v := filter(a)
if v != "" {
res = append(res, v)
}
}
// If we got less results than our limit, we've reached the
// last page to retrieve and we can break the loop. If the
// last batch happens to contain exactly the same amount of
// items as the limit we'll end up doing one extra query.
if len(attachments) < l.limit {
break
}
// Grab the last ID from the batch and set it as the maxID
// that'll be used in the next iteration so we don't get items
// we've already seen.
l.maxID = attachments[len(attachments)-1].ID
}
return res, nil
}
func setupList(ctx context.Context) (*list, error) {
var state state.State
state.Caches.Init()
state.Caches.Start()
state.Workers.Start()
dbService, err := bundb.NewBunDBService(ctx, &state)
if err != nil {
return nil, fmt.Errorf("error creating dbservice: %w", err)
}
state.DB = dbService
return &list{
dbService: dbService,
state: &state,
limit: 200,
maxID: "",
out: bufio.NewWriter(os.Stdout),
}, nil
}
func (l *list) shutdown(ctx context.Context) error {
l.out.Flush()
err := l.dbService.Stop(ctx)
l.state.Workers.Stop()
l.state.Caches.Stop()
return err
}
var ListLocal action.GTSAction = func(ctx context.Context) error {
list, err := setupList(ctx)
if err != nil {
return err
}
defer func() {
// Ensure lister gets shutdown on exit.
if err := list.shutdown(ctx); err != nil {
log.Error(ctx, err)
}
}()
mediaPath := config.GetStorageLocalBasePath()
media, err := list.GetAllMediaPaths(
ctx,
func(m *gtsmodel.MediaAttachment) string {
if m.RemoteURL == "" {
return path.Join(mediaPath, m.File.Path)
}
return ""
})
if err != nil {
return err
}
for _, m := range media {
_, _ = list.out.WriteString(m + "\n")
}
return nil
}
var ListRemote action.GTSAction = func(ctx context.Context) error {
list, err := setupList(ctx)
if err != nil {
return err
}
defer func() {
// Ensure lister gets shutdown on exit.
if err := list.shutdown(ctx); err != nil {
log.Error(ctx, err)
}
}()
media, err := list.GetAllMediaPaths(
ctx,
func(m *gtsmodel.MediaAttachment) string {
return m.RemoteURL
})
if err != nil {
return err
}
for _, m := range media {
_, _ = list.out.WriteString(m + "\n")
}
return nil
}

View file

@ -20,6 +20,7 @@
import (
"github.com/spf13/cobra"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/account"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/media"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/media/prune"
"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/trans"
"github.com/superseriousbusiness/gotosocial/internal/config"
@ -173,6 +174,34 @@ func adminCommands() *cobra.Command {
Short: "admin commands related to stored media / emojis",
}
/*
ADMIN MEDIA LIST COMMANDS
*/
adminMediaListLocalCmd := &cobra.Command{
Use: "list-local",
Short: "admin command to list media on local storage",
PreRunE: func(cmd *cobra.Command, args []string) error {
return preRun(preRunArgs{cmd: cmd})
},
RunE: func(cmd *cobra.Command, args []string) error {
return run(cmd.Context(), media.ListLocal)
},
}
adminMediaListRemoteCmd := &cobra.Command{
Use: "list-remote",
Short: "admin command to list remote media cached on this instance",
PreRunE: func(cmd *cobra.Command, args []string) error {
return preRun(preRunArgs{cmd: cmd})
},
RunE: func(cmd *cobra.Command, args []string) error {
return run(cmd.Context(), media.ListRemote)
},
}
adminMediaCmd.AddCommand(adminMediaListLocalCmd, adminMediaListRemoteCmd)
/*
ADMIN MEDIA PRUNE COMMANDS
*/

View file

@ -255,6 +255,18 @@ Example:
gotosocial admin import --path example.json --config-path config.yaml
```
### gotosocial admin media list-local
This command can be used to list local media. Local media is media that belongs to posts by users with an account on the instance.
The output will be a list of files. The list can be used to drive your backups.
### gotosocial admin media list-remote
This is the corollary to list-local, but instead lists media from remote instances. Remote media belongs to other instances, but was attached to a post we received over federation and have potentially cached locally.
The output will be a list of URLs to retrieve the original content from. GoToSocial automatically retrieves remote media when it needs it, so you should never need to do so yourself.
### gotosocial admin media prune orphaned
This command can be used to prune orphaned media from your GoToSocial.