// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

package cleaner

import (
	"context"
	"errors"
	"time"

	"github.com/superseriousbusiness/gotosocial/internal/db"
	"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
	"github.com/superseriousbusiness/gotosocial/internal/log"
	"github.com/superseriousbusiness/gotosocial/internal/media"
	"github.com/superseriousbusiness/gotosocial/internal/paging"
	"github.com/superseriousbusiness/gotosocial/internal/regexes"
	"github.com/superseriousbusiness/gotosocial/internal/uris"
)

// Media encompasses a set of
// media cleanup / admin utils.
type Media struct{ *Cleaner }

// All will execute all cleaner.Media utilities synchronously, including output logging.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) All(ctx context.Context, maxRemoteDays int) {
	t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays))
	m.LogUncacheRemote(ctx, t)
	m.LogPruneOrphaned(ctx)
	m.LogPruneUnused(ctx)
	m.LogFixCacheStates(ctx)
	_ = m.state.Storage.Storage.Clean(ctx)
}

// LogUncacheRemote performs Media.UncacheRemote(...), logging the start and outcome.
func (m *Media) LogUncacheRemote(ctx context.Context, olderThan time.Time) {
	log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp))
	if n, err := m.UncacheRemote(ctx, olderThan); err != nil {
		log.Error(ctx, err)
	} else {
		log.Infof(ctx, "uncached: %d", n)
	}
}

// LogPruneOrphaned performs Media.PruneOrphaned(...), logging the start and outcome.
func (m *Media) LogPruneOrphaned(ctx context.Context) {
	log.Info(ctx, "start")
	if n, err := m.PruneOrphaned(ctx); err != nil {
		log.Error(ctx, err)
	} else {
		log.Infof(ctx, "pruned: %d", n)
	}
}

// LogPruneUnused performs Media.PruneUnused(...), logging the start and outcome.
func (m *Media) LogPruneUnused(ctx context.Context) {
	log.Info(ctx, "start")
	if n, err := m.PruneUnused(ctx); err != nil {
		log.Error(ctx, err)
	} else {
		log.Infof(ctx, "pruned: %d", n)
	}
}

// LogFixCacheStates performs Media.FixCacheStates(...), logging the start and outcome.
func (m *Media) LogFixCacheStates(ctx context.Context) {
	log.Info(ctx, "start")
	if n, err := m.FixCacheStates(ctx); err != nil {
		log.Error(ctx, err)
	} else {
		log.Infof(ctx, "fixed: %d", n)
	}
}

// PruneOrphaned will delete orphaned files from storage (i.e. media missing a database entry).
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {
	var files []string

	// All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext}
	if err := m.state.Storage.WalkKeys(ctx, func(path string) error {
		// Check for our expected fileserver path format.
		if !regexes.FilePath.MatchString(path) {
			log.Warnf(ctx, "unexpected storage item: %s", path)
			return nil
		}

		// Check whether this entry is orphaned.
		orphaned, err := m.isOrphaned(ctx, path)
		if err != nil {
			return gtserror.Newf("error checking orphaned status: %w", err)
		}

		if orphaned {
			// Add this orphaned entry.
			files = append(files, path)
		}

		return nil
	}); err != nil {
		return 0, gtserror.Newf("error walking storage: %w", err)
	}

	// Delete all orphaned files from storage.
	return m.removeFiles(ctx, files...)
}

// PruneUnused will delete all unused media attachments from the database and storage driver.
// Media is marked as unused if not attached to any status, account or account is suspended.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) PruneUnused(ctx context.Context) (int, error) {
	var (
		total int
		page  paging.Page
	)

	// Set page select limit.
	page.Limit = selectLimit

	for {
		// Fetch the next batch of media attachments to next maxID.
		attachments, err := m.state.DB.GetAttachments(ctx, &page)
		if err != nil && !errors.Is(err, db.ErrNoEntries) {
			return total, gtserror.Newf("error getting attachments: %w", err)
		}

		// Get current max ID.
		maxID := page.Max.Value

		// If no attachments or the same group is returned, we reached the end.
		if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {
			break
		}

		// Use last ID as the next 'maxID' value.
		maxID = attachments[len(attachments)-1].ID
		page.Max = paging.MaxID(maxID)

		for _, media := range attachments {
			// Check / prune unused media attachment.
			fixed, err := m.pruneUnused(ctx, media)
			if err != nil {
				return total, err
			}

			if fixed {
				// Update
				// count.
				total++
			}
		}
	}

	return total, nil
}

// UncacheRemote will uncache all remote media attachments older than given input time.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) {
	var total int

	// Drop time by a minute to improve search,
	// (i.e. make it olderThan inclusive search).
	olderThan = olderThan.Add(-time.Minute)

	// Store recent time.
	mostRecent := olderThan

	for {
		// Fetch the next batch of cached attachments older than last-set time.
		attachments, err := m.state.DB.GetCachedAttachmentsOlderThan(ctx, olderThan, selectLimit)
		if err != nil && !errors.Is(err, db.ErrNoEntries) {
			return total, gtserror.Newf("error getting remote attachments: %w", err)
		}

		// If no attachments / same group is returned, we reached the end.
		if len(attachments) == 0 ||
			olderThan.Equal(attachments[len(attachments)-1].CreatedAt) {
			break
		}

		// Use last created-at as the next 'olderThan' value.
		olderThan = attachments[len(attachments)-1].CreatedAt

		for _, media := range attachments {
			// Check / uncache each remote media attachment.
			uncached, err := m.uncacheRemote(ctx, mostRecent, media)
			if err != nil {
				return total, err
			}

			if uncached {
				// Update
				// count.
				total++
			}
		}
	}

	return total, nil
}

// FixCacheStatus will check all media for up-to-date cache status (i.e. in storage driver).
// Media marked as cached, with any required files missing, will be automatically uncached.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) FixCacheStates(ctx context.Context) (int, error) {
	var (
		total int
		page  paging.Page
	)

	// Set page select limit.
	page.Limit = selectLimit

	for {
		// Fetch the next batch of media attachments up to next max ID.
		attachments, err := m.state.DB.GetRemoteAttachments(ctx, &page)
		if err != nil && !errors.Is(err, db.ErrNoEntries) {
			return total, gtserror.Newf("error getting remote attachments: %w", err)
		}
		// Get current max ID.
		maxID := page.Max.Value

		// If no attachments or the same group is returned, we reached the end.
		if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {
			break
		}

		// Use last ID as the next 'maxID' value.
		maxID = attachments[len(attachments)-1].ID
		page.Max = paging.MaxID(maxID)

		for _, media := range attachments {
			// Check / fix required media cache states.
			fixed, err := m.fixCacheState(ctx, media)
			if err != nil {
				return total, err
			}

			if fixed {
				// Update
				// count.
				total++
			}
		}
	}

	return total, nil
}

func (m *Media) isOrphaned(ctx context.Context, path string) (bool, error) {
	pathParts := regexes.FilePath.FindStringSubmatch(path)
	if len(pathParts) != 6 {
		// This doesn't match our expectations so
		// it wasn't created by gts; ignore it.
		return false, nil
	}

	var (
		// 0th -> whole match
		// 1st -> account ID
		mediaType = pathParts[2]
		// 3rd -> media sub-type (e.g. small, static)
		mediaID = pathParts[4]
		// 5th -> file extension
	)

	// Start a log entry for media.
	l := log.WithContext(ctx).
		WithField("media", mediaID)

	switch media.Type(mediaType) {
	case media.TypeAttachment:
		// Look for media in database stored by ID.
		media, err := m.state.DB.GetAttachmentByID(
			gtscontext.SetBarebones(ctx),
			mediaID,
		)
		if err != nil && !errors.Is(err, db.ErrNoEntries) {
			return false, gtserror.Newf("error fetching media by id %s: %w", mediaID, err)
		}

		if media == nil {
			l.Debug("missing db entry for media")
			return true, nil
		}

	case media.TypeEmoji:
		// Generate static URL for this emoji to lookup.
		staticURL := uris.URIForAttachment(
			pathParts[1], // instance account ID
			string(media.TypeEmoji),
			string(media.SizeStatic),
			mediaID,
			"png",
		)

		// Look for emoji in database stored by static URL.
		// The media ID part of the storage key for emojis can
		// change for refreshed items, so search by generated URL.
		emoji, err := m.state.DB.GetEmojiByStaticURL(
			gtscontext.SetBarebones(ctx),
			staticURL,
		)
		if err != nil && !errors.Is(err, db.ErrNoEntries) {
			return false, gtserror.Newf("error fetching emoji by url %s: %w", staticURL, err)
		}

		if emoji == nil {
			l.Debug("missing db entry for emoji")
			return true, nil
		}
	}

	return false, nil
}

func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) {
	// Start a log entry for media.
	l := log.WithContext(ctx).
		WithField("media", media.ID)

	// Check whether we have the account that owns the media.
	account, missing, err := m.getOwningAccount(ctx, media)
	if err != nil {
		return false, err
	} else if missing {
		l.Debug("deleting due to missing account")
		return true, m.delete(ctx, media)
	}

	if account != nil {
		// Related account exists for this media, check whether it is being used.
		headerInUse := (*media.Header && media.ID == account.HeaderMediaAttachmentID)
		avatarInUse := (*media.Avatar && media.ID == account.AvatarMediaAttachmentID)
		if (headerInUse || avatarInUse) && account.SuspendedAt.IsZero() {
			l.Debug("skipping as account media in use")
			return false, nil
		}
	}

	// Check whether we have the required status for media.
	status, missing, err := m.getRelatedStatus(ctx, media)
	if err != nil {
		return false, err
	} else if missing {
		l.Debug("deleting due to missing status")
		return true, m.delete(ctx, media)
	}

	if status != nil {
		// Check whether still attached to status.
		for _, id := range status.AttachmentIDs {
			if id == media.ID {
				l.Debug("skippping as attached to status")
				return false, nil
			}
		}
	}

	// Media totally unused, delete it.
	l.Debug("deleting unused media")
	return true, m.delete(ctx, media)
}

func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) {
	// Start a log entry for media.
	l := log.WithContext(ctx).
		WithField("media", media.ID)

	// Check whether we have the account that owns the media.
	_, missingAccount, err := m.getOwningAccount(ctx, media)
	if err != nil {
		return false, err
	} else if missingAccount {
		l.Debug("skipping due to missing account")
		return false, nil
	}

	// Check whether we have the required status for media.
	_, missingStatus, err := m.getRelatedStatus(ctx, media)
	if err != nil {
		return false, err
	} else if missingStatus {
		l.Debug("skipping due to missing status")
		return false, nil
	}

	// Check whether files exist.
	exist, err := m.haveFiles(ctx,
		media.Thumbnail.Path,
		media.File.Path,
	)
	if err != nil {
		return false, err
	}

	switch {
	case *media.Cached && !exist:
		// Mark as uncached if expected files don't exist.
		l.Debug("cached=true exists=false => uncaching")
		return true, m.uncache(ctx, media)

	case !*media.Cached && exist:
		// Remove files if we don't expect them to exist.
		l.Debug("cached=false exists=true => deleting")
		_, err := m.removeFiles(ctx,
			media.Thumbnail.Path,
			media.File.Path,
		)
		return true, err

	default:
		return false, nil
	}
}

func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) {
	if !*media.Cached {
		// Already uncached.
		return false, nil
	}

	// Start a log entry for media.
	l := log.WithContext(ctx).
		WithField("media", media.ID)

	// There are two possibilities here:
	//
	//   1. Media is an avatar or header; we should uncache
	//      it if we haven't seen the account recently.
	//   2. Media is attached to a status; we should uncache
	//      it if we haven't seen the status recently.
	if *media.Avatar || *media.Header {
		// Check whether we have the account that owns the media.
		account, missing, err := m.getOwningAccount(ctx, media)
		if err != nil {
			return false, err
		} else if missing {
			// PruneUnused will take care of this case.
			l.Debug("skipping due to missing account")
			return false, nil
		}

		if account != nil && account.FetchedAt.After(after) {
			l.Debug("skipping due to recently fetched account")
			return false, nil
		}
	} else {
		// Check whether we have the status that media is attached to.
		status, missing, err := m.getRelatedStatus(ctx, media)
		if err != nil {
			return false, err
		} else if missing {
			// PruneUnused will take care of this case.
			l.Debug("skipping due to missing status")
			return false, nil
		}

		if status != nil {
			// Check if recently used status.
			if status.FetchedAt.After(after) {
				l.Debug("skipping due to recently fetched status")
				return false, nil
			}

			// Check whether status is bookmarked by active accounts.
			bookmarked, err := m.state.DB.IsStatusBookmarked(ctx, status.ID)
			if err != nil {
				return false, err
			} else if bookmarked {
				l.Debug("skipping due to bookmarked status")
				return false, nil
			}
		}
	}

	// This media is too old, uncache it.
	l.Debug("uncaching old remote media")
	return true, m.uncache(ctx, media)
}

func (m *Media) getOwningAccount(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Account, bool, error) {
	if media.AccountID == "" {
		// no related account.
		return nil, false, nil
	}

	// Load the account that owns this media.
	account, err := m.state.DB.GetAccountByID(
		gtscontext.SetBarebones(ctx),
		media.AccountID,
	)
	if err != nil && !errors.Is(err, db.ErrNoEntries) {
		return nil, false, gtserror.Newf("error fetching account by id %s: %w", media.AccountID, err)
	}

	if account == nil {
		// account is missing.
		return nil, true, nil
	}

	return account, false, nil
}

func (m *Media) getRelatedStatus(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Status, bool, error) {
	if media.StatusID == "" {
		// no related status.
		return nil, false, nil
	}

	// Load the status related to this media.
	status, err := m.state.DB.GetStatusByID(
		gtscontext.SetBarebones(ctx),
		media.StatusID,
	)
	if err != nil && !errors.Is(err, db.ErrNoEntries) {
		return nil, false, gtserror.Newf("error fetching status by id %s: %w", media.StatusID, err)
	}

	if status == nil {
		// status is missing.
		return nil, true, nil
	}

	return status, false, nil
}

func (m *Media) uncache(ctx context.Context, media *gtsmodel.MediaAttachment) error {
	if gtscontext.DryRun(ctx) {
		// Dry run, do nothing.
		return nil
	}

	// Remove media and thumbnail.
	_, err := m.removeFiles(ctx,
		media.File.Path,
		media.Thumbnail.Path,
	)
	if err != nil {
		return gtserror.Newf("error removing media files: %w", err)
	}

	// Update attachment to reflect that we no longer have it cached.
	log.Debugf(ctx, "marking media attachment as uncached: %s", media.ID)
	media.Cached = func() *bool { i := false; return &i }()
	if err := m.state.DB.UpdateAttachment(ctx, media, "cached"); err != nil {
		return gtserror.Newf("error updating media: %w", err)
	}

	return nil
}

func (m *Media) delete(ctx context.Context, media *gtsmodel.MediaAttachment) error {
	if gtscontext.DryRun(ctx) {
		// Dry run, do nothing.
		return nil
	}

	// Remove media and thumbnail.
	_, err := m.removeFiles(ctx,
		media.File.Path,
		media.Thumbnail.Path,
	)
	if err != nil {
		return gtserror.Newf("error removing media files: %w", err)
	}

	// Delete media attachment entirely from the database.
	log.Debugf(ctx, "deleting media attachment: %s", media.ID)
	if err := m.state.DB.DeleteAttachment(ctx, media.ID); err != nil {
		return gtserror.Newf("error deleting media: %w", err)
	}

	return nil
}