2021-09-11 20:12:47 +01:00
|
|
|
package storage
|
|
|
|
|
|
|
|
import (
|
2022-05-08 18:49:45 +01:00
|
|
|
"bytes"
|
2022-11-05 11:10:19 +00:00
|
|
|
"context"
|
2022-01-24 16:35:13 +00:00
|
|
|
"crypto/sha256"
|
2022-05-08 18:49:45 +01:00
|
|
|
"fmt"
|
2021-09-11 20:12:47 +01:00
|
|
|
"io"
|
|
|
|
"io/fs"
|
|
|
|
"os"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
2023-01-11 11:13:13 +00:00
|
|
|
"sync/atomic"
|
2021-09-11 20:12:47 +01:00
|
|
|
"syscall"
|
|
|
|
|
2022-05-08 18:49:45 +01:00
|
|
|
"codeberg.org/gruf/go-byteutil"
|
|
|
|
"codeberg.org/gruf/go-errors/v2"
|
|
|
|
"codeberg.org/gruf/go-fastcopy"
|
2021-11-13 11:29:08 +00:00
|
|
|
"codeberg.org/gruf/go-hashenc"
|
2023-01-11 11:13:13 +00:00
|
|
|
"codeberg.org/gruf/go-iotools"
|
2021-11-13 11:29:08 +00:00
|
|
|
"codeberg.org/gruf/go-pools"
|
2022-11-05 11:10:19 +00:00
|
|
|
"codeberg.org/gruf/go-store/v2/util"
|
2021-09-11 20:12:47 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
nodePathPrefix = "node/"
|
|
|
|
blockPathPrefix = "block/"
|
|
|
|
)
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// DefaultBlockConfig is the default BlockStorage configuration.
|
2021-09-11 20:12:47 +01:00
|
|
|
var DefaultBlockConfig = &BlockConfig{
|
|
|
|
BlockSize: 1024 * 16,
|
|
|
|
WriteBufSize: 4096,
|
|
|
|
Overwrite: false,
|
|
|
|
Compression: NoCompression(),
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// BlockConfig defines options to be used when opening a BlockStorage.
|
2021-09-11 20:12:47 +01:00
|
|
|
type BlockConfig struct {
|
2022-11-05 11:10:19 +00:00
|
|
|
// BlockSize is the chunking size to use when splitting and storing blocks of data.
|
2021-09-11 20:12:47 +01:00
|
|
|
BlockSize int
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// ReadBufSize is the buffer size to use when reading node files.
|
2022-05-08 18:49:45 +01:00
|
|
|
ReadBufSize int
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// WriteBufSize is the buffer size to use when writing file streams.
|
2021-09-11 20:12:47 +01:00
|
|
|
WriteBufSize int
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Overwrite allows overwriting values of stored keys in the storage.
|
2021-09-11 20:12:47 +01:00
|
|
|
Overwrite bool
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Compression is the Compressor to use when reading / writing files,
|
|
|
|
// default is no compression.
|
2021-09-11 20:12:47 +01:00
|
|
|
Compression Compressor
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// getBlockConfig returns a valid BlockConfig for supplied ptr.
|
2021-09-11 20:12:47 +01:00
|
|
|
func getBlockConfig(cfg *BlockConfig) BlockConfig {
|
|
|
|
// If nil, use default
|
|
|
|
if cfg == nil {
|
|
|
|
cfg = DefaultBlockConfig
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assume nil compress == none
|
|
|
|
if cfg.Compression == nil {
|
|
|
|
cfg.Compression = NoCompression()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assume 0 chunk size == use default
|
2022-11-05 11:10:19 +00:00
|
|
|
if cfg.BlockSize <= 0 {
|
2021-09-11 20:12:47 +01:00
|
|
|
cfg.BlockSize = DefaultBlockConfig.BlockSize
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assume 0 buf size == use default
|
2022-11-05 11:10:19 +00:00
|
|
|
if cfg.WriteBufSize <= 0 {
|
2021-09-11 20:12:47 +01:00
|
|
|
cfg.WriteBufSize = DefaultDiskConfig.WriteBufSize
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return owned config copy
|
|
|
|
return BlockConfig{
|
|
|
|
BlockSize: cfg.BlockSize,
|
|
|
|
WriteBufSize: cfg.WriteBufSize,
|
|
|
|
Overwrite: cfg.Overwrite,
|
|
|
|
Compression: cfg.Compression,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// BlockStorage is a Storage implementation that stores input data as chunks on
|
|
|
|
// a filesystem. Each value is chunked into blocks of configured size and these
|
2022-01-24 16:35:13 +00:00
|
|
|
// blocks are stored with name equal to their base64-encoded SHA256 hash-sum. A
|
2021-09-11 20:12:47 +01:00
|
|
|
// "node" file is finally created containing an array of hashes contained within
|
2022-11-05 11:10:19 +00:00
|
|
|
// this value.
|
2021-09-11 20:12:47 +01:00
|
|
|
type BlockStorage struct {
|
2022-05-08 18:49:45 +01:00
|
|
|
path string // path is the root path of this store
|
|
|
|
blockPath string // blockPath is the joined root path + block path prefix
|
|
|
|
nodePath string // nodePath is the joined root path + node path prefix
|
|
|
|
config BlockConfig // cfg is the supplied configuration for this store
|
|
|
|
hashPool sync.Pool // hashPool is this store's hashEncoder pool
|
|
|
|
bufpool pools.BufferPool // bufpool is this store's bytes.Buffer pool
|
|
|
|
cppool fastcopy.CopyPool // cppool is the prepared io copier with buffer pool
|
|
|
|
lock *Lock // lock is the opened lockfile for this storage instance
|
2021-09-11 20:12:47 +01:00
|
|
|
|
|
|
|
// NOTE:
|
|
|
|
// BlockStorage does not need to lock each of the underlying block files
|
|
|
|
// as the filename itself directly relates to the contents. If there happens
|
|
|
|
// to be an overwrite, it will just be of the same data since the filename is
|
|
|
|
// the hash of the data.
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// OpenBlock opens a BlockStorage instance for given folder path and configuration.
|
2021-09-11 20:12:47 +01:00
|
|
|
func OpenBlock(path string, cfg *BlockConfig) (*BlockStorage, error) {
|
|
|
|
// Acquire path builder
|
2021-11-13 11:29:08 +00:00
|
|
|
pb := util.GetPathBuilder()
|
|
|
|
defer util.PutPathBuilder(pb)
|
2021-09-11 20:12:47 +01:00
|
|
|
|
|
|
|
// Clean provided path, ensure ends in '/' (should
|
|
|
|
// be dir, this helps with file path trimming later)
|
|
|
|
path = pb.Clean(path) + "/"
|
|
|
|
|
|
|
|
// Get checked config
|
|
|
|
config := getBlockConfig(cfg)
|
|
|
|
|
|
|
|
// Attempt to open path
|
|
|
|
file, err := os.OpenFile(path, defaultFileROFlags, defaultDirPerms)
|
|
|
|
if err != nil {
|
|
|
|
// If not a not-exist error, return
|
|
|
|
if !os.IsNotExist(err) {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to make store path dirs
|
|
|
|
err = os.MkdirAll(path, defaultDirPerms)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reopen dir now it's been created
|
|
|
|
file, err = os.OpenFile(path, defaultFileROFlags, defaultDirPerms)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
// Double check this is a dir (NOT a file!)
|
|
|
|
stat, err := file.Stat()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
} else if !stat.IsDir() {
|
2022-11-05 11:10:19 +00:00
|
|
|
return nil, new_error("path is file")
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-01-16 17:52:30 +00:00
|
|
|
// Open and acquire storage lock for path
|
2022-01-29 11:15:51 +00:00
|
|
|
lock, err := OpenLock(pb.Join(path, LockFile))
|
2022-01-16 17:52:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-11-13 11:29:08 +00:00
|
|
|
// Figure out the largest size for bufpool slices
|
|
|
|
bufSz := encodedHashLen
|
|
|
|
if bufSz < config.BlockSize {
|
|
|
|
bufSz = config.BlockSize
|
|
|
|
}
|
|
|
|
if bufSz < config.WriteBufSize {
|
|
|
|
bufSz = config.WriteBufSize
|
|
|
|
}
|
|
|
|
|
2022-05-08 18:49:45 +01:00
|
|
|
// Prepare BlockStorage
|
|
|
|
st := &BlockStorage{
|
2021-09-11 20:12:47 +01:00
|
|
|
path: path,
|
|
|
|
blockPath: pb.Join(path, blockPathPrefix),
|
|
|
|
nodePath: pb.Join(path, nodePathPrefix),
|
|
|
|
config: config,
|
|
|
|
hashPool: sync.Pool{
|
|
|
|
New: func() interface{} {
|
|
|
|
return newHashEncoder()
|
|
|
|
},
|
|
|
|
},
|
2021-11-13 11:29:08 +00:00
|
|
|
bufpool: pools.NewBufferPool(bufSz),
|
2022-01-16 17:52:30 +00:00
|
|
|
lock: lock,
|
2022-05-08 18:49:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Set copypool buffer size
|
|
|
|
st.cppool.Buffer(config.ReadBufSize)
|
|
|
|
|
|
|
|
return st, nil
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Clean implements storage.Clean().
|
|
|
|
func (st *BlockStorage) Clean(ctx context.Context) error {
|
2022-01-24 16:35:13 +00:00
|
|
|
// Check if open
|
|
|
|
if st.lock.Closed() {
|
|
|
|
return ErrClosed
|
|
|
|
}
|
2021-09-11 20:12:47 +01:00
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Check context still valid
|
|
|
|
if err := ctx.Err(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
// Acquire path builder
|
2021-11-13 11:29:08 +00:00
|
|
|
pb := util.GetPathBuilder()
|
|
|
|
defer util.PutPathBuilder(pb)
|
2021-09-11 20:12:47 +01:00
|
|
|
|
2022-01-24 16:35:13 +00:00
|
|
|
nodes := map[string]*node{}
|
|
|
|
|
|
|
|
// Walk nodes dir for entries
|
2022-11-05 11:10:19 +00:00
|
|
|
err := walkDir(pb, st.nodePath, func(npath string, fsentry fs.DirEntry) error {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Only deal with regular files
|
|
|
|
if !fsentry.Type().IsRegular() {
|
2022-11-05 11:10:19 +00:00
|
|
|
return nil
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get joined node path name
|
|
|
|
npath = pb.Join(npath, fsentry.Name())
|
|
|
|
|
|
|
|
// Attempt to open RO file
|
|
|
|
file, err := open(npath, defaultFileROFlags)
|
|
|
|
if err != nil {
|
2022-11-05 11:10:19 +00:00
|
|
|
return err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
// Alloc new Node + acquire hash buffer for writes
|
2021-11-13 11:29:08 +00:00
|
|
|
hbuf := st.bufpool.Get()
|
|
|
|
defer st.bufpool.Put(hbuf)
|
|
|
|
hbuf.Guarantee(encodedHashLen)
|
2021-09-11 20:12:47 +01:00
|
|
|
node := node{}
|
|
|
|
|
|
|
|
// Write file contents to node
|
|
|
|
_, err = io.CopyBuffer(
|
|
|
|
&nodeWriter{
|
|
|
|
node: &node,
|
|
|
|
buf: hbuf,
|
|
|
|
},
|
|
|
|
file,
|
|
|
|
nil,
|
|
|
|
)
|
|
|
|
if err != nil {
|
2022-11-05 11:10:19 +00:00
|
|
|
return err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Append to nodes slice
|
|
|
|
nodes[fsentry.Name()] = &node
|
2022-11-05 11:10:19 +00:00
|
|
|
return nil
|
2021-09-11 20:12:47 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
// Handle errors (though nodePath may not have been created yet)
|
|
|
|
if err != nil && !os.IsNotExist(err) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Walk blocks dir for entries
|
2022-11-05 11:10:19 +00:00
|
|
|
err = walkDir(pb, st.blockPath, func(bpath string, fsentry fs.DirEntry) error {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Only deal with regular files
|
|
|
|
if !fsentry.Type().IsRegular() {
|
2022-11-05 11:10:19 +00:00
|
|
|
return nil
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
inUse := false
|
|
|
|
for key, node := range nodes {
|
|
|
|
if node.removeHash(fsentry.Name()) {
|
|
|
|
if len(node.hashes) < 1 {
|
|
|
|
// This node contained hash, and after removal is now empty.
|
|
|
|
// Remove this node from our tracked nodes slice
|
|
|
|
delete(nodes, key)
|
|
|
|
}
|
|
|
|
inUse = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Block hash is used by node
|
|
|
|
if inUse {
|
2022-11-05 11:10:19 +00:00
|
|
|
return nil
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get joined block path name
|
|
|
|
bpath = pb.Join(bpath, fsentry.Name())
|
|
|
|
|
|
|
|
// Remove this unused block path
|
2022-11-05 11:10:19 +00:00
|
|
|
return os.Remove(bpath)
|
2021-09-11 20:12:47 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
// Handle errors (though blockPath may not have been created yet)
|
|
|
|
if err != nil && !os.IsNotExist(err) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there are nodes left at this point, they are corrupt
|
|
|
|
// (i.e. they're referencing block hashes that don't exist)
|
|
|
|
if len(nodes) > 0 {
|
|
|
|
nodeKeys := []string{}
|
|
|
|
for key := range nodes {
|
|
|
|
nodeKeys = append(nodeKeys, key)
|
|
|
|
}
|
2022-05-08 18:49:45 +01:00
|
|
|
return fmt.Errorf("store/storage: corrupted nodes: %v", nodeKeys)
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// ReadBytes implements Storage.ReadBytes().
|
|
|
|
func (st *BlockStorage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Get stream reader for key
|
2022-11-05 11:10:19 +00:00
|
|
|
rc, err := st.ReadStream(ctx, key)
|
2021-09-11 20:12:47 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-01-24 16:35:13 +00:00
|
|
|
defer rc.Close()
|
2021-09-11 20:12:47 +01:00
|
|
|
|
|
|
|
// Read all bytes and return
|
|
|
|
return io.ReadAll(rc)
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// ReadStream implements Storage.ReadStream().
|
|
|
|
func (st *BlockStorage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Get node file path for key
|
|
|
|
npath, err := st.nodePathForKey(key)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2022-01-24 16:35:13 +00:00
|
|
|
// Check if open
|
|
|
|
if st.lock.Closed() {
|
|
|
|
return nil, ErrClosed
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Check context still valid
|
|
|
|
if err := ctx.Err(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
// Attempt to open RO file
|
|
|
|
file, err := open(npath, defaultFileROFlags)
|
|
|
|
if err != nil {
|
2022-05-08 18:49:45 +01:00
|
|
|
return nil, errSwapNotFound(err)
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
2021-11-13 11:29:08 +00:00
|
|
|
// Acquire hash buffer for writes
|
|
|
|
hbuf := st.bufpool.Get()
|
|
|
|
defer st.bufpool.Put(hbuf)
|
2021-09-11 20:12:47 +01:00
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
var node node
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
// Write file contents to node
|
2022-05-08 18:49:45 +01:00
|
|
|
_, err = st.cppool.Copy(
|
2021-09-11 20:12:47 +01:00
|
|
|
&nodeWriter{
|
|
|
|
node: &node,
|
|
|
|
buf: hbuf,
|
|
|
|
},
|
|
|
|
file,
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2022-01-24 16:35:13 +00:00
|
|
|
// Prepare block reader and return
|
2023-01-11 11:13:13 +00:00
|
|
|
return iotools.NopReadCloser(&blockReader{
|
2021-09-11 20:12:47 +01:00
|
|
|
storage: st,
|
|
|
|
node: &node,
|
2022-11-05 11:10:19 +00:00
|
|
|
}), nil
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// readBlock reads the block with hash (key) from the filesystem.
|
2021-09-11 20:12:47 +01:00
|
|
|
func (st *BlockStorage) readBlock(key string) ([]byte, error) {
|
|
|
|
// Get block file path for key
|
|
|
|
bpath := st.blockPathForKey(key)
|
|
|
|
|
|
|
|
// Attempt to open RO file
|
|
|
|
file, err := open(bpath, defaultFileROFlags)
|
|
|
|
if err != nil {
|
2022-11-05 11:10:19 +00:00
|
|
|
return nil, wrap(new_error("corrupted node"), err)
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
// Wrap the file in a compressor
|
|
|
|
cFile, err := st.config.Compression.Reader(file)
|
|
|
|
if err != nil {
|
2022-11-05 11:10:19 +00:00
|
|
|
return nil, wrap(new_error("corrupted node"), err)
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
defer cFile.Close()
|
|
|
|
|
|
|
|
// Read the entire file
|
|
|
|
return io.ReadAll(cFile)
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// WriteBytes implements Storage.WriteBytes().
|
2023-01-11 11:13:13 +00:00
|
|
|
func (st *BlockStorage) WriteBytes(ctx context.Context, key string, value []byte) (int, error) {
|
|
|
|
n, err := st.WriteStream(ctx, key, bytes.NewReader(value))
|
|
|
|
return int(n), err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// WriteStream implements Storage.WriteStream().
|
2023-01-11 11:13:13 +00:00
|
|
|
func (st *BlockStorage) WriteStream(ctx context.Context, key string, r io.Reader) (int64, error) {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Get node file path for key
|
|
|
|
npath, err := st.nodePathForKey(key)
|
|
|
|
if err != nil {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-01-24 16:35:13 +00:00
|
|
|
// Check if open
|
|
|
|
if st.lock.Closed() {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, ErrClosed
|
2022-01-24 16:35:13 +00:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Check context still valid
|
|
|
|
if err := ctx.Err(); err != nil {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2022-11-05 11:10:19 +00:00
|
|
|
}
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
// Check if this exists
|
|
|
|
ok, err := stat(key)
|
|
|
|
if err != nil {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we allow overwrites
|
|
|
|
if ok && !st.config.Overwrite {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, ErrAlreadyExists
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Ensure nodes dir (and any leading up to) exists
|
|
|
|
err = os.MkdirAll(st.nodePath, defaultDirPerms)
|
|
|
|
if err != nil {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Ensure blocks dir (and any leading up to) exists
|
|
|
|
err = os.MkdirAll(st.blockPath, defaultDirPerms)
|
|
|
|
if err != nil {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
var node node
|
2023-01-11 11:13:13 +00:00
|
|
|
var total atomic.Int64
|
2021-09-11 20:12:47 +01:00
|
|
|
|
|
|
|
// Acquire HashEncoder
|
|
|
|
hc := st.hashPool.Get().(*hashEncoder)
|
|
|
|
defer st.hashPool.Put(hc)
|
|
|
|
|
|
|
|
// Create new waitgroup and OnceError for
|
|
|
|
// goroutine error tracking and propagating
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
onceErr := errors.OnceError{}
|
|
|
|
|
|
|
|
loop:
|
|
|
|
for !onceErr.IsSet() {
|
|
|
|
// Fetch new buffer for this loop
|
2021-11-13 11:29:08 +00:00
|
|
|
buf := st.bufpool.Get()
|
2021-09-11 20:12:47 +01:00
|
|
|
buf.Grow(st.config.BlockSize)
|
|
|
|
|
|
|
|
// Read next chunk
|
|
|
|
n, err := io.ReadFull(r, buf.B)
|
|
|
|
switch err {
|
|
|
|
case nil, io.ErrUnexpectedEOF:
|
|
|
|
// do nothing
|
|
|
|
case io.EOF:
|
2021-11-13 11:29:08 +00:00
|
|
|
st.bufpool.Put(buf)
|
2021-09-11 20:12:47 +01:00
|
|
|
break loop
|
|
|
|
default:
|
2021-11-13 11:29:08 +00:00
|
|
|
st.bufpool.Put(buf)
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Hash the encoded data
|
|
|
|
sum := hc.EncodeSum(buf.B)
|
|
|
|
|
|
|
|
// Append to the node's hashes
|
2022-05-08 18:49:45 +01:00
|
|
|
node.hashes = append(node.hashes, sum)
|
2021-09-11 20:12:47 +01:00
|
|
|
|
|
|
|
// If already on disk, skip
|
2022-05-08 18:49:45 +01:00
|
|
|
has, err := st.statBlock(sum)
|
2021-09-11 20:12:47 +01:00
|
|
|
if err != nil {
|
2021-11-13 11:29:08 +00:00
|
|
|
st.bufpool.Put(buf)
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
} else if has {
|
2021-11-13 11:29:08 +00:00
|
|
|
st.bufpool.Put(buf)
|
2021-09-11 20:12:47 +01:00
|
|
|
continue loop
|
|
|
|
}
|
|
|
|
|
2022-01-16 17:52:30 +00:00
|
|
|
// Check if reached EOF
|
|
|
|
atEOF := (n < buf.Len())
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
2022-01-16 17:52:30 +00:00
|
|
|
// Perform writes in goroutine
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
defer func() {
|
2022-01-16 17:52:30 +00:00
|
|
|
// Defer release +
|
|
|
|
// signal we're done
|
2021-11-13 11:29:08 +00:00
|
|
|
st.bufpool.Put(buf)
|
2021-09-11 20:12:47 +01:00
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Write block to store at hash
|
2023-01-11 11:13:13 +00:00
|
|
|
n, err := st.writeBlock(sum, buf.B[:n])
|
2021-09-11 20:12:47 +01:00
|
|
|
if err != nil {
|
|
|
|
onceErr.Store(err)
|
|
|
|
return
|
|
|
|
}
|
2023-01-11 11:13:13 +00:00
|
|
|
|
|
|
|
// Increment total.
|
|
|
|
total.Add(int64(n))
|
2021-09-11 20:12:47 +01:00
|
|
|
}()
|
|
|
|
|
2022-01-16 17:52:30 +00:00
|
|
|
// Break at end
|
|
|
|
if atEOF {
|
2021-09-11 20:12:47 +01:00
|
|
|
break loop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait, check errors
|
|
|
|
wg.Wait()
|
|
|
|
if onceErr.IsSet() {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, onceErr.Load()
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// If no hashes created, return
|
|
|
|
if len(node.hashes) < 1 {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, new_error("no hashes written")
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Prepare to swap error if need-be
|
|
|
|
errSwap := errSwapNoop
|
|
|
|
|
|
|
|
// Build file RW flags
|
|
|
|
// NOTE: we performed an initial check for
|
|
|
|
// this before writing blocks, but if
|
|
|
|
// the utilizer of this storage didn't
|
|
|
|
// correctly mutex protect this key then
|
|
|
|
// someone may have beaten us to the
|
|
|
|
// punch at writing the node file.
|
|
|
|
flags := defaultFileRWFlags
|
|
|
|
if !st.config.Overwrite {
|
|
|
|
flags |= syscall.O_EXCL
|
|
|
|
|
|
|
|
// Catch + replace err exist
|
|
|
|
errSwap = errSwapExist
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to open RW file
|
|
|
|
file, err := open(npath, flags)
|
|
|
|
if err != nil {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, errSwap(err)
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
// Acquire write buffer
|
2021-11-13 11:29:08 +00:00
|
|
|
buf := st.bufpool.Get()
|
|
|
|
defer st.bufpool.Put(buf)
|
2021-09-11 20:12:47 +01:00
|
|
|
buf.Grow(st.config.WriteBufSize)
|
|
|
|
|
|
|
|
// Finally, write data to file
|
2022-11-05 11:10:19 +00:00
|
|
|
_, err = io.CopyBuffer(file, &nodeReader{node: node}, buf.B)
|
2023-01-11 11:13:13 +00:00
|
|
|
return total.Load(), err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// writeBlock writes the block with hash and supplied value to the filesystem.
|
2023-01-11 11:13:13 +00:00
|
|
|
func (st *BlockStorage) writeBlock(hash string, value []byte) (int, error) {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Get block file path for key
|
|
|
|
bpath := st.blockPathForKey(hash)
|
|
|
|
|
|
|
|
// Attempt to open RW file
|
|
|
|
file, err := open(bpath, defaultFileRWFlags)
|
|
|
|
if err != nil {
|
2022-05-08 18:49:45 +01:00
|
|
|
if err == syscall.EEXIST {
|
2021-09-11 20:12:47 +01:00
|
|
|
err = nil /* race issue describe in struct NOTE */
|
|
|
|
}
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
// Wrap the file in a compressor
|
|
|
|
cFile, err := st.config.Compression.Writer(file)
|
|
|
|
if err != nil {
|
2023-01-11 11:13:13 +00:00
|
|
|
return 0, err
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
defer cFile.Close()
|
|
|
|
|
|
|
|
// Write value to file
|
2023-01-11 11:13:13 +00:00
|
|
|
return cFile.Write(value)
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// statBlock checks for existence of supplied block hash.
|
2021-09-11 20:12:47 +01:00
|
|
|
func (st *BlockStorage) statBlock(hash string) (bool, error) {
|
|
|
|
return stat(st.blockPathForKey(hash))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stat implements Storage.Stat()
|
2022-11-05 11:10:19 +00:00
|
|
|
func (st *BlockStorage) Stat(ctx context.Context, key string) (bool, error) {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Get node file path for key
|
|
|
|
kpath, err := st.nodePathForKey(key)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
2022-01-24 16:35:13 +00:00
|
|
|
// Check if open
|
|
|
|
if st.lock.Closed() {
|
|
|
|
return false, ErrClosed
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Check context still valid
|
|
|
|
if err := ctx.Err(); err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
// Check for file on disk
|
|
|
|
return stat(kpath)
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Remove implements Storage.Remove().
|
|
|
|
func (st *BlockStorage) Remove(ctx context.Context, key string) error {
|
2021-09-11 20:12:47 +01:00
|
|
|
// Get node file path for key
|
|
|
|
kpath, err := st.nodePathForKey(key)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-01-24 16:35:13 +00:00
|
|
|
// Check if open
|
|
|
|
if st.lock.Closed() {
|
|
|
|
return ErrClosed
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Check context still valid
|
|
|
|
if err := ctx.Err(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-05-08 18:49:45 +01:00
|
|
|
// Remove at path (we know this is file)
|
|
|
|
if err := unlink(kpath); err != nil {
|
|
|
|
return errSwapNotFound(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Close implements Storage.Close().
|
2022-01-16 17:52:30 +00:00
|
|
|
func (st *BlockStorage) Close() error {
|
2022-01-24 16:35:13 +00:00
|
|
|
return st.lock.Close()
|
2022-01-16 17:52:30 +00:00
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// WalkKeys implements Storage.WalkKeys().
|
|
|
|
func (st *BlockStorage) WalkKeys(ctx context.Context, opts WalkKeysOptions) error {
|
2022-01-24 16:35:13 +00:00
|
|
|
// Check if open
|
|
|
|
if st.lock.Closed() {
|
|
|
|
return ErrClosed
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// Check context still valid
|
|
|
|
if err := ctx.Err(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-09-11 20:12:47 +01:00
|
|
|
// Acquire path builder
|
2021-11-13 11:29:08 +00:00
|
|
|
pb := util.GetPathBuilder()
|
|
|
|
defer util.PutPathBuilder(pb)
|
2021-09-11 20:12:47 +01:00
|
|
|
|
|
|
|
// Walk dir for entries
|
2022-11-05 11:10:19 +00:00
|
|
|
return walkDir(pb, st.nodePath, func(npath string, fsentry fs.DirEntry) error {
|
|
|
|
if !fsentry.Type().IsRegular() {
|
|
|
|
// Only deal with regular files
|
|
|
|
return nil
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
2022-11-05 11:10:19 +00:00
|
|
|
|
|
|
|
// Perform provided walk function
|
|
|
|
return opts.WalkFn(ctx, Entry{
|
|
|
|
Key: fsentry.Name(),
|
|
|
|
Size: -1,
|
|
|
|
})
|
2021-09-11 20:12:47 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// nodePathForKey calculates the node file path for supplied key.
|
2021-09-11 20:12:47 +01:00
|
|
|
func (st *BlockStorage) nodePathForKey(key string) (string, error) {
|
2021-12-20 09:35:32 +00:00
|
|
|
// Path separators are illegal, as directory paths
|
|
|
|
if strings.Contains(key, "/") || key == "." || key == ".." {
|
2021-09-11 20:12:47 +01:00
|
|
|
return "", ErrInvalidKey
|
|
|
|
}
|
|
|
|
|
|
|
|
// Acquire path builder
|
2021-11-13 11:29:08 +00:00
|
|
|
pb := util.GetPathBuilder()
|
|
|
|
defer util.PutPathBuilder(pb)
|
2021-09-11 20:12:47 +01:00
|
|
|
|
|
|
|
// Return joined + cleaned node-path
|
|
|
|
return pb.Join(st.nodePath, key), nil
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// blockPathForKey calculates the block file path for supplied hash.
|
2021-09-11 20:12:47 +01:00
|
|
|
func (st *BlockStorage) blockPathForKey(hash string) string {
|
2021-11-13 11:29:08 +00:00
|
|
|
pb := util.GetPathBuilder()
|
|
|
|
defer util.PutPathBuilder(pb)
|
2021-09-11 20:12:47 +01:00
|
|
|
return pb.Join(st.blockPath, hash)
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// hashSeparator is the separating byte between block hashes.
|
2022-01-16 17:52:30 +00:00
|
|
|
const hashSeparator = byte('\n')
|
2021-09-11 20:12:47 +01:00
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// node represents the contents of a node file in storage.
|
2021-09-11 20:12:47 +01:00
|
|
|
type node struct {
|
|
|
|
hashes []string
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// removeHash attempts to remove supplied block hash from the node's hash array.
|
2021-09-11 20:12:47 +01:00
|
|
|
func (n *node) removeHash(hash string) bool {
|
|
|
|
for i := 0; i < len(n.hashes); {
|
|
|
|
if n.hashes[i] == hash {
|
|
|
|
// Drop this hash from slice
|
|
|
|
n.hashes = append(n.hashes[:i], n.hashes[i+1:]...)
|
2022-11-05 11:10:19 +00:00
|
|
|
return true
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
2022-11-05 11:10:19 +00:00
|
|
|
|
|
|
|
// Continue iter
|
|
|
|
i++
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
2022-11-05 11:10:19 +00:00
|
|
|
return false
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// nodeReader is an io.Reader implementation for the node file representation,
|
2022-11-05 11:10:19 +00:00
|
|
|
// which is useful when calculated node file is being written to the store.
|
2021-09-11 20:12:47 +01:00
|
|
|
type nodeReader struct {
|
2022-11-05 11:10:19 +00:00
|
|
|
node node
|
2021-09-11 20:12:47 +01:00
|
|
|
idx int
|
|
|
|
last int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *nodeReader) Read(b []byte) (int, error) {
|
|
|
|
n := 0
|
|
|
|
|
|
|
|
// '-1' means we missed writing
|
|
|
|
// hash separator on last iteration
|
|
|
|
if r.last == -1 {
|
|
|
|
b[n] = hashSeparator
|
|
|
|
n++
|
|
|
|
r.last = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
for r.idx < len(r.node.hashes) {
|
|
|
|
hash := r.node.hashes[r.idx]
|
|
|
|
|
|
|
|
// Copy into buffer + update read count
|
|
|
|
m := copy(b[n:], hash[r.last:])
|
|
|
|
n += m
|
|
|
|
|
|
|
|
// If incomplete copy, return here
|
|
|
|
if m < len(hash)-r.last {
|
|
|
|
r.last = m
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check we can write last separator
|
|
|
|
if n == len(b) {
|
|
|
|
r.last = -1
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write separator, iter, reset
|
|
|
|
b[n] = hashSeparator
|
|
|
|
n++
|
|
|
|
r.idx++
|
|
|
|
r.last = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// We reached end of hashes
|
|
|
|
return n, io.EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
// nodeWriter is an io.Writer implementation for the node file representation,
|
2022-11-05 11:10:19 +00:00
|
|
|
// which is useful when calculated node file is being read from the store.
|
2021-09-11 20:12:47 +01:00
|
|
|
type nodeWriter struct {
|
|
|
|
node *node
|
2022-05-08 18:49:45 +01:00
|
|
|
buf *byteutil.Buffer
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (w *nodeWriter) Write(b []byte) (int, error) {
|
|
|
|
n := 0
|
|
|
|
|
|
|
|
for {
|
|
|
|
// Find next hash separator position
|
|
|
|
idx := bytes.IndexByte(b[n:], hashSeparator)
|
|
|
|
if idx == -1 {
|
|
|
|
// Check we shouldn't be expecting it
|
|
|
|
if w.buf.Len() > encodedHashLen {
|
2022-11-05 11:10:19 +00:00
|
|
|
return n, new_error("invalid node")
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Write all contents to buffer
|
|
|
|
w.buf.Write(b[n:])
|
|
|
|
return len(b), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Found hash separator, write
|
|
|
|
// current buf contents to Node hashes
|
|
|
|
w.buf.Write(b[n : n+idx])
|
|
|
|
n += idx + 1
|
|
|
|
if w.buf.Len() != encodedHashLen {
|
2022-11-05 11:10:19 +00:00
|
|
|
return n, new_error("invalid node")
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Append to hashes & reset
|
|
|
|
w.node.hashes = append(w.node.hashes, w.buf.String())
|
|
|
|
w.buf.Reset()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// blockReader is an io.Reader implementation for the combined, linked block
|
|
|
|
// data contained with a node file. Basically, this allows reading value data
|
2022-11-05 11:10:19 +00:00
|
|
|
// from the store for a given node file.
|
2021-09-11 20:12:47 +01:00
|
|
|
type blockReader struct {
|
|
|
|
storage *BlockStorage
|
|
|
|
node *node
|
|
|
|
buf []byte
|
|
|
|
prev int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *blockReader) Read(b []byte) (int, error) {
|
|
|
|
n := 0
|
|
|
|
|
|
|
|
// Data left in buf, copy as much as we
|
|
|
|
// can into supplied read buffer
|
|
|
|
if r.prev < len(r.buf)-1 {
|
|
|
|
n += copy(b, r.buf[r.prev:])
|
|
|
|
r.prev += n
|
|
|
|
if n >= len(b) {
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
// Check we have any hashes left
|
|
|
|
if len(r.node.hashes) < 1 {
|
|
|
|
return n, io.EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get next key from slice
|
|
|
|
key := r.node.hashes[0]
|
|
|
|
r.node.hashes = r.node.hashes[1:]
|
|
|
|
|
|
|
|
// Attempt to fetch next batch of data
|
|
|
|
var err error
|
|
|
|
r.buf, err = r.storage.readBlock(key)
|
|
|
|
if err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
r.prev = 0
|
|
|
|
|
|
|
|
// Copy as much as can from new buffer
|
|
|
|
m := copy(b[n:], r.buf)
|
|
|
|
r.prev += m
|
|
|
|
n += m
|
|
|
|
|
|
|
|
// If we hit end of supplied buf, return
|
|
|
|
if n >= len(b) {
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-16 17:52:30 +00:00
|
|
|
var (
|
|
|
|
// base64Encoding is our base64 encoding object.
|
|
|
|
base64Encoding = hashenc.Base64()
|
|
|
|
|
|
|
|
// encodedHashLen is the once-calculated encoded hash-sum length
|
|
|
|
encodedHashLen = base64Encoding.EncodedLen(
|
2022-01-24 16:35:13 +00:00
|
|
|
sha256.New().Size(),
|
2022-01-16 17:52:30 +00:00
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// hashEncoder is a HashEncoder with built-in encode buffer.
|
2021-09-11 20:12:47 +01:00
|
|
|
type hashEncoder struct {
|
|
|
|
henc hashenc.HashEncoder
|
|
|
|
ebuf []byte
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// newHashEncoder returns a new hashEncoder instance.
|
2021-09-11 20:12:47 +01:00
|
|
|
func newHashEncoder() *hashEncoder {
|
|
|
|
return &hashEncoder{
|
2022-01-24 16:35:13 +00:00
|
|
|
henc: hashenc.New(sha256.New(), base64Encoding),
|
2022-01-16 17:52:30 +00:00
|
|
|
ebuf: make([]byte, encodedHashLen),
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-05 11:10:19 +00:00
|
|
|
// EncodeSum encodes the src data and returns resulting bytes, only valid until next call to EncodeSum().
|
2022-05-08 18:49:45 +01:00
|
|
|
func (henc *hashEncoder) EncodeSum(src []byte) string {
|
2021-09-11 20:12:47 +01:00
|
|
|
henc.henc.EncodeSum(henc.ebuf, src)
|
2022-05-08 18:49:45 +01:00
|
|
|
return string(henc.ebuf)
|
2021-09-11 20:12:47 +01:00
|
|
|
}
|