You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
282 lines
8.8 KiB
282 lines
8.8 KiB
package shell
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
func init() {
|
|
Commands = append(Commands, &commandRemoteCopyLocal{})
|
|
}
|
|
|
|
type commandRemoteCopyLocal struct {
|
|
}
|
|
|
|
func (c *commandRemoteCopyLocal) Name() string {
|
|
return "remote.copy.local"
|
|
}
|
|
|
|
func (c *commandRemoteCopyLocal) Help() string {
|
|
return `copy local files to remote storage
|
|
|
|
# assume a remote storage is configured to name "cloud1"
|
|
remote.configure -name=cloud1 -type=s3 -s3.access_key=xxx -s3.secret_key=yyy
|
|
# mount and pull one bucket
|
|
remote.mount -dir=/xxx -remote=cloud1/bucket
|
|
|
|
# copy local files to remote storage
|
|
remote.copy.local -dir=/xxx # copy all local-only files
|
|
remote.copy.local -dir=/xxx -concurrent=16 # with custom concurrency
|
|
remote.copy.local -dir=/xxx -include=*.pdf # only copy PDF files
|
|
remote.copy.local -dir=/xxx -exclude=*.tmp # exclude temporary files
|
|
remote.copy.local -dir=/xxx -dryRun=true # show what would be done without making changes
|
|
remote.copy.local -dir=/xxx -forceUpdate=true # force update even if remote exists
|
|
|
|
This command will:
|
|
1. Find local files that don't exist on remote storage
|
|
2. Copy these files to remote storage
|
|
3. Update local metadata with remote information
|
|
|
|
This is useful when:
|
|
- You deleted filer logs and need to copy existing files
|
|
- You have local files that were never copied to remote
|
|
- You want to ensure all local files are backed up to remote
|
|
|
|
`
|
|
}
|
|
|
|
func (c *commandRemoteCopyLocal) HasTag(CommandTag) bool {
|
|
return false
|
|
}
|
|
|
|
func (c *commandRemoteCopyLocal) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
|
|
remoteCopyLocalCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
|
|
|
dir := remoteCopyLocalCommand.String("dir", "", "a directory in filer")
|
|
concurrency := remoteCopyLocalCommand.Int("concurrent", 16, "concurrent file operations")
|
|
dryRun := remoteCopyLocalCommand.Bool("dryRun", false, "show what would be done without making changes")
|
|
forceUpdate := remoteCopyLocalCommand.Bool("forceUpdate", false, "force update even if remote exists")
|
|
fileFilter := newFileFilter(remoteCopyLocalCommand)
|
|
|
|
if err = remoteCopyLocalCommand.Parse(args); err != nil {
|
|
return err
|
|
}
|
|
|
|
if *dir == "" {
|
|
return fmt.Errorf("need to specify -dir option")
|
|
}
|
|
|
|
mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, detectErr := detectMountInfo(commandEnv, writer, *dir)
|
|
if detectErr != nil {
|
|
jsonPrintln(writer, mappings)
|
|
return detectErr
|
|
}
|
|
|
|
// perform local to remote copy
|
|
return c.doLocalToRemoteCopy(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(*dir), remoteStorageConf, *concurrency, *dryRun, *forceUpdate, fileFilter)
|
|
}
|
|
|
|
func (c *commandRemoteCopyLocal) doLocalToRemoteCopy(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, dirToCopy util.FullPath, remoteConf *remote_pb.RemoteConf, concurrency int, dryRun bool, forceUpdate bool, fileFilter *FileFilter) error {
|
|
|
|
// Get remote storage client
|
|
remoteStorage, err := remote_storage.GetRemoteStorage(remoteConf)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get remote storage: %w", err)
|
|
}
|
|
|
|
remote := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, dirToCopy)
|
|
|
|
// Step 1: Collect all local files that are part of the remote mount
|
|
localFiles := make(map[string]*filer_pb.Entry)
|
|
err = recursivelyTraverseDirectory(commandEnv, dirToCopy, func(dir util.FullPath, entry *filer_pb.Entry) bool {
|
|
// Only consider files that are part of remote mount
|
|
if isInMountedDirectory(dir, localMountedDir) {
|
|
fullPath := string(dir.Child(entry.Name))
|
|
localFiles[fullPath] = entry
|
|
}
|
|
return true
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to traverse local directory: %w", err)
|
|
}
|
|
|
|
fmt.Fprintf(writer, "Found %d files/directories in local storage\n", len(localFiles))
|
|
|
|
// Step 2: Check which files exist on remote storage
|
|
remoteFiles := make(map[string]bool)
|
|
err = remoteStorage.Traverse(remote, func(remoteDir, name string, isDirectory bool, remoteEntry *filer_pb.RemoteEntry) error {
|
|
localDir := filer.MapRemoteStorageLocationPathToFullPath(localMountedDir, remoteMountedLocation, remoteDir)
|
|
fullPath := string(localDir.Child(name))
|
|
remoteFiles[fullPath] = true
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to traverse remote storage: %w", err)
|
|
}
|
|
|
|
fmt.Fprintf(writer, "Found %d files/directories in remote storage\n", len(remoteFiles))
|
|
|
|
// Step 3: Determine files to copy
|
|
var filesToCopy []string
|
|
for localPath, localEntry := range localFiles {
|
|
// Skip directories
|
|
if localEntry.IsDirectory {
|
|
continue
|
|
}
|
|
|
|
// Apply file filter
|
|
if !fileFilter.matches(localEntry) {
|
|
continue
|
|
}
|
|
|
|
// Check if file needs copying
|
|
needsCopy := false
|
|
if !remoteFiles[localPath] {
|
|
// File doesn't exist on remote
|
|
needsCopy = true
|
|
} else if forceUpdate {
|
|
// Force update requested and file exists on remote
|
|
needsCopy = true
|
|
}
|
|
|
|
if needsCopy {
|
|
filesToCopy = append(filesToCopy, localPath)
|
|
}
|
|
}
|
|
|
|
fmt.Fprintf(writer, "Files to copy: %d\n", len(filesToCopy))
|
|
|
|
if dryRun {
|
|
fmt.Fprintf(writer, "DRY RUN - showing what would be done:\n")
|
|
for _, path := range filesToCopy {
|
|
fmt.Fprintf(writer, "COPY: %s\n", path)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Step 4: Copy files to remote storage
|
|
if len(filesToCopy) == 0 {
|
|
fmt.Fprintf(writer, "No files to copy\n")
|
|
return nil
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
limitedConcurrentExecutor := util.NewLimitedConcurrentExecutor(concurrency)
|
|
var firstErr error
|
|
var errOnce sync.Once
|
|
var successCount atomic.Int64
|
|
var outputMu sync.Mutex
|
|
|
|
for _, pathToCopy := range filesToCopy {
|
|
wg.Add(1)
|
|
localPath := pathToCopy // Capture for closure
|
|
limitedConcurrentExecutor.Execute(func() {
|
|
defer wg.Done()
|
|
|
|
localEntry := localFiles[localPath]
|
|
if localEntry == nil {
|
|
outputMu.Lock()
|
|
fmt.Fprintf(writer, "Warning: skipping copy for %s (local entry not found)\n", localPath)
|
|
outputMu.Unlock()
|
|
return
|
|
}
|
|
|
|
outputMu.Lock()
|
|
fmt.Fprintf(writer, "Copying %s... ", localPath)
|
|
outputMu.Unlock()
|
|
|
|
dir, _ := util.FullPath(localPath).DirAndName()
|
|
remoteLocation := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, util.FullPath(localPath))
|
|
|
|
// Copy the file to remote storage
|
|
err := syncFileToRemote(commandEnv, remoteStorage, remoteConf, remoteLocation, util.FullPath(dir), localEntry)
|
|
if err != nil {
|
|
outputMu.Lock()
|
|
fmt.Fprintf(writer, "failed: %v\n", err)
|
|
outputMu.Unlock()
|
|
errOnce.Do(func() {
|
|
firstErr = err
|
|
})
|
|
return
|
|
}
|
|
|
|
successCount.Add(1)
|
|
outputMu.Lock()
|
|
fmt.Fprintf(writer, "done\n")
|
|
outputMu.Unlock()
|
|
})
|
|
}
|
|
|
|
wg.Wait()
|
|
if firstErr != nil {
|
|
return firstErr
|
|
}
|
|
|
|
fmt.Fprintf(writer, "Successfully copied %d files to remote storage\n", successCount.Load())
|
|
return nil
|
|
}
|
|
|
|
func syncFileToRemote(commandEnv *CommandEnv, remoteStorage remote_storage.RemoteStorageClient, remoteConf *remote_pb.RemoteConf, remoteLocation *remote_pb.RemoteStorageLocation, dir util.FullPath, localEntry *filer_pb.Entry) error {
|
|
|
|
// Upload to remote storage using the same approach as filer_remote_sync
|
|
var remoteEntry *filer_pb.RemoteEntry
|
|
var err error
|
|
|
|
err = util.Retry("writeFile", func() error {
|
|
// Create a reader for the file content
|
|
reader := filer.NewFileReader(commandEnv, localEntry)
|
|
|
|
remoteEntry, err = remoteStorage.WriteFile(remoteLocation, localEntry, reader)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to upload to remote storage: %w", err)
|
|
}
|
|
|
|
// Update local entry with remote information
|
|
return commandEnv.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
ctx := context.Background()
|
|
|
|
// Update local entry with remote information
|
|
localEntry.RemoteEntry = remoteEntry
|
|
|
|
// Update the entry
|
|
_, updateErr := client.UpdateEntry(ctx, &filer_pb.UpdateEntryRequest{
|
|
Directory: string(dir),
|
|
Entry: localEntry,
|
|
})
|
|
if updateErr != nil {
|
|
return fmt.Errorf("failed to update local entry: %w", updateErr)
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func isInMountedDirectory(dir util.FullPath, mountedDir util.FullPath) bool {
|
|
if string(dir) == string(mountedDir) {
|
|
return true
|
|
}
|
|
// Ensure mountedDir ends with separator to avoid matching sibling directories
|
|
// e.g., "/mnt/remote2" should not match "/mnt/remote"
|
|
mountedDirStr := string(mountedDir)
|
|
if !strings.HasSuffix(mountedDirStr, "/") {
|
|
mountedDirStr += "/"
|
|
}
|
|
return strings.HasPrefix(string(dir)+"/", mountedDirStr)
|
|
}
|