From 0e570d6a8f2bb286db69429d339e279966b59366 Mon Sep 17 00:00:00 2001 From: Peter Dodd Date: Thu, 12 Mar 2026 22:21:07 +0000 Subject: [PATCH] feat(remote.mount): add -metadataStrategy flag to control metadata caching (#8568) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(remote): add -noSync flag to skip upfront metadata pull on mount Made-with: Cursor * refactor(remote): split mount setup from metadata sync Extract ensureMountDirectory for create/validate; call pullMetadata directly when sync is needed. Caller controls sync step for -noSync. Made-with: Cursor * fix(remote): validate mount root when -noSync so bad bucket/creds fail fast When -noSync is used, perform a cheap remote check (ListBuckets and verify bucket exists) instead of skipping all remote I/O. Invalid buckets or credentials now fail at mount time. Made-with: Cursor * test(remote): add TestRemoteMountNoSync for -noSync mount and persisted mapping Made-with: Cursor * test(remote): assert no upfront metadata after -noSync mount After remote.mount -noSync, run fs.ls on the mount dir and assert empty listing so the test fails if pullMetadata was invoked eagerly. Made-with: Cursor * fix(remote): propagate non-ErrNotFound lookup errors in ensureMountDirectory Return lookupErr immediately for any LookupDirectoryEntry failure that is not filer_pb.ErrNotFound, so only the not-found case creates the entry and other lookup failures are reported to the caller. Made-with: Cursor * fix(remote): use errors.Is for ErrNotFound in ensureMountDirectory Replace fragile strings.Contains(lookupErr.Error(), ...) with errors.Is(lookupErr, filer_pb.ErrNotFound) before calling CreateEntry. Made-with: Cursor * fix(remote): use LookupEntry so ErrNotFound is recognised after gRPC Raw gRPC LookupDirectoryEntry returns a status error, not the sentinel, so errors.Is(lookupErr, filer_pb.ErrNotFound) was always false. Use filer_pb.LookupEntry which normalises not-found to ErrNotFound so the mount directory is created when missing. Made-with: Cursor * test(remote): ignore weed shell banner in TestRemoteMountNoSync fs.ls count Exclude master/filer and prompt lines from entry count so the assertion checks only actual fs.ls output for empty -noSync mount. Made-with: Cursor * fix(remote.mount): use 0755 for mount dir, document bucket-less early return Made-with: Cursor * feat(remote.mount): replace -noSync with -metadataStrategy=lazy|eager - Add -metadataStrategy flag (eager default, lazy skips upfront metadata pull) - Accept lazy/eager case-insensitively; reject invalid values with clear error - Rename TestRemoteMountNoSync to TestRemoteMountMetadataStrategyLazy - Add TestRemoteMountMetadataStrategyEager and TestRemoteMountMetadataStrategyInvalid Made-with: Cursor * fix(remote.mount): validate strategy and remote before creating mount directory Move strategy validation and validateMountRoot (lazy path) before ensureMountDirectory so that invalid strategies or bad bucket/credentials fail without leaving orphaned directory entries in the filer. * refactor(remote.mount): remove unused remote param from ensureMountDirectory The remote *RemoteStorageLocation parameter was left over from the old syncMetadata signature. Only remoteConf.Name is used inside the function. * doc(remote.mount): add TODO for HeadBucket-style validation validateMountRoot currently lists all buckets to verify one exists. Note the need for a targeted BucketExists method in the interface. * refactor(remote.mount): use MetadataStrategy type and constants Replace raw string comparisons with a MetadataStrategy type and MetadataStrategyEager/MetadataStrategyLazy constants for clarity and compile-time safety. * refactor(remote.mount): rename MetadataStrategy to MetadataCacheStrategy More precisely describes the purpose: controlling how metadata is cached from the remote, not metadata handling in general. * fix(remote.mount): remove validateMountRoot from lazy path Lazy mount's purpose is to skip remote I/O. Validating via ListBuckets contradicts that, especially on accounts with many buckets. Invalid buckets or credentials will surface on first lazy access instead. * fix(test): handle shell exit 0 in TestRemoteMountMetadataStrategyInvalid The weed shell process exits with code 0 even when individual commands fail — errors appear in stdout. Check output instead of requiring a non-nil error. * test(remote.mount): remove metadataStrategy shell integration tests These tests only verify string output from a shell process that always exits 0 — they cannot meaningfully validate eager vs lazy behavior without a real remote backend. --------- Co-authored-by: Chris Lu --- weed/shell/command_remote_mount.go | 54 +++++++++++++--------- weed/shell/command_remote_mount_buckets.go | 8 ++-- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/weed/shell/command_remote_mount.go b/weed/shell/command_remote_mount.go index e9237866c..5f995e9f3 100644 --- a/weed/shell/command_remote_mount.go +++ b/weed/shell/command_remote_mount.go @@ -2,6 +2,7 @@ package shell import ( "context" + "errors" "flag" "fmt" "io" @@ -17,6 +18,13 @@ import ( "google.golang.org/protobuf/proto" ) +type MetadataCacheStrategy string + +const ( + MetadataCacheEager MetadataCacheStrategy = "eager" + MetadataCacheLazy MetadataCacheStrategy = "lazy" +) + func init() { Commands = append(Commands, &commandRemoteMount{}) } @@ -29,13 +37,15 @@ func (c *commandRemoteMount) Name() string { } func (c *commandRemoteMount) Help() string { - return `mount remote storage and pull its metadata + return `mount remote storage and optionally pull its metadata # assume a remote storage is configured to name "cloud1" remote.configure -name=cloud1 -type=s3 -s3.access_key=xxx -s3.secret_key=yyy - # mount and pull one bucket + # mount and pull one bucket (full upfront metadata sync) remote.mount -dir=/xxx -remote=cloud1/bucket + # mount without upfront sync; metadata is fetched lazily on access + remote.mount -dir=/xxx -remote=cloud1/bucket -metadataStrategy=lazy # mount and pull one directory in the bucket remote.mount -dir=/xxx -remote=cloud1/bucket/dir1 @@ -55,6 +65,7 @@ func (c *commandRemoteMount) Do(args []string, commandEnv *CommandEnv, writer io dir := remoteMountCommand.String("dir", "", "a directory in filer") nonEmpty := remoteMountCommand.Bool("nonempty", false, "allows the mounting over a non-empty directory") + metadataStrategy := remoteMountCommand.String("metadataStrategy", string(MetadataCacheEager), "lazy: skip upfront metadata pull; eager: full metadata pull (default)") remote := remoteMountCommand.String("remote", "", "a directory in remote storage, ex. //path/to/dir") if err = remoteMountCommand.Parse(args); err != nil { @@ -77,9 +88,19 @@ func (c *commandRemoteMount) Do(args []string, commandEnv *CommandEnv, writer io return err } - // sync metadata from remote - if err = syncMetadata(commandEnv, writer, *dir, *nonEmpty, remoteConf, remoteStorageLocation); err != nil { - return fmt.Errorf("pull metadata: %w", err) + strategy := MetadataCacheStrategy(strings.ToLower(*metadataStrategy)) + if strategy != MetadataCacheLazy && strategy != MetadataCacheEager { + return fmt.Errorf("metadataStrategy must be %s or %s, got %q", MetadataCacheLazy, MetadataCacheEager, *metadataStrategy) + } + + if err = ensureMountDirectory(commandEnv, *dir, *nonEmpty, remoteConf); err != nil { + return fmt.Errorf("mount setup: %w", err) + } + + if strategy == MetadataCacheEager { + if err = pullMetadata(commandEnv, writer, util.FullPath(*dir), remoteStorageLocation, util.FullPath(*dir), remoteConf); err != nil { + return fmt.Errorf("cache metadata: %w", err) + } } // store a mount configuration in filer @@ -108,17 +129,15 @@ func jsonPrintln(writer io.Writer, message proto.Message) error { return filer.ProtoToText(writer, message) } -func syncMetadata(commandEnv *CommandEnv, writer io.Writer, dir string, nonEmpty bool, remoteConf *remote_pb.RemoteConf, remote *remote_pb.RemoteStorageLocation) error { - - // find existing directory, and ensure the directory is empty - err := commandEnv.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { +func ensureMountDirectory(commandEnv *CommandEnv, dir string, nonEmpty bool, remoteConf *remote_pb.RemoteConf) error { + return commandEnv.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { parent, name := util.FullPath(dir).DirAndName() - _, lookupErr := client.LookupDirectoryEntry(context.Background(), &filer_pb.LookupDirectoryEntryRequest{ + _, lookupErr := filer_pb.LookupEntry(context.Background(), client, &filer_pb.LookupDirectoryEntryRequest{ Directory: parent, Name: name, }) if lookupErr != nil { - if strings.Contains(lookupErr.Error(), filer_pb.ErrNotFound.Error()) { + if errors.Is(lookupErr, filer_pb.ErrNotFound) { _, createErr := client.CreateEntry(context.Background(), &filer_pb.CreateEntryRequest{ Directory: parent, Entry: &filer_pb.Entry{ @@ -127,7 +146,7 @@ func syncMetadata(commandEnv *CommandEnv, writer io.Writer, dir string, nonEmpty Attributes: &filer_pb.FuseAttributes{ Mtime: time.Now().Unix(), Crtime: time.Now().Unix(), - FileMode: uint32(0644 | os.ModeDir), + FileMode: uint32(0755 | os.ModeDir), }, RemoteEntry: &filer_pb.RemoteEntry{ StorageName: remoteConf.Name, @@ -136,6 +155,7 @@ func syncMetadata(commandEnv *CommandEnv, writer io.Writer, dir string, nonEmpty }) return createErr } + return lookupErr } mountToDirIsEmpty := true @@ -156,16 +176,6 @@ func syncMetadata(commandEnv *CommandEnv, writer io.Writer, dir string, nonEmpty return nil }) - if err != nil { - return err - } - - // pull metadata from remote - if err = pullMetadata(commandEnv, writer, util.FullPath(dir), remote, util.FullPath(dir), remoteConf); err != nil { - return fmt.Errorf("cache metadata: %w", err) - } - - return nil } // if an entry has synchronized metadata but has not synchronized content diff --git a/weed/shell/command_remote_mount_buckets.go b/weed/shell/command_remote_mount_buckets.go index 77aa1ba30..d1b664c69 100644 --- a/weed/shell/command_remote_mount_buckets.go +++ b/weed/shell/command_remote_mount_buckets.go @@ -109,9 +109,11 @@ func (c *commandRemoteMountBuckets) Do(args []string, commandEnv *CommandEnv, wr Path: "/", } - // sync metadata from remote - if err = syncMetadata(commandEnv, writer, string(dir), true, remoteConf, remoteStorageLocation); err != nil { - return fmt.Errorf("pull metadata on %+v: %v", remoteStorageLocation, err) + if err = ensureMountDirectory(commandEnv, string(dir), true, remoteConf); err != nil { + return fmt.Errorf("mount setup on %+v: %v", remoteStorageLocation, err) + } + if err = pullMetadata(commandEnv, writer, dir, remoteStorageLocation, dir, remoteConf); err != nil { + return fmt.Errorf("cache metadata on %+v: %v", remoteStorageLocation, err) } // store a mount configuration in filer