Browse Source

mount: efficient file lookup in large directories, skipping directory caching (#7818)

* mount: skip directory caching on file lookup and write

When opening or creating a file in a directory that hasn't been cached yet,
don't list the entire directory. Instead:
- For reads: fetch only the single file's metadata directly from the filer
- For writes: create on filer but skip local cache insertion

This fixes a performance issue where opening a file in a directory
with millions of files would hang because EnsureVisited() had to
list all entries before the open could complete.

The directory will still be cached when explicitly listed (ReadDir),
but individual file operations now bypass the full directory caching.

Key optimizations:
- Extract shared lookupEntry() method to eliminate code duplication
- Skip EnsureVisited on Lookup (file open)
- Skip cache insertion on Mknod, Mkdir, Symlink, Link if dir not cached
- Skip cache update on file sync/flush if dir not cached
- If directory IS cached and entry not found, return ENOENT immediately

Fixes #7145

* mount: add error handling for meta cache insert/update operations

Handle errors from metaCache.InsertEntry and metaCache.UpdateEntry calls
instead of silently ignoring them. This prevents silent cache inconsistencies
and ensures errors are properly propagated.

Files updated:
- filehandle_read.go: handle InsertEntry error in downloadRemoteEntry
- weedfs_file_sync.go: handle InsertEntry error in doFlush
- weedfs_link.go: handle UpdateEntry and InsertEntry errors in Link
- weedfs_symlink.go: handle InsertEntry error in Symlink

* mount: use error wrapping (%w) for consistent error handling

Use %w instead of %v in fmt.Errorf to preserve the original error,
allowing it to be inspected up the call stack with errors.Is/As.
pull/7821/head
Chris Lu 2 days ago
committed by GitHub
parent
commit
6442da6f17
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 8
      weed/mount/filehandle_read.go
  2. 6
      weed/mount/meta_cache/meta_cache.go
  3. 44
      weed/mount/weedfs.go
  4. 32
      weed/mount/weedfs_dir_lookup.go
  5. 6
      weed/mount/weedfs_dir_mkrm.go
  6. 6
      weed/mount/weedfs_file_mkrm.go
  7. 7
      weed/mount/weedfs_file_sync.go
  8. 16
      weed/mount/weedfs_link.go
  9. 4
      weed/mount/weedfs_metadata_flush.go
  10. 7
      weed/mount/weedfs_symlink.go

8
weed/mount/filehandle_read.go

@ -9,6 +9,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
func (fh *FileHandle) lockForRead(startOffset int64, size int) {
@ -162,7 +163,12 @@ func (fh *FileHandle) downloadRemoteEntry(entry *LockedEntry) error {
fh.SetEntry(resp.Entry)
fh.wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, resp.Entry))
// Only update cache if the parent directory is cached
if fh.wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) {
if err := fh.wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, resp.Entry)); err != nil {
return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err)
}
}
return nil
})

6
weed/mount/meta_cache/meta_cache.go

@ -187,3 +187,9 @@ func (mc *MetaCache) Debug() {
debuggable.Debug(os.Stderr)
}
}
// IsDirectoryCached returns true if the directory has been fully cached
// (i.e., all entries have been loaded via EnsureVisited or ReadDir).
func (mc *MetaCache) IsDirectoryCached(dirPath util.FullPath) bool {
return mc.isCachedFn(dirPath)
}

44
weed/mount/weedfs.go

@ -2,7 +2,6 @@ package mount
import (
"context"
"errors"
"math/rand/v2"
"os"
"path"
@ -251,7 +250,7 @@ func (wfs *WFS) maybeReadEntry(inode uint64) (path util.FullPath, fh *FileHandle
func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.Status) {
// glog.V(3).Infof("read entry cache miss %s", fullpath)
dir, name := fullpath.DirAndName()
_, name := fullpath.DirAndName()
// return a valid entry for the mount root
if string(fullpath) == wfs.option.FilerMountRootPath {
@ -268,13 +267,46 @@ func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.St
}, fuse.OK
}
// read from async meta cache
meta_cache.EnsureVisited(wfs.metaCache, wfs, util.FullPath(dir))
entry, status := wfs.lookupEntry(fullpath)
if status != fuse.OK {
return nil, status
}
return entry.ToProtoEntry(), fuse.OK
}
// lookupEntry looks up an entry by path, checking the local cache first.
// If the directory is cached, it trusts the cache. Otherwise, it fetches
// directly from the filer without caching the entire directory.
// This avoids the performance issue of listing millions of files just to open one.
func (wfs *WFS) lookupEntry(fullpath util.FullPath) (*filer.Entry, fuse.Status) {
dir, _ := fullpath.DirAndName()
// Try to find the entry in the local cache first.
cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath)
if errors.Is(cacheErr, filer_pb.ErrNotFound) {
if cacheErr != nil && cacheErr != filer_pb.ErrNotFound {
glog.Errorf("lookupEntry: cache lookup for %s failed: %v", fullpath, cacheErr)
return nil, fuse.EIO
}
if cachedEntry != nil {
glog.V(4).Infof("lookupEntry cache hit %s", fullpath)
return cachedEntry, fuse.OK
}
// If the directory is cached but entry not found, file doesn't exist.
// No need to query the filer again.
if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) {
glog.V(4).Infof("lookupEntry cache miss (dir cached) %s", fullpath)
return nil, fuse.ENOENT
}
// Directory not cached - fetch directly from filer without caching the entire directory.
glog.V(4).Infof("lookupEntry fetching from filer %s", fullpath)
entry, err := filer_pb.GetEntry(context.Background(), wfs, fullpath)
if err != nil {
glog.V(1).Infof("lookupEntry GetEntry %s: %v", fullpath, err)
return nil, fuse.ENOENT
}
return cachedEntry.ToProtoEntry(), fuse.OK
return filer.FromPbEntry(dir, entry), fuse.OK
}
func (wfs *WFS) LookupFn() wdclient.LookupFileIdFunctionType {

32
weed/mount/weedfs_dir_lookup.go

@ -1,14 +1,10 @@
package mount
import (
"context"
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/mount/meta_cache"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
)
// Lookup is called by the kernel when the VFS wants to know
@ -29,30 +25,10 @@ func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name strin
fullFilePath := dirPath.Child(name)
visitErr := meta_cache.EnsureVisited(wfs.metaCache, wfs, dirPath)
if visitErr != nil {
glog.Errorf("dir Lookup %s: %v", dirPath, visitErr)
return fuse.EIO
}
localEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullFilePath)
if cacheErr == filer_pb.ErrNotFound {
return fuse.ENOENT
}
if localEntry == nil {
// glog.V(3).Infof("dir Lookup cache miss %s", fullFilePath)
entry, err := filer_pb.GetEntry(context.Background(), wfs, fullFilePath)
if err != nil {
glog.V(1).Infof("dir GetEntry %s: %v", fullFilePath, err)
return fuse.ENOENT
}
localEntry = filer.FromPbEntry(string(dirPath), entry)
} else {
glog.V(4).Infof("dir Lookup cache hit %s", fullFilePath)
}
if localEntry == nil {
return fuse.ENOENT
// Use shared lookup logic that checks cache first, then filer if needed
localEntry, status := wfs.lookupEntry(fullFilePath)
if status != fuse.OK {
return status
}
inode := wfs.inodeToPath.Lookup(fullFilePath, localEntry.Crtime.Unix(), localEntry.IsDirectory(), len(localEntry.HardLinkId) > 0, localEntry.Inode, true)

6
weed/mount/weedfs_dir_mkrm.go

@ -68,8 +68,12 @@ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out
return err
}
// Only cache the entry if the parent directory is already cached.
// This avoids polluting the cache with partial directory data.
if wfs.metaCache.IsDirectoryCached(dirFullPath) {
if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil {
return fmt.Errorf("local mkdir dir %s: %v", entryFullPath, err)
return fmt.Errorf("local mkdir dir %s: %w", entryFullPath, err)
}
}
return nil

6
weed/mount/weedfs_file_mkrm.go

@ -88,8 +88,12 @@ func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out
return err
}
// Only cache the entry if the parent directory is already cached.
// This avoids polluting the cache with partial directory data.
if wfs.metaCache.IsDirectoryCached(dirFullPath) {
if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil {
return fmt.Errorf("local mknod %s: %v", entryFullPath, err)
return fmt.Errorf("local mknod %s: %w", entryFullPath, err)
}
}
return nil

7
weed/mount/weedfs_file_sync.go

@ -166,7 +166,12 @@ func (wfs *WFS) doFlush(fh *FileHandle, uid, gid uint32) fuse.Status {
return fmt.Errorf("fh flush create %s: %v", fileFullPath, err)
}
wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry))
// Only update cache if the parent directory is cached
if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) {
if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil {
return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err)
}
}
return nil
})

16
weed/mount/weedfs_link.go

@ -2,6 +2,7 @@ package mount
import (
"context"
"fmt"
"syscall"
"time"
@ -10,6 +11,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
/*
@ -91,13 +93,23 @@ func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out *
if err := filer_pb.UpdateEntry(context.Background(), client, updateOldEntryRequest); err != nil {
return err
}
wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(updateOldEntryRequest.Directory, updateOldEntryRequest.Entry))
// Only update cache if the directory is cached
if wfs.metaCache.IsDirectoryCached(util.FullPath(updateOldEntryRequest.Directory)) {
if err := wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(updateOldEntryRequest.Directory, updateOldEntryRequest.Entry)); err != nil {
return fmt.Errorf("update meta cache for %s: %w", oldEntryPath, err)
}
}
if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil {
return err
}
wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry))
// Only cache the entry if the parent directory is already cached.
if wfs.metaCache.IsDirectoryCached(newParentPath) {
if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil {
return fmt.Errorf("insert meta cache for %s: %w", newParentPath.Child(name), err)
}
}
return nil
})

4
weed/mount/weedfs_metadata_flush.go

@ -146,10 +146,12 @@ func (wfs *WFS) flushFileMetadata(fh *FileHandle) error {
return err
}
// Update meta cache
// Only update cache if the parent directory is cached
if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) {
if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil {
return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err)
}
}
glog.V(3).Infof("flushed metadata for %s with %d chunks", fileFullPath, len(entry.GetChunks()))
return nil

7
weed/mount/weedfs_symlink.go

@ -57,7 +57,12 @@ func (wfs *WFS) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target st
return fmt.Errorf("symlink %s: %v", entryFullPath, err)
}
wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry))
// Only cache the entry if the parent directory is already cached.
if wfs.metaCache.IsDirectoryCached(dirPath) {
if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil {
return fmt.Errorf("insert meta cache for symlink %s: %w", entryFullPath, err)
}
}
return nil
})

Loading…
Cancel
Save