diff --git a/other/java/client/src/main/proto/filer.proto b/other/java/client/src/main/proto/filer.proto index 9257996ed..78dd58b1f 100644 --- a/other/java/client/src/main/proto/filer.proto +++ b/other/java/client/src/main/proto/filer.proto @@ -100,10 +100,12 @@ message ListEntriesRequest { string startFromFileName = 3; bool inclusiveStartFrom = 4; uint32 limit = 5; + int64 snapshot_ts_ns = 6; } message ListEntriesResponse { Entry entry = 1; + int64 snapshot_ts_ns = 2; } message RemoteEntry { @@ -203,6 +205,7 @@ message CreateEntryRequest { message CreateEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message UpdateEntryRequest { @@ -212,6 +215,7 @@ message UpdateEntryRequest { repeated int32 signatures = 4; } message UpdateEntryResponse { + SubscribeMetadataResponse metadata_event = 1; } message AppendToEntryRequest { @@ -236,6 +240,7 @@ message DeleteEntryRequest { message DeleteEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message AtomicRenameEntryRequest { @@ -469,6 +474,7 @@ message CacheRemoteObjectToLocalClusterRequest { } message CacheRemoteObjectToLocalClusterResponse { Entry entry = 1; + SubscribeMetadataResponse metadata_event = 2; } ///////////////////////// diff --git a/weed/admin/dash/admin_server.go b/weed/admin/dash/admin_server.go index 2c129c671..deb1a9215 100644 --- a/weed/admin/dash/admin_server.go +++ b/weed/admin/dash/admin_server.go @@ -427,7 +427,7 @@ func (s *AdminServer) GetS3Buckets() ([]S3Bucket, error) { return err } - if resp.Entry.IsDirectory { + if resp.Entry != nil && resp.Entry.IsDirectory { bucketName := resp.Entry.Name if strings.HasPrefix(bucketName, ".") { // Skip internal/system directories from Object Store bucket listing. @@ -480,13 +480,18 @@ func (s *AdminServer) GetS3Buckets() ([]S3Bucket, error) { } } + var createdAt, lastModified time.Time + if resp.Entry.Attributes != nil { + createdAt = time.Unix(resp.Entry.Attributes.Crtime, 0) + lastModified = time.Unix(resp.Entry.Attributes.Mtime, 0) + } bucket := S3Bucket{ Name: bucketName, - CreatedAt: time.Unix(resp.Entry.Attributes.Crtime, 0), + CreatedAt: createdAt, LogicalSize: logicalSize, PhysicalSize: physicalSize, ObjectCount: objectCount, - LastModified: time.Unix(resp.Entry.Attributes.Mtime, 0), + LastModified: lastModified, Quota: quota, QuotaEnabled: quotaEnabled, VersioningStatus: versioningStatus, diff --git a/weed/admin/dash/mq_management.go b/weed/admin/dash/mq_management.go index ba9c1cd18..3f6407b18 100644 --- a/weed/admin/dash/mq_management.go +++ b/weed/admin/dash/mq_management.go @@ -324,7 +324,7 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co } // Only process directories that are versions (start with "v") - if versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { + if versionResp.Entry != nil && versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { versionDir := filepath.Join(topicDir, versionResp.Entry.Name) // List all partition directories under the version directory (e.g., 0315-0630) @@ -352,7 +352,7 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co } // Only process directories that are partitions (format: NNNN-NNNN) - if partitionResp.Entry.IsDirectory { + if partitionResp.Entry != nil && partitionResp.Entry.IsDirectory { // Parse partition range to get partition start ID (e.g., "0315-0630" -> 315) var partitionStart, partitionStop int32 if n, err := fmt.Sscanf(partitionResp.Entry.Name, "%04d-%04d", &partitionStart, &partitionStop); n != 2 || err != nil { @@ -387,7 +387,7 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co } // Only process .offset files - if !offsetResp.Entry.IsDirectory && strings.HasSuffix(offsetResp.Entry.Name, ".offset") { + if offsetResp.Entry != nil && !offsetResp.Entry.IsDirectory && strings.HasSuffix(offsetResp.Entry.Name, ".offset") { consumerGroup := strings.TrimSuffix(offsetResp.Entry.Name, ".offset") // Read the offset value from the file @@ -401,7 +401,10 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co offset := int64(util.BytesToUint64(offsetData)) // Get the file modification time - lastUpdated := time.Unix(offsetResp.Entry.Attributes.Mtime, 0) + var lastUpdated time.Time + if offsetResp.Entry.Attributes != nil { + lastUpdated = time.Unix(offsetResp.Entry.Attributes.Mtime, 0) + } offsets = append(offsets, ConsumerGroupOffsetInfo{ ConsumerGroup: consumerGroup, diff --git a/weed/admin/dash/topic_retention.go b/weed/admin/dash/topic_retention.go index fed4893a4..cc66f9035 100644 --- a/weed/admin/dash/topic_retention.go +++ b/weed/admin/dash/topic_retention.go @@ -151,17 +151,21 @@ func (p *TopicRetentionPurger) purgeTopicData(topicRetention TopicRetentionConfi } // Only process directories that are versions (start with "v") - if versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { + if versionResp.Entry != nil && versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { versionTime, err := p.parseVersionTime(versionResp.Entry.Name) if err != nil { glog.Warningf("Failed to parse version time from %s: %v", versionResp.Entry.Name, err) continue } + var modTime time.Time + if versionResp.Entry.Attributes != nil { + modTime = time.Unix(versionResp.Entry.Attributes.Mtime, 0) + } versionDirs = append(versionDirs, VersionDirInfo{ Name: versionResp.Entry.Name, VersionTime: versionTime, - ModTime: time.Unix(versionResp.Entry.Attributes.Mtime, 0), + ModTime: modTime, }) } } @@ -260,6 +264,9 @@ func (p *TopicRetentionPurger) deleteDirectoryRecursively(client filer_pb.Seawee return fmt.Errorf("failed to receive entries: %w", err) } + if resp.Entry == nil { + continue + } entryPath := filepath.Join(dirPath, resp.Entry.Name) if resp.Entry.IsDirectory { diff --git a/weed/filer/filer_notify.go b/weed/filer/filer_notify.go index 6fd595f87..48e1b163c 100644 --- a/weed/filer/filer_notify.go +++ b/weed/filer/filer_notify.go @@ -19,19 +19,23 @@ import ( ) func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry, deleteChunks, isFromOtherCluster bool, signatures []int32) { + f.notifyUpdateEvent(ctx, oldEntry, newEntry, deleteChunks, isFromOtherCluster, signatures) +} + +func (f *Filer) notifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry, deleteChunks, isFromOtherCluster bool, signatures []int32) *filer_pb.SubscribeMetadataResponse { var fullpath string if oldEntry != nil { fullpath = string(oldEntry.FullPath) } else if newEntry != nil { fullpath = string(newEntry.FullPath) } else { - return + return nil } // println("fullpath:", fullpath) if strings.HasPrefix(fullpath, SystemLogDir) { - return + return nil } foundSelf := false for _, sig := range signatures { @@ -43,18 +47,8 @@ func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry signatures = append(signatures, f.Signature) } - newParentPath := "" - if newEntry != nil { - newParentPath, _ = newEntry.FullPath.DirAndName() - } - eventNotification := &filer_pb.EventNotification{ - OldEntry: oldEntry.ToProtoEntry(), - NewEntry: newEntry.ToProtoEntry(), - DeleteChunks: deleteChunks, - NewParentPath: newParentPath, - IsFromOtherCluster: isFromOtherCluster, - Signatures: signatures, - } + event := f.newMetadataEvent(oldEntry, newEntry, deleteChunks, isFromOtherCluster, signatures) + eventNotification := event.EventNotification if notification.Queue != nil { glog.V(3).Infof("notifying entry update %v", fullpath) @@ -64,31 +58,57 @@ func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry } } - f.logMetaEvent(ctx, fullpath, eventNotification) + f.logMetaEvent(ctx, event) + if sink := metadataEventSinkFromContext(ctx); sink != nil { + sink.Record(event) + } // Trigger empty folder cleanup for local events // Remote events are handled via MetaAggregator.onMetadataChangeEvent f.triggerLocalEmptyFolderCleanup(oldEntry, newEntry) + return event } -func (f *Filer) logMetaEvent(ctx context.Context, fullpath string, eventNotification *filer_pb.EventNotification) { - - dir, _ := util.FullPath(fullpath).DirAndName() - - event := &filer_pb.SubscribeMetadataResponse{ - Directory: dir, - EventNotification: eventNotification, - TsNs: time.Now().UnixNano(), +func (f *Filer) newMetadataEvent(oldEntry, newEntry *Entry, deleteChunks, isFromOtherCluster bool, signatures []int32) *filer_pb.SubscribeMetadataResponse { + if oldEntry == nil && newEntry == nil { + return nil + } + var fullpath util.FullPath + if oldEntry != nil { + fullpath = oldEntry.FullPath } + if fullpath == "" && newEntry != nil { + fullpath = newEntry.FullPath + } + dir, _ := fullpath.DirAndName() + newParentPath := "" + if newEntry != nil { + newParentPath, _ = newEntry.FullPath.DirAndName() + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: dir, + EventNotification: &filer_pb.EventNotification{ + OldEntry: oldEntry.ToProtoEntry(), + NewEntry: newEntry.ToProtoEntry(), + DeleteChunks: deleteChunks, + NewParentPath: newParentPath, + IsFromOtherCluster: isFromOtherCluster, + Signatures: signatures, + }, + TsNs: time.Now().UnixNano(), + } +} + +func (f *Filer) logMetaEvent(ctx context.Context, event *filer_pb.SubscribeMetadataResponse) { data, err := proto.Marshal(event) if err != nil { glog.Errorf("failed to marshal filer_pb.SubscribeMetadataResponse %+v: %v", event, err) return } - if err := f.LocalMetaLogBuffer.AddDataToBuffer([]byte(dir), data, event.TsNs); err != nil { - glog.Errorf("failed to add data to log buffer for %s: %v", dir, err) + if err := f.LocalMetaLogBuffer.AddDataToBuffer([]byte(event.Directory), data, event.TsNs); err != nil { + glog.Errorf("failed to add data to log buffer for %s: %v", event.Directory, err) } } diff --git a/weed/filer/metadata_event_sink.go b/weed/filer/metadata_event_sink.go new file mode 100644 index 000000000..79e959c9f --- /dev/null +++ b/weed/filer/metadata_event_sink.go @@ -0,0 +1,47 @@ +package filer + +import ( + "context" + + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" +) + +type metadataEventSinkKey struct{} + +// MetadataEventSink captures the last metadata event emitted while serving a +// request. It is request-scoped and accessed only by the goroutine handling +// the gRPC call, so no mutex is needed. +type MetadataEventSink struct { + last *filer_pb.SubscribeMetadataResponse +} + +func WithMetadataEventSink(ctx context.Context) (context.Context, *MetadataEventSink) { + sink := &MetadataEventSink{} + return context.WithValue(ctx, metadataEventSinkKey{}, sink), sink +} + +func metadataEventSinkFromContext(ctx context.Context) *MetadataEventSink { + if ctx == nil { + return nil + } + sink, _ := ctx.Value(metadataEventSinkKey{}).(*MetadataEventSink) + return sink +} + +// Record stores the event, replacing any previously recorded one. +// Each filer RPC emits at most one NotifyUpdateEvent, so only the last +// event is retained. If an RPC were to emit multiple events, only the +// final one would be returned to the caller. +func (s *MetadataEventSink) Record(event *filer_pb.SubscribeMetadataResponse) { + if s == nil || event == nil { + return + } + s.last = event +} + +func (s *MetadataEventSink) Last() *filer_pb.SubscribeMetadataResponse { + if s == nil { + return nil + } + return s.last +} diff --git a/weed/filer/metadata_event_sink_test.go b/weed/filer/metadata_event_sink_test.go new file mode 100644 index 000000000..19edad1c7 --- /dev/null +++ b/weed/filer/metadata_event_sink_test.go @@ -0,0 +1,43 @@ +package filer + +import ( + "context" + "testing" + "time" + + "github.com/seaweedfs/seaweedfs/weed/util" + "github.com/seaweedfs/seaweedfs/weed/util/log_buffer" +) + +func TestNotifyUpdateEventRecordsRequestMetadataEvent(t *testing.T) { + f := &Filer{ + Signature: 42, + LocalMetaLogBuffer: log_buffer.NewLogBuffer( + "test", + time.Hour, + func(*log_buffer.LogBuffer, time.Time, time.Time, []byte, int64, int64) {}, + nil, + nil, + ), + } + + ctx, sink := WithMetadataEventSink(context.Background()) + f.NotifyUpdateEvent(ctx, &Entry{FullPath: util.FullPath("/dir/file.txt")}, nil, true, false, []int32{7}) + + event := sink.Last() + if event == nil { + t.Fatal("expected metadata event to be recorded") + } + if event.Directory != "/dir" { + t.Fatalf("directory = %q, want /dir", event.Directory) + } + if event.EventNotification.OldEntry == nil || event.EventNotification.OldEntry.Name != "file.txt" { + t.Fatalf("old entry = %+v, want file.txt", event.EventNotification.OldEntry) + } + if got := event.EventNotification.Signatures; len(got) != 2 || got[0] != 7 || got[1] != 42 { + t.Fatalf("signatures = %v, want [7 42]", got) + } + if event.TsNs == 0 { + t.Fatal("expected event timestamp to be set") + } +} diff --git a/weed/mount/filehandle_read.go b/weed/mount/filehandle_read.go index db4647eba..48805b60b 100644 --- a/weed/mount/filehandle_read.go +++ b/weed/mount/filehandle_read.go @@ -9,7 +9,6 @@ import ( "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" ) func (fh *FileHandle) lockForRead(startOffset int64, size int) { @@ -163,11 +162,12 @@ func (fh *FileHandle) downloadRemoteEntry(entry *LockedEntry) error { fh.SetEntry(resp.Entry) - // Only update cache if the parent directory is cached - if fh.wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - if err := fh.wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, resp.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(request.Directory, resp.Entry) + } + if applyErr := fh.wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("CacheRemoteObject %s: best-effort metadata apply failed: %v", fileFullPath, applyErr) } return nil diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index e23a9c1e9..053d66484 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -21,18 +21,18 @@ type InodeEntry struct { nlookup uint64 isDirectory bool isChildrenCached bool + readDirDirect bool cachedExpiresTime time.Time lastAccess time.Time lastRefresh time.Time updateWindowStart time.Time updateCount int - needsRefresh bool } func (ie *InodeEntry) resetCacheState() { ie.isChildrenCached = false + ie.readDirDirect = false ie.cachedExpiresTime = time.Time{} - ie.needsRefresh = false ie.updateCount = 0 ie.updateWindowStart = time.Time{} } @@ -188,11 +188,11 @@ func (i *InodeToPath) MarkChildrenCached(fullpath util.FullPath) { return } path.isChildrenCached = true + path.readDirDirect = false now := time.Now() path.lastAccess = now path.lastRefresh = now path.updateCount = 0 - path.needsRefresh = false path.updateWindowStart = time.Time{} if i.cacheMetaTtlSec > 0 { path.cachedExpiresTime = now.Add(i.cacheMetaTtlSec) @@ -264,6 +264,27 @@ func (i *InodeToPath) TouchDirectory(fullpath util.FullPath) { entry.lastAccess = time.Now() } +func (i *InodeToPath) MarkDirectoryReadThrough(fullpath util.FullPath, now time.Time) bool { + i.Lock() + defer i.Unlock() + inode, found := i.path2inode[fullpath] + if !found { + return false + } + entry, found := i.inode2path[inode] + if !found || !entry.isDirectory { + return false + } + entry.isChildrenCached = false + entry.readDirDirect = true + entry.cachedExpiresTime = time.Time{} + entry.lastAccess = now + entry.lastRefresh = time.Time{} + entry.updateCount = 0 + entry.updateWindowStart = time.Time{} + return true +} + func (i *InodeToPath) RecordDirectoryUpdate(fullpath util.FullPath, now time.Time, window time.Duration, threshold int) bool { if threshold <= 0 || window <= 0 { return false @@ -284,13 +305,19 @@ func (i *InodeToPath) RecordDirectoryUpdate(fullpath util.FullPath, now time.Tim } entry.updateCount++ if entry.updateCount >= threshold { - entry.needsRefresh = true + entry.isChildrenCached = false + entry.readDirDirect = true + entry.cachedExpiresTime = time.Time{} + entry.lastAccess = now + entry.lastRefresh = time.Time{} + entry.updateCount = 0 + entry.updateWindowStart = time.Time{} return true } return false } -func (i *InodeToPath) NeedsRefresh(fullpath util.FullPath) bool { +func (i *InodeToPath) ShouldReadDirectoryDirect(fullpath util.FullPath) bool { i.RLock() defer i.RUnlock() inode, found := i.path2inode[fullpath] @@ -301,7 +328,7 @@ func (i *InodeToPath) NeedsRefresh(fullpath util.FullPath) bool { if !found || !entry.isDirectory { return false } - return entry.isChildrenCached && entry.needsRefresh + return entry.readDirDirect } func (i *InodeToPath) MarkDirectoryRefreshed(fullpath util.FullPath, now time.Time) { @@ -317,8 +344,8 @@ func (i *InodeToPath) MarkDirectoryRefreshed(fullpath util.FullPath, now time.Ti } entry.lastRefresh = now entry.lastAccess = now + entry.readDirDirect = false entry.updateCount = 0 - entry.needsRefresh = false entry.updateWindowStart = time.Time{} if i.cacheMetaTtlSec > 0 { entry.cachedExpiresTime = now.Add(i.cacheMetaTtlSec) diff --git a/weed/mount/inode_to_path_test.go b/weed/mount/inode_to_path_test.go index 63da42fee..f5f3e1a9f 100644 --- a/weed/mount/inode_to_path_test.go +++ b/weed/mount/inode_to_path_test.go @@ -2,6 +2,7 @@ package mount import ( "testing" + "time" "github.com/seaweedfs/seaweedfs/weed/util" ) @@ -92,3 +93,43 @@ func TestInodeEntry_removeOnePath(t *testing.T) { }) } } + +func TestRecordDirectoryUpdateSwitchesDirectoryToReadThrough(t *testing.T) { + root := util.FullPath("/") + dir := util.FullPath("/data") + + inodeToPath := NewInodeToPath(root, 60) + inodeToPath.Lookup(dir, time.Now().Unix(), true, false, 0, true) + inodeToPath.MarkChildrenCached(dir) + + now := time.Now() + if !inodeToPath.RecordDirectoryUpdate(dir, now, time.Second, 1) { + t.Fatal("expected directory to switch to read-through mode") + } + if inodeToPath.IsChildrenCached(dir) { + t.Fatal("directory should no longer be marked cached") + } + if !inodeToPath.ShouldReadDirectoryDirect(dir) { + t.Fatal("directory should be served via direct reads after hot invalidation") + } +} + +func TestMarkChildrenCachedClearsReadThroughMode(t *testing.T) { + root := util.FullPath("/") + dir := util.FullPath("/data") + + inodeToPath := NewInodeToPath(root, 60) + inodeToPath.Lookup(dir, time.Now().Unix(), true, false, 0, true) + + if !inodeToPath.MarkDirectoryReadThrough(dir, time.Now()) { + t.Fatal("expected read-through flag to be set") + } + inodeToPath.MarkChildrenCached(dir) + + if !inodeToPath.IsChildrenCached(dir) { + t.Fatal("directory should be cached after MarkChildrenCached") + } + if inodeToPath.ShouldReadDirectoryDirect(dir) { + t.Fatal("directory should leave read-through mode after caching") + } +} diff --git a/weed/mount/meta_cache/meta_cache.go b/weed/mount/meta_cache/meta_cache.go index e08ba5c2d..a03959cc0 100644 --- a/weed/mount/meta_cache/meta_cache.go +++ b/weed/mount/meta_cache/meta_cache.go @@ -2,17 +2,21 @@ package meta_cache import ( "context" + "errors" "os" "sync" "time" "golang.org/x/sync/singleflight" + "fmt" + "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/filer/leveldb" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/protobuf/proto" ) // need to have logic similar to FilerStoreWrapper @@ -29,12 +33,61 @@ type MetaCache struct { invalidateFunc func(fullpath util.FullPath, entry *filer_pb.Entry) onDirectoryUpdate func(dir util.FullPath) visitGroup singleflight.Group // deduplicates concurrent EnsureVisited calls for the same path + applyCh chan metadataApplyRequest + applyDone chan struct{} + applyStateMu sync.Mutex + applyClosed bool + buildingDirs map[util.FullPath]*directoryBuildState + dedupRing dedupRingBuffer +} + +var errMetaCacheClosed = errors.New("metadata cache is shut down") + +type MetadataResponseApplyOptions struct { + NotifyDirectories bool + InvalidateEntries bool +} + +var ( + LocalMetadataResponseApplyOptions = MetadataResponseApplyOptions{ + NotifyDirectories: true, + } + SubscriberMetadataResponseApplyOptions = MetadataResponseApplyOptions{ + NotifyDirectories: true, + InvalidateEntries: true, + } +) + +type directoryBuildState struct { + bufferedEvents []*filer_pb.SubscribeMetadataResponse +} + +const recentEventDedupWindow = 4096 + +type metadataApplyRequestKind int + +const ( + metadataApplyEvent metadataApplyRequestKind = iota + metadataBeginBuild + metadataCompleteBuild + metadataAbortBuild + metadataShutdown +) + +type metadataApplyRequest struct { + ctx context.Context + kind metadataApplyRequestKind + resp *filer_pb.SubscribeMetadataResponse + options MetadataResponseApplyOptions + buildPath util.FullPath + snapshotTsNs int64 + done chan error } func NewMetaCache(dbFolder string, uidGidMapper *UidGidMapper, root util.FullPath, markCachedFn func(path util.FullPath), isCachedFn func(path util.FullPath) bool, invalidateFunc func(util.FullPath, *filer_pb.Entry), onDirectoryUpdate func(dir util.FullPath)) *MetaCache { leveldbStore, virtualStore := openMetaStore(dbFolder) - return &MetaCache{ + mc := &MetaCache{ root: root, localStore: virtualStore, leveldbStore: leveldbStore, @@ -45,7 +98,13 @@ func NewMetaCache(dbFolder string, uidGidMapper *UidGidMapper, root util.FullPat invalidateFunc: func(fullpath util.FullPath, entry *filer_pb.Entry) { invalidateFunc(fullpath, entry) }, + applyCh: make(chan metadataApplyRequest, 128), + applyDone: make(chan struct{}), + buildingDirs: make(map[util.FullPath]*directoryBuildState), + dedupRing: newDedupRingBuffer(), } + go mc.runApplyLoop() + return mc } func openMetaStore(dbFolder string) (*leveldb.LevelDBStore, filer.VirtualFilerStore) { @@ -85,7 +144,10 @@ func (mc *MetaCache) doBatchInsertEntries(ctx context.Context, entries []*filer. func (mc *MetaCache) AtomicUpdateEntryFromFiler(ctx context.Context, oldPath util.FullPath, newEntry *filer.Entry) error { mc.Lock() defer mc.Unlock() + return mc.atomicUpdateEntryFromFilerLocked(ctx, oldPath, newEntry, false) +} +func (mc *MetaCache) atomicUpdateEntryFromFilerLocked(ctx context.Context, oldPath util.FullPath, newEntry *filer.Entry, allowUncachedInsert bool) error { entry, err := mc.localStore.FindEntry(ctx, oldPath) if err != nil && err != filer_pb.ErrNotFound { glog.Errorf("Metacache: find entry error: %v", err) @@ -110,7 +172,7 @@ func (mc *MetaCache) AtomicUpdateEntryFromFiler(ctx context.Context, oldPath uti if newEntry != nil { newDir, _ := newEntry.DirAndName() - if mc.isCachedFn(util.FullPath(newDir)) { + if allowUncachedInsert || mc.isCachedFn(util.FullPath(newDir)) { glog.V(3).Infof("InsertEntry %s/%s", newDir, newEntry.Name()) if err := mc.localStore.InsertEntry(ctx, newEntry); err != nil { return err @@ -120,6 +182,71 @@ func (mc *MetaCache) AtomicUpdateEntryFromFiler(ctx context.Context, oldPath uti return nil } +func (mc *MetaCache) ApplyMetadataResponse(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if resp == nil || resp.EventNotification == nil { + return nil + } + clonedResp := proto.Clone(resp).(*filer_pb.SubscribeMetadataResponse) + return mc.applyMetadataResponseEnqueue(ctx, clonedResp, options) +} + +// ApplyMetadataResponseOwned is like ApplyMetadataResponse but takes ownership +// of resp without cloning. The caller must not use resp after this call. +func (mc *MetaCache) ApplyMetadataResponseOwned(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if resp == nil || resp.EventNotification == nil { + return nil + } + return mc.applyMetadataResponseEnqueue(ctx, resp, options) +} + +func (mc *MetaCache) applyMetadataResponseEnqueue(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if ctx == nil { + ctx = context.Background() + } + req := metadataApplyRequest{ + // Use a non-cancellable context for the queued mutation so a + // cancelled caller doesn't abort the apply loop mid-write. + ctx: context.Background(), + kind: metadataApplyEvent, + resp: resp, + options: options, + done: make(chan error, 1), + } + + if err := mc.enqueueApplyRequest(req); err != nil { + return err + } + + select { + case err := <-req.done: + return err + case <-ctx.Done(): + return ctx.Err() + } +} + +func (mc *MetaCache) BeginDirectoryBuild(ctx context.Context, dirPath util.FullPath) error { + return mc.enqueueAndWait(ctx, metadataApplyRequest{ + kind: metadataBeginBuild, + buildPath: dirPath, + }) +} + +func (mc *MetaCache) CompleteDirectoryBuild(ctx context.Context, dirPath util.FullPath, snapshotTsNs int64) error { + return mc.enqueueAndWait(ctx, metadataApplyRequest{ + kind: metadataCompleteBuild, + buildPath: dirPath, + snapshotTsNs: snapshotTsNs, + }) +} + +func (mc *MetaCache) AbortDirectoryBuild(ctx context.Context, dirPath util.FullPath) error { + return mc.enqueueAndWait(ctx, metadataApplyRequest{ + kind: metadataAbortBuild, + buildPath: dirPath, + }) +} + func (mc *MetaCache) UpdateEntry(ctx context.Context, entry *filer.Entry) error { mc.Lock() defer mc.Unlock() @@ -174,6 +301,25 @@ func (mc *MetaCache) ListDirectoryEntries(ctx context.Context, dirPath util.Full } func (mc *MetaCache) Shutdown() { + done := make(chan error, 1) + + mc.applyStateMu.Lock() + if !mc.applyClosed { + mc.applyClosed = true + mc.applyCh <- metadataApplyRequest{ + kind: metadataShutdown, + done: done, + } + } + mc.applyStateMu.Unlock() + + select { + case <-done: + case <-mc.applyDone: + } + + <-mc.applyDone + mc.Lock() defer mc.Unlock() mc.localStore.Shutdown() @@ -201,3 +347,494 @@ func (mc *MetaCache) noteDirectoryUpdate(dirPath util.FullPath) { mc.onDirectoryUpdate(dirPath) } } + +func (mc *MetaCache) enqueueAndWait(ctx context.Context, req metadataApplyRequest) error { + if ctx == nil { + ctx = context.Background() + } + // Use a non-cancellable context for the queued operation so a + // cancelled caller doesn't abort a build/complete mid-way. + req.ctx = context.Background() + req.done = make(chan error, 1) + if err := mc.enqueueApplyRequest(req); err != nil { + return err + } + select { + case err := <-req.done: + return err + case <-ctx.Done(): + return ctx.Err() + } +} + +func (mc *MetaCache) enqueueApplyRequest(req metadataApplyRequest) error { + mc.applyStateMu.Lock() + if mc.applyClosed { + mc.applyStateMu.Unlock() + return errMetaCacheClosed + } + // Release the mutex before the potentially-blocking channel send so that + // Shutdown can still acquire it to set applyClosed when the channel is full. + mc.applyStateMu.Unlock() + select { + case mc.applyCh <- req: + return nil + case <-mc.applyDone: + return errMetaCacheClosed + } +} + +func (mc *MetaCache) runApplyLoop() { + defer close(mc.applyDone) + + for req := range mc.applyCh { + req.done <- mc.handleApplyRequest(req) + close(req.done) + if req.kind == metadataShutdown { + mc.drainApplyCh() + return + } + } +} + +// drainApplyCh non-blockingly drains any remaining requests from applyCh +// after a shutdown sentinel, signalling each caller so they don't block. +func (mc *MetaCache) drainApplyCh() { + for { + select { + case req := <-mc.applyCh: + req.done <- errMetaCacheClosed + close(req.done) + default: + return + } + } +} + +func (mc *MetaCache) handleApplyRequest(req metadataApplyRequest) error { + switch req.kind { + case metadataApplyEvent: + return mc.applyMetadataResponseNow(req.ctx, req.resp, req.options) + case metadataBeginBuild: + return mc.beginDirectoryBuildNow(req.buildPath) + case metadataCompleteBuild: + return mc.completeDirectoryBuildNow(req.ctx, req.buildPath, req.snapshotTsNs) + case metadataAbortBuild: + return mc.abortDirectoryBuildNow(req.buildPath) + case metadataShutdown: + return nil + default: + return nil + } +} + +type metadataInvalidation struct { + path util.FullPath + entry *filer_pb.Entry +} + +type metadataResponseSideEffects struct { + dirsToNotify []util.FullPath + invalidations []metadataInvalidation +} + +func (mc *MetaCache) applyMetadataResponseNow(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if mc.shouldSkipDuplicateEvent(resp) { + return nil + } + + immediateEvents, bufferedEvents := mc.routeMetadataResponse(resp) + if len(bufferedEvents) == 0 { + return mc.applyMetadataResponseDirect(ctx, resp, options, false) + } + + // Apply side effects but skip directory notifications for dirs that are + // currently being built. Notifying a building dir can trigger + // markDirectoryReadThrough → DeleteFolderChildren, wiping entries that + // EnsureVisited already inserted, leaving an incomplete cache. + mc.applyMetadataSideEffectsSkippingBuildingDirs(resp, options) + for buildDir, events := range bufferedEvents { + state := mc.buildingDirs[buildDir] + if state == nil { + continue + } + state.bufferedEvents = append(state.bufferedEvents, events...) + } + for _, immediateEvent := range immediateEvents { + if err := mc.applyMetadataResponseDirect(ctx, immediateEvent, MetadataResponseApplyOptions{}, false); err != nil { + return err + } + } + return nil +} + +func (mc *MetaCache) applyMetadataResponseDirect(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions, allowUncachedInsert bool) error { + if _, err := mc.applyMetadataResponseLocked(ctx, resp, options, allowUncachedInsert); err != nil { + return err + } + mc.applyMetadataSideEffects(resp, options) + return nil +} + +func (mc *MetaCache) applyMetadataSideEffects(resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) { + sideEffects := metadataResponseSideEffects{} + if options.NotifyDirectories { + sideEffects.dirsToNotify = collectDirectoryNotifications(resp) + } + if options.InvalidateEntries { + sideEffects.invalidations = collectEntryInvalidations(resp) + } + for _, dirPath := range sideEffects.dirsToNotify { + mc.noteDirectoryUpdate(dirPath) + } + for _, invalidation := range sideEffects.invalidations { + mc.invalidateFunc(invalidation.path, invalidation.entry) + } +} + +// applyMetadataSideEffectsSkippingBuildingDirs is like applyMetadataSideEffects +// but suppresses directory notifications for dirs currently in buildingDirs. +// This prevents markDirectoryReadThrough from wiping entries mid-build. +func (mc *MetaCache) applyMetadataSideEffectsSkippingBuildingDirs(resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) { + sideEffects := metadataResponseSideEffects{} + if options.NotifyDirectories { + sideEffects.dirsToNotify = collectDirectoryNotifications(resp) + } + if options.InvalidateEntries { + sideEffects.invalidations = collectEntryInvalidations(resp) + } + for _, dirPath := range sideEffects.dirsToNotify { + if _, building := mc.buildingDirs[dirPath]; !building { + mc.noteDirectoryUpdate(dirPath) + } + } + for _, invalidation := range sideEffects.invalidations { + mc.invalidateFunc(invalidation.path, invalidation.entry) + } +} + +func (mc *MetaCache) applyMetadataResponseLocked(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, _ MetadataResponseApplyOptions, allowUncachedInsert bool) (metadataResponseSideEffects, error) { + message := resp.GetEventNotification() + if message == nil { + return metadataResponseSideEffects{}, nil + } + + var oldPath util.FullPath + var newEntry *filer.Entry + if message.OldEntry != nil { + oldPath = util.NewFullPath(resp.Directory, message.OldEntry.Name) + } + + if message.NewEntry != nil { + dir := resp.Directory + if message.NewParentPath != "" { + dir = message.NewParentPath + } + newEntry = filer.FromPbEntry(dir, message.NewEntry) + } + + mc.Lock() + err := mc.atomicUpdateEntryFromFilerLocked(ctx, oldPath, newEntry, allowUncachedInsert) + // When a directory is deleted or moved, remove its cached descendants + // so stale children cannot be served from the local cache. + if err == nil && oldPath != "" && message.OldEntry != nil && message.OldEntry.IsDirectory { + isDelete := message.NewEntry == nil + isMove := message.NewEntry != nil && (message.NewParentPath != resp.Directory || message.NewEntry.Name != message.OldEntry.Name) + if isDelete || isMove { + if deleteErr := mc.localStore.DeleteFolderChildren(ctx, oldPath); deleteErr != nil { + glog.V(2).Infof("delete descendants of %s: %v", oldPath, deleteErr) + } + } + } + mc.Unlock() + if err != nil { + return metadataResponseSideEffects{}, err + } + return metadataResponseSideEffects{}, nil +} + +func (mc *MetaCache) beginDirectoryBuildNow(dirPath util.FullPath) error { + if _, found := mc.buildingDirs[dirPath]; found { + return nil + } + mc.buildingDirs[dirPath] = &directoryBuildState{} + return nil +} + +func (mc *MetaCache) abortDirectoryBuildNow(dirPath util.FullPath) error { + delete(mc.buildingDirs, dirPath) + return nil +} + +func (mc *MetaCache) completeDirectoryBuildNow(ctx context.Context, dirPath util.FullPath, snapshotTsNs int64) error { + state := mc.buildingDirs[dirPath] + delete(mc.buildingDirs, dirPath) + + if state == nil { + return nil + } + + for _, event := range state.bufferedEvents { + // When the server provided a snapshot timestamp, skip events that + // the listing already included. When snapshotTsNs == 0 (empty + // directory — server returned no entries and no snapshot), replay + // ALL buffered events to avoid dropping mutations due to + // client/server clock skew. + if snapshotTsNs != 0 && event.TsNs != 0 && event.TsNs <= snapshotTsNs { + continue + } + if err := mc.applyMetadataResponseDirect(ctx, event, MetadataResponseApplyOptions{}, true); err != nil { + return err + } + } + + mc.markCachedFn(dirPath) + return nil +} + +func (mc *MetaCache) routeMetadataResponse(resp *filer_pb.SubscribeMetadataResponse) ([]*filer_pb.SubscribeMetadataResponse, map[util.FullPath][]*filer_pb.SubscribeMetadataResponse) { + message := resp.GetEventNotification() + if message == nil { + return []*filer_pb.SubscribeMetadataResponse{resp}, nil + } + + oldDir, hasOld := metadataOldParentDir(resp) + newDir, hasNew := metadataNewParentDir(resp) + oldBuilding := hasOld && mc.isBuildingDir(oldDir) + newBuilding := hasNew && mc.isBuildingDir(newDir) + if !oldBuilding && !newBuilding { + return []*filer_pb.SubscribeMetadataResponse{resp}, nil + } + + bufferedEvents := make(map[util.FullPath][]*filer_pb.SubscribeMetadataResponse) + var immediateEvents []*filer_pb.SubscribeMetadataResponse + + if hasOld && hasNew && oldDir != newDir { + deleteEvent := metadataDeleteFragment(resp) + createEvent := metadataCreateFragment(resp) + if oldBuilding { + bufferedEvents[oldDir] = append(bufferedEvents[oldDir], deleteEvent) + } else { + immediateEvents = append(immediateEvents, deleteEvent) + } + if newBuilding { + bufferedEvents[newDir] = append(bufferedEvents[newDir], createEvent) + } else { + immediateEvents = append(immediateEvents, createEvent) + } + return immediateEvents, bufferedEvents + } + + targetDir := newDir + if hasOld { + targetDir = oldDir + } + if mc.isBuildingDir(targetDir) { + bufferedEvents[targetDir] = append(bufferedEvents[targetDir], resp) + return nil, bufferedEvents + } + return []*filer_pb.SubscribeMetadataResponse{resp}, nil +} + +func (mc *MetaCache) isBuildingDir(dirPath util.FullPath) bool { + _, found := mc.buildingDirs[dirPath] + return found +} + +func metadataOldParentDir(resp *filer_pb.SubscribeMetadataResponse) (util.FullPath, bool) { + if resp.GetEventNotification() == nil || resp.EventNotification.OldEntry == nil { + return "", false + } + return util.FullPath(resp.Directory), true +} + +func metadataNewParentDir(resp *filer_pb.SubscribeMetadataResponse) (util.FullPath, bool) { + if resp.GetEventNotification() == nil || resp.EventNotification.NewEntry == nil { + return "", false + } + newDir := resp.Directory + if resp.EventNotification.NewParentPath != "" { + newDir = resp.EventNotification.NewParentPath + } + return util.FullPath(newDir), true +} + +func metadataDeleteFragment(resp *filer_pb.SubscribeMetadataResponse) *filer_pb.SubscribeMetadataResponse { + if resp.GetEventNotification() == nil || resp.EventNotification.OldEntry == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: resp.Directory, + EventNotification: &filer_pb.EventNotification{ + OldEntry: proto.Clone(resp.EventNotification.OldEntry).(*filer_pb.Entry), + }, + TsNs: resp.TsNs, + } +} + +func metadataCreateFragment(resp *filer_pb.SubscribeMetadataResponse) *filer_pb.SubscribeMetadataResponse { + if resp.GetEventNotification() == nil || resp.EventNotification.NewEntry == nil { + return nil + } + newDir := resp.Directory + if resp.EventNotification.NewParentPath != "" { + newDir = resp.EventNotification.NewParentPath + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: newDir, + EventNotification: &filer_pb.EventNotification{ + NewEntry: proto.Clone(resp.EventNotification.NewEntry).(*filer_pb.Entry), + NewParentPath: newDir, + }, + TsNs: resp.TsNs, + } +} + +func metadataEventDedupKey(resp *filer_pb.SubscribeMetadataResponse) string { + var oldName, newName, newParent string + hasOld, hasNew := false, false + if msg := resp.GetEventNotification(); msg != nil { + if msg.OldEntry != nil { + oldName = msg.OldEntry.Name + hasOld = true + } + if msg.NewEntry != nil { + newName = msg.NewEntry.Name + hasNew = true + newParent = msg.NewParentPath + } + } + // Encode event shape (create/delete/update/rename) so structurally + // different events with the same names are not collapsed. + var shape byte + switch { + case hasOld && hasNew: + if resp.Directory != newParent && newParent != "" { + shape = 'R' // rename across directories + } else { + shape = 'U' // update in place + } + case hasOld: + shape = 'D' // delete + case hasNew: + shape = 'C' // create + } + return fmt.Sprintf("%d|%c|%s|%s|%s|%s", resp.TsNs, shape, resp.Directory, oldName, newParent, newName) +} + +func (mc *MetaCache) shouldSkipDuplicateEvent(resp *filer_pb.SubscribeMetadataResponse) bool { + if resp == nil || resp.TsNs == 0 { + return false + } + key := metadataEventDedupKey(resp) + return !mc.dedupRing.Add(key) +} + +type dedupRingBuffer struct { + keys [recentEventDedupWindow]string + head int + size int + set map[string]struct{} +} + +func newDedupRingBuffer() dedupRingBuffer { + return dedupRingBuffer{ + set: make(map[string]struct{}, recentEventDedupWindow), + } +} + +func (r *dedupRingBuffer) Add(key string) bool { + if _, found := r.set[key]; found { + return false // duplicate + } + if r.size == recentEventDedupWindow { + evicted := r.keys[r.head] + delete(r.set, evicted) + } else { + r.size++ + } + r.keys[r.head] = key + r.set[key] = struct{}{} + r.head = (r.head + 1) % recentEventDedupWindow + return true // new entry +} + +func collectDirectoryNotifications(resp *filer_pb.SubscribeMetadataResponse) []util.FullPath { + message := resp.GetEventNotification() + if message == nil { + return nil + } + + // At most 3 dirs: old parent, new parent, new child (if directory). + // Use a fixed slice with linear dedup to avoid map allocation. + var dirs [3]util.FullPath + n := 0 + addUnique := func(p util.FullPath) { + for i := 0; i < n; i++ { + if dirs[i] == p { + return + } + } + dirs[n] = p + n++ + } + + if message.OldEntry != nil { + oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name) + parent, _ := oldPath.DirAndName() + addUnique(util.FullPath(parent)) + } + if message.NewEntry != nil { + newDir := resp.Directory + if message.NewParentPath != "" { + newDir = message.NewParentPath + } + newPath := util.NewFullPath(newDir, message.NewEntry.Name) + parent, _ := newPath.DirAndName() + addUnique(util.FullPath(parent)) + if message.NewEntry.IsDirectory { + addUnique(newPath) + } + } + + return dirs[:n] +} + +func collectEntryInvalidations(resp *filer_pb.SubscribeMetadataResponse) []metadataInvalidation { + message := resp.GetEventNotification() + if message == nil { + return nil + } + + var invalidations []metadataInvalidation + if message.OldEntry != nil && message.NewEntry != nil { + oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: oldKey, entry: message.OldEntry}) + // Normalize NewParentPath: empty means same directory as resp.Directory + newDir := resp.Directory + if message.NewParentPath != "" { + newDir = message.NewParentPath + } + if message.OldEntry.Name != message.NewEntry.Name || resp.Directory != newDir { + newKey := util.NewFullPath(newDir, message.NewEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: newKey, entry: message.NewEntry}) + } + return invalidations + } + + if filer_pb.IsCreate(resp) && message.NewEntry != nil { + newDir := resp.Directory + if message.NewParentPath != "" { + newDir = message.NewParentPath + } + newKey := util.NewFullPath(newDir, message.NewEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: newKey, entry: message.NewEntry}) + } + + if filer_pb.IsDelete(resp) && message.OldEntry != nil { + oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: oldKey, entry: message.OldEntry}) + } + + return invalidations +} diff --git a/weed/mount/meta_cache/meta_cache_apply_test.go b/weed/mount/meta_cache/meta_cache_apply_test.go new file mode 100644 index 000000000..d30fab90d --- /dev/null +++ b/weed/mount/meta_cache/meta_cache_apply_test.go @@ -0,0 +1,361 @@ +package meta_cache + +import ( + "context" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/seaweedfs/seaweedfs/weed/filer" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" +) + +func TestApplyMetadataResponseAppliesEventsInOrder(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/dir": true, + }) + defer mc.Shutdown() + + createResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 1, + FileMode: 0100644, + FileSize: 11, + }, + }, + }, + } + updateResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 29, + }, + }, + NewParentPath: "/dir", + }, + } + deleteResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + }, + } + + if err := mc.ApplyMetadataResponse(context.Background(), createResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply create: %v", err) + } + + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find created entry: %v", err) + } + if entry.FileSize != 11 { + t.Fatalf("created file size = %d, want 11", entry.FileSize) + } + + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply update: %v", err) + } + + entry, err = mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find updated entry: %v", err) + } + if entry.FileSize != 29 { + t.Fatalf("updated file size = %d, want 29", entry.FileSize) + } + + if err := mc.ApplyMetadataResponse(context.Background(), deleteResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply delete: %v", err) + } + + entry, err = mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find deleted entry error = %v, want %v", err, filer_pb.ErrNotFound) + } + if entry != nil { + t.Fatalf("deleted entry still cached: %+v", entry) + } + + if got := countPath(notifications.paths(), util.FullPath("/dir")); got != 3 { + t.Fatalf("directory notifications for /dir = %d, want 3", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/dir/file.txt")); got != 3 { + t.Fatalf("invalidations for /dir/file.txt = %d, want 3 (create + update + delete)", got) + } +} + +func TestApplyMetadataResponseRenamesAcrossCachedDirectories(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/src": true, + "/dst": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/src/file.tmp", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 7, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + renameResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/src", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.tmp", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 41, + }, + }, + NewParentPath: "/dst", + }, + } + + if err := mc.ApplyMetadataResponse(context.Background(), renameResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply rename: %v", err) + } + + oldEntry, err := mc.FindEntry(context.Background(), util.FullPath("/src/file.tmp")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find old path error = %v, want %v", err, filer_pb.ErrNotFound) + } + if oldEntry != nil { + t.Fatalf("old path still cached: %+v", oldEntry) + } + + newEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dst/file.txt")) + if err != nil { + t.Fatalf("find new path: %v", err) + } + if newEntry.FileSize != 41 { + t.Fatalf("renamed file size = %d, want 41", newEntry.FileSize) + } + + if got := countPath(notifications.paths(), util.FullPath("/src")); got != 1 { + t.Fatalf("directory notifications for /src = %d, want 1", got) + } + if got := countPath(notifications.paths(), util.FullPath("/dst")); got != 1 { + t.Fatalf("directory notifications for /dst = %d, want 1", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/src/file.tmp")); got != 1 { + t.Fatalf("invalidations for /src/file.tmp = %d, want 1", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/dst/file.txt")); got != 1 { + t.Fatalf("invalidations for /dst/file.txt = %d, want 1", got) + } +} + +func TestApplyMetadataResponseLocalOptionsSkipInvalidations(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/dir": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/file.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 7, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + updateResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 17, + }, + }, + NewParentPath: "/dir", + }, + } + + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, LocalMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply local update: %v", err) + } + + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find updated entry: %v", err) + } + if entry.FileSize != 17 { + t.Fatalf("updated file size = %d, want 17", entry.FileSize) + } + if got := countPath(notifications.paths(), util.FullPath("/dir")); got != 1 { + t.Fatalf("directory notifications for /dir = %d, want 1", got) + } + if got := len(invalidations.paths()); got != 0 { + t.Fatalf("invalidations = %d, want 0", got) + } +} + +func TestApplyMetadataResponseDeduplicatesRepeatedFilerEvent(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/dir": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/file.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 5, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + updateResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 15, + }, + }, + NewParentPath: "/dir", + Signatures: []int32{7}, + }, + TsNs: 99, + } + + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("first apply: %v", err) + } + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("second apply: %v", err) + } + + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find updated entry: %v", err) + } + if entry.FileSize != 15 { + t.Fatalf("updated file size = %d, want 15", entry.FileSize) + } + if got := countPath(notifications.paths(), util.FullPath("/dir")); got != 1 { + t.Fatalf("directory notifications for /dir = %d, want 1", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/dir/file.txt")); got != 1 { + t.Fatalf("invalidations for /dir/file.txt = %d, want 1", got) + } +} + +func newTestMetaCache(t *testing.T, cached map[util.FullPath]bool) (*MetaCache, map[util.FullPath]bool, *recordedPaths, *recordedPaths) { + t.Helper() + + mapper, err := NewUidGidMapper("", "") + if err != nil { + t.Fatalf("uid/gid mapper: %v", err) + } + + var cachedMu sync.Mutex + notifications := &recordedPaths{} + invalidations := &recordedPaths{} + + mc := NewMetaCache( + filepath.Join(t.TempDir(), "meta"), + mapper, + util.FullPath("/"), + func(path util.FullPath) { + cachedMu.Lock() + defer cachedMu.Unlock() + cached[path] = true + }, + func(path util.FullPath) bool { + cachedMu.Lock() + defer cachedMu.Unlock() + return cached[path] + }, + func(path util.FullPath, entry *filer_pb.Entry) { + invalidations.record(path) + }, + func(dir util.FullPath) { + notifications.record(dir) + }, + ) + + return mc, cached, notifications, invalidations +} + +type recordedPaths struct { + mu sync.Mutex + items []util.FullPath +} + +func (r *recordedPaths) record(path util.FullPath) { + r.mu.Lock() + defer r.mu.Unlock() + r.items = append(r.items, path) +} + +func (r *recordedPaths) paths() []util.FullPath { + r.mu.Lock() + defer r.mu.Unlock() + return append([]util.FullPath(nil), r.items...) +} + +func countPath(paths []util.FullPath, target util.FullPath) int { + count := 0 + for _, path := range paths { + if path == target { + count++ + } + } + return count +} diff --git a/weed/mount/meta_cache/meta_cache_build_test.go b/weed/mount/meta_cache/meta_cache_build_test.go new file mode 100644 index 000000000..61285f23a --- /dev/null +++ b/weed/mount/meta_cache/meta_cache_build_test.go @@ -0,0 +1,459 @@ +package meta_cache + +import ( + "context" + "fmt" + "io" + "sync" + "testing" + "time" + + "github.com/seaweedfs/seaweedfs/weed/filer" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +type buildListStream struct { + responses []*filer_pb.ListEntriesResponse + onFirstRecv func() + once sync.Once + index int +} + +func (s *buildListStream) Recv() (*filer_pb.ListEntriesResponse, error) { + s.once.Do(func() { + if s.onFirstRecv != nil { + s.onFirstRecv() + } + }) + if s.index >= len(s.responses) { + return nil, io.EOF + } + resp := s.responses[s.index] + s.index++ + return resp, nil +} + +func (s *buildListStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } +func (s *buildListStream) Trailer() metadata.MD { return metadata.MD{} } +func (s *buildListStream) CloseSend() error { return nil } +func (s *buildListStream) Context() context.Context { return context.Background() } +func (s *buildListStream) SendMsg(any) error { return nil } +func (s *buildListStream) RecvMsg(any) error { return nil } + +type buildListClient struct { + filer_pb.SeaweedFilerClient + responses []*filer_pb.ListEntriesResponse + onFirstRecv func() +} + +func (c *buildListClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[filer_pb.ListEntriesResponse], error) { + return &buildListStream{ + responses: c.responses, + onFirstRecv: c.onFirstRecv, + }, nil +} + +type buildFilerAccessor struct { + client filer_pb.SeaweedFilerClient +} + +func (a *buildFilerAccessor) WithFilerClient(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error { + return fn(a.client) +} + +func (a *buildFilerAccessor) AdjustedUrl(*filer_pb.Location) string { return "" } +func (a *buildFilerAccessor) GetDataCenter() string { return "" } + +func TestEnsureVisitedReplaysBufferedEventsAfterSnapshot(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + var applyErr error + accessor := &buildFilerAccessor{ + client: &buildListClient{ + responses: []*filer_pb.ListEntriesResponse{ + { + Entry: &filer_pb.Entry{ + Name: "base.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 1, + FileMode: 0100644, + FileSize: 3, + }, + }, + SnapshotTsNs: 100, + }, + }, + onFirstRecv: func() { + applyErr = mc.ApplyMetadataResponse(context.Background(), &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: "after.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 2, + Mtime: 2, + FileMode: 0100644, + FileSize: 9, + }, + }, + }, + TsNs: 101, + }, SubscriberMetadataResponseApplyOptions) + }, + }, + } + + if err := EnsureVisited(mc, accessor, util.FullPath("/dir")); err != nil { + t.Fatalf("ensure visited: %v", err) + } + if applyErr != nil { + t.Fatalf("apply buffered event: %v", applyErr) + } + if !mc.IsDirectoryCached(util.FullPath("/dir")) { + t.Fatal("directory /dir should be cached after build completes") + } + + baseEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/base.txt")) + if err != nil { + t.Fatalf("find base entry: %v", err) + } + if baseEntry.FileSize != 3 { + t.Fatalf("base entry size = %d, want 3", baseEntry.FileSize) + } + + afterEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/after.txt")) + if err != nil { + t.Fatalf("find replayed entry: %v", err) + } + if afterEntry.FileSize != 9 { + t.Fatalf("replayed entry size = %d, want 9", afterEntry.FileSize) + } +} + +// TestDirectoryNotificationsSuppressedDuringBuild verifies that metadata events +// targeting a directory under active build do NOT fire onDirectoryUpdate for +// that directory. In production, onDirectoryUpdate can trigger +// markDirectoryReadThrough → DeleteFolderChildren, which would wipe entries +// that EnsureVisited already inserted mid-build. +func TestDirectoryNotificationsSuppressedDuringBuild(t *testing.T) { + mc, _, notifications, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + // Start building /dir (simulates the beginning of EnsureVisited) + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/dir")); err != nil { + t.Fatalf("begin build: %v", err) + } + + // Insert an entry as EnsureVisited would during the filer listing + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/existing.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 100, + }, + }); err != nil { + t.Fatalf("insert entry during build: %v", err) + } + + // Simulate multiple metadata events arriving for /dir while the build + // is in progress. Each event would normally call noteDirectoryUpdate, + // which in production can trigger markDirectoryReadThrough and wipe entries. + for i := 0; i < 5; i++ { + resp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: fmt.Sprintf("new-%d.txt", i), + Attributes: &filer_pb.FuseAttributes{ + Crtime: int64(10 + i), + Mtime: int64(10 + i), + FileMode: 0100644, + FileSize: uint64(i + 1), + }, + }, + }, + TsNs: int64(200 + i), + } + if err := mc.ApplyMetadataResponse(context.Background(), resp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply event %d: %v", i, err) + } + } + + // The building directory /dir must NOT have received any notifications. + // If it did, markDirectoryReadThrough would wipe the cache mid-build. + for _, p := range notifications.paths() { + if p == util.FullPath("/dir") { + t.Fatal("onDirectoryUpdate was called for /dir during build; this would cause markDirectoryReadThrough to wipe entries mid-build") + } + } + + // The entry inserted during the build must still be present + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/existing.txt")) + if err != nil { + t.Fatalf("entry wiped during build: %v", err) + } + if entry.FileSize != 100 { + t.Fatalf("entry size = %d, want 100", entry.FileSize) + } + + // Complete the build — buffered events should be replayed + if err := mc.CompleteDirectoryBuild(context.Background(), util.FullPath("/dir"), 150); err != nil { + t.Fatalf("complete build: %v", err) + } + + // After build completes, the entry from the listing should still exist + entry, err = mc.FindEntry(context.Background(), util.FullPath("/dir/existing.txt")) + if err != nil { + t.Fatalf("entry lost after build completion: %v", err) + } + if entry.FileSize != 100 { + t.Fatalf("entry size after build = %d, want 100", entry.FileSize) + } + + // Buffered events with TsNs > snapshotTsNs (150) should have been replayed + for i := 0; i < 5; i++ { + name := fmt.Sprintf("new-%d.txt", i) + e, err := mc.FindEntry(context.Background(), util.FullPath("/dir/"+name)) + if err != nil { + t.Fatalf("replayed entry %s not found: %v", name, err) + } + if e.FileSize != uint64(i+1) { + t.Fatalf("replayed entry %s size = %d, want %d", name, e.FileSize, i+1) + } + } +} + +// TestEmptyDirectoryBuildReplaysAllBufferedEvents verifies that when a +// directory build completes with snapshotTsNs=0 (empty directory — server +// returned no entries and no snapshot), ALL buffered events are replayed +// without any TsNs filtering. This prevents clock-skew between client and +// filer from dropping legitimate mutations. +func TestEmptyDirectoryBuildReplaysAllBufferedEvents(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/empty")); err != nil { + t.Fatalf("begin build: %v", err) + } + + // Buffer events with a range of TsNs values — some very old, some recent. + // With a client-synthesized snapshot, old events could be incorrectly filtered. + tsValues := []int64{1, 50, 500, 5000, 50000} + for i, ts := range tsValues { + resp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/empty", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: fmt.Sprintf("file-%d.txt", i), + Attributes: &filer_pb.FuseAttributes{ + Crtime: ts, + Mtime: ts, + FileMode: 0100644, + FileSize: uint64(i + 10), + }, + }, + }, + TsNs: ts, + } + if err := mc.ApplyMetadataResponse(context.Background(), resp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply event %d: %v", i, err) + } + } + + // Complete with snapshotTsNs=0 — simulates empty directory listing + if err := mc.CompleteDirectoryBuild(context.Background(), util.FullPath("/empty"), 0); err != nil { + t.Fatalf("complete build: %v", err) + } + + // Every buffered event must have been replayed, regardless of TsNs + for i := range tsValues { + name := fmt.Sprintf("file-%d.txt", i) + e, err := mc.FindEntry(context.Background(), util.FullPath("/empty/"+name)) + if err != nil { + t.Fatalf("replayed entry %s not found: %v", name, err) + } + if e.FileSize != uint64(i+10) { + t.Fatalf("replayed entry %s size = %d, want %d", name, e.FileSize, i+10) + } + } + + if !mc.IsDirectoryCached(util.FullPath("/empty")) { + t.Fatal("/empty should be marked cached after build completes") + } +} + +// TestBuildCompletionSurvivesCallerCancellation verifies that once +// CompleteDirectoryBuild is enqueued, a cancelled caller context does not +// prevent the build from completing. The apply loop uses context.Background() +// internally, so the operation finishes even if the caller gives up waiting. +func TestBuildCompletionSurvivesCallerCancellation(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/dir")); err != nil { + t.Fatalf("begin build: %v", err) + } + + // Insert an entry during the build (as EnsureVisited would) + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/kept.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 42, + }, + }); err != nil { + t.Fatalf("insert entry: %v", err) + } + + // Buffer an event that should be replayed + if err := mc.ApplyMetadataResponse(context.Background(), &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: "buffered.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 5, + Mtime: 5, + FileMode: 0100644, + FileSize: 77, + }, + }, + }, + TsNs: 200, + }, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply event: %v", err) + } + + // Complete with an already-cancelled context. The operation should still + // succeed because enqueueAndWait sets req.ctx = context.Background(). + cancelledCtx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + + // CompleteDirectoryBuild may return ctx.Err() if the select picks + // ctx.Done() first, but the operation itself still completes in the + // apply loop. Poll for the observable side effect instead of using + // a fixed sleep. + _ = mc.CompleteDirectoryBuild(cancelledCtx, util.FullPath("/dir"), 100) + + // Poll until the build completes or a deadline elapses. + deadline := time.After(2 * time.Second) + for !mc.IsDirectoryCached(util.FullPath("/dir")) { + select { + case <-deadline: + t.Fatal("/dir should be cached — CompleteDirectoryBuild must have executed despite cancelled context") + default: + time.Sleep(5 * time.Millisecond) + } + } + + // The pre-existing entry must survive + entry, findErr := mc.FindEntry(context.Background(), util.FullPath("/dir/kept.txt")) + if findErr != nil { + t.Fatalf("find kept entry: %v", findErr) + } + if entry.FileSize != 42 { + t.Fatalf("kept entry size = %d, want 42", entry.FileSize) + } + + // The buffered event (TsNs 200 > snapshot 100) must have been replayed + buffered, findErr := mc.FindEntry(context.Background(), util.FullPath("/dir/buffered.txt")) + if findErr != nil { + t.Fatalf("find buffered entry: %v", findErr) + } + if buffered.FileSize != 77 { + t.Fatalf("buffered entry size = %d, want 77", buffered.FileSize) + } +} + +func TestBufferedRenameUpdatesOtherDirectoryBeforeBuildCompletes(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/src": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/src/from.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 7, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/dst")); err != nil { + t.Fatalf("begin build: %v", err) + } + + renameResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/src", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "from.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "to.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 2, + Mtime: 2, + FileMode: 0100644, + FileSize: 12, + }, + }, + NewParentPath: "/dst", + }, + TsNs: 101, + } + + if err := mc.ApplyMetadataResponse(context.Background(), renameResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply rename: %v", err) + } + + oldEntry, err := mc.FindEntry(context.Background(), util.FullPath("/src/from.txt")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find old path error = %v, want %v", err, filer_pb.ErrNotFound) + } + if oldEntry != nil { + t.Fatalf("old path should be removed before build completes: %+v", oldEntry) + } + + newEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dst/to.txt")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find buffered new path error = %v, want %v", err, filer_pb.ErrNotFound) + } + if newEntry != nil { + t.Fatalf("new path should stay hidden until build completes: %+v", newEntry) + } + + if err := mc.CompleteDirectoryBuild(context.Background(), util.FullPath("/dst"), 100); err != nil { + t.Fatalf("complete build: %v", err) + } + + newEntry, err = mc.FindEntry(context.Background(), util.FullPath("/dst/to.txt")) + if err != nil { + t.Fatalf("find replayed new path: %v", err) + } + if newEntry.FileSize != 12 { + t.Fatalf("replayed new path size = %d, want 12", newEntry.FileSize) + } +} diff --git a/weed/mount/meta_cache/meta_cache_init.go b/weed/mount/meta_cache/meta_cache_init.go index 10ec9dad7..81aad780f 100644 --- a/weed/mount/meta_cache/meta_cache_init.go +++ b/weed/mount/meta_cache/meta_cache_init.go @@ -69,12 +69,43 @@ func doEnsureVisited(ctx context.Context, mc *MetaCache, client filer_pb.FilerCl glog.V(4).Infof("ReadDirAllEntries %s ...", path) + // Use context.Background() for build lifecycle calls so that + // errgroup cancellation of ctx doesn't cause enqueueAndWait to + // return early, which would trigger cleanupBuild while the + // operation is still queued. + if err := mc.BeginDirectoryBuild(context.Background(), path); err != nil { + return nil, fmt.Errorf("begin build %s: %w", path, err) + } + cleanupDone := false + cleanupBuild := func(reason string) { + if cleanupDone { + return + } + cleanupDone = true + if deleteErr := mc.DeleteFolderChildren(context.Background(), path); deleteErr != nil { + glog.V(2).Infof("clear %s build %s: %v", reason, path, deleteErr) + } + if abortErr := mc.AbortDirectoryBuild(context.Background(), path); abortErr != nil { + glog.V(2).Infof("abort %s build %s: %v", reason, path, abortErr) + } + } + defer func() { + if !cleanupDone && ctx.Err() != nil { + cleanupBuild("canceled") + } + }() + // Collect entries in batches for efficient LevelDB writes var batch []*filer.Entry + var snapshotTsNs int64 fetchErr := util.Retry("ReadDirAllEntries", func() error { batch = nil // Reset batch on retry, allow GC of previous entries - return filer_pb.ReadDirAllEntries(ctx, client, path, "", func(pbEntry *filer_pb.Entry, isLast bool) error { + if err := mc.DeleteFolderChildren(ctx, path); err != nil { + return fmt.Errorf("clear existing entries for %s: %w", path, err) + } + var err error + snapshotTsNs, err = filer_pb.ReadDirAllEntriesWithSnapshot(ctx, client, path, "", func(pbEntry *filer_pb.Entry, isLast bool) error { entry := filer.FromPbEntry(string(path), pbEntry) if IsHiddenSystemEntry(string(path), entry.Name()) { return nil @@ -94,19 +125,26 @@ func doEnsureVisited(ctx context.Context, mc *MetaCache, client filer_pb.FilerCl } return nil }) + return err }) if fetchErr != nil { + cleanupBuild("failed") return nil, fmt.Errorf("list %s: %w", path, fetchErr) } // Flush any remaining entries in the batch if len(batch) > 0 { if err := mc.doBatchInsertEntries(ctx, batch); err != nil { + cleanupBuild("incomplete") return nil, fmt.Errorf("batch insert remaining for %s: %w", path, err) } } - mc.markCachedFn(path) + if err := mc.CompleteDirectoryBuild(context.Background(), path, snapshotTsNs); err != nil { + cleanupBuild("unreplayed") + return nil, fmt.Errorf("complete build for %s: %w", path, err) + } + cleanupDone = true // Prevent deferred cleanup after successful publish return nil, nil }) return err diff --git a/weed/mount/meta_cache/meta_cache_subscribe.go b/weed/mount/meta_cache/meta_cache_subscribe.go index fe5f75ba9..12e9d4a77 100644 --- a/weed/mount/meta_cache/meta_cache_subscribe.go +++ b/weed/mount/meta_cache/meta_cache_subscribe.go @@ -4,7 +4,6 @@ import ( "context" "strings" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" @@ -51,67 +50,12 @@ func SubscribeMetaEvents(mc *MetaCache, selfSignature int32, client filer_pb.Fil } processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error { - message := resp.EventNotification - - for _, sig := range message.Signatures { - if sig == selfSignature && selfSignature != 0 { - return nil - } - } - - dir := resp.Directory - var oldPath util.FullPath - var newEntry *filer.Entry - if message.OldEntry != nil { - oldPath = util.NewFullPath(dir, message.OldEntry.Name) - glog.V(4).Infof("deleting %v", oldPath) - } - - if message.NewEntry != nil { - if message.NewParentPath != "" { - dir = message.NewParentPath - } - key := util.NewFullPath(dir, message.NewEntry.Name) - glog.V(4).Infof("creating %v", key) - newEntry = filer.FromPbEntry(dir, message.NewEntry) - } - err := mc.AtomicUpdateEntryFromFiler(context.Background(), oldPath, newEntry) - if err == nil { - if message.NewEntry != nil || message.OldEntry != nil { - dirsToNotify := make(map[util.FullPath]struct{}) - if oldPath != "" { - parent, _ := oldPath.DirAndName() - dirsToNotify[util.FullPath(parent)] = struct{}{} - } - if newEntry != nil { - newParent, _ := newEntry.DirAndName() - dirsToNotify[util.FullPath(newParent)] = struct{}{} - } - if message.NewEntry != nil && message.NewEntry.IsDirectory { - childPath := util.NewFullPath(dir, message.NewEntry.Name) - dirsToNotify[childPath] = struct{}{} - } - for dirPath := range dirsToNotify { - mc.noteDirectoryUpdate(dirPath) - } - } - if message.OldEntry != nil && message.NewEntry != nil { - oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) - mc.invalidateFunc(oldKey, message.OldEntry) - if message.OldEntry.Name != message.NewEntry.Name { - newKey := util.NewFullPath(dir, message.NewEntry.Name) - mc.invalidateFunc(newKey, message.NewEntry) - } - } else if filer_pb.IsCreate(resp) { - // no need to invalidate - } else if filer_pb.IsDelete(resp) { - oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) - mc.invalidateFunc(oldKey, message.OldEntry) - } - } - - return err - + // Let all events (including self-originated ones) flow through the + // applier so that the directory-build buffering and dedup logic + // can handle them consistently. The dedupRing in + // applyMetadataResponseNow catches duplicates that were already + // applied locally via applyLocalMetadataEvent. + return mc.ApplyMetadataResponse(context.Background(), resp, SubscriberMetadataResponseApplyOptions) } prefix := dir diff --git a/weed/mount/metadata_events.go b/weed/mount/metadata_events.go new file mode 100644 index 000000000..15d18df0d --- /dev/null +++ b/weed/mount/metadata_events.go @@ -0,0 +1,66 @@ +package mount + +import ( + "context" + + "github.com/seaweedfs/seaweedfs/weed/mount/meta_cache" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "google.golang.org/protobuf/proto" +) + +func (wfs *WFS) applyLocalMetadataEvent(ctx context.Context, event *filer_pb.SubscribeMetadataResponse) error { + if ctx == nil { + ctx = context.Background() + } + return wfs.metaCache.ApplyMetadataResponseOwned(ctx, event, meta_cache.LocalMetadataResponseApplyOptions) +} + +func metadataDeleteEvent(directory, name string, isDirectory bool) *filer_pb.SubscribeMetadataResponse { + if name == "" { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: directory, + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{Name: name, IsDirectory: isDirectory}, + }, + } +} + +func metadataCreateEvent(directory string, entry *filer_pb.Entry) *filer_pb.SubscribeMetadataResponse { + if entry == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: directory, + EventNotification: &filer_pb.EventNotification{ + NewEntry: proto.Clone(entry).(*filer_pb.Entry), + NewParentPath: directory, + }, + } +} + +func metadataUpdateEvent(directory string, entry *filer_pb.Entry) *filer_pb.SubscribeMetadataResponse { + if entry == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: directory, + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{Name: entry.Name}, + NewEntry: proto.Clone(entry).(*filer_pb.Entry), + NewParentPath: directory, + }, + } +} + +func metadataEventFromRenameResponse(resp *filer_pb.StreamRenameEntryResponse) *filer_pb.SubscribeMetadataResponse { + if resp == nil || resp.EventNotification == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: resp.Directory, + EventNotification: proto.Clone(resp.EventNotification).(*filer_pb.EventNotification), + TsNs: resp.TsNs, + } +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index db60c9deb..9b2341ca3 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -200,7 +200,7 @@ func NewSeaweedFileSystem(option *Option) *WFS { } }, func(dirPath util.FullPath) { if wfs.inodeToPath.RecordDirectoryUpdate(dirPath, time.Now(), wfs.dirHotWindow, wfs.dirHotThreshold) { - wfs.maybeRefreshDirectory(dirPath) + wfs.markDirectoryReadThrough(dirPath) } }) grace.OnInterrupt(func() { @@ -313,36 +313,42 @@ func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.St } // lookupEntry looks up an entry by path, checking the local cache first. -// If the directory is cached, it trusts the cache. Otherwise, it fetches -// directly from the filer without caching the entire directory. -// This avoids the performance issue of listing millions of files just to open one. +// Cached metadata is only authoritative when the parent directory itself is cached. +// For uncached/read-through directories, always consult the filer directly so stale +// local entries do not leak back into lookup results. func (wfs *WFS) lookupEntry(fullpath util.FullPath) (*filer.Entry, fuse.Status) { dir, _ := fullpath.DirAndName() + dirPath := util.FullPath(dir) - // Try to find the entry in the local cache first. - cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath) - if cacheErr != nil && cacheErr != filer_pb.ErrNotFound { - glog.Errorf("lookupEntry: cache lookup for %s failed: %v", fullpath, cacheErr) - return nil, fuse.EIO - } - if cachedEntry != nil { - glog.V(4).Infof("lookupEntry cache hit %s", fullpath) - return cachedEntry, fuse.OK - } - - // If the directory is cached but entry not found, file doesn't exist. - // No need to query the filer again. - if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - glog.V(4).Infof("lookupEntry cache miss (dir cached) %s", fullpath) - return nil, fuse.ENOENT + if wfs.metaCache.IsDirectoryCached(dirPath) { + cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath) + if cacheErr != nil && cacheErr != filer_pb.ErrNotFound { + glog.Errorf("lookupEntry: cache lookup for %s failed: %v", fullpath, cacheErr) + return nil, fuse.EIO + } + if cachedEntry != nil { + glog.V(4).Infof("lookupEntry cache hit %s", fullpath) + return cachedEntry, fuse.OK + } + // Re-check: the directory may have been evicted from cache between + // our IsDirectoryCached check and FindEntry (e.g. markDirectoryReadThrough). + // If it's no longer cached, fall through to the filer lookup below. + if wfs.metaCache.IsDirectoryCached(dirPath) { + glog.V(4).Infof("lookupEntry cache miss (dir cached) %s", fullpath) + return nil, fuse.ENOENT + } } // Directory not cached - fetch directly from filer without caching the entire directory. glog.V(4).Infof("lookupEntry fetching from filer %s", fullpath) entry, err := filer_pb.GetEntry(context.Background(), wfs, fullpath) if err != nil { - glog.V(1).Infof("lookupEntry GetEntry %s: %v", fullpath, err) - return nil, fuse.ENOENT + if err == filer_pb.ErrNotFound { + glog.V(4).Infof("lookupEntry not found %s", fullpath) + return nil, fuse.ENOENT + } + glog.Warningf("lookupEntry GetEntry %s: %v", fullpath, err) + return nil, fuse.EIO } if entry != nil && entry.Attributes != nil && wfs.option.UidGidMapper != nil { entry.Attributes.Uid, entry.Attributes.Gid = wfs.option.UidGidMapper.FilerToLocal(entry.Attributes.Uid, entry.Attributes.Gid) @@ -371,31 +377,13 @@ func (wfs *WFS) ClearCacheDir() { os.RemoveAll(wfs.option.getUniqueCacheDirForRead()) } -func (wfs *WFS) maybeRefreshDirectory(dirPath util.FullPath) { - if !wfs.inodeToPath.NeedsRefresh(dirPath) { +func (wfs *WFS) markDirectoryReadThrough(dirPath util.FullPath) { + if !wfs.inodeToPath.MarkDirectoryReadThrough(dirPath, time.Now()) { return } - wfs.refreshMu.Lock() - if _, exists := wfs.refreshingDirs[dirPath]; exists { - wfs.refreshMu.Unlock() - return + if err := wfs.metaCache.DeleteFolderChildren(context.Background(), dirPath); err != nil { + glog.V(2).Infof("clear dir cache %s: %v", dirPath, err) } - wfs.refreshingDirs[dirPath] = struct{}{} - wfs.refreshMu.Unlock() - - go func() { - defer func() { - wfs.refreshMu.Lock() - delete(wfs.refreshingDirs, dirPath) - wfs.refreshMu.Unlock() - }() - wfs.inodeToPath.InvalidateChildrenCache(dirPath) - if err := meta_cache.EnsureVisited(wfs.metaCache, wfs, dirPath); err != nil { - glog.Warningf("refresh dir cache %s: %v", dirPath, err) - return - } - wfs.inodeToPath.MarkDirectoryRefreshed(dirPath, time.Now()) - }() } func (wfs *WFS) loopEvictIdleDirCache() { diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go index 0ede18397..f1381234d 100644 --- a/weed/mount/weedfs_dir_mkrm.go +++ b/weed/mount/weedfs_dir_mkrm.go @@ -2,7 +2,6 @@ package mount import ( "context" - "fmt" "os" "strings" "syscall" @@ -63,19 +62,21 @@ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out } glog.V(1).Infof("mkdir: %v", request) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { glog.V(0).Infof("mkdir %s: %v", entryFullPath, err) return err } - // Only cache the entry if the parent directory is already cached. - // This avoids polluting the cache with partial directory data. - if wfs.metaCache.IsDirectoryCached(dirFullPath) { - wfs.inodeToPath.TouchDirectory(dirFullPath) - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("local mkdir dir %s: %w", entryFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataCreateEvent(string(dirFullPath), newEntry) } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("mkdir %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) + } + wfs.inodeToPath.TouchDirectory(dirFullPath) return nil }) @@ -112,7 +113,7 @@ func (wfs *WFS) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string glog.V(3).Infof("remove directory: %v", entryFullPath) ignoreRecursiveErr := true // ignore recursion error since the OS should manage it - err := filer_pb.Remove(context.Background(), wfs, string(dirFullPath), name, true, false, ignoreRecursiveErr, false, []int32{wfs.signature}) + resp, err := filer_pb.RemoveWithResponse(context.Background(), wfs, string(dirFullPath), name, true, false, ignoreRecursiveErr, false, []int32{wfs.signature}) if err != nil { glog.V(0).Infof("remove %s: %v", entryFullPath, err) if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) { @@ -121,7 +122,14 @@ func (wfs *WFS) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string return fuse.ENOENT } - wfs.metaCache.DeleteEntry(context.Background(), entryFullPath) + event := metadataDeleteEvent(string(dirFullPath), name, true) + if resp != nil && resp.MetadataEvent != nil { + event = resp.MetadataEvent + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("rmdir %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) + } wfs.inodeToPath.RemovePath(entryFullPath) wfs.inodeToPath.TouchDirectory(dirFullPath) diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index 274f2c185..9488f9aff 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -3,11 +3,13 @@ package mount import ( "context" "sync" + "time" "github.com/seaweedfs/go-fuse/v2/fuse" "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/mount/meta_cache" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" ) @@ -26,10 +28,12 @@ type DirectoryHandle struct { isFinished bool entryStream []*filer.Entry entryStreamOffset uint64 + snapshotTsNs int64 // snapshot timestamp for consistent readdir in direct mode } func (dh *DirectoryHandle) reset() { dh.isFinished = false + dh.snapshotTsNs = 0 // Nil out pointers to allow garbage collection of old entries, // then reuse the slice's capacity to avoid re-allocations. for i := range dh.entryStream { @@ -164,7 +168,6 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl return code } wfs.inodeToPath.TouchDirectory(dirPath) - wfs.maybeRefreshDirectory(dirPath) var dirEntry fuse.DirEntry @@ -214,6 +217,10 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl var lastEntryName string + if wfs.inodeToPath.ShouldReadDirectoryDirect(dirPath) { + return wfs.readDirectoryDirect(input, out, dh, dirPath, processEachEntryFn) + } + // Read from cache first, then load next batch if needed if input.Offset >= dh.entryStreamOffset { // Handle case: new handle with non-zero offset but empty cache @@ -288,3 +295,90 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl return fuse.OK } + +func (wfs *WFS) readDirectoryDirect(input *fuse.ReadIn, out *fuse.DirEntryList, dh *DirectoryHandle, dirPath util.FullPath, processEachEntryFn func(entry *filer.Entry, index int64) bool) fuse.Status { + var lastEntryName string + + if input.Offset >= dh.entryStreamOffset { + if len(dh.entryStream) == 0 && input.Offset > dh.entryStreamOffset { + skipCount := uint32(input.Offset-dh.entryStreamOffset) + batchSize + entries, snapshotTs, err := loadDirectoryEntriesDirect(context.Background(), wfs, wfs.option.UidGidMapper, dirPath, "", false, skipCount, dh.snapshotTsNs) + if err != nil { + glog.Errorf("list filer directory: %v", err) + return fuse.EIO + } + dh.entryStream = append(dh.entryStream, entries...) + if dh.snapshotTsNs == 0 { + dh.snapshotTsNs = snapshotTs + } + } + + if input.Offset > dh.entryStreamOffset { + entryPreviousIndex := (input.Offset - dh.entryStreamOffset) - 1 + if uint64(len(dh.entryStream)) > entryPreviousIndex { + lastEntryName = dh.entryStream[entryPreviousIndex].Name() + } + } + + entryCurrentIndex := int64(input.Offset - dh.entryStreamOffset) + for int64(len(dh.entryStream)) > entryCurrentIndex { + entry := dh.entryStream[entryCurrentIndex] + if processEachEntryFn(entry, entryCurrentIndex) { + lastEntryName = entry.Name() + entryCurrentIndex++ + } else { + return fuse.OK + } + } + + entries, snapshotTs, err := loadDirectoryEntriesDirect(context.Background(), wfs, wfs.option.UidGidMapper, dirPath, lastEntryName, false, batchSize, dh.snapshotTsNs) + if err != nil { + glog.Errorf("list filer directory: %v", err) + return fuse.EIO + } + if dh.snapshotTsNs == 0 { + dh.snapshotTsNs = snapshotTs + } + + bufferFull := false + for _, entry := range entries { + currentIndex := int64(len(dh.entryStream)) + dh.entryStream = append(dh.entryStream, entry) + if !processEachEntryFn(entry, currentIndex) { + bufferFull = true + break + } + } + if !bufferFull && len(entries) < int(batchSize) { + dh.isFinished = true + // After a full successful read-through listing, exit direct mode + // so subsequent reads can use the cache instead of hitting the filer. + wfs.inodeToPath.MarkDirectoryRefreshed(dirPath, time.Now()) + } + } + + return fuse.OK +} + +func loadDirectoryEntriesDirect(ctx context.Context, client filer_pb.FilerClient, uidGidMapper *meta_cache.UidGidMapper, dirPath util.FullPath, startFileName string, includeStart bool, limit uint32, snapshotTsNs int64) ([]*filer.Entry, int64, error) { + entries := make([]*filer.Entry, 0, limit) + var actualSnapshotTsNs int64 + err := client.WithFilerClient(false, func(sc filer_pb.SeaweedFilerClient) error { + var innerErr error + actualSnapshotTsNs, innerErr = filer_pb.DoSeaweedListWithSnapshot(ctx, sc, dirPath, "", func(entry *filer_pb.Entry, isLast bool) error { + if meta_cache.IsHiddenSystemEntry(string(dirPath), entry.Name) { + return nil + } + if uidGidMapper != nil && entry.Attributes != nil { + entry.Attributes.Uid, entry.Attributes.Gid = uidGidMapper.FilerToLocal(entry.Attributes.Uid, entry.Attributes.Gid) + } + entries = append(entries, filer.FromPbEntry(string(dirPath), entry)) + return nil + }, startFileName, includeStart, limit, snapshotTsNs) + return innerErr + }) + if err != nil { + return nil, actualSnapshotTsNs, err + } + return entries, actualSnapshotTsNs, nil +} diff --git a/weed/mount/weedfs_dir_read_test.go b/weed/mount/weedfs_dir_read_test.go new file mode 100644 index 000000000..e8e8e7d79 --- /dev/null +++ b/weed/mount/weedfs_dir_read_test.go @@ -0,0 +1,100 @@ +package mount + +import ( + "context" + "io" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/mount/meta_cache" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +type directoryListStream struct { + responses []*filer_pb.ListEntriesResponse + index int +} + +func (s *directoryListStream) Recv() (*filer_pb.ListEntriesResponse, error) { + if s.index >= len(s.responses) { + return nil, io.EOF + } + resp := s.responses[s.index] + s.index++ + return resp, nil +} + +func (s *directoryListStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } +func (s *directoryListStream) Trailer() metadata.MD { return metadata.MD{} } +func (s *directoryListStream) CloseSend() error { return nil } +func (s *directoryListStream) Context() context.Context { return context.Background() } +func (s *directoryListStream) SendMsg(any) error { return nil } +func (s *directoryListStream) RecvMsg(any) error { return nil } + +type directoryListClient struct { + filer_pb.SeaweedFilerClient + responses []*filer_pb.ListEntriesResponse +} + +func (c *directoryListClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[filer_pb.ListEntriesResponse], error) { + return &directoryListStream{responses: c.responses}, nil +} + +type directoryFilerAccessor struct { + client filer_pb.SeaweedFilerClient +} + +func (a *directoryFilerAccessor) WithFilerClient(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error { + return fn(a.client) +} + +func (a *directoryFilerAccessor) AdjustedUrl(*filer_pb.Location) string { return "" } +func (a *directoryFilerAccessor) GetDataCenter() string { return "" } + +func TestLoadDirectoryEntriesDirectFiltersHiddenEntriesAndMapsIds(t *testing.T) { + mapper, err := meta_cache.NewUidGidMapper("10:1000", "20:2000") + if err != nil { + t.Fatalf("uid/gid mapper: %v", err) + } + + client := &directoryFilerAccessor{ + client: &directoryListClient{ + responses: []*filer_pb.ListEntriesResponse{ + { + Entry: &filer_pb.Entry{ + Name: "topics", + Attributes: &filer_pb.FuseAttributes{ + Uid: 1000, + Gid: 2000, + }, + }, + }, + { + Entry: &filer_pb.Entry{ + Name: "visible", + Attributes: &filer_pb.FuseAttributes{ + Uid: 1000, + Gid: 2000, + }, + }, + }, + }, + }, + } + + entries, _, err := loadDirectoryEntriesDirect(context.Background(), client, mapper, util.FullPath("/"), "", false, 10, 0) + if err != nil { + t.Fatalf("loadDirectoryEntriesDirect: %v", err) + } + if got := len(entries); got != 1 { + t.Fatalf("entry count = %d, want 1", got) + } + if entries[0].Name() != "visible" { + t.Fatalf("entry name = %q, want visible", entries[0].Name()) + } + if entries[0].Attr.Uid != 10 || entries[0].Attr.Gid != 20 { + t.Fatalf("mapped uid/gid = %d/%d, want 10/20", entries[0].Attr.Uid, entries[0].Attr.Gid) + } +} diff --git a/weed/mount/weedfs_file_mkrm.go b/weed/mount/weedfs_file_mkrm.go index f7306f7a3..302e908bb 100644 --- a/weed/mount/weedfs_file_mkrm.go +++ b/weed/mount/weedfs_file_mkrm.go @@ -2,12 +2,10 @@ package mount import ( "context" - "fmt" "syscall" "time" "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" ) @@ -83,19 +81,21 @@ func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out } glog.V(1).Infof("mknod: %v", request) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { glog.V(0).Infof("mknod %s: %v", entryFullPath, err) return err } - // Only cache the entry if the parent directory is already cached. - // This avoids polluting the cache with partial directory data. - if wfs.metaCache.IsDirectoryCached(dirFullPath) { - wfs.inodeToPath.TouchDirectory(dirFullPath) - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("local mknod %s: %w", entryFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataCreateEvent(string(dirFullPath), newEntry) } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("mknod %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) + } + wfs.inodeToPath.TouchDirectory(dirFullPath) return nil }) @@ -143,16 +143,21 @@ func (wfs *WFS) Unlink(cancel <-chan struct{}, header *fuse.InHeader, name strin glog.V(3).Infof("remove file: %v", entryFullPath) // Always let the filer decide whether to delete chunks based on its authoritative data. // The filer has the correct hard link count and will only delete chunks when appropriate. - err := filer_pb.Remove(context.Background(), wfs, string(dirFullPath), name, true, false, false, false, []int32{wfs.signature}) + resp, err := filer_pb.RemoveWithResponse(context.Background(), wfs, string(dirFullPath), name, true, false, false, false, []int32{wfs.signature}) if err != nil { glog.V(0).Infof("remove %s: %v", entryFullPath, err) return fuse.OK } - // then, delete meta cache - if err = wfs.metaCache.DeleteEntry(context.Background(), entryFullPath); err != nil { - glog.V(3).Infof("local DeleteEntry %s: %v", entryFullPath, err) - return fuse.EIO + var event *filer_pb.SubscribeMetadataResponse + if resp != nil && resp.MetadataEvent != nil { + event = resp.MetadataEvent + } else { + event = metadataDeleteEvent(string(dirFullPath), name, false) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("unlink %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) } wfs.inodeToPath.TouchDirectory(dirFullPath) diff --git a/weed/mount/weedfs_file_sync.go b/weed/mount/weedfs_file_sync.go index e8fff04a7..42064a099 100644 --- a/weed/mount/weedfs_file_sync.go +++ b/weed/mount/weedfs_file_sync.go @@ -161,16 +161,19 @@ func (wfs *WFS) doFlush(fh *FileHandle, uid, gid uint32) fuse.Status { wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { glog.Errorf("fh flush create %s: %v", fileFullPath, err) return fmt.Errorf("fh flush create %s: %v", fileFullPath, err) } - // Only update cache if the parent directory is cached - if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(string(dir), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("flush %s: best-effort metadata apply failed: %v", fileFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(dir)) } return nil diff --git a/weed/mount/weedfs_link.go b/weed/mount/weedfs_link.go index 0960b7e47..d39aa73ee 100644 --- a/weed/mount/weedfs_link.go +++ b/weed/mount/weedfs_link.go @@ -2,7 +2,6 @@ package mount import ( "context" - "fmt" "syscall" "time" @@ -56,6 +55,8 @@ func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out * } // update old file to hardlink mode + origHardLinkId := oldEntry.HardLinkId + origHardLinkCounter := oldEntry.HardLinkCounter if len(oldEntry.HardLinkId) == 0 { oldEntry.HardLinkId = filer.NewHardLinkId() oldEntry.HardLinkCounter = 1 @@ -90,25 +91,42 @@ func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out * wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.UpdateEntry(context.Background(), client, updateOldEntryRequest); err != nil { + updateResp, err := filer_pb.UpdateEntryWithResponse(context.Background(), client, updateOldEntryRequest) + if err != nil { return err } - // Only update cache if the directory is cached - if wfs.metaCache.IsDirectoryCached(util.FullPath(updateOldEntryRequest.Directory)) { - if err := wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(updateOldEntryRequest.Directory, updateOldEntryRequest.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", oldEntryPath, err) - } + updateEvent := updateResp.GetMetadataEvent() + if updateEvent == nil { + updateEvent = metadataUpdateEvent(oldParentPath, updateOldEntryRequest.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), updateEvent); applyErr != nil { + glog.Warningf("link %s: best-effort metadata apply failed: %v", oldEntryPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(oldParentPath)) } - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + createResp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { + // Rollback: restore original HardLinkId/Counter on the source entry + oldEntry.HardLinkId = origHardLinkId + oldEntry.HardLinkCounter = origHardLinkCounter + rollbackReq := &filer_pb.UpdateEntryRequest{ + Directory: oldParentPath, + Entry: oldEntry, + Signatures: []int32{wfs.signature}, + } + if _, rollbackErr := filer_pb.UpdateEntryWithResponse(context.Background(), client, rollbackReq); rollbackErr != nil { + glog.Warningf("link rollback %s: %v", oldEntryPath, rollbackErr) + } return err } - // Only cache the entry if the parent directory is already cached. - if wfs.metaCache.IsDirectoryCached(newParentPath) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("insert meta cache for %s: %w", newParentPath.Child(name), err) - } + createEvent := createResp.GetMetadataEvent() + if createEvent == nil { + createEvent = metadataCreateEvent(string(newParentPath), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), createEvent); applyErr != nil { + glog.Warningf("link %s: best-effort metadata apply failed: %v", newParentPath.Child(name), applyErr) + wfs.inodeToPath.InvalidateChildrenCache(newParentPath) } return nil diff --git a/weed/mount/weedfs_metadata_flush.go b/weed/mount/weedfs_metadata_flush.go index 28145d089..fe3ace2ec 100644 --- a/weed/mount/weedfs_metadata_flush.go +++ b/weed/mount/weedfs_metadata_flush.go @@ -2,7 +2,6 @@ package mount import ( "context" - "fmt" "sync" "time" @@ -142,15 +141,18 @@ func (wfs *WFS) flushFileMetadata(fh *FileHandle) error { wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { return err } - // Only update cache if the parent directory is cached - if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(string(dir), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("flushFileMetadata %s: best-effort metadata apply failed: %v", fileFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(dir)) } glog.V(3).Infof("flushed metadata for %s with %d chunks", fileFullPath, len(entry.GetChunks())) diff --git a/weed/mount/weedfs_rename.go b/weed/mount/weedfs_rename.go index 70d152ae4..cd1acd79f 100644 --- a/weed/mount/weedfs_rename.go +++ b/weed/mount/weedfs_rename.go @@ -9,7 +9,6 @@ import ( "github.com/seaweedfs/go-fuse/v2/fs" "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" @@ -233,10 +232,12 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR glog.V(4).Infof("dir Rename %+v", resp.EventNotification) if resp.EventNotification.NewEntry != nil { - // with new entry, the old entry name also exists. This is the first step to create new entry - newEntry := filer.FromPbEntry(resp.EventNotification.NewParentPath, resp.EventNotification.NewEntry) - if err := wfs.metaCache.AtomicUpdateEntryFromFiler(ctx, "", newEntry); err != nil { - return err + if err := wfs.applyLocalMetadataEvent(ctx, metadataEventFromRenameResponse(resp)); err != nil { + glog.Warningf("rename apply metadata event: %v", err) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(resp.Directory)) + if resp.EventNotification.NewParentPath != "" { + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(resp.EventNotification.NewParentPath)) + } } oldParent, newParent := util.FullPath(resp.Directory), util.FullPath(resp.EventNotification.NewParentPath) @@ -245,14 +246,6 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR oldPath := oldParent.Child(oldName) newPath := newParent.Child(newName) - // Keep the renamed destination immediately readable even when the directory - // itself is not marked as fully cached. - if !wfs.metaCache.IsDirectoryCached(newParent) { - if err := wfs.metaCache.InsertEntry(ctx, newEntry); err != nil { - return err - } - } - sourceInode, targetInode := wfs.inodeToPath.MovePath(oldPath, newPath) if sourceInode != 0 { fh, foundFh := wfs.fhMap.FindFileHandle(sourceInode) @@ -271,8 +264,9 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR } else if resp.EventNotification.OldEntry != nil { // without new entry, only old entry name exists. This is the second step to delete old entry - if err := wfs.metaCache.AtomicUpdateEntryFromFiler(ctx, util.NewFullPath(resp.Directory, resp.EventNotification.OldEntry.Name), nil); err != nil { - return err + if err := wfs.applyLocalMetadataEvent(ctx, metadataEventFromRenameResponse(resp)); err != nil { + glog.Warningf("rename apply delete event: %v", err) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(resp.Directory)) } } diff --git a/weed/mount/weedfs_rename_test.go b/weed/mount/weedfs_rename_test.go index b6f4bf33b..4b79cc709 100644 --- a/weed/mount/weedfs_rename_test.go +++ b/weed/mount/weedfs_rename_test.go @@ -10,7 +10,7 @@ import ( "github.com/seaweedfs/seaweedfs/weed/util" ) -func TestHandleRenameResponseCachesTargetForUncachedDirectory(t *testing.T) { +func TestHandleRenameResponseLeavesUncachedTargetOutOfCache(t *testing.T) { uidGidMapper, err := meta_cache.NewUidGidMapper("", "") if err != nil { t.Fatalf("create uid/gid mapper: %v", err) @@ -73,14 +73,11 @@ func TestHandleRenameResponseCachesTargetForUncachedDirectory(t *testing.T) { } entry, findErr := mc.FindEntry(context.Background(), targetPath) - if findErr != nil { - t.Fatalf("find target entry: %v", findErr) + if findErr != filer_pb.ErrNotFound { + t.Fatalf("find target entry error = %v, want %v", findErr, filer_pb.ErrNotFound) } - if entry == nil { - t.Fatalf("target entry %s not cached", targetPath) - } - if entry.FileSize != 53 { - t.Fatalf("cached file size = %d, want 53", entry.FileSize) + if entry != nil { + t.Fatalf("target entry %s should not be cached for an uncached directory", targetPath) } updatedInode, found := inodeToPath.GetInode(targetPath) diff --git a/weed/mount/weedfs_symlink.go b/weed/mount/weedfs_symlink.go index a53312984..0505c8bed 100644 --- a/weed/mount/weedfs_symlink.go +++ b/weed/mount/weedfs_symlink.go @@ -9,7 +9,6 @@ import ( "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" ) @@ -53,15 +52,18 @@ func (wfs *WFS) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target st wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { return fmt.Errorf("symlink %s: %v", entryFullPath, err) } - // Only cache the entry if the parent directory is already cached. - if wfs.metaCache.IsDirectoryCached(dirPath) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("insert meta cache for symlink %s: %w", entryFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataCreateEvent(string(dirPath), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("symlink %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirPath) } return nil diff --git a/weed/mount/wfs_save.go b/weed/mount/wfs_save.go index f3a4694f6..84318ff41 100644 --- a/weed/mount/wfs_save.go +++ b/weed/mount/wfs_save.go @@ -6,7 +6,6 @@ import ( "syscall" "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" @@ -28,13 +27,18 @@ func (wfs *WFS) saveEntry(path util.FullPath, entry *filer_pb.Entry) (code fuse. } glog.V(1).Infof("save entry: %v", request) - _, err := client.UpdateEntry(context.Background(), request) + resp, err := filer_pb.UpdateEntryWithResponse(context.Background(), client, request) if err != nil { return fmt.Errorf("UpdateEntry dir %s: %v", path, err) } - if err := wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("metaCache.UpdateEntry dir %s: %w", path, err) + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(parentDir, entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("saveEntry %s: best-effort metadata apply failed: %v", path, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(parentDir)) } return nil diff --git a/weed/mq/broker/broker_grpc_lookup.go b/weed/mq/broker/broker_grpc_lookup.go index 5eec21b69..fad10f599 100644 --- a/weed/mq/broker/broker_grpc_lookup.go +++ b/weed/mq/broker/broker_grpc_lookup.go @@ -107,7 +107,7 @@ func (b *MessageQueueBroker) ListTopics(ctx context.Context, request *mq_pb.List return err } - if !resp.Entry.IsDirectory { + if resp.Entry == nil || !resp.Entry.IsDirectory { continue } @@ -135,7 +135,7 @@ func (b *MessageQueueBroker) ListTopics(ctx context.Context, request *mq_pb.List break } - if !topicResp.Entry.IsDirectory { + if topicResp.Entry == nil || !topicResp.Entry.IsDirectory { continue } diff --git a/weed/mq/broker/broker_topic_conf_read_write.go b/weed/mq/broker/broker_topic_conf_read_write.go index 138d1023e..976efb36c 100644 --- a/weed/mq/broker/broker_topic_conf_read_write.go +++ b/weed/mq/broker/broker_topic_conf_read_write.go @@ -252,7 +252,7 @@ func (b *MessageQueueBroker) getOffsetRangeFromChunkMetadata(t topic.Topic, part if err != nil { return err } - if resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { + if resp.Entry != nil && resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { if latestVersion == "" || resp.Entry.Name > latestVersion { latestVersion = resp.Entry.Name } @@ -290,7 +290,7 @@ func (b *MessageQueueBroker) getOffsetRangeFromChunkMetadata(t topic.Topic, part if err != nil { return err } - if resp.Entry.IsDirectory && resp.Entry.Name == targetPartitionName { + if resp.Entry != nil && resp.Entry.IsDirectory && resp.Entry.Name == targetPartitionName { partitionDir = resp.Entry.Name break } @@ -327,7 +327,7 @@ func (b *MessageQueueBroker) getOffsetRangeFromChunkMetadata(t topic.Topic, part if err != nil { return err } - if !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { + if resp.Entry != nil && !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { // Check for offset ranges in Extended attributes (both log files and parquet files) if resp.Entry.Extended != nil { fileType := "log" diff --git a/weed/mq/kafka/consumer_offset/filer_storage.go b/weed/mq/kafka/consumer_offset/filer_storage.go index 8eeceb660..9d92ad730 100644 --- a/weed/mq/kafka/consumer_offset/filer_storage.go +++ b/weed/mq/kafka/consumer_offset/filer_storage.go @@ -285,7 +285,7 @@ func (f *FilerStorage) listDirectory(path string) ([]string, error) { return err } - if resp.Entry.IsDirectory { + if resp.Entry != nil && resp.Entry.IsDirectory { entries = append(entries, resp.Entry.Name) } } diff --git a/weed/mq/kafka/integration/broker_client.go b/weed/mq/kafka/integration/broker_client.go index c1f743f0b..158f9e6e5 100644 --- a/weed/mq/kafka/integration/broker_client.go +++ b/weed/mq/kafka/integration/broker_client.go @@ -232,7 +232,7 @@ func (bc *BrokerClient) getOffsetRangeFromChunkMetadata(topic string, partition if err != nil { return err } - if resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { + if resp.Entry != nil && resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { if latestVersion == "" || resp.Entry.Name > latestVersion { latestVersion = resp.Entry.Name } @@ -267,9 +267,15 @@ func (bc *BrokerClient) getOffsetRangeFromChunkMetadata(topic string, partition if err != nil { return err } - if resp.Entry.IsDirectory && strings.Contains(resp.Entry.Name, "-") { - partitionDir = resp.Entry.Name - break // Use the first partition directory we find + if resp.Entry != nil && resp.Entry.IsDirectory && strings.Contains(resp.Entry.Name, "-") { + // Parse partition range (format: NNNN-NNNN) and match requested partition + var pStart, pStop int32 + if n, scanErr := fmt.Sscanf(resp.Entry.Name, "%04d-%04d", &pStart, &pStop); n == 2 && scanErr == nil { + if partition >= pStart && partition < pStop { + partitionDir = resp.Entry.Name + break + } + } } } return nil @@ -303,7 +309,7 @@ func (bc *BrokerClient) getOffsetRangeFromChunkMetadata(topic string, partition if err != nil { return err } - if !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { + if resp.Entry != nil && !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { // Check for offset ranges in Extended attributes (both log files and parquet files) if resp.Entry.Extended != nil { // Track maximum offset for high water mark diff --git a/weed/pb/filer.proto b/weed/pb/filer.proto index 9257996ed..78dd58b1f 100644 --- a/weed/pb/filer.proto +++ b/weed/pb/filer.proto @@ -100,10 +100,12 @@ message ListEntriesRequest { string startFromFileName = 3; bool inclusiveStartFrom = 4; uint32 limit = 5; + int64 snapshot_ts_ns = 6; } message ListEntriesResponse { Entry entry = 1; + int64 snapshot_ts_ns = 2; } message RemoteEntry { @@ -203,6 +205,7 @@ message CreateEntryRequest { message CreateEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message UpdateEntryRequest { @@ -212,6 +215,7 @@ message UpdateEntryRequest { repeated int32 signatures = 4; } message UpdateEntryResponse { + SubscribeMetadataResponse metadata_event = 1; } message AppendToEntryRequest { @@ -236,6 +240,7 @@ message DeleteEntryRequest { message DeleteEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message AtomicRenameEntryRequest { @@ -469,6 +474,7 @@ message CacheRemoteObjectToLocalClusterRequest { } message CacheRemoteObjectToLocalClusterResponse { Entry entry = 1; + SubscribeMetadataResponse metadata_event = 2; } ///////////////////////// diff --git a/weed/pb/filer_pb/filer.pb.go b/weed/pb/filer_pb/filer.pb.go index 7b96f095d..e86fc9d2a 100644 --- a/weed/pb/filer_pb/filer.pb.go +++ b/weed/pb/filer_pb/filer.pb.go @@ -176,6 +176,7 @@ type ListEntriesRequest struct { StartFromFileName string `protobuf:"bytes,3,opt,name=startFromFileName,proto3" json:"startFromFileName,omitempty"` InclusiveStartFrom bool `protobuf:"varint,4,opt,name=inclusiveStartFrom,proto3" json:"inclusiveStartFrom,omitempty"` Limit uint32 `protobuf:"varint,5,opt,name=limit,proto3" json:"limit,omitempty"` + SnapshotTsNs int64 `protobuf:"varint,6,opt,name=snapshot_ts_ns,json=snapshotTsNs,proto3" json:"snapshot_ts_ns,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -245,9 +246,17 @@ func (x *ListEntriesRequest) GetLimit() uint32 { return 0 } +func (x *ListEntriesRequest) GetSnapshotTsNs() int64 { + if x != nil { + return x.SnapshotTsNs + } + return 0 +} + type ListEntriesResponse struct { state protoimpl.MessageState `protogen:"open.v1"` Entry *Entry `protobuf:"bytes,1,opt,name=entry,proto3" json:"entry,omitempty"` + SnapshotTsNs int64 `protobuf:"varint,2,opt,name=snapshot_ts_ns,json=snapshotTsNs,proto3" json:"snapshot_ts_ns,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -289,6 +298,13 @@ func (x *ListEntriesResponse) GetEntry() *Entry { return nil } +func (x *ListEntriesResponse) GetSnapshotTsNs() int64 { + if x != nil { + return x.SnapshotTsNs + } + return 0 +} + type RemoteEntry struct { state protoimpl.MessageState `protogen:"open.v1"` StorageName string `protobuf:"bytes,1,opt,name=storage_name,json=storageName,proto3" json:"storage_name,omitempty"` @@ -1102,8 +1118,9 @@ func (x *CreateEntryRequest) GetSkipCheckParentDirectory() bool { } type CreateEntryResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,2,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -1145,6 +1162,13 @@ func (x *CreateEntryResponse) GetError() string { return "" } +func (x *CreateEntryResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + type UpdateEntryRequest struct { state protoimpl.MessageState `protogen:"open.v1"` Directory string `protobuf:"bytes,1,opt,name=directory,proto3" json:"directory,omitempty"` @@ -1214,7 +1238,8 @@ func (x *UpdateEntryRequest) GetSignatures() []int32 { } type UpdateEntryResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` + state protoimpl.MessageState `protogen:"open.v1"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,1,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -1249,6 +1274,13 @@ func (*UpdateEntryResponse) Descriptor() ([]byte, []int) { return file_filer_proto_rawDescGZIP(), []int{15} } +func (x *UpdateEntryResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + type AppendToEntryRequest struct { state protoimpl.MessageState `protogen:"open.v1"` Directory string `protobuf:"bytes,1,opt,name=directory,proto3" json:"directory,omitempty"` @@ -1447,8 +1479,9 @@ func (x *DeleteEntryRequest) GetIfNotModifiedAfter() int64 { } type DeleteEntryResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,2,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -1490,6 +1523,13 @@ func (x *DeleteEntryResponse) GetError() string { return "" } +func (x *DeleteEntryResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + type AtomicRenameEntryRequest struct { state protoimpl.MessageState `protogen:"open.v1"` OldDirectory string `protobuf:"bytes,1,opt,name=old_directory,json=oldDirectory,proto3" json:"old_directory,omitempty"` @@ -3628,8 +3668,9 @@ func (x *CacheRemoteObjectToLocalClusterRequest) GetName() string { } type CacheRemoteObjectToLocalClusterResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Entry *Entry `protobuf:"bytes,1,opt,name=entry,proto3" json:"entry,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + Entry *Entry `protobuf:"bytes,1,opt,name=entry,proto3" json:"entry,omitempty"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,2,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -3671,6 +3712,13 @@ func (x *CacheRemoteObjectToLocalClusterResponse) GetEntry() *Entry { return nil } +func (x *CacheRemoteObjectToLocalClusterResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + // /////////////////////// // distributed lock management // /////////////////////// @@ -4401,15 +4449,17 @@ const file_filer_proto_rawDesc = "" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x12\n" + "\x04name\x18\x02 \x01(\tR\x04name\"E\n" + "\x1cLookupDirectoryEntryResponse\x12%\n" + - "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\xbe\x01\n" + + "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\xe4\x01\n" + "\x12ListEntriesRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x16\n" + "\x06prefix\x18\x02 \x01(\tR\x06prefix\x12,\n" + "\x11startFromFileName\x18\x03 \x01(\tR\x11startFromFileName\x12.\n" + "\x12inclusiveStartFrom\x18\x04 \x01(\bR\x12inclusiveStartFrom\x12\x14\n" + - "\x05limit\x18\x05 \x01(\rR\x05limit\"<\n" + + "\x05limit\x18\x05 \x01(\rR\x05limit\x12$\n" + + "\x0esnapshot_ts_ns\x18\x06 \x01(\x03R\fsnapshotTsNs\"b\n" + "\x13ListEntriesResponse\x12%\n" + - "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\xc8\x01\n" + + "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\x12$\n" + + "\x0esnapshot_ts_ns\x18\x02 \x01(\x03R\fsnapshotTsNs\"\xc8\x01\n" + "\vRemoteEntry\x12!\n" + "\fstorage_name\x18\x01 \x01(\tR\vstorageName\x120\n" + "\x15last_local_sync_ts_ns\x18\x02 \x01(\x03R\x11lastLocalSyncTsNs\x12 \n" + @@ -4497,17 +4547,19 @@ const file_filer_proto_rawDesc = "" + "\n" + "signatures\x18\x05 \x03(\x05R\n" + "signatures\x12=\n" + - "\x1bskip_check_parent_directory\x18\x06 \x01(\bR\x18skipCheckParentDirectory\"+\n" + + "\x1bskip_check_parent_directory\x18\x06 \x01(\bR\x18skipCheckParentDirectory\"w\n" + "\x13CreateEntryResponse\x12\x14\n" + - "\x05error\x18\x01 \x01(\tR\x05error\"\xac\x01\n" + + "\x05error\x18\x01 \x01(\tR\x05error\x12J\n" + + "\x0emetadata_event\x18\x02 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\xac\x01\n" + "\x12UpdateEntryRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12%\n" + "\x05entry\x18\x02 \x01(\v2\x0f.filer_pb.EntryR\x05entry\x121\n" + "\x15is_from_other_cluster\x18\x03 \x01(\bR\x12isFromOtherCluster\x12\x1e\n" + "\n" + "signatures\x18\x04 \x03(\x05R\n" + - "signatures\"\x15\n" + - "\x13UpdateEntryResponse\"\x80\x01\n" + + "signatures\"a\n" + + "\x13UpdateEntryResponse\x12J\n" + + "\x0emetadata_event\x18\x01 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\x80\x01\n" + "\x14AppendToEntryRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x1d\n" + "\n" + @@ -4524,9 +4576,10 @@ const file_filer_proto_rawDesc = "" + "\n" + "signatures\x18\b \x03(\x05R\n" + "signatures\x121\n" + - "\x15if_not_modified_after\x18\t \x01(\x03R\x12ifNotModifiedAfter\"+\n" + + "\x15if_not_modified_after\x18\t \x01(\x03R\x12ifNotModifiedAfter\"w\n" + "\x13DeleteEntryResponse\x12\x14\n" + - "\x05error\x18\x01 \x01(\tR\x05error\"\xba\x01\n" + + "\x05error\x18\x01 \x01(\tR\x05error\x12J\n" + + "\x0emetadata_event\x18\x02 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\xba\x01\n" + "\x18AtomicRenameEntryRequest\x12#\n" + "\rold_directory\x18\x01 \x01(\tR\foldDirectory\x12\x19\n" + "\bold_name\x18\x02 \x01(\tR\aoldName\x12#\n" + @@ -4723,9 +4776,10 @@ const file_filer_proto_rawDesc = "" + "\x1bworm_retention_time_seconds\x18\x10 \x01(\x04R\x18wormRetentionTimeSeconds\"Z\n" + "&CacheRemoteObjectToLocalClusterRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x12\n" + - "\x04name\x18\x02 \x01(\tR\x04name\"P\n" + + "\x04name\x18\x02 \x01(\tR\x04name\"\x9c\x01\n" + "'CacheRemoteObjectToLocalClusterResponse\x12%\n" + - "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\x9b\x01\n" + + "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\x12J\n" + + "\x0emetadata_event\x18\x02 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\x9b\x01\n" + "\vLockRequest\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12&\n" + "\x0fseconds_to_lock\x18\x02 \x01(\x03R\rsecondsToLock\x12\x1f\n" + @@ -4900,75 +4954,79 @@ var file_filer_proto_depIdxs = []int32{ 0, // 11: filer_pb.FileChunk.sse_type:type_name -> filer_pb.SSEType 9, // 12: filer_pb.FileChunkManifest.chunks:type_name -> filer_pb.FileChunk 6, // 13: filer_pb.CreateEntryRequest.entry:type_name -> filer_pb.Entry - 6, // 14: filer_pb.UpdateEntryRequest.entry:type_name -> filer_pb.Entry - 9, // 15: filer_pb.AppendToEntryRequest.chunks:type_name -> filer_pb.FileChunk - 8, // 16: filer_pb.StreamRenameEntryResponse.event_notification:type_name -> filer_pb.EventNotification - 29, // 17: filer_pb.AssignVolumeResponse.location:type_name -> filer_pb.Location - 29, // 18: filer_pb.Locations.locations:type_name -> filer_pb.Location - 68, // 19: filer_pb.LookupVolumeResponse.locations_map:type_name -> filer_pb.LookupVolumeResponse.LocationsMapEntry - 31, // 20: filer_pb.CollectionListResponse.collections:type_name -> filer_pb.Collection - 8, // 21: filer_pb.SubscribeMetadataResponse.event_notification:type_name -> filer_pb.EventNotification - 6, // 22: filer_pb.TraverseBfsMetadataResponse.entry:type_name -> filer_pb.Entry - 69, // 23: filer_pb.LocateBrokerResponse.resources:type_name -> filer_pb.LocateBrokerResponse.Resource - 70, // 24: filer_pb.FilerConf.locations:type_name -> filer_pb.FilerConf.PathConf - 6, // 25: filer_pb.CacheRemoteObjectToLocalClusterResponse.entry:type_name -> filer_pb.Entry - 64, // 26: filer_pb.TransferLocksRequest.locks:type_name -> filer_pb.Lock - 28, // 27: filer_pb.LookupVolumeResponse.LocationsMapEntry.value:type_name -> filer_pb.Locations - 1, // 28: filer_pb.SeaweedFiler.LookupDirectoryEntry:input_type -> filer_pb.LookupDirectoryEntryRequest - 3, // 29: filer_pb.SeaweedFiler.ListEntries:input_type -> filer_pb.ListEntriesRequest - 13, // 30: filer_pb.SeaweedFiler.CreateEntry:input_type -> filer_pb.CreateEntryRequest - 15, // 31: filer_pb.SeaweedFiler.UpdateEntry:input_type -> filer_pb.UpdateEntryRequest - 17, // 32: filer_pb.SeaweedFiler.AppendToEntry:input_type -> filer_pb.AppendToEntryRequest - 19, // 33: filer_pb.SeaweedFiler.DeleteEntry:input_type -> filer_pb.DeleteEntryRequest - 21, // 34: filer_pb.SeaweedFiler.AtomicRenameEntry:input_type -> filer_pb.AtomicRenameEntryRequest - 23, // 35: filer_pb.SeaweedFiler.StreamRenameEntry:input_type -> filer_pb.StreamRenameEntryRequest - 25, // 36: filer_pb.SeaweedFiler.AssignVolume:input_type -> filer_pb.AssignVolumeRequest - 27, // 37: filer_pb.SeaweedFiler.LookupVolume:input_type -> filer_pb.LookupVolumeRequest - 32, // 38: filer_pb.SeaweedFiler.CollectionList:input_type -> filer_pb.CollectionListRequest - 34, // 39: filer_pb.SeaweedFiler.DeleteCollection:input_type -> filer_pb.DeleteCollectionRequest - 36, // 40: filer_pb.SeaweedFiler.Statistics:input_type -> filer_pb.StatisticsRequest - 38, // 41: filer_pb.SeaweedFiler.Ping:input_type -> filer_pb.PingRequest - 40, // 42: filer_pb.SeaweedFiler.GetFilerConfiguration:input_type -> filer_pb.GetFilerConfigurationRequest - 44, // 43: filer_pb.SeaweedFiler.TraverseBfsMetadata:input_type -> filer_pb.TraverseBfsMetadataRequest - 42, // 44: filer_pb.SeaweedFiler.SubscribeMetadata:input_type -> filer_pb.SubscribeMetadataRequest - 42, // 45: filer_pb.SeaweedFiler.SubscribeLocalMetadata:input_type -> filer_pb.SubscribeMetadataRequest - 51, // 46: filer_pb.SeaweedFiler.KvGet:input_type -> filer_pb.KvGetRequest - 53, // 47: filer_pb.SeaweedFiler.KvPut:input_type -> filer_pb.KvPutRequest - 56, // 48: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:input_type -> filer_pb.CacheRemoteObjectToLocalClusterRequest - 58, // 49: filer_pb.SeaweedFiler.DistributedLock:input_type -> filer_pb.LockRequest - 60, // 50: filer_pb.SeaweedFiler.DistributedUnlock:input_type -> filer_pb.UnlockRequest - 62, // 51: filer_pb.SeaweedFiler.FindLockOwner:input_type -> filer_pb.FindLockOwnerRequest - 65, // 52: filer_pb.SeaweedFiler.TransferLocks:input_type -> filer_pb.TransferLocksRequest - 2, // 53: filer_pb.SeaweedFiler.LookupDirectoryEntry:output_type -> filer_pb.LookupDirectoryEntryResponse - 4, // 54: filer_pb.SeaweedFiler.ListEntries:output_type -> filer_pb.ListEntriesResponse - 14, // 55: filer_pb.SeaweedFiler.CreateEntry:output_type -> filer_pb.CreateEntryResponse - 16, // 56: filer_pb.SeaweedFiler.UpdateEntry:output_type -> filer_pb.UpdateEntryResponse - 18, // 57: filer_pb.SeaweedFiler.AppendToEntry:output_type -> filer_pb.AppendToEntryResponse - 20, // 58: filer_pb.SeaweedFiler.DeleteEntry:output_type -> filer_pb.DeleteEntryResponse - 22, // 59: filer_pb.SeaweedFiler.AtomicRenameEntry:output_type -> filer_pb.AtomicRenameEntryResponse - 24, // 60: filer_pb.SeaweedFiler.StreamRenameEntry:output_type -> filer_pb.StreamRenameEntryResponse - 26, // 61: filer_pb.SeaweedFiler.AssignVolume:output_type -> filer_pb.AssignVolumeResponse - 30, // 62: filer_pb.SeaweedFiler.LookupVolume:output_type -> filer_pb.LookupVolumeResponse - 33, // 63: filer_pb.SeaweedFiler.CollectionList:output_type -> filer_pb.CollectionListResponse - 35, // 64: filer_pb.SeaweedFiler.DeleteCollection:output_type -> filer_pb.DeleteCollectionResponse - 37, // 65: filer_pb.SeaweedFiler.Statistics:output_type -> filer_pb.StatisticsResponse - 39, // 66: filer_pb.SeaweedFiler.Ping:output_type -> filer_pb.PingResponse - 41, // 67: filer_pb.SeaweedFiler.GetFilerConfiguration:output_type -> filer_pb.GetFilerConfigurationResponse - 45, // 68: filer_pb.SeaweedFiler.TraverseBfsMetadata:output_type -> filer_pb.TraverseBfsMetadataResponse - 43, // 69: filer_pb.SeaweedFiler.SubscribeMetadata:output_type -> filer_pb.SubscribeMetadataResponse - 43, // 70: filer_pb.SeaweedFiler.SubscribeLocalMetadata:output_type -> filer_pb.SubscribeMetadataResponse - 52, // 71: filer_pb.SeaweedFiler.KvGet:output_type -> filer_pb.KvGetResponse - 54, // 72: filer_pb.SeaweedFiler.KvPut:output_type -> filer_pb.KvPutResponse - 57, // 73: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:output_type -> filer_pb.CacheRemoteObjectToLocalClusterResponse - 59, // 74: filer_pb.SeaweedFiler.DistributedLock:output_type -> filer_pb.LockResponse - 61, // 75: filer_pb.SeaweedFiler.DistributedUnlock:output_type -> filer_pb.UnlockResponse - 63, // 76: filer_pb.SeaweedFiler.FindLockOwner:output_type -> filer_pb.FindLockOwnerResponse - 66, // 77: filer_pb.SeaweedFiler.TransferLocks:output_type -> filer_pb.TransferLocksResponse - 53, // [53:78] is the sub-list for method output_type - 28, // [28:53] is the sub-list for method input_type - 28, // [28:28] is the sub-list for extension type_name - 28, // [28:28] is the sub-list for extension extendee - 0, // [0:28] is the sub-list for field type_name + 43, // 14: filer_pb.CreateEntryResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 6, // 15: filer_pb.UpdateEntryRequest.entry:type_name -> filer_pb.Entry + 43, // 16: filer_pb.UpdateEntryResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 9, // 17: filer_pb.AppendToEntryRequest.chunks:type_name -> filer_pb.FileChunk + 43, // 18: filer_pb.DeleteEntryResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 8, // 19: filer_pb.StreamRenameEntryResponse.event_notification:type_name -> filer_pb.EventNotification + 29, // 20: filer_pb.AssignVolumeResponse.location:type_name -> filer_pb.Location + 29, // 21: filer_pb.Locations.locations:type_name -> filer_pb.Location + 68, // 22: filer_pb.LookupVolumeResponse.locations_map:type_name -> filer_pb.LookupVolumeResponse.LocationsMapEntry + 31, // 23: filer_pb.CollectionListResponse.collections:type_name -> filer_pb.Collection + 8, // 24: filer_pb.SubscribeMetadataResponse.event_notification:type_name -> filer_pb.EventNotification + 6, // 25: filer_pb.TraverseBfsMetadataResponse.entry:type_name -> filer_pb.Entry + 69, // 26: filer_pb.LocateBrokerResponse.resources:type_name -> filer_pb.LocateBrokerResponse.Resource + 70, // 27: filer_pb.FilerConf.locations:type_name -> filer_pb.FilerConf.PathConf + 6, // 28: filer_pb.CacheRemoteObjectToLocalClusterResponse.entry:type_name -> filer_pb.Entry + 43, // 29: filer_pb.CacheRemoteObjectToLocalClusterResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 64, // 30: filer_pb.TransferLocksRequest.locks:type_name -> filer_pb.Lock + 28, // 31: filer_pb.LookupVolumeResponse.LocationsMapEntry.value:type_name -> filer_pb.Locations + 1, // 32: filer_pb.SeaweedFiler.LookupDirectoryEntry:input_type -> filer_pb.LookupDirectoryEntryRequest + 3, // 33: filer_pb.SeaweedFiler.ListEntries:input_type -> filer_pb.ListEntriesRequest + 13, // 34: filer_pb.SeaweedFiler.CreateEntry:input_type -> filer_pb.CreateEntryRequest + 15, // 35: filer_pb.SeaweedFiler.UpdateEntry:input_type -> filer_pb.UpdateEntryRequest + 17, // 36: filer_pb.SeaweedFiler.AppendToEntry:input_type -> filer_pb.AppendToEntryRequest + 19, // 37: filer_pb.SeaweedFiler.DeleteEntry:input_type -> filer_pb.DeleteEntryRequest + 21, // 38: filer_pb.SeaweedFiler.AtomicRenameEntry:input_type -> filer_pb.AtomicRenameEntryRequest + 23, // 39: filer_pb.SeaweedFiler.StreamRenameEntry:input_type -> filer_pb.StreamRenameEntryRequest + 25, // 40: filer_pb.SeaweedFiler.AssignVolume:input_type -> filer_pb.AssignVolumeRequest + 27, // 41: filer_pb.SeaweedFiler.LookupVolume:input_type -> filer_pb.LookupVolumeRequest + 32, // 42: filer_pb.SeaweedFiler.CollectionList:input_type -> filer_pb.CollectionListRequest + 34, // 43: filer_pb.SeaweedFiler.DeleteCollection:input_type -> filer_pb.DeleteCollectionRequest + 36, // 44: filer_pb.SeaweedFiler.Statistics:input_type -> filer_pb.StatisticsRequest + 38, // 45: filer_pb.SeaweedFiler.Ping:input_type -> filer_pb.PingRequest + 40, // 46: filer_pb.SeaweedFiler.GetFilerConfiguration:input_type -> filer_pb.GetFilerConfigurationRequest + 44, // 47: filer_pb.SeaweedFiler.TraverseBfsMetadata:input_type -> filer_pb.TraverseBfsMetadataRequest + 42, // 48: filer_pb.SeaweedFiler.SubscribeMetadata:input_type -> filer_pb.SubscribeMetadataRequest + 42, // 49: filer_pb.SeaweedFiler.SubscribeLocalMetadata:input_type -> filer_pb.SubscribeMetadataRequest + 51, // 50: filer_pb.SeaweedFiler.KvGet:input_type -> filer_pb.KvGetRequest + 53, // 51: filer_pb.SeaweedFiler.KvPut:input_type -> filer_pb.KvPutRequest + 56, // 52: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:input_type -> filer_pb.CacheRemoteObjectToLocalClusterRequest + 58, // 53: filer_pb.SeaweedFiler.DistributedLock:input_type -> filer_pb.LockRequest + 60, // 54: filer_pb.SeaweedFiler.DistributedUnlock:input_type -> filer_pb.UnlockRequest + 62, // 55: filer_pb.SeaweedFiler.FindLockOwner:input_type -> filer_pb.FindLockOwnerRequest + 65, // 56: filer_pb.SeaweedFiler.TransferLocks:input_type -> filer_pb.TransferLocksRequest + 2, // 57: filer_pb.SeaweedFiler.LookupDirectoryEntry:output_type -> filer_pb.LookupDirectoryEntryResponse + 4, // 58: filer_pb.SeaweedFiler.ListEntries:output_type -> filer_pb.ListEntriesResponse + 14, // 59: filer_pb.SeaweedFiler.CreateEntry:output_type -> filer_pb.CreateEntryResponse + 16, // 60: filer_pb.SeaweedFiler.UpdateEntry:output_type -> filer_pb.UpdateEntryResponse + 18, // 61: filer_pb.SeaweedFiler.AppendToEntry:output_type -> filer_pb.AppendToEntryResponse + 20, // 62: filer_pb.SeaweedFiler.DeleteEntry:output_type -> filer_pb.DeleteEntryResponse + 22, // 63: filer_pb.SeaweedFiler.AtomicRenameEntry:output_type -> filer_pb.AtomicRenameEntryResponse + 24, // 64: filer_pb.SeaweedFiler.StreamRenameEntry:output_type -> filer_pb.StreamRenameEntryResponse + 26, // 65: filer_pb.SeaweedFiler.AssignVolume:output_type -> filer_pb.AssignVolumeResponse + 30, // 66: filer_pb.SeaweedFiler.LookupVolume:output_type -> filer_pb.LookupVolumeResponse + 33, // 67: filer_pb.SeaweedFiler.CollectionList:output_type -> filer_pb.CollectionListResponse + 35, // 68: filer_pb.SeaweedFiler.DeleteCollection:output_type -> filer_pb.DeleteCollectionResponse + 37, // 69: filer_pb.SeaweedFiler.Statistics:output_type -> filer_pb.StatisticsResponse + 39, // 70: filer_pb.SeaweedFiler.Ping:output_type -> filer_pb.PingResponse + 41, // 71: filer_pb.SeaweedFiler.GetFilerConfiguration:output_type -> filer_pb.GetFilerConfigurationResponse + 45, // 72: filer_pb.SeaweedFiler.TraverseBfsMetadata:output_type -> filer_pb.TraverseBfsMetadataResponse + 43, // 73: filer_pb.SeaweedFiler.SubscribeMetadata:output_type -> filer_pb.SubscribeMetadataResponse + 43, // 74: filer_pb.SeaweedFiler.SubscribeLocalMetadata:output_type -> filer_pb.SubscribeMetadataResponse + 52, // 75: filer_pb.SeaweedFiler.KvGet:output_type -> filer_pb.KvGetResponse + 54, // 76: filer_pb.SeaweedFiler.KvPut:output_type -> filer_pb.KvPutResponse + 57, // 77: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:output_type -> filer_pb.CacheRemoteObjectToLocalClusterResponse + 59, // 78: filer_pb.SeaweedFiler.DistributedLock:output_type -> filer_pb.LockResponse + 61, // 79: filer_pb.SeaweedFiler.DistributedUnlock:output_type -> filer_pb.UnlockResponse + 63, // 80: filer_pb.SeaweedFiler.FindLockOwner:output_type -> filer_pb.FindLockOwnerResponse + 66, // 81: filer_pb.SeaweedFiler.TransferLocks:output_type -> filer_pb.TransferLocksResponse + 57, // [57:82] is the sub-list for method output_type + 32, // [32:57] is the sub-list for method input_type + 32, // [32:32] is the sub-list for extension type_name + 32, // [32:32] is the sub-list for extension extendee + 0, // [0:32] is the sub-list for field type_name } func init() { file_filer_proto_init() } diff --git a/weed/pb/filer_pb/filer_client.go b/weed/pb/filer_pb/filer_client.go index 17953c67d..c93417eee 100644 --- a/weed/pb/filer_pb/filer_client.go +++ b/weed/pb/filer_pb/filer_client.go @@ -58,7 +58,11 @@ func GetEntry(ctx context.Context, filerClient FilerClient, fullFilePath util.Fu type EachEntryFunction func(entry *Entry, isLast bool) error func ReadDirAllEntries(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction) (err error) { + _, err = ReadDirAllEntriesWithSnapshot(ctx, filerClient, fullDirPath, prefix, fn) + return err +} +func ReadDirAllEntriesWithSnapshot(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction) (snapshotTsNs int64, err error) { var counter uint32 var startFrom string var counterFunc = func(entry *Entry, isLast bool) error { @@ -69,18 +73,18 @@ func ReadDirAllEntries(ctx context.Context, filerClient FilerClient, fullDirPath var paginationLimit uint32 = 10000 - if err = doList(ctx, filerClient, fullDirPath, prefix, counterFunc, "", false, paginationLimit); err != nil { - return err + if snapshotTsNs, err = doListWithSnapshot(ctx, filerClient, fullDirPath, prefix, counterFunc, "", false, paginationLimit, 0); err != nil { + return snapshotTsNs, err } for counter == paginationLimit { counter = 0 - if err = doList(ctx, filerClient, fullDirPath, prefix, counterFunc, startFrom, false, paginationLimit); err != nil { - return err + if _, err = doListWithSnapshot(ctx, filerClient, fullDirPath, prefix, counterFunc, startFrom, false, paginationLimit, snapshotTsNs); err != nil { + return snapshotTsNs, err } } - return nil + return snapshotTsNs, nil } func List(ctx context.Context, filerClient FilerClient, parentDirectoryPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { @@ -90,9 +94,16 @@ func List(ctx context.Context, filerClient FilerClient, parentDirectoryPath, pre } func doList(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { - return filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { - return doSeaweedList(ctx, client, fullDirPath, prefix, fn, startFrom, inclusive, limit) + _, err = doListWithSnapshot(ctx, filerClient, fullDirPath, prefix, fn, startFrom, inclusive, limit, 0) + return err +} + +func doListWithSnapshot(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32, snapshotTsNs int64) (actualSnapshotTsNs int64, err error) { + err = filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { + actualSnapshotTsNs, err = DoSeaweedListWithSnapshot(ctx, client, fullDirPath, prefix, fn, startFrom, inclusive, limit, snapshotTsNs) + return err }) + return actualSnapshotTsNs, err } func SeaweedList(ctx context.Context, client SeaweedFilerClient, parentDirectoryPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { @@ -100,6 +111,11 @@ func SeaweedList(ctx context.Context, client SeaweedFilerClient, parentDirectory } func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { + _, err = DoSeaweedListWithSnapshot(ctx, client, fullDirPath, prefix, fn, startFrom, inclusive, limit, 0) + return err +} + +func DoSeaweedListWithSnapshot(ctx context.Context, client SeaweedFilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32, snapshotTsNs int64) (actualSnapshotTsNs int64, err error) { // Redundancy limit to make it correctly judge whether it is the last file. redLimit := limit @@ -115,14 +131,23 @@ func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath u StartFromFileName: startFrom, Limit: redLimit, InclusiveStartFrom: inclusive, + SnapshotTsNs: snapshotTsNs, } + // Preserve the caller-requested snapshot so pagination uses the same + // boundary across pages. For first requests (snapshotTsNs==0) we do NOT + // synthesize a client-side timestamp — if the server returns no entries, + // we return 0 so callers like CompleteDirectoryBuild know no server + // snapshot was received and can replay all buffered events without + // clock-skew-sensitive filtering. + actualSnapshotTsNs = snapshotTsNs + glog.V(4).InfofCtx(ctx, "read directory: %v", request) ctx, cancel := context.WithCancel(ctx) defer cancel() stream, err := client.ListEntries(ctx, request) if err != nil { - return fmt.Errorf("list %s: %v", fullDirPath, err) + return actualSnapshotTsNs, fmt.Errorf("list %s: %v", fullDirPath, err) } var prevEntry *Entry @@ -133,17 +158,20 @@ func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath u if recvErr == io.EOF { if prevEntry != nil { if err := fn(prevEntry, true); err != nil { - return err + return actualSnapshotTsNs, err } } break } else { - return recvErr + return actualSnapshotTsNs, recvErr } } + if resp.SnapshotTsNs != 0 { + actualSnapshotTsNs = resp.SnapshotTsNs + } if prevEntry != nil { if err := fn(prevEntry, false); err != nil { - return err + return actualSnapshotTsNs, err } } prevEntry = resp.Entry @@ -153,7 +181,7 @@ func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath u } } - return nil + return actualSnapshotTsNs, nil } func Exists(ctx context.Context, filerClient FilerClient, parentDirectoryPath string, entryName string, isDirectory bool) (exists bool, err error) { @@ -277,12 +305,26 @@ func MkFile(ctx context.Context, filerClient FilerClient, parentDirectoryPath st } func Remove(ctx context.Context, filerClient FilerClient, parentDirectoryPath, name string, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster bool, signatures []int32) error { - return filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { - return DoRemove(ctx, client, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) - }) + _, err := RemoveWithResponse(ctx, filerClient, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) + return err } func DoRemove(ctx context.Context, client SeaweedFilerClient, parentDirectoryPath string, name string, isDeleteData bool, isRecursive bool, ignoreRecursiveErr bool, isFromOtherCluster bool, signatures []int32) error { + _, err := DoRemoveWithResponse(ctx, client, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) + return err +} + +func RemoveWithResponse(ctx context.Context, filerClient FilerClient, parentDirectoryPath, name string, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster bool, signatures []int32) (*DeleteEntryResponse, error) { + var resp *DeleteEntryResponse + err := filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { + var innerErr error + resp, innerErr = DoRemoveWithResponse(ctx, client, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) + return innerErr + }) + return resp, err +} + +func DoRemoveWithResponse(ctx context.Context, client SeaweedFilerClient, parentDirectoryPath string, name string, isDeleteData bool, isRecursive bool, ignoreRecursiveErr bool, isFromOtherCluster bool, signatures []int32) (*DeleteEntryResponse, error) { deleteEntryRequest := &DeleteEntryRequest{ Directory: parentDirectoryPath, Name: name, @@ -294,19 +336,18 @@ func DoRemove(ctx context.Context, client SeaweedFilerClient, parentDirectoryPat } if resp, err := client.DeleteEntry(ctx, deleteEntryRequest); err != nil { if strings.Contains(err.Error(), ErrNotFound.Error()) { - return nil + return nil, nil } - return err + return nil, err } else { if resp.Error != "" { if strings.Contains(resp.Error, ErrNotFound.Error()) { - return nil + return nil, nil } - return errors.New(resp.Error) + return nil, errors.New(resp.Error) } + return resp, nil } - - return nil } // DoDeleteEmptyParentDirectories recursively deletes empty parent directories. diff --git a/weed/pb/filer_pb/filer_client_snapshot_test.go b/weed/pb/filer_pb/filer_client_snapshot_test.go new file mode 100644 index 000000000..a6521bd4d --- /dev/null +++ b/weed/pb/filer_pb/filer_client_snapshot_test.go @@ -0,0 +1,165 @@ +package filer_pb + +import ( + "context" + "fmt" + "io" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" +) + +type snapshotListStream struct { + responses []*ListEntriesResponse + index int +} + +func (s *snapshotListStream) Recv() (*ListEntriesResponse, error) { + if s.index >= len(s.responses) { + return nil, io.EOF + } + resp := s.responses[s.index] + s.index++ + return resp, nil +} + +func (s *snapshotListStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } +func (s *snapshotListStream) Trailer() metadata.MD { return metadata.MD{} } +func (s *snapshotListStream) CloseSend() error { return nil } +func (s *snapshotListStream) Context() context.Context { return context.Background() } +func (s *snapshotListStream) SendMsg(any) error { return nil } +func (s *snapshotListStream) RecvMsg(any) error { return nil } + +type snapshotListClient struct { + SeaweedFilerClient + entries []*Entry + requests []*ListEntriesRequest + snapshotTs int64 + listCalled bool +} + +func (c *snapshotListClient) ListEntries(ctx context.Context, in *ListEntriesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ListEntriesResponse], error) { + c.listCalled = true + c.requests = append(c.requests, proto.Clone(in).(*ListEntriesRequest)) + + start := 0 + if in.StartFromFileName != "" { + start = len(c.entries) + for i, entry := range c.entries { + if entry.Name == in.StartFromFileName { + start = i + if !in.InclusiveStartFrom { + start++ + } + break + } + } + } + + end := len(c.entries) + if in.Limit > 0 && start+int(in.Limit) < end { + end = start + int(in.Limit) + } + + snapshotTs := in.SnapshotTsNs + if snapshotTs == 0 { + snapshotTs = c.snapshotTs + } + + responses := make([]*ListEntriesResponse, 0, end-start) + for i, entry := range c.entries[start:end] { + resp := &ListEntriesResponse{ + Entry: entry, + } + if i == 0 { + resp.SnapshotTsNs = snapshotTs + } + responses = append(responses, resp) + } + + return &snapshotListStream{responses: responses}, nil +} + +type snapshotFilerAccessor struct { + client SeaweedFilerClient +} + +func (a *snapshotFilerAccessor) WithFilerClient(_ bool, fn func(SeaweedFilerClient) error) error { + return fn(a.client) +} + +func (a *snapshotFilerAccessor) AdjustedUrl(*Location) string { return "" } +func (a *snapshotFilerAccessor) GetDataCenter() string { return "" } + +func TestReadDirAllEntriesWithSnapshotCarriesSnapshotAcrossPages(t *testing.T) { + entries := make([]*Entry, 0, 10001) + for i := 0; i < 10001; i++ { + entries = append(entries, &Entry{Name: fmt.Sprintf("entry-%05d", i), Attributes: &FuseAttributes{}}) + } + + client := &snapshotListClient{ + entries: entries, + snapshotTs: 123456789, + } + accessor := &snapshotFilerAccessor{client: client} + + var listed []string + snapshotTs, err := ReadDirAllEntriesWithSnapshot(context.Background(), accessor, util.FullPath("/dir"), "", func(entry *Entry, isLast bool) error { + listed = append(listed, entry.Name) + return nil + }) + if err != nil { + t.Fatalf("ReadDirAllEntriesWithSnapshot: %v", err) + } + + if got := len(listed); got != len(entries) { + t.Fatalf("listed %d entries, want %d", got, len(entries)) + } + if snapshotTs != client.snapshotTs { + t.Fatalf("snapshotTs = %d, want %d", snapshotTs, client.snapshotTs) + } + if got := len(client.requests); got != 2 { + t.Fatalf("request count = %d, want 2", got) + } + if client.requests[0].SnapshotTsNs != 0 { + t.Fatalf("first request snapshot = %d, want 0", client.requests[0].SnapshotTsNs) + } + if client.requests[1].SnapshotTsNs != client.snapshotTs { + t.Fatalf("second request snapshot = %d, want %d", client.requests[1].SnapshotTsNs, client.snapshotTs) + } + if client.requests[1].StartFromFileName != entries[9999].Name { + t.Fatalf("second request marker = %q, want %q", client.requests[1].StartFromFileName, entries[9999].Name) + } +} + +func TestReadDirAllEntriesWithSnapshotEmptyDirectory(t *testing.T) { + client := &snapshotListClient{ + entries: nil, // empty directory + snapshotTs: 999888777, + } + accessor := &snapshotFilerAccessor{client: client} + + var listed []string + snapshotTs, err := ReadDirAllEntriesWithSnapshot(context.Background(), accessor, util.FullPath("/empty"), "", func(entry *Entry, isLast bool) error { + listed = append(listed, entry.Name) + return nil + }) + if err != nil { + t.Fatalf("ReadDirAllEntriesWithSnapshot: %v", err) + } + if len(listed) != 0 { + t.Fatalf("listed %d entries, want 0", len(listed)) + } + // When the server sends no entries (empty directory), no snapshot is + // received. The client returns 0 so callers like CompleteDirectoryBuild + // know to replay all buffered events without clock-skew filtering. + if snapshotTs != 0 { + t.Fatalf("snapshotTs = %d, want 0 for empty directory", snapshotTs) + } + if !client.listCalled { + t.Fatal("ListEntries was not invoked for the empty directory") + } +} diff --git a/weed/pb/filer_pb/filer_pb_helper.go b/weed/pb/filer_pb/filer_pb_helper.go index c776f83d7..fed902824 100644 --- a/weed/pb/filer_pb/filer_pb_helper.go +++ b/weed/pb/filer_pb/filer_pb_helper.go @@ -135,25 +135,35 @@ func AfterEntryDeserialization(chunks []*FileChunk) { } func CreateEntry(ctx context.Context, client SeaweedFilerClient, request *CreateEntryRequest) error { + _, err := CreateEntryWithResponse(ctx, client, request) + return err +} + +func CreateEntryWithResponse(ctx context.Context, client SeaweedFilerClient, request *CreateEntryRequest) (*CreateEntryResponse, error) { resp, err := client.CreateEntry(ctx, request) if err != nil { glog.V(1).InfofCtx(ctx, "create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, err) - return fmt.Errorf("CreateEntry: %w", err) + return nil, fmt.Errorf("CreateEntry: %w", err) } if resp.Error != "" { glog.V(1).InfofCtx(ctx, "create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, resp.Error) - return fmt.Errorf("CreateEntry : %v", resp.Error) + return nil, fmt.Errorf("CreateEntry: %w", errors.New(resp.Error)) } - return nil + return resp, nil } func UpdateEntry(ctx context.Context, client SeaweedFilerClient, request *UpdateEntryRequest) error { - _, err := client.UpdateEntry(ctx, request) + _, err := UpdateEntryWithResponse(ctx, client, request) + return err +} + +func UpdateEntryWithResponse(ctx context.Context, client SeaweedFilerClient, request *UpdateEntryRequest) (*UpdateEntryResponse, error) { + resp, err := client.UpdateEntry(ctx, request) if err != nil { glog.V(1).InfofCtx(ctx, "update entry %s/%s :%v", request.Directory, request.Entry.Name, err) - return fmt.Errorf("UpdateEntry: %w", err) + return nil, fmt.Errorf("UpdateEntry: %w", err) } - return nil + return resp, nil } func LookupEntry(ctx context.Context, client SeaweedFilerClient, request *LookupDirectoryEntryRequest) (*LookupDirectoryEntryResponse, error) { diff --git a/weed/query/engine/hybrid_message_scanner.go b/weed/query/engine/hybrid_message_scanner.go index 8fa9f4381..b8477acfb 100644 --- a/weed/query/engine/hybrid_message_scanner.go +++ b/weed/query/engine/hybrid_message_scanner.go @@ -625,6 +625,9 @@ func (hms *HybridMessageScanner) countLiveLogFiles(partition topic.Partition) (i return err } + if resp.Entry == nil { + continue + } // Count files that are not .parquet files (live log files) // Live log files typically have timestamps or are named like log files fileName := resp.Entry.Name diff --git a/weed/s3api/s3_objectlock/object_lock_check.go b/weed/s3api/s3_objectlock/object_lock_check.go index 2cd79b340..c1c8c0b18 100644 --- a/weed/s3api/s3_objectlock/object_lock_check.go +++ b/weed/s3api/s3_objectlock/object_lock_check.go @@ -99,8 +99,11 @@ func paginateEntries(ctx context.Context, client filer_pb.SeaweedFilerClient, di } return fmt.Errorf("failed to receive entry from %s: %w", dir, recvErr) } - entriesReceived = true entry := entryResp.Entry + if entry == nil { + continue + } + entriesReceived = true lastFileName = entry.Name // Skip invalid entry names to prevent path traversal diff --git a/weed/s3api/s3api_object_handlers_list.go b/weed/s3api/s3api_object_handlers_list.go index d32cbb415..5f647ae83 100644 --- a/weed/s3api/s3api_object_handlers_list.go +++ b/weed/s3api/s3api_object_handlers_list.go @@ -615,6 +615,9 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d } } entry := resp.Entry + if entry == nil { + continue + } // listFilerEntries always calls doListFilerEntries with inclusiveStartFrom=false // (S3 marker semantics are exclusive), but keep the guard explicit to preserve // behavior if inclusive callers are introduced in the future. diff --git a/weed/server/filer_grpc_server.go b/weed/server/filer_grpc_server.go index 4510a0397..219703ad5 100644 --- a/weed/server/filer_grpc_server.go +++ b/weed/server/filer_grpc_server.go @@ -53,14 +53,24 @@ func (fs *FilerServer) ListEntries(req *filer_pb.ListEntriesRequest, stream file lastFileName := req.StartFromFileName includeLastFile := req.InclusiveStartFrom + snapshotTsNs := req.SnapshotTsNs + if snapshotTsNs == 0 { + snapshotTsNs = time.Now().UnixNano() + } + sentSnapshot := false var listErr error for limit > 0 { var hasEntries bool lastFileName, listErr = fs.filer.StreamListDirectoryEntries(stream.Context(), util.FullPath(req.Directory), lastFileName, includeLastFile, int64(paginationLimit), req.Prefix, "", "", func(entry *filer.Entry) (bool, error) { hasEntries = true - if err = stream.Send(&filer_pb.ListEntriesResponse{ + resp := &filer_pb.ListEntriesResponse{ Entry: entry.ToProtoEntry(), - }); err != nil { + } + if !sentSnapshot { + resp.SnapshotTsNs = snapshotTsNs + sentSnapshot = true + } + if err = stream.Send(resp); err != nil { return false, err } @@ -78,13 +88,20 @@ func (fs *FilerServer) ListEntries(req *filer_pb.ListEntriesRequest, stream file return err } if !hasEntries { - return nil + break } includeLastFile = false } + // For empty directories we intentionally do NOT send a snapshot-only + // response (Entry == nil). Many consumers (Java FilerClient, S3 listing, + // etc.) treat any received response as an entry. The Go client-side + // DoSeaweedListWithSnapshot generates a client-side cutoff when the + // server sends no snapshot, so snapshot consistency is preserved + // without a server-side send. + return nil } @@ -162,10 +179,12 @@ func (fs *FilerServer) CreateEntry(ctx context.Context, req *filer_pb.CreateEntr newEntry.TtlSec = 0 } + ctx, eventSink := filer.WithMetadataEventSink(ctx) createErr := fs.filer.CreateEntry(ctx, newEntry, req.OExcl, req.IsFromOtherCluster, req.Signatures, req.SkipCheckParentDirectory, so.MaxFileNameLength) if createErr == nil { fs.filer.DeleteChunksNotRecursive(garbage) + resp.MetadataEvent = eventSink.Last() } else { glog.V(3).InfofCtx(ctx, "CreateEntry %s: %v", filepath.Join(req.Directory, req.Entry.Name), createErr) resp.Error = createErr.Error() @@ -201,16 +220,19 @@ func (fs *FilerServer) UpdateEntry(ctx context.Context, req *filer_pb.UpdateEntr return &filer_pb.UpdateEntryResponse{}, err } + ctx, eventSink := filer.WithMetadataEventSink(ctx) + resp := &filer_pb.UpdateEntryResponse{} if err = fs.filer.UpdateEntry(ctx, entry, newEntry); err == nil { fs.filer.DeleteChunksNotRecursive(garbage) fs.filer.NotifyUpdateEvent(ctx, entry, newEntry, true, req.IsFromOtherCluster, req.Signatures) + resp.MetadataEvent = eventSink.Last() } else { glog.V(3).InfofCtx(ctx, "UpdateEntry %s: %v", filepath.Join(req.Directory, req.Entry.Name), err) } - return &filer_pb.UpdateEntryResponse{}, err + return resp, err } func (fs *FilerServer) cleanupChunks(ctx context.Context, fullpath string, existingEntry *filer.Entry, newEntry *filer_pb.Entry) (chunks, garbage []*filer_pb.FileChunk, err error) { @@ -303,10 +325,13 @@ func (fs *FilerServer) DeleteEntry(ctx context.Context, req *filer_pb.DeleteEntr glog.V(4).InfofCtx(ctx, "DeleteEntry %v", req) + ctx, eventSink := filer.WithMetadataEventSink(ctx) err = fs.filer.DeleteEntryMetaAndData(ctx, util.JoinPath(req.Directory, req.Name), req.IsRecursive, req.IgnoreRecursiveError, req.IsDeleteData, req.IsFromOtherCluster, req.Signatures, req.IfNotModifiedAfter) resp = &filer_pb.DeleteEntryResponse{} if err != nil && err != filer_pb.ErrNotFound { resp.Error = err.Error() + } else { + resp.MetadataEvent = eventSink.Last() } return resp, nil } diff --git a/weed/server/filer_grpc_server_remote.go b/weed/server/filer_grpc_server_remote.go index 7aafe0799..e790a12ef 100644 --- a/weed/server/filer_grpc_server_remote.go +++ b/weed/server/filer_grpc_server_remote.go @@ -259,9 +259,11 @@ func (fs *FilerServer) doCacheRemoteObjectToLocalCluster(ctx context.Context, re } fs.filer.DeleteChunks(ctx, entry.FullPath, garbage) + ctx, eventSink := filer.WithMetadataEventSink(ctx) fs.filer.NotifyUpdateEvent(ctx, entry, newEntry, true, false, nil) resp.Entry = newEntry.ToProtoEntry() + resp.MetadataEvent = eventSink.Last() return resp, nil