From 3f946fc0c0bda0c1831adbca9fa401caa4076cdb Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 7 Mar 2026 09:19:40 -0800 Subject: [PATCH] mount: make metadata cache rebuilds snapshot-consistent (#8531) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * filer: expose metadata events and list snapshots * mount: invalidate hot directory caches * mount: read hot directories directly from filer * mount: add sequenced metadata cache applier * mount: apply metadata responses through cache applier * mount: replay snapshot-consistent directory builds * mount: dedupe self metadata events * mount: factor directory build cleanup * mount: replace proto marshal dedup with composite key and ring buffer The dedup logic was doing a full deterministic proto.Marshal on every metadata event just to produce a dedup key. Replace with a cheap composite string key (TsNs|Directory|OldName|NewName). Also replace the sliding-window slice (which leaked the backing array unboundedly) with a fixed-size ring buffer that reuses the same array. * filer: remove mutex and proto.Clone from request-scoped MetadataEventSink MetadataEventSink is created per-request and only accessed by the goroutine handling the gRPC call. The mutex and double proto.Clone (once in Record, once in Last) were unnecessary overhead on every filer write operation. Store the pointer directly instead. * mount: skip proto.Clone for caller-owned metadata events Add ApplyMetadataResponseOwned that takes ownership of the response without cloning. Local metadata events (mkdir, create, flush, etc.) are freshly constructed and never shared, so the clone is unnecessary. * filer: only populate MetadataEvent on successful DeleteEntry Avoid calling eventSink.Last() on error paths where the sink may contain a partial event from an intermediate child deletion during recursive deletes. * mount: avoid map allocation in collectDirectoryNotifications Replace the map with a fixed-size array and linear dedup. There are at most 3 directories to notify (old parent, new parent, new child if directory), so a 3-element array avoids the heap allocation on every metadata event. * mount: fix potential deadlock in enqueueApplyRequest Release applyStateMu before the blocking channel send. Previously, if the channel was full (cap 128), the send would block while holding the mutex, preventing Shutdown from acquiring it to set applyClosed. * mount: restore signature-based self-event filtering as fast path Re-add the signature check that was removed when content-based dedup was introduced. Checking signatures is O(1) on a small slice and avoids enqueuing and processing events that originated from this mount instance. The content-based dedup remains as a fallback. * filer: send snapshotTsNs only in first ListEntries response The snapshot timestamp is identical for every entry in a single ListEntries stream. Sending it in every response message wastes wire bandwidth for large directories. The client already reads it only from the first response. * mount: exit read-through mode after successful full directory listing MarkDirectoryRefreshed was defined but never called, so directories that entered read-through mode (hot invalidation threshold) stayed there permanently, hitting the filer on every readdir even when cold. Call it after a complete read-through listing finishes. * mount: include event shape and full paths in dedup key The previous dedup key only used Names, which could collapse distinct rename targets. Include the event shape (C/D/U/R), source directory, new parent path, and both entry names so structurally different events are never treated as duplicates. * mount: drain pending requests on shutdown in runApplyLoop After receiving the shutdown sentinel, drain any remaining requests from applyCh non-blockingly and signal each with errMetaCacheClosed so callers waiting on req.done are released. * mount: include IsDirectory in synthetic delete events metadataDeleteEvent now accepts an isDirectory parameter so the applier can distinguish directory deletes from file deletes. Rmdir passes true, Unlink passes false. * mount: fall back to synthetic event when MetadataEvent is nil In mknod and mkdir, if the filer response omits MetadataEvent (e.g. older filer without the field), synthesize an equivalent local metadata event so the cache is always updated. * mount: make Flush metadata apply best-effort after successful commit After filer_pb.CreateEntryWithResponse succeeds, the entry is persisted. Don't fail the Flush syscall if the local metadata cache apply fails — log and invalidate the directory cache instead. Also fall back to a synthetic event when MetadataEvent is nil. * mount: make Rename metadata apply best-effort The rename has already succeeded on the filer by the time we apply the local metadata event. Log failures instead of returning errors that would be dropped by the caller anyway. * mount: make saveEntry metadata apply best-effort with fallback After UpdateEntryWithResponse succeeds, treat local metadata apply as non-fatal. Log and invalidate the directory cache on failure. Also fall back to a synthetic event when MetadataEvent is nil. * filer_pb: preserve snapshotTsNs on error in ReadDirAllEntriesWithSnapshot Return the snapshot timestamp even when the first page fails, so callers receive the snapshot boundary when partial data was received. * filer: send snapshot token for empty directory listings When no entries are streamed, send a final ListEntriesResponse with only SnapshotTsNs so clients always receive the snapshot boundary. * mount: distinguish not-found vs transient errors in lookupEntry Return fuse.EIO for non-not-found filer errors instead of unconditionally returning ENOENT, so transient failures don't masquerade as missing entries. * mount: make CacheRemoteObject metadata apply best-effort The file content has already been cached successfully. Don't fail the read if the local metadata cache update fails. * mount: use consistent snapshot for readdir in direct mode Capture the SnapshotTsNs from the first loadDirectoryEntriesDirect call and store it on the DirectoryHandle. Subsequent batch loads pass this stored timestamp so all batches use the same snapshot. Also export DoSeaweedListWithSnapshot so mount can use it directly with snapshot passthrough. * filer_pb: fix test fake to send SnapshotTsNs only on first response Match the server behavior: only the first ListEntriesResponse in a page carries the snapshot timestamp, subsequent entries leave it zero. * Fix nil pointer dereference in ListEntries stream consumers Remove the empty-directory snapshot-only response from ListEntries that sent a ListEntriesResponse with Entry==nil, which crashed every raw stream consumer that assumed resp.Entry is always non-nil. Also add defensive nil checks for resp.Entry in all raw ListEntries stream consumers across: S3 listing, broker topic lookup, broker topic config, admin dashboard, topic retention, hybrid message scanner, Kafka integration, and consumer offset storage. * Add nil guards for resp.Entry in remaining ListEntries stream consumers Covers: S3 object lock check, MQ management dashboard (version/ partition/offset loops), and topic retention version loop. * Make applyLocalMetadataEvent best-effort in Link and Symlink The filer operations already succeeded; failing the syscall because the local cache apply failed is wrong. Log a warning and invalidate the parent directory cache instead. * Make applyLocalMetadataEvent best-effort in Mkdir/Rmdir/Mknod/Unlink The filer RPC already committed; don't fail the syscall when the local metadata cache apply fails. Log a warning and invalidate the parent directory cache to force a re-fetch on next access. * flushFileMetadata: add nil-fallback for metadata event and best-effort apply Synthesize a metadata event when resp.GetMetadataEvent() is nil (matching doFlush), and make the apply best-effort with cache invalidation on failure. * Prevent double-invocation of cleanupBuild in doEnsureVisited Add a cleanupDone guard so the deferred cleanup and inline error-path cleanup don't both call DeleteFolderChildren/AbortDirectoryBuild. * Fix comment: signature check is O(n) not O(1) * Prevent deferred cleanup after successful CompleteDirectoryBuild Set cleanupDone before returning from the success path so the deferred context-cancellation check cannot undo a published build. * Invalidate parent directory caches on rename metadata apply failure When applyLocalMetadataEvent fails during rename, invalidate the source and destination parent directory caches so subsequent accesses trigger a re-fetch from the filer. * Add event nil-fallback and cache invalidation to Link and Symlink Synthesize metadata events when the server doesn't return one, and invalidate parent directory caches on apply failure. * Match requested partition when scanning partition directories Parse the partition range format (NNNN-NNNN) and match against the requested partition parameter instead of using the first directory. * Preserve snapshot timestamp across empty directory listings Initialize actualSnapshotTsNs from the caller-requested value so it isn't lost when the server returns no entries. Re-add the server-side snapshot-only response for empty directories (all raw stream consumers now have nil guards for Entry). * Fix CreateEntry error wrapping to support errors.Is/errors.As Use errors.New + %w instead of %v for resp.Error so callers can unwrap the underlying error. * Fix object lock pagination: only advance on non-nil entries Move entriesReceived inside the nil check so nil entries don't cause repeated ListEntries calls with the same lastFileName. * Guard Attributes nil check before accessing Mtime in MQ management * Do not send nil-Entry response for empty directory listings The snapshot-only ListEntriesResponse (with Entry == nil) for empty directories breaks consumers that treat any received response as an entry (Java FilerClient, S3 listing). The Go client-side DoSeaweedListWithSnapshot already preserves the caller-requested snapshot via actualSnapshotTsNs initialization, so the server-side send is unnecessary. * Fix review findings: subscriber dedup, invalidation normalization, nil guards, shutdown race - Remove self-signature early-return in processEventFn so all events flow through the applier (directory-build buffering sees self-originated events that arrive after a snapshot) - Normalize NewParentPath in collectEntryInvalidations to avoid duplicate invalidations when NewParentPath is empty (same-directory update) - Guard resp.Entry.Attributes for nil in admin_server.go and topic_retention.go to prevent panics on entries without attributes - Fix enqueueApplyRequest race with shutdown by using select on both applyCh and applyDone, preventing sends after the apply loop exits - Add cleanupDone check to deferred cleanup in meta_cache_init.go for clarity alongside the existing guard in cleanupBuild - Add empty directory test case for snapshot consistency * Propagate authoritative metadata event from CacheRemoteObjectToLocalCluster and generate client-side snapshot for empty directories - Add metadata_event field to CacheRemoteObjectToLocalClusterResponse proto so the filer-emitted event is available to callers - Use WithMetadataEventSink in the server handler to capture the event from NotifyUpdateEvent and return it on the response - Update filehandle_read.go to prefer the RPC's metadata event over a locally fabricated one, falling back to metadataUpdateEvent when the server doesn't provide one (e.g., older filers) - Generate a client-side snapshot cutoff in DoSeaweedListWithSnapshot when the server sends no snapshot (empty directory), so callers like CompleteDirectoryBuild get a meaningful boundary for filtering buffered events * Skip directory notifications for dirs being built to prevent mid-build cache wipe When a metadata event is buffered during a directory build, applyMetadataSideEffects was still firing noteDirectoryUpdate for the building directory. If the directory accumulated enough updates to become "hot", markDirectoryReadThrough would call DeleteFolderChildren, wiping entries that EnsureVisited had already inserted. The build would then complete and mark the directory cached with incomplete data. Fix by using applyMetadataSideEffectsSkippingBuildingDirs for buffered events, which suppresses directory notifications for dirs currently in buildingDirs while still applying entry invalidations. * Add test for directory notification suppression during active build TestDirectoryNotificationsSuppressedDuringBuild verifies that metadata events targeting a directory under active EnsureVisited build do NOT fire onDirectoryUpdate for that directory. In production, this prevents markDirectoryReadThrough from calling DeleteFolderChildren mid-build, which would wipe entries already inserted by the listing. The test inserts an entry during a build, sends multiple metadata events for the building directory, asserts no notifications fired for it, verifies the entry survives, and confirms buffered events are replayed after CompleteDirectoryBuild. * Fix create invalidations, build guard, event shape, context, and snapshot error path - collectEntryInvalidations: invalidate FUSE kernel cache on pure create events (OldEntry==nil && NewEntry!=nil), not just updates and deletes - completeDirectoryBuildNow: only call markCachedFn when an active build existed (state != nil), preventing an unpopulated directory from being marked as cached - Add metadataCreateEvent helper that produces a create-shaped event (NewEntry only, no OldEntry) and use it in mkdir, mknod, symlink, and hardlink create fallback paths instead of metadataUpdateEvent which incorrectly set both OldEntry and NewEntry - applyMetadataResponseEnqueue: use context.Background() for the queued mutation so a cancelled caller context cannot abort the apply loop mid-write - DoSeaweedListWithSnapshot: move snapshot initialization before ListEntries call so the error path returns the preserved snapshot instead of 0 * Fix review findings: test loop, cache race, context safety, snapshot consistency - Fix build test loop starting at i=1 instead of i=0, missing new-0.txt verification - Re-check IsDirectoryCached after cache miss to avoid ENOENT race with markDirectoryReadThrough - Use context.Background() in enqueueAndWait so caller cancellation can't abort build/complete mid-way - Pass dh.snapshotTsNs in skip-batch loadDirectoryEntriesDirect for snapshot consistency - Prefer resp.MetadataEvent over fallback in Unlink event derivation - Add comment on MetadataEventSink.Record single-event assumption * Fix empty-directory snapshot clock skew and build cancellation race Empty-directory snapshot: Remove client-side time.Now() synthesis when the server returns no entries. Instead return snapshotTsNs=0, and in completeDirectoryBuildNow replay ALL buffered events when snapshot is 0. This eliminates the clock-skew bug where a client ahead of the filer would filter out legitimate post-list events. Build cancellation: Use context.Background() for BeginDirectoryBuild and CompleteDirectoryBuild calls in doEnsureVisited, so errgroup cancellation doesn't cause enqueueAndWait to return early and trigger cleanupBuild while the operation is still queued. * Add tests for empty-directory build replay and cancellation resilience TestEmptyDirectoryBuildReplaysAllBufferedEvents: verifies that when CompleteDirectoryBuild receives snapshotTsNs=0 (empty directory, no server snapshot), ALL buffered events are replayed regardless of their TsNs values — no clock-skew-sensitive filtering occurs. TestBuildCompletionSurvivesCallerCancellation: verifies that once CompleteDirectoryBuild is enqueued, a cancelled caller context does not prevent the build from completing. The apply loop runs with context.Background(), so the directory becomes cached and buffered events are replayed even when the caller gives up waiting. * Fix directory subtree cleanup, Link rollback, test robustness - applyMetadataResponseLocked: when a directory entry is deleted or moved, call DeleteFolderChildren on the old path so cached descendants don't leak as stale entries. - Link: save original HardLinkId/Counter before mutation. If CreateEntryWithResponse fails after the source was already updated, rollback the source entry to its original state via UpdateEntry. - TestBuildCompletionSurvivesCallerCancellation: replace fixed time.Sleep(50ms) with a deadline-based poll that checks IsDirectoryCached in a loop, failing only after 2s timeout. - TestReadDirAllEntriesWithSnapshotEmptyDirectory: assert that ListEntries was actually invoked on the mock client so the test exercises the RPC path. - newMetadataEvent: add early return when both oldEntry and newEntry are nil to avoid emitting events with empty Directory. --------- Co-authored-by: Copilot --- other/java/client/src/main/proto/filer.proto | 6 + weed/admin/dash/admin_server.go | 11 +- weed/admin/dash/mq_management.go | 11 +- weed/admin/dash/topic_retention.go | 11 +- weed/filer/filer_notify.go | 70 +- weed/filer/metadata_event_sink.go | 47 ++ weed/filer/metadata_event_sink_test.go | 43 ++ weed/mount/filehandle_read.go | 12 +- weed/mount/inode_to_path.go | 41 +- weed/mount/inode_to_path_test.go | 41 ++ weed/mount/meta_cache/meta_cache.go | 641 +++++++++++++++++- .../mount/meta_cache/meta_cache_apply_test.go | 361 ++++++++++ .../mount/meta_cache/meta_cache_build_test.go | 459 +++++++++++++ weed/mount/meta_cache/meta_cache_init.go | 42 +- weed/mount/meta_cache/meta_cache_subscribe.go | 68 +- weed/mount/metadata_events.go | 66 ++ weed/mount/weedfs.go | 76 +-- weed/mount/weedfs_dir_mkrm.go | 30 +- weed/mount/weedfs_dir_read.go | 96 ++- weed/mount/weedfs_dir_read_test.go | 100 +++ weed/mount/weedfs_file_mkrm.go | 35 +- weed/mount/weedfs_file_sync.go | 15 +- weed/mount/weedfs_link.go | 44 +- weed/mount/weedfs_metadata_flush.go | 16 +- weed/mount/weedfs_rename.go | 24 +- weed/mount/weedfs_rename_test.go | 13 +- weed/mount/weedfs_symlink.go | 16 +- weed/mount/wfs_save.go | 12 +- weed/mq/broker/broker_grpc_lookup.go | 4 +- .../mq/broker/broker_topic_conf_read_write.go | 6 +- .../mq/kafka/consumer_offset/filer_storage.go | 2 +- weed/mq/kafka/integration/broker_client.go | 16 +- weed/pb/filer.proto | 6 + weed/pb/filer_pb/filer.pb.go | 232 ++++--- weed/pb/filer_pb/filer_client.go | 83 ++- .../pb/filer_pb/filer_client_snapshot_test.go | 165 +++++ weed/pb/filer_pb/filer_pb_helper.go | 22 +- weed/query/engine/hybrid_message_scanner.go | 3 + weed/s3api/s3_objectlock/object_lock_check.go | 5 +- weed/s3api/s3api_object_handlers_list.go | 3 + weed/server/filer_grpc_server.go | 33 +- weed/server/filer_grpc_server_remote.go | 2 + 42 files changed, 2615 insertions(+), 374 deletions(-) create mode 100644 weed/filer/metadata_event_sink.go create mode 100644 weed/filer/metadata_event_sink_test.go create mode 100644 weed/mount/meta_cache/meta_cache_apply_test.go create mode 100644 weed/mount/meta_cache/meta_cache_build_test.go create mode 100644 weed/mount/metadata_events.go create mode 100644 weed/mount/weedfs_dir_read_test.go create mode 100644 weed/pb/filer_pb/filer_client_snapshot_test.go diff --git a/other/java/client/src/main/proto/filer.proto b/other/java/client/src/main/proto/filer.proto index 9257996ed..78dd58b1f 100644 --- a/other/java/client/src/main/proto/filer.proto +++ b/other/java/client/src/main/proto/filer.proto @@ -100,10 +100,12 @@ message ListEntriesRequest { string startFromFileName = 3; bool inclusiveStartFrom = 4; uint32 limit = 5; + int64 snapshot_ts_ns = 6; } message ListEntriesResponse { Entry entry = 1; + int64 snapshot_ts_ns = 2; } message RemoteEntry { @@ -203,6 +205,7 @@ message CreateEntryRequest { message CreateEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message UpdateEntryRequest { @@ -212,6 +215,7 @@ message UpdateEntryRequest { repeated int32 signatures = 4; } message UpdateEntryResponse { + SubscribeMetadataResponse metadata_event = 1; } message AppendToEntryRequest { @@ -236,6 +240,7 @@ message DeleteEntryRequest { message DeleteEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message AtomicRenameEntryRequest { @@ -469,6 +474,7 @@ message CacheRemoteObjectToLocalClusterRequest { } message CacheRemoteObjectToLocalClusterResponse { Entry entry = 1; + SubscribeMetadataResponse metadata_event = 2; } ///////////////////////// diff --git a/weed/admin/dash/admin_server.go b/weed/admin/dash/admin_server.go index 2c129c671..deb1a9215 100644 --- a/weed/admin/dash/admin_server.go +++ b/weed/admin/dash/admin_server.go @@ -427,7 +427,7 @@ func (s *AdminServer) GetS3Buckets() ([]S3Bucket, error) { return err } - if resp.Entry.IsDirectory { + if resp.Entry != nil && resp.Entry.IsDirectory { bucketName := resp.Entry.Name if strings.HasPrefix(bucketName, ".") { // Skip internal/system directories from Object Store bucket listing. @@ -480,13 +480,18 @@ func (s *AdminServer) GetS3Buckets() ([]S3Bucket, error) { } } + var createdAt, lastModified time.Time + if resp.Entry.Attributes != nil { + createdAt = time.Unix(resp.Entry.Attributes.Crtime, 0) + lastModified = time.Unix(resp.Entry.Attributes.Mtime, 0) + } bucket := S3Bucket{ Name: bucketName, - CreatedAt: time.Unix(resp.Entry.Attributes.Crtime, 0), + CreatedAt: createdAt, LogicalSize: logicalSize, PhysicalSize: physicalSize, ObjectCount: objectCount, - LastModified: time.Unix(resp.Entry.Attributes.Mtime, 0), + LastModified: lastModified, Quota: quota, QuotaEnabled: quotaEnabled, VersioningStatus: versioningStatus, diff --git a/weed/admin/dash/mq_management.go b/weed/admin/dash/mq_management.go index ba9c1cd18..3f6407b18 100644 --- a/weed/admin/dash/mq_management.go +++ b/weed/admin/dash/mq_management.go @@ -324,7 +324,7 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co } // Only process directories that are versions (start with "v") - if versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { + if versionResp.Entry != nil && versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { versionDir := filepath.Join(topicDir, versionResp.Entry.Name) // List all partition directories under the version directory (e.g., 0315-0630) @@ -352,7 +352,7 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co } // Only process directories that are partitions (format: NNNN-NNNN) - if partitionResp.Entry.IsDirectory { + if partitionResp.Entry != nil && partitionResp.Entry.IsDirectory { // Parse partition range to get partition start ID (e.g., "0315-0630" -> 315) var partitionStart, partitionStop int32 if n, err := fmt.Sscanf(partitionResp.Entry.Name, "%04d-%04d", &partitionStart, &partitionStop); n != 2 || err != nil { @@ -387,7 +387,7 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co } // Only process .offset files - if !offsetResp.Entry.IsDirectory && strings.HasSuffix(offsetResp.Entry.Name, ".offset") { + if offsetResp.Entry != nil && !offsetResp.Entry.IsDirectory && strings.HasSuffix(offsetResp.Entry.Name, ".offset") { consumerGroup := strings.TrimSuffix(offsetResp.Entry.Name, ".offset") // Read the offset value from the file @@ -401,7 +401,10 @@ func (s *AdminServer) GetConsumerGroupOffsets(namespace, topicName string) ([]Co offset := int64(util.BytesToUint64(offsetData)) // Get the file modification time - lastUpdated := time.Unix(offsetResp.Entry.Attributes.Mtime, 0) + var lastUpdated time.Time + if offsetResp.Entry.Attributes != nil { + lastUpdated = time.Unix(offsetResp.Entry.Attributes.Mtime, 0) + } offsets = append(offsets, ConsumerGroupOffsetInfo{ ConsumerGroup: consumerGroup, diff --git a/weed/admin/dash/topic_retention.go b/weed/admin/dash/topic_retention.go index fed4893a4..cc66f9035 100644 --- a/weed/admin/dash/topic_retention.go +++ b/weed/admin/dash/topic_retention.go @@ -151,17 +151,21 @@ func (p *TopicRetentionPurger) purgeTopicData(topicRetention TopicRetentionConfi } // Only process directories that are versions (start with "v") - if versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { + if versionResp.Entry != nil && versionResp.Entry.IsDirectory && strings.HasPrefix(versionResp.Entry.Name, "v") { versionTime, err := p.parseVersionTime(versionResp.Entry.Name) if err != nil { glog.Warningf("Failed to parse version time from %s: %v", versionResp.Entry.Name, err) continue } + var modTime time.Time + if versionResp.Entry.Attributes != nil { + modTime = time.Unix(versionResp.Entry.Attributes.Mtime, 0) + } versionDirs = append(versionDirs, VersionDirInfo{ Name: versionResp.Entry.Name, VersionTime: versionTime, - ModTime: time.Unix(versionResp.Entry.Attributes.Mtime, 0), + ModTime: modTime, }) } } @@ -260,6 +264,9 @@ func (p *TopicRetentionPurger) deleteDirectoryRecursively(client filer_pb.Seawee return fmt.Errorf("failed to receive entries: %w", err) } + if resp.Entry == nil { + continue + } entryPath := filepath.Join(dirPath, resp.Entry.Name) if resp.Entry.IsDirectory { diff --git a/weed/filer/filer_notify.go b/weed/filer/filer_notify.go index 6fd595f87..48e1b163c 100644 --- a/weed/filer/filer_notify.go +++ b/weed/filer/filer_notify.go @@ -19,19 +19,23 @@ import ( ) func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry, deleteChunks, isFromOtherCluster bool, signatures []int32) { + f.notifyUpdateEvent(ctx, oldEntry, newEntry, deleteChunks, isFromOtherCluster, signatures) +} + +func (f *Filer) notifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry, deleteChunks, isFromOtherCluster bool, signatures []int32) *filer_pb.SubscribeMetadataResponse { var fullpath string if oldEntry != nil { fullpath = string(oldEntry.FullPath) } else if newEntry != nil { fullpath = string(newEntry.FullPath) } else { - return + return nil } // println("fullpath:", fullpath) if strings.HasPrefix(fullpath, SystemLogDir) { - return + return nil } foundSelf := false for _, sig := range signatures { @@ -43,18 +47,8 @@ func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry signatures = append(signatures, f.Signature) } - newParentPath := "" - if newEntry != nil { - newParentPath, _ = newEntry.FullPath.DirAndName() - } - eventNotification := &filer_pb.EventNotification{ - OldEntry: oldEntry.ToProtoEntry(), - NewEntry: newEntry.ToProtoEntry(), - DeleteChunks: deleteChunks, - NewParentPath: newParentPath, - IsFromOtherCluster: isFromOtherCluster, - Signatures: signatures, - } + event := f.newMetadataEvent(oldEntry, newEntry, deleteChunks, isFromOtherCluster, signatures) + eventNotification := event.EventNotification if notification.Queue != nil { glog.V(3).Infof("notifying entry update %v", fullpath) @@ -64,31 +58,57 @@ func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry } } - f.logMetaEvent(ctx, fullpath, eventNotification) + f.logMetaEvent(ctx, event) + if sink := metadataEventSinkFromContext(ctx); sink != nil { + sink.Record(event) + } // Trigger empty folder cleanup for local events // Remote events are handled via MetaAggregator.onMetadataChangeEvent f.triggerLocalEmptyFolderCleanup(oldEntry, newEntry) + return event } -func (f *Filer) logMetaEvent(ctx context.Context, fullpath string, eventNotification *filer_pb.EventNotification) { - - dir, _ := util.FullPath(fullpath).DirAndName() - - event := &filer_pb.SubscribeMetadataResponse{ - Directory: dir, - EventNotification: eventNotification, - TsNs: time.Now().UnixNano(), +func (f *Filer) newMetadataEvent(oldEntry, newEntry *Entry, deleteChunks, isFromOtherCluster bool, signatures []int32) *filer_pb.SubscribeMetadataResponse { + if oldEntry == nil && newEntry == nil { + return nil + } + var fullpath util.FullPath + if oldEntry != nil { + fullpath = oldEntry.FullPath } + if fullpath == "" && newEntry != nil { + fullpath = newEntry.FullPath + } + dir, _ := fullpath.DirAndName() + newParentPath := "" + if newEntry != nil { + newParentPath, _ = newEntry.FullPath.DirAndName() + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: dir, + EventNotification: &filer_pb.EventNotification{ + OldEntry: oldEntry.ToProtoEntry(), + NewEntry: newEntry.ToProtoEntry(), + DeleteChunks: deleteChunks, + NewParentPath: newParentPath, + IsFromOtherCluster: isFromOtherCluster, + Signatures: signatures, + }, + TsNs: time.Now().UnixNano(), + } +} + +func (f *Filer) logMetaEvent(ctx context.Context, event *filer_pb.SubscribeMetadataResponse) { data, err := proto.Marshal(event) if err != nil { glog.Errorf("failed to marshal filer_pb.SubscribeMetadataResponse %+v: %v", event, err) return } - if err := f.LocalMetaLogBuffer.AddDataToBuffer([]byte(dir), data, event.TsNs); err != nil { - glog.Errorf("failed to add data to log buffer for %s: %v", dir, err) + if err := f.LocalMetaLogBuffer.AddDataToBuffer([]byte(event.Directory), data, event.TsNs); err != nil { + glog.Errorf("failed to add data to log buffer for %s: %v", event.Directory, err) } } diff --git a/weed/filer/metadata_event_sink.go b/weed/filer/metadata_event_sink.go new file mode 100644 index 000000000..79e959c9f --- /dev/null +++ b/weed/filer/metadata_event_sink.go @@ -0,0 +1,47 @@ +package filer + +import ( + "context" + + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" +) + +type metadataEventSinkKey struct{} + +// MetadataEventSink captures the last metadata event emitted while serving a +// request. It is request-scoped and accessed only by the goroutine handling +// the gRPC call, so no mutex is needed. +type MetadataEventSink struct { + last *filer_pb.SubscribeMetadataResponse +} + +func WithMetadataEventSink(ctx context.Context) (context.Context, *MetadataEventSink) { + sink := &MetadataEventSink{} + return context.WithValue(ctx, metadataEventSinkKey{}, sink), sink +} + +func metadataEventSinkFromContext(ctx context.Context) *MetadataEventSink { + if ctx == nil { + return nil + } + sink, _ := ctx.Value(metadataEventSinkKey{}).(*MetadataEventSink) + return sink +} + +// Record stores the event, replacing any previously recorded one. +// Each filer RPC emits at most one NotifyUpdateEvent, so only the last +// event is retained. If an RPC were to emit multiple events, only the +// final one would be returned to the caller. +func (s *MetadataEventSink) Record(event *filer_pb.SubscribeMetadataResponse) { + if s == nil || event == nil { + return + } + s.last = event +} + +func (s *MetadataEventSink) Last() *filer_pb.SubscribeMetadataResponse { + if s == nil { + return nil + } + return s.last +} diff --git a/weed/filer/metadata_event_sink_test.go b/weed/filer/metadata_event_sink_test.go new file mode 100644 index 000000000..19edad1c7 --- /dev/null +++ b/weed/filer/metadata_event_sink_test.go @@ -0,0 +1,43 @@ +package filer + +import ( + "context" + "testing" + "time" + + "github.com/seaweedfs/seaweedfs/weed/util" + "github.com/seaweedfs/seaweedfs/weed/util/log_buffer" +) + +func TestNotifyUpdateEventRecordsRequestMetadataEvent(t *testing.T) { + f := &Filer{ + Signature: 42, + LocalMetaLogBuffer: log_buffer.NewLogBuffer( + "test", + time.Hour, + func(*log_buffer.LogBuffer, time.Time, time.Time, []byte, int64, int64) {}, + nil, + nil, + ), + } + + ctx, sink := WithMetadataEventSink(context.Background()) + f.NotifyUpdateEvent(ctx, &Entry{FullPath: util.FullPath("/dir/file.txt")}, nil, true, false, []int32{7}) + + event := sink.Last() + if event == nil { + t.Fatal("expected metadata event to be recorded") + } + if event.Directory != "/dir" { + t.Fatalf("directory = %q, want /dir", event.Directory) + } + if event.EventNotification.OldEntry == nil || event.EventNotification.OldEntry.Name != "file.txt" { + t.Fatalf("old entry = %+v, want file.txt", event.EventNotification.OldEntry) + } + if got := event.EventNotification.Signatures; len(got) != 2 || got[0] != 7 || got[1] != 42 { + t.Fatalf("signatures = %v, want [7 42]", got) + } + if event.TsNs == 0 { + t.Fatal("expected event timestamp to be set") + } +} diff --git a/weed/mount/filehandle_read.go b/weed/mount/filehandle_read.go index db4647eba..48805b60b 100644 --- a/weed/mount/filehandle_read.go +++ b/weed/mount/filehandle_read.go @@ -9,7 +9,6 @@ import ( "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" ) func (fh *FileHandle) lockForRead(startOffset int64, size int) { @@ -163,11 +162,12 @@ func (fh *FileHandle) downloadRemoteEntry(entry *LockedEntry) error { fh.SetEntry(resp.Entry) - // Only update cache if the parent directory is cached - if fh.wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - if err := fh.wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, resp.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(request.Directory, resp.Entry) + } + if applyErr := fh.wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("CacheRemoteObject %s: best-effort metadata apply failed: %v", fileFullPath, applyErr) } return nil diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index e23a9c1e9..053d66484 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -21,18 +21,18 @@ type InodeEntry struct { nlookup uint64 isDirectory bool isChildrenCached bool + readDirDirect bool cachedExpiresTime time.Time lastAccess time.Time lastRefresh time.Time updateWindowStart time.Time updateCount int - needsRefresh bool } func (ie *InodeEntry) resetCacheState() { ie.isChildrenCached = false + ie.readDirDirect = false ie.cachedExpiresTime = time.Time{} - ie.needsRefresh = false ie.updateCount = 0 ie.updateWindowStart = time.Time{} } @@ -188,11 +188,11 @@ func (i *InodeToPath) MarkChildrenCached(fullpath util.FullPath) { return } path.isChildrenCached = true + path.readDirDirect = false now := time.Now() path.lastAccess = now path.lastRefresh = now path.updateCount = 0 - path.needsRefresh = false path.updateWindowStart = time.Time{} if i.cacheMetaTtlSec > 0 { path.cachedExpiresTime = now.Add(i.cacheMetaTtlSec) @@ -264,6 +264,27 @@ func (i *InodeToPath) TouchDirectory(fullpath util.FullPath) { entry.lastAccess = time.Now() } +func (i *InodeToPath) MarkDirectoryReadThrough(fullpath util.FullPath, now time.Time) bool { + i.Lock() + defer i.Unlock() + inode, found := i.path2inode[fullpath] + if !found { + return false + } + entry, found := i.inode2path[inode] + if !found || !entry.isDirectory { + return false + } + entry.isChildrenCached = false + entry.readDirDirect = true + entry.cachedExpiresTime = time.Time{} + entry.lastAccess = now + entry.lastRefresh = time.Time{} + entry.updateCount = 0 + entry.updateWindowStart = time.Time{} + return true +} + func (i *InodeToPath) RecordDirectoryUpdate(fullpath util.FullPath, now time.Time, window time.Duration, threshold int) bool { if threshold <= 0 || window <= 0 { return false @@ -284,13 +305,19 @@ func (i *InodeToPath) RecordDirectoryUpdate(fullpath util.FullPath, now time.Tim } entry.updateCount++ if entry.updateCount >= threshold { - entry.needsRefresh = true + entry.isChildrenCached = false + entry.readDirDirect = true + entry.cachedExpiresTime = time.Time{} + entry.lastAccess = now + entry.lastRefresh = time.Time{} + entry.updateCount = 0 + entry.updateWindowStart = time.Time{} return true } return false } -func (i *InodeToPath) NeedsRefresh(fullpath util.FullPath) bool { +func (i *InodeToPath) ShouldReadDirectoryDirect(fullpath util.FullPath) bool { i.RLock() defer i.RUnlock() inode, found := i.path2inode[fullpath] @@ -301,7 +328,7 @@ func (i *InodeToPath) NeedsRefresh(fullpath util.FullPath) bool { if !found || !entry.isDirectory { return false } - return entry.isChildrenCached && entry.needsRefresh + return entry.readDirDirect } func (i *InodeToPath) MarkDirectoryRefreshed(fullpath util.FullPath, now time.Time) { @@ -317,8 +344,8 @@ func (i *InodeToPath) MarkDirectoryRefreshed(fullpath util.FullPath, now time.Ti } entry.lastRefresh = now entry.lastAccess = now + entry.readDirDirect = false entry.updateCount = 0 - entry.needsRefresh = false entry.updateWindowStart = time.Time{} if i.cacheMetaTtlSec > 0 { entry.cachedExpiresTime = now.Add(i.cacheMetaTtlSec) diff --git a/weed/mount/inode_to_path_test.go b/weed/mount/inode_to_path_test.go index 63da42fee..f5f3e1a9f 100644 --- a/weed/mount/inode_to_path_test.go +++ b/weed/mount/inode_to_path_test.go @@ -2,6 +2,7 @@ package mount import ( "testing" + "time" "github.com/seaweedfs/seaweedfs/weed/util" ) @@ -92,3 +93,43 @@ func TestInodeEntry_removeOnePath(t *testing.T) { }) } } + +func TestRecordDirectoryUpdateSwitchesDirectoryToReadThrough(t *testing.T) { + root := util.FullPath("/") + dir := util.FullPath("/data") + + inodeToPath := NewInodeToPath(root, 60) + inodeToPath.Lookup(dir, time.Now().Unix(), true, false, 0, true) + inodeToPath.MarkChildrenCached(dir) + + now := time.Now() + if !inodeToPath.RecordDirectoryUpdate(dir, now, time.Second, 1) { + t.Fatal("expected directory to switch to read-through mode") + } + if inodeToPath.IsChildrenCached(dir) { + t.Fatal("directory should no longer be marked cached") + } + if !inodeToPath.ShouldReadDirectoryDirect(dir) { + t.Fatal("directory should be served via direct reads after hot invalidation") + } +} + +func TestMarkChildrenCachedClearsReadThroughMode(t *testing.T) { + root := util.FullPath("/") + dir := util.FullPath("/data") + + inodeToPath := NewInodeToPath(root, 60) + inodeToPath.Lookup(dir, time.Now().Unix(), true, false, 0, true) + + if !inodeToPath.MarkDirectoryReadThrough(dir, time.Now()) { + t.Fatal("expected read-through flag to be set") + } + inodeToPath.MarkChildrenCached(dir) + + if !inodeToPath.IsChildrenCached(dir) { + t.Fatal("directory should be cached after MarkChildrenCached") + } + if inodeToPath.ShouldReadDirectoryDirect(dir) { + t.Fatal("directory should leave read-through mode after caching") + } +} diff --git a/weed/mount/meta_cache/meta_cache.go b/weed/mount/meta_cache/meta_cache.go index e08ba5c2d..a03959cc0 100644 --- a/weed/mount/meta_cache/meta_cache.go +++ b/weed/mount/meta_cache/meta_cache.go @@ -2,17 +2,21 @@ package meta_cache import ( "context" + "errors" "os" "sync" "time" "golang.org/x/sync/singleflight" + "fmt" + "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/filer/leveldb" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/protobuf/proto" ) // need to have logic similar to FilerStoreWrapper @@ -29,12 +33,61 @@ type MetaCache struct { invalidateFunc func(fullpath util.FullPath, entry *filer_pb.Entry) onDirectoryUpdate func(dir util.FullPath) visitGroup singleflight.Group // deduplicates concurrent EnsureVisited calls for the same path + applyCh chan metadataApplyRequest + applyDone chan struct{} + applyStateMu sync.Mutex + applyClosed bool + buildingDirs map[util.FullPath]*directoryBuildState + dedupRing dedupRingBuffer +} + +var errMetaCacheClosed = errors.New("metadata cache is shut down") + +type MetadataResponseApplyOptions struct { + NotifyDirectories bool + InvalidateEntries bool +} + +var ( + LocalMetadataResponseApplyOptions = MetadataResponseApplyOptions{ + NotifyDirectories: true, + } + SubscriberMetadataResponseApplyOptions = MetadataResponseApplyOptions{ + NotifyDirectories: true, + InvalidateEntries: true, + } +) + +type directoryBuildState struct { + bufferedEvents []*filer_pb.SubscribeMetadataResponse +} + +const recentEventDedupWindow = 4096 + +type metadataApplyRequestKind int + +const ( + metadataApplyEvent metadataApplyRequestKind = iota + metadataBeginBuild + metadataCompleteBuild + metadataAbortBuild + metadataShutdown +) + +type metadataApplyRequest struct { + ctx context.Context + kind metadataApplyRequestKind + resp *filer_pb.SubscribeMetadataResponse + options MetadataResponseApplyOptions + buildPath util.FullPath + snapshotTsNs int64 + done chan error } func NewMetaCache(dbFolder string, uidGidMapper *UidGidMapper, root util.FullPath, markCachedFn func(path util.FullPath), isCachedFn func(path util.FullPath) bool, invalidateFunc func(util.FullPath, *filer_pb.Entry), onDirectoryUpdate func(dir util.FullPath)) *MetaCache { leveldbStore, virtualStore := openMetaStore(dbFolder) - return &MetaCache{ + mc := &MetaCache{ root: root, localStore: virtualStore, leveldbStore: leveldbStore, @@ -45,7 +98,13 @@ func NewMetaCache(dbFolder string, uidGidMapper *UidGidMapper, root util.FullPat invalidateFunc: func(fullpath util.FullPath, entry *filer_pb.Entry) { invalidateFunc(fullpath, entry) }, + applyCh: make(chan metadataApplyRequest, 128), + applyDone: make(chan struct{}), + buildingDirs: make(map[util.FullPath]*directoryBuildState), + dedupRing: newDedupRingBuffer(), } + go mc.runApplyLoop() + return mc } func openMetaStore(dbFolder string) (*leveldb.LevelDBStore, filer.VirtualFilerStore) { @@ -85,7 +144,10 @@ func (mc *MetaCache) doBatchInsertEntries(ctx context.Context, entries []*filer. func (mc *MetaCache) AtomicUpdateEntryFromFiler(ctx context.Context, oldPath util.FullPath, newEntry *filer.Entry) error { mc.Lock() defer mc.Unlock() + return mc.atomicUpdateEntryFromFilerLocked(ctx, oldPath, newEntry, false) +} +func (mc *MetaCache) atomicUpdateEntryFromFilerLocked(ctx context.Context, oldPath util.FullPath, newEntry *filer.Entry, allowUncachedInsert bool) error { entry, err := mc.localStore.FindEntry(ctx, oldPath) if err != nil && err != filer_pb.ErrNotFound { glog.Errorf("Metacache: find entry error: %v", err) @@ -110,7 +172,7 @@ func (mc *MetaCache) AtomicUpdateEntryFromFiler(ctx context.Context, oldPath uti if newEntry != nil { newDir, _ := newEntry.DirAndName() - if mc.isCachedFn(util.FullPath(newDir)) { + if allowUncachedInsert || mc.isCachedFn(util.FullPath(newDir)) { glog.V(3).Infof("InsertEntry %s/%s", newDir, newEntry.Name()) if err := mc.localStore.InsertEntry(ctx, newEntry); err != nil { return err @@ -120,6 +182,71 @@ func (mc *MetaCache) AtomicUpdateEntryFromFiler(ctx context.Context, oldPath uti return nil } +func (mc *MetaCache) ApplyMetadataResponse(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if resp == nil || resp.EventNotification == nil { + return nil + } + clonedResp := proto.Clone(resp).(*filer_pb.SubscribeMetadataResponse) + return mc.applyMetadataResponseEnqueue(ctx, clonedResp, options) +} + +// ApplyMetadataResponseOwned is like ApplyMetadataResponse but takes ownership +// of resp without cloning. The caller must not use resp after this call. +func (mc *MetaCache) ApplyMetadataResponseOwned(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if resp == nil || resp.EventNotification == nil { + return nil + } + return mc.applyMetadataResponseEnqueue(ctx, resp, options) +} + +func (mc *MetaCache) applyMetadataResponseEnqueue(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if ctx == nil { + ctx = context.Background() + } + req := metadataApplyRequest{ + // Use a non-cancellable context for the queued mutation so a + // cancelled caller doesn't abort the apply loop mid-write. + ctx: context.Background(), + kind: metadataApplyEvent, + resp: resp, + options: options, + done: make(chan error, 1), + } + + if err := mc.enqueueApplyRequest(req); err != nil { + return err + } + + select { + case err := <-req.done: + return err + case <-ctx.Done(): + return ctx.Err() + } +} + +func (mc *MetaCache) BeginDirectoryBuild(ctx context.Context, dirPath util.FullPath) error { + return mc.enqueueAndWait(ctx, metadataApplyRequest{ + kind: metadataBeginBuild, + buildPath: dirPath, + }) +} + +func (mc *MetaCache) CompleteDirectoryBuild(ctx context.Context, dirPath util.FullPath, snapshotTsNs int64) error { + return mc.enqueueAndWait(ctx, metadataApplyRequest{ + kind: metadataCompleteBuild, + buildPath: dirPath, + snapshotTsNs: snapshotTsNs, + }) +} + +func (mc *MetaCache) AbortDirectoryBuild(ctx context.Context, dirPath util.FullPath) error { + return mc.enqueueAndWait(ctx, metadataApplyRequest{ + kind: metadataAbortBuild, + buildPath: dirPath, + }) +} + func (mc *MetaCache) UpdateEntry(ctx context.Context, entry *filer.Entry) error { mc.Lock() defer mc.Unlock() @@ -174,6 +301,25 @@ func (mc *MetaCache) ListDirectoryEntries(ctx context.Context, dirPath util.Full } func (mc *MetaCache) Shutdown() { + done := make(chan error, 1) + + mc.applyStateMu.Lock() + if !mc.applyClosed { + mc.applyClosed = true + mc.applyCh <- metadataApplyRequest{ + kind: metadataShutdown, + done: done, + } + } + mc.applyStateMu.Unlock() + + select { + case <-done: + case <-mc.applyDone: + } + + <-mc.applyDone + mc.Lock() defer mc.Unlock() mc.localStore.Shutdown() @@ -201,3 +347,494 @@ func (mc *MetaCache) noteDirectoryUpdate(dirPath util.FullPath) { mc.onDirectoryUpdate(dirPath) } } + +func (mc *MetaCache) enqueueAndWait(ctx context.Context, req metadataApplyRequest) error { + if ctx == nil { + ctx = context.Background() + } + // Use a non-cancellable context for the queued operation so a + // cancelled caller doesn't abort a build/complete mid-way. + req.ctx = context.Background() + req.done = make(chan error, 1) + if err := mc.enqueueApplyRequest(req); err != nil { + return err + } + select { + case err := <-req.done: + return err + case <-ctx.Done(): + return ctx.Err() + } +} + +func (mc *MetaCache) enqueueApplyRequest(req metadataApplyRequest) error { + mc.applyStateMu.Lock() + if mc.applyClosed { + mc.applyStateMu.Unlock() + return errMetaCacheClosed + } + // Release the mutex before the potentially-blocking channel send so that + // Shutdown can still acquire it to set applyClosed when the channel is full. + mc.applyStateMu.Unlock() + select { + case mc.applyCh <- req: + return nil + case <-mc.applyDone: + return errMetaCacheClosed + } +} + +func (mc *MetaCache) runApplyLoop() { + defer close(mc.applyDone) + + for req := range mc.applyCh { + req.done <- mc.handleApplyRequest(req) + close(req.done) + if req.kind == metadataShutdown { + mc.drainApplyCh() + return + } + } +} + +// drainApplyCh non-blockingly drains any remaining requests from applyCh +// after a shutdown sentinel, signalling each caller so they don't block. +func (mc *MetaCache) drainApplyCh() { + for { + select { + case req := <-mc.applyCh: + req.done <- errMetaCacheClosed + close(req.done) + default: + return + } + } +} + +func (mc *MetaCache) handleApplyRequest(req metadataApplyRequest) error { + switch req.kind { + case metadataApplyEvent: + return mc.applyMetadataResponseNow(req.ctx, req.resp, req.options) + case metadataBeginBuild: + return mc.beginDirectoryBuildNow(req.buildPath) + case metadataCompleteBuild: + return mc.completeDirectoryBuildNow(req.ctx, req.buildPath, req.snapshotTsNs) + case metadataAbortBuild: + return mc.abortDirectoryBuildNow(req.buildPath) + case metadataShutdown: + return nil + default: + return nil + } +} + +type metadataInvalidation struct { + path util.FullPath + entry *filer_pb.Entry +} + +type metadataResponseSideEffects struct { + dirsToNotify []util.FullPath + invalidations []metadataInvalidation +} + +func (mc *MetaCache) applyMetadataResponseNow(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) error { + if mc.shouldSkipDuplicateEvent(resp) { + return nil + } + + immediateEvents, bufferedEvents := mc.routeMetadataResponse(resp) + if len(bufferedEvents) == 0 { + return mc.applyMetadataResponseDirect(ctx, resp, options, false) + } + + // Apply side effects but skip directory notifications for dirs that are + // currently being built. Notifying a building dir can trigger + // markDirectoryReadThrough → DeleteFolderChildren, wiping entries that + // EnsureVisited already inserted, leaving an incomplete cache. + mc.applyMetadataSideEffectsSkippingBuildingDirs(resp, options) + for buildDir, events := range bufferedEvents { + state := mc.buildingDirs[buildDir] + if state == nil { + continue + } + state.bufferedEvents = append(state.bufferedEvents, events...) + } + for _, immediateEvent := range immediateEvents { + if err := mc.applyMetadataResponseDirect(ctx, immediateEvent, MetadataResponseApplyOptions{}, false); err != nil { + return err + } + } + return nil +} + +func (mc *MetaCache) applyMetadataResponseDirect(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions, allowUncachedInsert bool) error { + if _, err := mc.applyMetadataResponseLocked(ctx, resp, options, allowUncachedInsert); err != nil { + return err + } + mc.applyMetadataSideEffects(resp, options) + return nil +} + +func (mc *MetaCache) applyMetadataSideEffects(resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) { + sideEffects := metadataResponseSideEffects{} + if options.NotifyDirectories { + sideEffects.dirsToNotify = collectDirectoryNotifications(resp) + } + if options.InvalidateEntries { + sideEffects.invalidations = collectEntryInvalidations(resp) + } + for _, dirPath := range sideEffects.dirsToNotify { + mc.noteDirectoryUpdate(dirPath) + } + for _, invalidation := range sideEffects.invalidations { + mc.invalidateFunc(invalidation.path, invalidation.entry) + } +} + +// applyMetadataSideEffectsSkippingBuildingDirs is like applyMetadataSideEffects +// but suppresses directory notifications for dirs currently in buildingDirs. +// This prevents markDirectoryReadThrough from wiping entries mid-build. +func (mc *MetaCache) applyMetadataSideEffectsSkippingBuildingDirs(resp *filer_pb.SubscribeMetadataResponse, options MetadataResponseApplyOptions) { + sideEffects := metadataResponseSideEffects{} + if options.NotifyDirectories { + sideEffects.dirsToNotify = collectDirectoryNotifications(resp) + } + if options.InvalidateEntries { + sideEffects.invalidations = collectEntryInvalidations(resp) + } + for _, dirPath := range sideEffects.dirsToNotify { + if _, building := mc.buildingDirs[dirPath]; !building { + mc.noteDirectoryUpdate(dirPath) + } + } + for _, invalidation := range sideEffects.invalidations { + mc.invalidateFunc(invalidation.path, invalidation.entry) + } +} + +func (mc *MetaCache) applyMetadataResponseLocked(ctx context.Context, resp *filer_pb.SubscribeMetadataResponse, _ MetadataResponseApplyOptions, allowUncachedInsert bool) (metadataResponseSideEffects, error) { + message := resp.GetEventNotification() + if message == nil { + return metadataResponseSideEffects{}, nil + } + + var oldPath util.FullPath + var newEntry *filer.Entry + if message.OldEntry != nil { + oldPath = util.NewFullPath(resp.Directory, message.OldEntry.Name) + } + + if message.NewEntry != nil { + dir := resp.Directory + if message.NewParentPath != "" { + dir = message.NewParentPath + } + newEntry = filer.FromPbEntry(dir, message.NewEntry) + } + + mc.Lock() + err := mc.atomicUpdateEntryFromFilerLocked(ctx, oldPath, newEntry, allowUncachedInsert) + // When a directory is deleted or moved, remove its cached descendants + // so stale children cannot be served from the local cache. + if err == nil && oldPath != "" && message.OldEntry != nil && message.OldEntry.IsDirectory { + isDelete := message.NewEntry == nil + isMove := message.NewEntry != nil && (message.NewParentPath != resp.Directory || message.NewEntry.Name != message.OldEntry.Name) + if isDelete || isMove { + if deleteErr := mc.localStore.DeleteFolderChildren(ctx, oldPath); deleteErr != nil { + glog.V(2).Infof("delete descendants of %s: %v", oldPath, deleteErr) + } + } + } + mc.Unlock() + if err != nil { + return metadataResponseSideEffects{}, err + } + return metadataResponseSideEffects{}, nil +} + +func (mc *MetaCache) beginDirectoryBuildNow(dirPath util.FullPath) error { + if _, found := mc.buildingDirs[dirPath]; found { + return nil + } + mc.buildingDirs[dirPath] = &directoryBuildState{} + return nil +} + +func (mc *MetaCache) abortDirectoryBuildNow(dirPath util.FullPath) error { + delete(mc.buildingDirs, dirPath) + return nil +} + +func (mc *MetaCache) completeDirectoryBuildNow(ctx context.Context, dirPath util.FullPath, snapshotTsNs int64) error { + state := mc.buildingDirs[dirPath] + delete(mc.buildingDirs, dirPath) + + if state == nil { + return nil + } + + for _, event := range state.bufferedEvents { + // When the server provided a snapshot timestamp, skip events that + // the listing already included. When snapshotTsNs == 0 (empty + // directory — server returned no entries and no snapshot), replay + // ALL buffered events to avoid dropping mutations due to + // client/server clock skew. + if snapshotTsNs != 0 && event.TsNs != 0 && event.TsNs <= snapshotTsNs { + continue + } + if err := mc.applyMetadataResponseDirect(ctx, event, MetadataResponseApplyOptions{}, true); err != nil { + return err + } + } + + mc.markCachedFn(dirPath) + return nil +} + +func (mc *MetaCache) routeMetadataResponse(resp *filer_pb.SubscribeMetadataResponse) ([]*filer_pb.SubscribeMetadataResponse, map[util.FullPath][]*filer_pb.SubscribeMetadataResponse) { + message := resp.GetEventNotification() + if message == nil { + return []*filer_pb.SubscribeMetadataResponse{resp}, nil + } + + oldDir, hasOld := metadataOldParentDir(resp) + newDir, hasNew := metadataNewParentDir(resp) + oldBuilding := hasOld && mc.isBuildingDir(oldDir) + newBuilding := hasNew && mc.isBuildingDir(newDir) + if !oldBuilding && !newBuilding { + return []*filer_pb.SubscribeMetadataResponse{resp}, nil + } + + bufferedEvents := make(map[util.FullPath][]*filer_pb.SubscribeMetadataResponse) + var immediateEvents []*filer_pb.SubscribeMetadataResponse + + if hasOld && hasNew && oldDir != newDir { + deleteEvent := metadataDeleteFragment(resp) + createEvent := metadataCreateFragment(resp) + if oldBuilding { + bufferedEvents[oldDir] = append(bufferedEvents[oldDir], deleteEvent) + } else { + immediateEvents = append(immediateEvents, deleteEvent) + } + if newBuilding { + bufferedEvents[newDir] = append(bufferedEvents[newDir], createEvent) + } else { + immediateEvents = append(immediateEvents, createEvent) + } + return immediateEvents, bufferedEvents + } + + targetDir := newDir + if hasOld { + targetDir = oldDir + } + if mc.isBuildingDir(targetDir) { + bufferedEvents[targetDir] = append(bufferedEvents[targetDir], resp) + return nil, bufferedEvents + } + return []*filer_pb.SubscribeMetadataResponse{resp}, nil +} + +func (mc *MetaCache) isBuildingDir(dirPath util.FullPath) bool { + _, found := mc.buildingDirs[dirPath] + return found +} + +func metadataOldParentDir(resp *filer_pb.SubscribeMetadataResponse) (util.FullPath, bool) { + if resp.GetEventNotification() == nil || resp.EventNotification.OldEntry == nil { + return "", false + } + return util.FullPath(resp.Directory), true +} + +func metadataNewParentDir(resp *filer_pb.SubscribeMetadataResponse) (util.FullPath, bool) { + if resp.GetEventNotification() == nil || resp.EventNotification.NewEntry == nil { + return "", false + } + newDir := resp.Directory + if resp.EventNotification.NewParentPath != "" { + newDir = resp.EventNotification.NewParentPath + } + return util.FullPath(newDir), true +} + +func metadataDeleteFragment(resp *filer_pb.SubscribeMetadataResponse) *filer_pb.SubscribeMetadataResponse { + if resp.GetEventNotification() == nil || resp.EventNotification.OldEntry == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: resp.Directory, + EventNotification: &filer_pb.EventNotification{ + OldEntry: proto.Clone(resp.EventNotification.OldEntry).(*filer_pb.Entry), + }, + TsNs: resp.TsNs, + } +} + +func metadataCreateFragment(resp *filer_pb.SubscribeMetadataResponse) *filer_pb.SubscribeMetadataResponse { + if resp.GetEventNotification() == nil || resp.EventNotification.NewEntry == nil { + return nil + } + newDir := resp.Directory + if resp.EventNotification.NewParentPath != "" { + newDir = resp.EventNotification.NewParentPath + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: newDir, + EventNotification: &filer_pb.EventNotification{ + NewEntry: proto.Clone(resp.EventNotification.NewEntry).(*filer_pb.Entry), + NewParentPath: newDir, + }, + TsNs: resp.TsNs, + } +} + +func metadataEventDedupKey(resp *filer_pb.SubscribeMetadataResponse) string { + var oldName, newName, newParent string + hasOld, hasNew := false, false + if msg := resp.GetEventNotification(); msg != nil { + if msg.OldEntry != nil { + oldName = msg.OldEntry.Name + hasOld = true + } + if msg.NewEntry != nil { + newName = msg.NewEntry.Name + hasNew = true + newParent = msg.NewParentPath + } + } + // Encode event shape (create/delete/update/rename) so structurally + // different events with the same names are not collapsed. + var shape byte + switch { + case hasOld && hasNew: + if resp.Directory != newParent && newParent != "" { + shape = 'R' // rename across directories + } else { + shape = 'U' // update in place + } + case hasOld: + shape = 'D' // delete + case hasNew: + shape = 'C' // create + } + return fmt.Sprintf("%d|%c|%s|%s|%s|%s", resp.TsNs, shape, resp.Directory, oldName, newParent, newName) +} + +func (mc *MetaCache) shouldSkipDuplicateEvent(resp *filer_pb.SubscribeMetadataResponse) bool { + if resp == nil || resp.TsNs == 0 { + return false + } + key := metadataEventDedupKey(resp) + return !mc.dedupRing.Add(key) +} + +type dedupRingBuffer struct { + keys [recentEventDedupWindow]string + head int + size int + set map[string]struct{} +} + +func newDedupRingBuffer() dedupRingBuffer { + return dedupRingBuffer{ + set: make(map[string]struct{}, recentEventDedupWindow), + } +} + +func (r *dedupRingBuffer) Add(key string) bool { + if _, found := r.set[key]; found { + return false // duplicate + } + if r.size == recentEventDedupWindow { + evicted := r.keys[r.head] + delete(r.set, evicted) + } else { + r.size++ + } + r.keys[r.head] = key + r.set[key] = struct{}{} + r.head = (r.head + 1) % recentEventDedupWindow + return true // new entry +} + +func collectDirectoryNotifications(resp *filer_pb.SubscribeMetadataResponse) []util.FullPath { + message := resp.GetEventNotification() + if message == nil { + return nil + } + + // At most 3 dirs: old parent, new parent, new child (if directory). + // Use a fixed slice with linear dedup to avoid map allocation. + var dirs [3]util.FullPath + n := 0 + addUnique := func(p util.FullPath) { + for i := 0; i < n; i++ { + if dirs[i] == p { + return + } + } + dirs[n] = p + n++ + } + + if message.OldEntry != nil { + oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name) + parent, _ := oldPath.DirAndName() + addUnique(util.FullPath(parent)) + } + if message.NewEntry != nil { + newDir := resp.Directory + if message.NewParentPath != "" { + newDir = message.NewParentPath + } + newPath := util.NewFullPath(newDir, message.NewEntry.Name) + parent, _ := newPath.DirAndName() + addUnique(util.FullPath(parent)) + if message.NewEntry.IsDirectory { + addUnique(newPath) + } + } + + return dirs[:n] +} + +func collectEntryInvalidations(resp *filer_pb.SubscribeMetadataResponse) []metadataInvalidation { + message := resp.GetEventNotification() + if message == nil { + return nil + } + + var invalidations []metadataInvalidation + if message.OldEntry != nil && message.NewEntry != nil { + oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: oldKey, entry: message.OldEntry}) + // Normalize NewParentPath: empty means same directory as resp.Directory + newDir := resp.Directory + if message.NewParentPath != "" { + newDir = message.NewParentPath + } + if message.OldEntry.Name != message.NewEntry.Name || resp.Directory != newDir { + newKey := util.NewFullPath(newDir, message.NewEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: newKey, entry: message.NewEntry}) + } + return invalidations + } + + if filer_pb.IsCreate(resp) && message.NewEntry != nil { + newDir := resp.Directory + if message.NewParentPath != "" { + newDir = message.NewParentPath + } + newKey := util.NewFullPath(newDir, message.NewEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: newKey, entry: message.NewEntry}) + } + + if filer_pb.IsDelete(resp) && message.OldEntry != nil { + oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) + invalidations = append(invalidations, metadataInvalidation{path: oldKey, entry: message.OldEntry}) + } + + return invalidations +} diff --git a/weed/mount/meta_cache/meta_cache_apply_test.go b/weed/mount/meta_cache/meta_cache_apply_test.go new file mode 100644 index 000000000..d30fab90d --- /dev/null +++ b/weed/mount/meta_cache/meta_cache_apply_test.go @@ -0,0 +1,361 @@ +package meta_cache + +import ( + "context" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/seaweedfs/seaweedfs/weed/filer" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" +) + +func TestApplyMetadataResponseAppliesEventsInOrder(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/dir": true, + }) + defer mc.Shutdown() + + createResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 1, + FileMode: 0100644, + FileSize: 11, + }, + }, + }, + } + updateResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 29, + }, + }, + NewParentPath: "/dir", + }, + } + deleteResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + }, + } + + if err := mc.ApplyMetadataResponse(context.Background(), createResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply create: %v", err) + } + + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find created entry: %v", err) + } + if entry.FileSize != 11 { + t.Fatalf("created file size = %d, want 11", entry.FileSize) + } + + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply update: %v", err) + } + + entry, err = mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find updated entry: %v", err) + } + if entry.FileSize != 29 { + t.Fatalf("updated file size = %d, want 29", entry.FileSize) + } + + if err := mc.ApplyMetadataResponse(context.Background(), deleteResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply delete: %v", err) + } + + entry, err = mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find deleted entry error = %v, want %v", err, filer_pb.ErrNotFound) + } + if entry != nil { + t.Fatalf("deleted entry still cached: %+v", entry) + } + + if got := countPath(notifications.paths(), util.FullPath("/dir")); got != 3 { + t.Fatalf("directory notifications for /dir = %d, want 3", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/dir/file.txt")); got != 3 { + t.Fatalf("invalidations for /dir/file.txt = %d, want 3 (create + update + delete)", got) + } +} + +func TestApplyMetadataResponseRenamesAcrossCachedDirectories(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/src": true, + "/dst": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/src/file.tmp", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 7, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + renameResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/src", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.tmp", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 41, + }, + }, + NewParentPath: "/dst", + }, + } + + if err := mc.ApplyMetadataResponse(context.Background(), renameResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply rename: %v", err) + } + + oldEntry, err := mc.FindEntry(context.Background(), util.FullPath("/src/file.tmp")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find old path error = %v, want %v", err, filer_pb.ErrNotFound) + } + if oldEntry != nil { + t.Fatalf("old path still cached: %+v", oldEntry) + } + + newEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dst/file.txt")) + if err != nil { + t.Fatalf("find new path: %v", err) + } + if newEntry.FileSize != 41 { + t.Fatalf("renamed file size = %d, want 41", newEntry.FileSize) + } + + if got := countPath(notifications.paths(), util.FullPath("/src")); got != 1 { + t.Fatalf("directory notifications for /src = %d, want 1", got) + } + if got := countPath(notifications.paths(), util.FullPath("/dst")); got != 1 { + t.Fatalf("directory notifications for /dst = %d, want 1", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/src/file.tmp")); got != 1 { + t.Fatalf("invalidations for /src/file.tmp = %d, want 1", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/dst/file.txt")); got != 1 { + t.Fatalf("invalidations for /dst/file.txt = %d, want 1", got) + } +} + +func TestApplyMetadataResponseLocalOptionsSkipInvalidations(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/dir": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/file.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 7, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + updateResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 17, + }, + }, + NewParentPath: "/dir", + }, + } + + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, LocalMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply local update: %v", err) + } + + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find updated entry: %v", err) + } + if entry.FileSize != 17 { + t.Fatalf("updated file size = %d, want 17", entry.FileSize) + } + if got := countPath(notifications.paths(), util.FullPath("/dir")); got != 1 { + t.Fatalf("directory notifications for /dir = %d, want 1", got) + } + if got := len(invalidations.paths()); got != 0 { + t.Fatalf("invalidations = %d, want 0", got) + } +} + +func TestApplyMetadataResponseDeduplicatesRepeatedFilerEvent(t *testing.T) { + mc, _, notifications, invalidations := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/dir": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/file.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 5, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + updateResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "file.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "file.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 2, + FileMode: 0100644, + FileSize: 15, + }, + }, + NewParentPath: "/dir", + Signatures: []int32{7}, + }, + TsNs: 99, + } + + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("first apply: %v", err) + } + if err := mc.ApplyMetadataResponse(context.Background(), updateResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("second apply: %v", err) + } + + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/file.txt")) + if err != nil { + t.Fatalf("find updated entry: %v", err) + } + if entry.FileSize != 15 { + t.Fatalf("updated file size = %d, want 15", entry.FileSize) + } + if got := countPath(notifications.paths(), util.FullPath("/dir")); got != 1 { + t.Fatalf("directory notifications for /dir = %d, want 1", got) + } + if got := countPath(invalidations.paths(), util.FullPath("/dir/file.txt")); got != 1 { + t.Fatalf("invalidations for /dir/file.txt = %d, want 1", got) + } +} + +func newTestMetaCache(t *testing.T, cached map[util.FullPath]bool) (*MetaCache, map[util.FullPath]bool, *recordedPaths, *recordedPaths) { + t.Helper() + + mapper, err := NewUidGidMapper("", "") + if err != nil { + t.Fatalf("uid/gid mapper: %v", err) + } + + var cachedMu sync.Mutex + notifications := &recordedPaths{} + invalidations := &recordedPaths{} + + mc := NewMetaCache( + filepath.Join(t.TempDir(), "meta"), + mapper, + util.FullPath("/"), + func(path util.FullPath) { + cachedMu.Lock() + defer cachedMu.Unlock() + cached[path] = true + }, + func(path util.FullPath) bool { + cachedMu.Lock() + defer cachedMu.Unlock() + return cached[path] + }, + func(path util.FullPath, entry *filer_pb.Entry) { + invalidations.record(path) + }, + func(dir util.FullPath) { + notifications.record(dir) + }, + ) + + return mc, cached, notifications, invalidations +} + +type recordedPaths struct { + mu sync.Mutex + items []util.FullPath +} + +func (r *recordedPaths) record(path util.FullPath) { + r.mu.Lock() + defer r.mu.Unlock() + r.items = append(r.items, path) +} + +func (r *recordedPaths) paths() []util.FullPath { + r.mu.Lock() + defer r.mu.Unlock() + return append([]util.FullPath(nil), r.items...) +} + +func countPath(paths []util.FullPath, target util.FullPath) int { + count := 0 + for _, path := range paths { + if path == target { + count++ + } + } + return count +} diff --git a/weed/mount/meta_cache/meta_cache_build_test.go b/weed/mount/meta_cache/meta_cache_build_test.go new file mode 100644 index 000000000..61285f23a --- /dev/null +++ b/weed/mount/meta_cache/meta_cache_build_test.go @@ -0,0 +1,459 @@ +package meta_cache + +import ( + "context" + "fmt" + "io" + "sync" + "testing" + "time" + + "github.com/seaweedfs/seaweedfs/weed/filer" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +type buildListStream struct { + responses []*filer_pb.ListEntriesResponse + onFirstRecv func() + once sync.Once + index int +} + +func (s *buildListStream) Recv() (*filer_pb.ListEntriesResponse, error) { + s.once.Do(func() { + if s.onFirstRecv != nil { + s.onFirstRecv() + } + }) + if s.index >= len(s.responses) { + return nil, io.EOF + } + resp := s.responses[s.index] + s.index++ + return resp, nil +} + +func (s *buildListStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } +func (s *buildListStream) Trailer() metadata.MD { return metadata.MD{} } +func (s *buildListStream) CloseSend() error { return nil } +func (s *buildListStream) Context() context.Context { return context.Background() } +func (s *buildListStream) SendMsg(any) error { return nil } +func (s *buildListStream) RecvMsg(any) error { return nil } + +type buildListClient struct { + filer_pb.SeaweedFilerClient + responses []*filer_pb.ListEntriesResponse + onFirstRecv func() +} + +func (c *buildListClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[filer_pb.ListEntriesResponse], error) { + return &buildListStream{ + responses: c.responses, + onFirstRecv: c.onFirstRecv, + }, nil +} + +type buildFilerAccessor struct { + client filer_pb.SeaweedFilerClient +} + +func (a *buildFilerAccessor) WithFilerClient(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error { + return fn(a.client) +} + +func (a *buildFilerAccessor) AdjustedUrl(*filer_pb.Location) string { return "" } +func (a *buildFilerAccessor) GetDataCenter() string { return "" } + +func TestEnsureVisitedReplaysBufferedEventsAfterSnapshot(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + var applyErr error + accessor := &buildFilerAccessor{ + client: &buildListClient{ + responses: []*filer_pb.ListEntriesResponse{ + { + Entry: &filer_pb.Entry{ + Name: "base.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 1, + Mtime: 1, + FileMode: 0100644, + FileSize: 3, + }, + }, + SnapshotTsNs: 100, + }, + }, + onFirstRecv: func() { + applyErr = mc.ApplyMetadataResponse(context.Background(), &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: "after.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 2, + Mtime: 2, + FileMode: 0100644, + FileSize: 9, + }, + }, + }, + TsNs: 101, + }, SubscriberMetadataResponseApplyOptions) + }, + }, + } + + if err := EnsureVisited(mc, accessor, util.FullPath("/dir")); err != nil { + t.Fatalf("ensure visited: %v", err) + } + if applyErr != nil { + t.Fatalf("apply buffered event: %v", applyErr) + } + if !mc.IsDirectoryCached(util.FullPath("/dir")) { + t.Fatal("directory /dir should be cached after build completes") + } + + baseEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/base.txt")) + if err != nil { + t.Fatalf("find base entry: %v", err) + } + if baseEntry.FileSize != 3 { + t.Fatalf("base entry size = %d, want 3", baseEntry.FileSize) + } + + afterEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/after.txt")) + if err != nil { + t.Fatalf("find replayed entry: %v", err) + } + if afterEntry.FileSize != 9 { + t.Fatalf("replayed entry size = %d, want 9", afterEntry.FileSize) + } +} + +// TestDirectoryNotificationsSuppressedDuringBuild verifies that metadata events +// targeting a directory under active build do NOT fire onDirectoryUpdate for +// that directory. In production, onDirectoryUpdate can trigger +// markDirectoryReadThrough → DeleteFolderChildren, which would wipe entries +// that EnsureVisited already inserted mid-build. +func TestDirectoryNotificationsSuppressedDuringBuild(t *testing.T) { + mc, _, notifications, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + // Start building /dir (simulates the beginning of EnsureVisited) + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/dir")); err != nil { + t.Fatalf("begin build: %v", err) + } + + // Insert an entry as EnsureVisited would during the filer listing + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/existing.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 100, + }, + }); err != nil { + t.Fatalf("insert entry during build: %v", err) + } + + // Simulate multiple metadata events arriving for /dir while the build + // is in progress. Each event would normally call noteDirectoryUpdate, + // which in production can trigger markDirectoryReadThrough and wipe entries. + for i := 0; i < 5; i++ { + resp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: fmt.Sprintf("new-%d.txt", i), + Attributes: &filer_pb.FuseAttributes{ + Crtime: int64(10 + i), + Mtime: int64(10 + i), + FileMode: 0100644, + FileSize: uint64(i + 1), + }, + }, + }, + TsNs: int64(200 + i), + } + if err := mc.ApplyMetadataResponse(context.Background(), resp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply event %d: %v", i, err) + } + } + + // The building directory /dir must NOT have received any notifications. + // If it did, markDirectoryReadThrough would wipe the cache mid-build. + for _, p := range notifications.paths() { + if p == util.FullPath("/dir") { + t.Fatal("onDirectoryUpdate was called for /dir during build; this would cause markDirectoryReadThrough to wipe entries mid-build") + } + } + + // The entry inserted during the build must still be present + entry, err := mc.FindEntry(context.Background(), util.FullPath("/dir/existing.txt")) + if err != nil { + t.Fatalf("entry wiped during build: %v", err) + } + if entry.FileSize != 100 { + t.Fatalf("entry size = %d, want 100", entry.FileSize) + } + + // Complete the build — buffered events should be replayed + if err := mc.CompleteDirectoryBuild(context.Background(), util.FullPath("/dir"), 150); err != nil { + t.Fatalf("complete build: %v", err) + } + + // After build completes, the entry from the listing should still exist + entry, err = mc.FindEntry(context.Background(), util.FullPath("/dir/existing.txt")) + if err != nil { + t.Fatalf("entry lost after build completion: %v", err) + } + if entry.FileSize != 100 { + t.Fatalf("entry size after build = %d, want 100", entry.FileSize) + } + + // Buffered events with TsNs > snapshotTsNs (150) should have been replayed + for i := 0; i < 5; i++ { + name := fmt.Sprintf("new-%d.txt", i) + e, err := mc.FindEntry(context.Background(), util.FullPath("/dir/"+name)) + if err != nil { + t.Fatalf("replayed entry %s not found: %v", name, err) + } + if e.FileSize != uint64(i+1) { + t.Fatalf("replayed entry %s size = %d, want %d", name, e.FileSize, i+1) + } + } +} + +// TestEmptyDirectoryBuildReplaysAllBufferedEvents verifies that when a +// directory build completes with snapshotTsNs=0 (empty directory — server +// returned no entries and no snapshot), ALL buffered events are replayed +// without any TsNs filtering. This prevents clock-skew between client and +// filer from dropping legitimate mutations. +func TestEmptyDirectoryBuildReplaysAllBufferedEvents(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/empty")); err != nil { + t.Fatalf("begin build: %v", err) + } + + // Buffer events with a range of TsNs values — some very old, some recent. + // With a client-synthesized snapshot, old events could be incorrectly filtered. + tsValues := []int64{1, 50, 500, 5000, 50000} + for i, ts := range tsValues { + resp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/empty", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: fmt.Sprintf("file-%d.txt", i), + Attributes: &filer_pb.FuseAttributes{ + Crtime: ts, + Mtime: ts, + FileMode: 0100644, + FileSize: uint64(i + 10), + }, + }, + }, + TsNs: ts, + } + if err := mc.ApplyMetadataResponse(context.Background(), resp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply event %d: %v", i, err) + } + } + + // Complete with snapshotTsNs=0 — simulates empty directory listing + if err := mc.CompleteDirectoryBuild(context.Background(), util.FullPath("/empty"), 0); err != nil { + t.Fatalf("complete build: %v", err) + } + + // Every buffered event must have been replayed, regardless of TsNs + for i := range tsValues { + name := fmt.Sprintf("file-%d.txt", i) + e, err := mc.FindEntry(context.Background(), util.FullPath("/empty/"+name)) + if err != nil { + t.Fatalf("replayed entry %s not found: %v", name, err) + } + if e.FileSize != uint64(i+10) { + t.Fatalf("replayed entry %s size = %d, want %d", name, e.FileSize, i+10) + } + } + + if !mc.IsDirectoryCached(util.FullPath("/empty")) { + t.Fatal("/empty should be marked cached after build completes") + } +} + +// TestBuildCompletionSurvivesCallerCancellation verifies that once +// CompleteDirectoryBuild is enqueued, a cancelled caller context does not +// prevent the build from completing. The apply loop uses context.Background() +// internally, so the operation finishes even if the caller gives up waiting. +func TestBuildCompletionSurvivesCallerCancellation(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + }) + defer mc.Shutdown() + + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/dir")); err != nil { + t.Fatalf("begin build: %v", err) + } + + // Insert an entry during the build (as EnsureVisited would) + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/dir/kept.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 42, + }, + }); err != nil { + t.Fatalf("insert entry: %v", err) + } + + // Buffer an event that should be replayed + if err := mc.ApplyMetadataResponse(context.Background(), &filer_pb.SubscribeMetadataResponse{ + Directory: "/dir", + EventNotification: &filer_pb.EventNotification{ + NewEntry: &filer_pb.Entry{ + Name: "buffered.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 5, + Mtime: 5, + FileMode: 0100644, + FileSize: 77, + }, + }, + }, + TsNs: 200, + }, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply event: %v", err) + } + + // Complete with an already-cancelled context. The operation should still + // succeed because enqueueAndWait sets req.ctx = context.Background(). + cancelledCtx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + + // CompleteDirectoryBuild may return ctx.Err() if the select picks + // ctx.Done() first, but the operation itself still completes in the + // apply loop. Poll for the observable side effect instead of using + // a fixed sleep. + _ = mc.CompleteDirectoryBuild(cancelledCtx, util.FullPath("/dir"), 100) + + // Poll until the build completes or a deadline elapses. + deadline := time.After(2 * time.Second) + for !mc.IsDirectoryCached(util.FullPath("/dir")) { + select { + case <-deadline: + t.Fatal("/dir should be cached — CompleteDirectoryBuild must have executed despite cancelled context") + default: + time.Sleep(5 * time.Millisecond) + } + } + + // The pre-existing entry must survive + entry, findErr := mc.FindEntry(context.Background(), util.FullPath("/dir/kept.txt")) + if findErr != nil { + t.Fatalf("find kept entry: %v", findErr) + } + if entry.FileSize != 42 { + t.Fatalf("kept entry size = %d, want 42", entry.FileSize) + } + + // The buffered event (TsNs 200 > snapshot 100) must have been replayed + buffered, findErr := mc.FindEntry(context.Background(), util.FullPath("/dir/buffered.txt")) + if findErr != nil { + t.Fatalf("find buffered entry: %v", findErr) + } + if buffered.FileSize != 77 { + t.Fatalf("buffered entry size = %d, want 77", buffered.FileSize) + } +} + +func TestBufferedRenameUpdatesOtherDirectoryBeforeBuildCompletes(t *testing.T) { + mc, _, _, _ := newTestMetaCache(t, map[util.FullPath]bool{ + "/": true, + "/src": true, + }) + defer mc.Shutdown() + + if err := mc.InsertEntry(context.Background(), &filer.Entry{ + FullPath: "/src/from.txt", + Attr: filer.Attr{ + Crtime: time.Unix(1, 0), + Mtime: time.Unix(1, 0), + Mode: 0100644, + FileSize: 7, + }, + }); err != nil { + t.Fatalf("insert source entry: %v", err) + } + + if err := mc.BeginDirectoryBuild(context.Background(), util.FullPath("/dst")); err != nil { + t.Fatalf("begin build: %v", err) + } + + renameResp := &filer_pb.SubscribeMetadataResponse{ + Directory: "/src", + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{ + Name: "from.txt", + }, + NewEntry: &filer_pb.Entry{ + Name: "to.txt", + Attributes: &filer_pb.FuseAttributes{ + Crtime: 2, + Mtime: 2, + FileMode: 0100644, + FileSize: 12, + }, + }, + NewParentPath: "/dst", + }, + TsNs: 101, + } + + if err := mc.ApplyMetadataResponse(context.Background(), renameResp, SubscriberMetadataResponseApplyOptions); err != nil { + t.Fatalf("apply rename: %v", err) + } + + oldEntry, err := mc.FindEntry(context.Background(), util.FullPath("/src/from.txt")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find old path error = %v, want %v", err, filer_pb.ErrNotFound) + } + if oldEntry != nil { + t.Fatalf("old path should be removed before build completes: %+v", oldEntry) + } + + newEntry, err := mc.FindEntry(context.Background(), util.FullPath("/dst/to.txt")) + if err != filer_pb.ErrNotFound { + t.Fatalf("find buffered new path error = %v, want %v", err, filer_pb.ErrNotFound) + } + if newEntry != nil { + t.Fatalf("new path should stay hidden until build completes: %+v", newEntry) + } + + if err := mc.CompleteDirectoryBuild(context.Background(), util.FullPath("/dst"), 100); err != nil { + t.Fatalf("complete build: %v", err) + } + + newEntry, err = mc.FindEntry(context.Background(), util.FullPath("/dst/to.txt")) + if err != nil { + t.Fatalf("find replayed new path: %v", err) + } + if newEntry.FileSize != 12 { + t.Fatalf("replayed new path size = %d, want 12", newEntry.FileSize) + } +} diff --git a/weed/mount/meta_cache/meta_cache_init.go b/weed/mount/meta_cache/meta_cache_init.go index 10ec9dad7..81aad780f 100644 --- a/weed/mount/meta_cache/meta_cache_init.go +++ b/weed/mount/meta_cache/meta_cache_init.go @@ -69,12 +69,43 @@ func doEnsureVisited(ctx context.Context, mc *MetaCache, client filer_pb.FilerCl glog.V(4).Infof("ReadDirAllEntries %s ...", path) + // Use context.Background() for build lifecycle calls so that + // errgroup cancellation of ctx doesn't cause enqueueAndWait to + // return early, which would trigger cleanupBuild while the + // operation is still queued. + if err := mc.BeginDirectoryBuild(context.Background(), path); err != nil { + return nil, fmt.Errorf("begin build %s: %w", path, err) + } + cleanupDone := false + cleanupBuild := func(reason string) { + if cleanupDone { + return + } + cleanupDone = true + if deleteErr := mc.DeleteFolderChildren(context.Background(), path); deleteErr != nil { + glog.V(2).Infof("clear %s build %s: %v", reason, path, deleteErr) + } + if abortErr := mc.AbortDirectoryBuild(context.Background(), path); abortErr != nil { + glog.V(2).Infof("abort %s build %s: %v", reason, path, abortErr) + } + } + defer func() { + if !cleanupDone && ctx.Err() != nil { + cleanupBuild("canceled") + } + }() + // Collect entries in batches for efficient LevelDB writes var batch []*filer.Entry + var snapshotTsNs int64 fetchErr := util.Retry("ReadDirAllEntries", func() error { batch = nil // Reset batch on retry, allow GC of previous entries - return filer_pb.ReadDirAllEntries(ctx, client, path, "", func(pbEntry *filer_pb.Entry, isLast bool) error { + if err := mc.DeleteFolderChildren(ctx, path); err != nil { + return fmt.Errorf("clear existing entries for %s: %w", path, err) + } + var err error + snapshotTsNs, err = filer_pb.ReadDirAllEntriesWithSnapshot(ctx, client, path, "", func(pbEntry *filer_pb.Entry, isLast bool) error { entry := filer.FromPbEntry(string(path), pbEntry) if IsHiddenSystemEntry(string(path), entry.Name()) { return nil @@ -94,19 +125,26 @@ func doEnsureVisited(ctx context.Context, mc *MetaCache, client filer_pb.FilerCl } return nil }) + return err }) if fetchErr != nil { + cleanupBuild("failed") return nil, fmt.Errorf("list %s: %w", path, fetchErr) } // Flush any remaining entries in the batch if len(batch) > 0 { if err := mc.doBatchInsertEntries(ctx, batch); err != nil { + cleanupBuild("incomplete") return nil, fmt.Errorf("batch insert remaining for %s: %w", path, err) } } - mc.markCachedFn(path) + if err := mc.CompleteDirectoryBuild(context.Background(), path, snapshotTsNs); err != nil { + cleanupBuild("unreplayed") + return nil, fmt.Errorf("complete build for %s: %w", path, err) + } + cleanupDone = true // Prevent deferred cleanup after successful publish return nil, nil }) return err diff --git a/weed/mount/meta_cache/meta_cache_subscribe.go b/weed/mount/meta_cache/meta_cache_subscribe.go index fe5f75ba9..12e9d4a77 100644 --- a/weed/mount/meta_cache/meta_cache_subscribe.go +++ b/weed/mount/meta_cache/meta_cache_subscribe.go @@ -4,7 +4,6 @@ import ( "context" "strings" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" @@ -51,67 +50,12 @@ func SubscribeMetaEvents(mc *MetaCache, selfSignature int32, client filer_pb.Fil } processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error { - message := resp.EventNotification - - for _, sig := range message.Signatures { - if sig == selfSignature && selfSignature != 0 { - return nil - } - } - - dir := resp.Directory - var oldPath util.FullPath - var newEntry *filer.Entry - if message.OldEntry != nil { - oldPath = util.NewFullPath(dir, message.OldEntry.Name) - glog.V(4).Infof("deleting %v", oldPath) - } - - if message.NewEntry != nil { - if message.NewParentPath != "" { - dir = message.NewParentPath - } - key := util.NewFullPath(dir, message.NewEntry.Name) - glog.V(4).Infof("creating %v", key) - newEntry = filer.FromPbEntry(dir, message.NewEntry) - } - err := mc.AtomicUpdateEntryFromFiler(context.Background(), oldPath, newEntry) - if err == nil { - if message.NewEntry != nil || message.OldEntry != nil { - dirsToNotify := make(map[util.FullPath]struct{}) - if oldPath != "" { - parent, _ := oldPath.DirAndName() - dirsToNotify[util.FullPath(parent)] = struct{}{} - } - if newEntry != nil { - newParent, _ := newEntry.DirAndName() - dirsToNotify[util.FullPath(newParent)] = struct{}{} - } - if message.NewEntry != nil && message.NewEntry.IsDirectory { - childPath := util.NewFullPath(dir, message.NewEntry.Name) - dirsToNotify[childPath] = struct{}{} - } - for dirPath := range dirsToNotify { - mc.noteDirectoryUpdate(dirPath) - } - } - if message.OldEntry != nil && message.NewEntry != nil { - oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) - mc.invalidateFunc(oldKey, message.OldEntry) - if message.OldEntry.Name != message.NewEntry.Name { - newKey := util.NewFullPath(dir, message.NewEntry.Name) - mc.invalidateFunc(newKey, message.NewEntry) - } - } else if filer_pb.IsCreate(resp) { - // no need to invalidate - } else if filer_pb.IsDelete(resp) { - oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) - mc.invalidateFunc(oldKey, message.OldEntry) - } - } - - return err - + // Let all events (including self-originated ones) flow through the + // applier so that the directory-build buffering and dedup logic + // can handle them consistently. The dedupRing in + // applyMetadataResponseNow catches duplicates that were already + // applied locally via applyLocalMetadataEvent. + return mc.ApplyMetadataResponse(context.Background(), resp, SubscriberMetadataResponseApplyOptions) } prefix := dir diff --git a/weed/mount/metadata_events.go b/weed/mount/metadata_events.go new file mode 100644 index 000000000..15d18df0d --- /dev/null +++ b/weed/mount/metadata_events.go @@ -0,0 +1,66 @@ +package mount + +import ( + "context" + + "github.com/seaweedfs/seaweedfs/weed/mount/meta_cache" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "google.golang.org/protobuf/proto" +) + +func (wfs *WFS) applyLocalMetadataEvent(ctx context.Context, event *filer_pb.SubscribeMetadataResponse) error { + if ctx == nil { + ctx = context.Background() + } + return wfs.metaCache.ApplyMetadataResponseOwned(ctx, event, meta_cache.LocalMetadataResponseApplyOptions) +} + +func metadataDeleteEvent(directory, name string, isDirectory bool) *filer_pb.SubscribeMetadataResponse { + if name == "" { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: directory, + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{Name: name, IsDirectory: isDirectory}, + }, + } +} + +func metadataCreateEvent(directory string, entry *filer_pb.Entry) *filer_pb.SubscribeMetadataResponse { + if entry == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: directory, + EventNotification: &filer_pb.EventNotification{ + NewEntry: proto.Clone(entry).(*filer_pb.Entry), + NewParentPath: directory, + }, + } +} + +func metadataUpdateEvent(directory string, entry *filer_pb.Entry) *filer_pb.SubscribeMetadataResponse { + if entry == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: directory, + EventNotification: &filer_pb.EventNotification{ + OldEntry: &filer_pb.Entry{Name: entry.Name}, + NewEntry: proto.Clone(entry).(*filer_pb.Entry), + NewParentPath: directory, + }, + } +} + +func metadataEventFromRenameResponse(resp *filer_pb.StreamRenameEntryResponse) *filer_pb.SubscribeMetadataResponse { + if resp == nil || resp.EventNotification == nil { + return nil + } + return &filer_pb.SubscribeMetadataResponse{ + Directory: resp.Directory, + EventNotification: proto.Clone(resp.EventNotification).(*filer_pb.EventNotification), + TsNs: resp.TsNs, + } +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index db60c9deb..9b2341ca3 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -200,7 +200,7 @@ func NewSeaweedFileSystem(option *Option) *WFS { } }, func(dirPath util.FullPath) { if wfs.inodeToPath.RecordDirectoryUpdate(dirPath, time.Now(), wfs.dirHotWindow, wfs.dirHotThreshold) { - wfs.maybeRefreshDirectory(dirPath) + wfs.markDirectoryReadThrough(dirPath) } }) grace.OnInterrupt(func() { @@ -313,36 +313,42 @@ func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.St } // lookupEntry looks up an entry by path, checking the local cache first. -// If the directory is cached, it trusts the cache. Otherwise, it fetches -// directly from the filer without caching the entire directory. -// This avoids the performance issue of listing millions of files just to open one. +// Cached metadata is only authoritative when the parent directory itself is cached. +// For uncached/read-through directories, always consult the filer directly so stale +// local entries do not leak back into lookup results. func (wfs *WFS) lookupEntry(fullpath util.FullPath) (*filer.Entry, fuse.Status) { dir, _ := fullpath.DirAndName() + dirPath := util.FullPath(dir) - // Try to find the entry in the local cache first. - cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath) - if cacheErr != nil && cacheErr != filer_pb.ErrNotFound { - glog.Errorf("lookupEntry: cache lookup for %s failed: %v", fullpath, cacheErr) - return nil, fuse.EIO - } - if cachedEntry != nil { - glog.V(4).Infof("lookupEntry cache hit %s", fullpath) - return cachedEntry, fuse.OK - } - - // If the directory is cached but entry not found, file doesn't exist. - // No need to query the filer again. - if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - glog.V(4).Infof("lookupEntry cache miss (dir cached) %s", fullpath) - return nil, fuse.ENOENT + if wfs.metaCache.IsDirectoryCached(dirPath) { + cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath) + if cacheErr != nil && cacheErr != filer_pb.ErrNotFound { + glog.Errorf("lookupEntry: cache lookup for %s failed: %v", fullpath, cacheErr) + return nil, fuse.EIO + } + if cachedEntry != nil { + glog.V(4).Infof("lookupEntry cache hit %s", fullpath) + return cachedEntry, fuse.OK + } + // Re-check: the directory may have been evicted from cache between + // our IsDirectoryCached check and FindEntry (e.g. markDirectoryReadThrough). + // If it's no longer cached, fall through to the filer lookup below. + if wfs.metaCache.IsDirectoryCached(dirPath) { + glog.V(4).Infof("lookupEntry cache miss (dir cached) %s", fullpath) + return nil, fuse.ENOENT + } } // Directory not cached - fetch directly from filer without caching the entire directory. glog.V(4).Infof("lookupEntry fetching from filer %s", fullpath) entry, err := filer_pb.GetEntry(context.Background(), wfs, fullpath) if err != nil { - glog.V(1).Infof("lookupEntry GetEntry %s: %v", fullpath, err) - return nil, fuse.ENOENT + if err == filer_pb.ErrNotFound { + glog.V(4).Infof("lookupEntry not found %s", fullpath) + return nil, fuse.ENOENT + } + glog.Warningf("lookupEntry GetEntry %s: %v", fullpath, err) + return nil, fuse.EIO } if entry != nil && entry.Attributes != nil && wfs.option.UidGidMapper != nil { entry.Attributes.Uid, entry.Attributes.Gid = wfs.option.UidGidMapper.FilerToLocal(entry.Attributes.Uid, entry.Attributes.Gid) @@ -371,31 +377,13 @@ func (wfs *WFS) ClearCacheDir() { os.RemoveAll(wfs.option.getUniqueCacheDirForRead()) } -func (wfs *WFS) maybeRefreshDirectory(dirPath util.FullPath) { - if !wfs.inodeToPath.NeedsRefresh(dirPath) { +func (wfs *WFS) markDirectoryReadThrough(dirPath util.FullPath) { + if !wfs.inodeToPath.MarkDirectoryReadThrough(dirPath, time.Now()) { return } - wfs.refreshMu.Lock() - if _, exists := wfs.refreshingDirs[dirPath]; exists { - wfs.refreshMu.Unlock() - return + if err := wfs.metaCache.DeleteFolderChildren(context.Background(), dirPath); err != nil { + glog.V(2).Infof("clear dir cache %s: %v", dirPath, err) } - wfs.refreshingDirs[dirPath] = struct{}{} - wfs.refreshMu.Unlock() - - go func() { - defer func() { - wfs.refreshMu.Lock() - delete(wfs.refreshingDirs, dirPath) - wfs.refreshMu.Unlock() - }() - wfs.inodeToPath.InvalidateChildrenCache(dirPath) - if err := meta_cache.EnsureVisited(wfs.metaCache, wfs, dirPath); err != nil { - glog.Warningf("refresh dir cache %s: %v", dirPath, err) - return - } - wfs.inodeToPath.MarkDirectoryRefreshed(dirPath, time.Now()) - }() } func (wfs *WFS) loopEvictIdleDirCache() { diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go index 0ede18397..f1381234d 100644 --- a/weed/mount/weedfs_dir_mkrm.go +++ b/weed/mount/weedfs_dir_mkrm.go @@ -2,7 +2,6 @@ package mount import ( "context" - "fmt" "os" "strings" "syscall" @@ -63,19 +62,21 @@ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out } glog.V(1).Infof("mkdir: %v", request) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { glog.V(0).Infof("mkdir %s: %v", entryFullPath, err) return err } - // Only cache the entry if the parent directory is already cached. - // This avoids polluting the cache with partial directory data. - if wfs.metaCache.IsDirectoryCached(dirFullPath) { - wfs.inodeToPath.TouchDirectory(dirFullPath) - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("local mkdir dir %s: %w", entryFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataCreateEvent(string(dirFullPath), newEntry) } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("mkdir %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) + } + wfs.inodeToPath.TouchDirectory(dirFullPath) return nil }) @@ -112,7 +113,7 @@ func (wfs *WFS) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string glog.V(3).Infof("remove directory: %v", entryFullPath) ignoreRecursiveErr := true // ignore recursion error since the OS should manage it - err := filer_pb.Remove(context.Background(), wfs, string(dirFullPath), name, true, false, ignoreRecursiveErr, false, []int32{wfs.signature}) + resp, err := filer_pb.RemoveWithResponse(context.Background(), wfs, string(dirFullPath), name, true, false, ignoreRecursiveErr, false, []int32{wfs.signature}) if err != nil { glog.V(0).Infof("remove %s: %v", entryFullPath, err) if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) { @@ -121,7 +122,14 @@ func (wfs *WFS) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string return fuse.ENOENT } - wfs.metaCache.DeleteEntry(context.Background(), entryFullPath) + event := metadataDeleteEvent(string(dirFullPath), name, true) + if resp != nil && resp.MetadataEvent != nil { + event = resp.MetadataEvent + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("rmdir %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) + } wfs.inodeToPath.RemovePath(entryFullPath) wfs.inodeToPath.TouchDirectory(dirFullPath) diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index 274f2c185..9488f9aff 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -3,11 +3,13 @@ package mount import ( "context" "sync" + "time" "github.com/seaweedfs/go-fuse/v2/fuse" "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/mount/meta_cache" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" ) @@ -26,10 +28,12 @@ type DirectoryHandle struct { isFinished bool entryStream []*filer.Entry entryStreamOffset uint64 + snapshotTsNs int64 // snapshot timestamp for consistent readdir in direct mode } func (dh *DirectoryHandle) reset() { dh.isFinished = false + dh.snapshotTsNs = 0 // Nil out pointers to allow garbage collection of old entries, // then reuse the slice's capacity to avoid re-allocations. for i := range dh.entryStream { @@ -164,7 +168,6 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl return code } wfs.inodeToPath.TouchDirectory(dirPath) - wfs.maybeRefreshDirectory(dirPath) var dirEntry fuse.DirEntry @@ -214,6 +217,10 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl var lastEntryName string + if wfs.inodeToPath.ShouldReadDirectoryDirect(dirPath) { + return wfs.readDirectoryDirect(input, out, dh, dirPath, processEachEntryFn) + } + // Read from cache first, then load next batch if needed if input.Offset >= dh.entryStreamOffset { // Handle case: new handle with non-zero offset but empty cache @@ -288,3 +295,90 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl return fuse.OK } + +func (wfs *WFS) readDirectoryDirect(input *fuse.ReadIn, out *fuse.DirEntryList, dh *DirectoryHandle, dirPath util.FullPath, processEachEntryFn func(entry *filer.Entry, index int64) bool) fuse.Status { + var lastEntryName string + + if input.Offset >= dh.entryStreamOffset { + if len(dh.entryStream) == 0 && input.Offset > dh.entryStreamOffset { + skipCount := uint32(input.Offset-dh.entryStreamOffset) + batchSize + entries, snapshotTs, err := loadDirectoryEntriesDirect(context.Background(), wfs, wfs.option.UidGidMapper, dirPath, "", false, skipCount, dh.snapshotTsNs) + if err != nil { + glog.Errorf("list filer directory: %v", err) + return fuse.EIO + } + dh.entryStream = append(dh.entryStream, entries...) + if dh.snapshotTsNs == 0 { + dh.snapshotTsNs = snapshotTs + } + } + + if input.Offset > dh.entryStreamOffset { + entryPreviousIndex := (input.Offset - dh.entryStreamOffset) - 1 + if uint64(len(dh.entryStream)) > entryPreviousIndex { + lastEntryName = dh.entryStream[entryPreviousIndex].Name() + } + } + + entryCurrentIndex := int64(input.Offset - dh.entryStreamOffset) + for int64(len(dh.entryStream)) > entryCurrentIndex { + entry := dh.entryStream[entryCurrentIndex] + if processEachEntryFn(entry, entryCurrentIndex) { + lastEntryName = entry.Name() + entryCurrentIndex++ + } else { + return fuse.OK + } + } + + entries, snapshotTs, err := loadDirectoryEntriesDirect(context.Background(), wfs, wfs.option.UidGidMapper, dirPath, lastEntryName, false, batchSize, dh.snapshotTsNs) + if err != nil { + glog.Errorf("list filer directory: %v", err) + return fuse.EIO + } + if dh.snapshotTsNs == 0 { + dh.snapshotTsNs = snapshotTs + } + + bufferFull := false + for _, entry := range entries { + currentIndex := int64(len(dh.entryStream)) + dh.entryStream = append(dh.entryStream, entry) + if !processEachEntryFn(entry, currentIndex) { + bufferFull = true + break + } + } + if !bufferFull && len(entries) < int(batchSize) { + dh.isFinished = true + // After a full successful read-through listing, exit direct mode + // so subsequent reads can use the cache instead of hitting the filer. + wfs.inodeToPath.MarkDirectoryRefreshed(dirPath, time.Now()) + } + } + + return fuse.OK +} + +func loadDirectoryEntriesDirect(ctx context.Context, client filer_pb.FilerClient, uidGidMapper *meta_cache.UidGidMapper, dirPath util.FullPath, startFileName string, includeStart bool, limit uint32, snapshotTsNs int64) ([]*filer.Entry, int64, error) { + entries := make([]*filer.Entry, 0, limit) + var actualSnapshotTsNs int64 + err := client.WithFilerClient(false, func(sc filer_pb.SeaweedFilerClient) error { + var innerErr error + actualSnapshotTsNs, innerErr = filer_pb.DoSeaweedListWithSnapshot(ctx, sc, dirPath, "", func(entry *filer_pb.Entry, isLast bool) error { + if meta_cache.IsHiddenSystemEntry(string(dirPath), entry.Name) { + return nil + } + if uidGidMapper != nil && entry.Attributes != nil { + entry.Attributes.Uid, entry.Attributes.Gid = uidGidMapper.FilerToLocal(entry.Attributes.Uid, entry.Attributes.Gid) + } + entries = append(entries, filer.FromPbEntry(string(dirPath), entry)) + return nil + }, startFileName, includeStart, limit, snapshotTsNs) + return innerErr + }) + if err != nil { + return nil, actualSnapshotTsNs, err + } + return entries, actualSnapshotTsNs, nil +} diff --git a/weed/mount/weedfs_dir_read_test.go b/weed/mount/weedfs_dir_read_test.go new file mode 100644 index 000000000..e8e8e7d79 --- /dev/null +++ b/weed/mount/weedfs_dir_read_test.go @@ -0,0 +1,100 @@ +package mount + +import ( + "context" + "io" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/mount/meta_cache" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +type directoryListStream struct { + responses []*filer_pb.ListEntriesResponse + index int +} + +func (s *directoryListStream) Recv() (*filer_pb.ListEntriesResponse, error) { + if s.index >= len(s.responses) { + return nil, io.EOF + } + resp := s.responses[s.index] + s.index++ + return resp, nil +} + +func (s *directoryListStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } +func (s *directoryListStream) Trailer() metadata.MD { return metadata.MD{} } +func (s *directoryListStream) CloseSend() error { return nil } +func (s *directoryListStream) Context() context.Context { return context.Background() } +func (s *directoryListStream) SendMsg(any) error { return nil } +func (s *directoryListStream) RecvMsg(any) error { return nil } + +type directoryListClient struct { + filer_pb.SeaweedFilerClient + responses []*filer_pb.ListEntriesResponse +} + +func (c *directoryListClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[filer_pb.ListEntriesResponse], error) { + return &directoryListStream{responses: c.responses}, nil +} + +type directoryFilerAccessor struct { + client filer_pb.SeaweedFilerClient +} + +func (a *directoryFilerAccessor) WithFilerClient(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error { + return fn(a.client) +} + +func (a *directoryFilerAccessor) AdjustedUrl(*filer_pb.Location) string { return "" } +func (a *directoryFilerAccessor) GetDataCenter() string { return "" } + +func TestLoadDirectoryEntriesDirectFiltersHiddenEntriesAndMapsIds(t *testing.T) { + mapper, err := meta_cache.NewUidGidMapper("10:1000", "20:2000") + if err != nil { + t.Fatalf("uid/gid mapper: %v", err) + } + + client := &directoryFilerAccessor{ + client: &directoryListClient{ + responses: []*filer_pb.ListEntriesResponse{ + { + Entry: &filer_pb.Entry{ + Name: "topics", + Attributes: &filer_pb.FuseAttributes{ + Uid: 1000, + Gid: 2000, + }, + }, + }, + { + Entry: &filer_pb.Entry{ + Name: "visible", + Attributes: &filer_pb.FuseAttributes{ + Uid: 1000, + Gid: 2000, + }, + }, + }, + }, + }, + } + + entries, _, err := loadDirectoryEntriesDirect(context.Background(), client, mapper, util.FullPath("/"), "", false, 10, 0) + if err != nil { + t.Fatalf("loadDirectoryEntriesDirect: %v", err) + } + if got := len(entries); got != 1 { + t.Fatalf("entry count = %d, want 1", got) + } + if entries[0].Name() != "visible" { + t.Fatalf("entry name = %q, want visible", entries[0].Name()) + } + if entries[0].Attr.Uid != 10 || entries[0].Attr.Gid != 20 { + t.Fatalf("mapped uid/gid = %d/%d, want 10/20", entries[0].Attr.Uid, entries[0].Attr.Gid) + } +} diff --git a/weed/mount/weedfs_file_mkrm.go b/weed/mount/weedfs_file_mkrm.go index f7306f7a3..302e908bb 100644 --- a/weed/mount/weedfs_file_mkrm.go +++ b/weed/mount/weedfs_file_mkrm.go @@ -2,12 +2,10 @@ package mount import ( "context" - "fmt" "syscall" "time" "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" ) @@ -83,19 +81,21 @@ func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out } glog.V(1).Infof("mknod: %v", request) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { glog.V(0).Infof("mknod %s: %v", entryFullPath, err) return err } - // Only cache the entry if the parent directory is already cached. - // This avoids polluting the cache with partial directory data. - if wfs.metaCache.IsDirectoryCached(dirFullPath) { - wfs.inodeToPath.TouchDirectory(dirFullPath) - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("local mknod %s: %w", entryFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataCreateEvent(string(dirFullPath), newEntry) } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("mknod %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) + } + wfs.inodeToPath.TouchDirectory(dirFullPath) return nil }) @@ -143,16 +143,21 @@ func (wfs *WFS) Unlink(cancel <-chan struct{}, header *fuse.InHeader, name strin glog.V(3).Infof("remove file: %v", entryFullPath) // Always let the filer decide whether to delete chunks based on its authoritative data. // The filer has the correct hard link count and will only delete chunks when appropriate. - err := filer_pb.Remove(context.Background(), wfs, string(dirFullPath), name, true, false, false, false, []int32{wfs.signature}) + resp, err := filer_pb.RemoveWithResponse(context.Background(), wfs, string(dirFullPath), name, true, false, false, false, []int32{wfs.signature}) if err != nil { glog.V(0).Infof("remove %s: %v", entryFullPath, err) return fuse.OK } - // then, delete meta cache - if err = wfs.metaCache.DeleteEntry(context.Background(), entryFullPath); err != nil { - glog.V(3).Infof("local DeleteEntry %s: %v", entryFullPath, err) - return fuse.EIO + var event *filer_pb.SubscribeMetadataResponse + if resp != nil && resp.MetadataEvent != nil { + event = resp.MetadataEvent + } else { + event = metadataDeleteEvent(string(dirFullPath), name, false) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("unlink %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirFullPath) } wfs.inodeToPath.TouchDirectory(dirFullPath) diff --git a/weed/mount/weedfs_file_sync.go b/weed/mount/weedfs_file_sync.go index e8fff04a7..42064a099 100644 --- a/weed/mount/weedfs_file_sync.go +++ b/weed/mount/weedfs_file_sync.go @@ -161,16 +161,19 @@ func (wfs *WFS) doFlush(fh *FileHandle, uid, gid uint32) fuse.Status { wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { glog.Errorf("fh flush create %s: %v", fileFullPath, err) return fmt.Errorf("fh flush create %s: %v", fileFullPath, err) } - // Only update cache if the parent directory is cached - if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(string(dir), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("flush %s: best-effort metadata apply failed: %v", fileFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(dir)) } return nil diff --git a/weed/mount/weedfs_link.go b/weed/mount/weedfs_link.go index 0960b7e47..d39aa73ee 100644 --- a/weed/mount/weedfs_link.go +++ b/weed/mount/weedfs_link.go @@ -2,7 +2,6 @@ package mount import ( "context" - "fmt" "syscall" "time" @@ -56,6 +55,8 @@ func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out * } // update old file to hardlink mode + origHardLinkId := oldEntry.HardLinkId + origHardLinkCounter := oldEntry.HardLinkCounter if len(oldEntry.HardLinkId) == 0 { oldEntry.HardLinkId = filer.NewHardLinkId() oldEntry.HardLinkCounter = 1 @@ -90,25 +91,42 @@ func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out * wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.UpdateEntry(context.Background(), client, updateOldEntryRequest); err != nil { + updateResp, err := filer_pb.UpdateEntryWithResponse(context.Background(), client, updateOldEntryRequest) + if err != nil { return err } - // Only update cache if the directory is cached - if wfs.metaCache.IsDirectoryCached(util.FullPath(updateOldEntryRequest.Directory)) { - if err := wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(updateOldEntryRequest.Directory, updateOldEntryRequest.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", oldEntryPath, err) - } + updateEvent := updateResp.GetMetadataEvent() + if updateEvent == nil { + updateEvent = metadataUpdateEvent(oldParentPath, updateOldEntryRequest.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), updateEvent); applyErr != nil { + glog.Warningf("link %s: best-effort metadata apply failed: %v", oldEntryPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(oldParentPath)) } - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + createResp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { + // Rollback: restore original HardLinkId/Counter on the source entry + oldEntry.HardLinkId = origHardLinkId + oldEntry.HardLinkCounter = origHardLinkCounter + rollbackReq := &filer_pb.UpdateEntryRequest{ + Directory: oldParentPath, + Entry: oldEntry, + Signatures: []int32{wfs.signature}, + } + if _, rollbackErr := filer_pb.UpdateEntryWithResponse(context.Background(), client, rollbackReq); rollbackErr != nil { + glog.Warningf("link rollback %s: %v", oldEntryPath, rollbackErr) + } return err } - // Only cache the entry if the parent directory is already cached. - if wfs.metaCache.IsDirectoryCached(newParentPath) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("insert meta cache for %s: %w", newParentPath.Child(name), err) - } + createEvent := createResp.GetMetadataEvent() + if createEvent == nil { + createEvent = metadataCreateEvent(string(newParentPath), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), createEvent); applyErr != nil { + glog.Warningf("link %s: best-effort metadata apply failed: %v", newParentPath.Child(name), applyErr) + wfs.inodeToPath.InvalidateChildrenCache(newParentPath) } return nil diff --git a/weed/mount/weedfs_metadata_flush.go b/weed/mount/weedfs_metadata_flush.go index 28145d089..fe3ace2ec 100644 --- a/weed/mount/weedfs_metadata_flush.go +++ b/weed/mount/weedfs_metadata_flush.go @@ -2,7 +2,6 @@ package mount import ( "context" - "fmt" "sync" "time" @@ -142,15 +141,18 @@ func (wfs *WFS) flushFileMetadata(fh *FileHandle) error { wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { return err } - // Only update cache if the parent directory is cached - if wfs.metaCache.IsDirectoryCached(util.FullPath(dir)) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("update meta cache for %s: %w", fileFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(string(dir), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("flushFileMetadata %s: best-effort metadata apply failed: %v", fileFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(dir)) } glog.V(3).Infof("flushed metadata for %s with %d chunks", fileFullPath, len(entry.GetChunks())) diff --git a/weed/mount/weedfs_rename.go b/weed/mount/weedfs_rename.go index 70d152ae4..cd1acd79f 100644 --- a/weed/mount/weedfs_rename.go +++ b/weed/mount/weedfs_rename.go @@ -9,7 +9,6 @@ import ( "github.com/seaweedfs/go-fuse/v2/fs" "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" @@ -233,10 +232,12 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR glog.V(4).Infof("dir Rename %+v", resp.EventNotification) if resp.EventNotification.NewEntry != nil { - // with new entry, the old entry name also exists. This is the first step to create new entry - newEntry := filer.FromPbEntry(resp.EventNotification.NewParentPath, resp.EventNotification.NewEntry) - if err := wfs.metaCache.AtomicUpdateEntryFromFiler(ctx, "", newEntry); err != nil { - return err + if err := wfs.applyLocalMetadataEvent(ctx, metadataEventFromRenameResponse(resp)); err != nil { + glog.Warningf("rename apply metadata event: %v", err) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(resp.Directory)) + if resp.EventNotification.NewParentPath != "" { + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(resp.EventNotification.NewParentPath)) + } } oldParent, newParent := util.FullPath(resp.Directory), util.FullPath(resp.EventNotification.NewParentPath) @@ -245,14 +246,6 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR oldPath := oldParent.Child(oldName) newPath := newParent.Child(newName) - // Keep the renamed destination immediately readable even when the directory - // itself is not marked as fully cached. - if !wfs.metaCache.IsDirectoryCached(newParent) { - if err := wfs.metaCache.InsertEntry(ctx, newEntry); err != nil { - return err - } - } - sourceInode, targetInode := wfs.inodeToPath.MovePath(oldPath, newPath) if sourceInode != 0 { fh, foundFh := wfs.fhMap.FindFileHandle(sourceInode) @@ -271,8 +264,9 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR } else if resp.EventNotification.OldEntry != nil { // without new entry, only old entry name exists. This is the second step to delete old entry - if err := wfs.metaCache.AtomicUpdateEntryFromFiler(ctx, util.NewFullPath(resp.Directory, resp.EventNotification.OldEntry.Name), nil); err != nil { - return err + if err := wfs.applyLocalMetadataEvent(ctx, metadataEventFromRenameResponse(resp)); err != nil { + glog.Warningf("rename apply delete event: %v", err) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(resp.Directory)) } } diff --git a/weed/mount/weedfs_rename_test.go b/weed/mount/weedfs_rename_test.go index b6f4bf33b..4b79cc709 100644 --- a/weed/mount/weedfs_rename_test.go +++ b/weed/mount/weedfs_rename_test.go @@ -10,7 +10,7 @@ import ( "github.com/seaweedfs/seaweedfs/weed/util" ) -func TestHandleRenameResponseCachesTargetForUncachedDirectory(t *testing.T) { +func TestHandleRenameResponseLeavesUncachedTargetOutOfCache(t *testing.T) { uidGidMapper, err := meta_cache.NewUidGidMapper("", "") if err != nil { t.Fatalf("create uid/gid mapper: %v", err) @@ -73,14 +73,11 @@ func TestHandleRenameResponseCachesTargetForUncachedDirectory(t *testing.T) { } entry, findErr := mc.FindEntry(context.Background(), targetPath) - if findErr != nil { - t.Fatalf("find target entry: %v", findErr) + if findErr != filer_pb.ErrNotFound { + t.Fatalf("find target entry error = %v, want %v", findErr, filer_pb.ErrNotFound) } - if entry == nil { - t.Fatalf("target entry %s not cached", targetPath) - } - if entry.FileSize != 53 { - t.Fatalf("cached file size = %d, want 53", entry.FileSize) + if entry != nil { + t.Fatalf("target entry %s should not be cached for an uncached directory", targetPath) } updatedInode, found := inodeToPath.GetInode(targetPath) diff --git a/weed/mount/weedfs_symlink.go b/weed/mount/weedfs_symlink.go index a53312984..0505c8bed 100644 --- a/weed/mount/weedfs_symlink.go +++ b/weed/mount/weedfs_symlink.go @@ -9,7 +9,6 @@ import ( "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" ) @@ -53,15 +52,18 @@ func (wfs *WFS) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target st wfs.mapPbIdFromLocalToFiler(request.Entry) defer wfs.mapPbIdFromFilerToLocal(request.Entry) - if err := filer_pb.CreateEntry(context.Background(), client, request); err != nil { + resp, err := filer_pb.CreateEntryWithResponse(context.Background(), client, request) + if err != nil { return fmt.Errorf("symlink %s: %v", entryFullPath, err) } - // Only cache the entry if the parent directory is already cached. - if wfs.metaCache.IsDirectoryCached(dirPath) { - if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("insert meta cache for symlink %s: %w", entryFullPath, err) - } + event := resp.GetMetadataEvent() + if event == nil { + event = metadataCreateEvent(string(dirPath), request.Entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("symlink %s: best-effort metadata apply failed: %v", entryFullPath, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(dirPath) } return nil diff --git a/weed/mount/wfs_save.go b/weed/mount/wfs_save.go index f3a4694f6..84318ff41 100644 --- a/weed/mount/wfs_save.go +++ b/weed/mount/wfs_save.go @@ -6,7 +6,6 @@ import ( "syscall" "github.com/seaweedfs/go-fuse/v2/fuse" - "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util" @@ -28,13 +27,18 @@ func (wfs *WFS) saveEntry(path util.FullPath, entry *filer_pb.Entry) (code fuse. } glog.V(1).Infof("save entry: %v", request) - _, err := client.UpdateEntry(context.Background(), request) + resp, err := filer_pb.UpdateEntryWithResponse(context.Background(), client, request) if err != nil { return fmt.Errorf("UpdateEntry dir %s: %v", path, err) } - if err := wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { - return fmt.Errorf("metaCache.UpdateEntry dir %s: %w", path, err) + event := resp.GetMetadataEvent() + if event == nil { + event = metadataUpdateEvent(parentDir, entry) + } + if applyErr := wfs.applyLocalMetadataEvent(context.Background(), event); applyErr != nil { + glog.Warningf("saveEntry %s: best-effort metadata apply failed: %v", path, applyErr) + wfs.inodeToPath.InvalidateChildrenCache(util.FullPath(parentDir)) } return nil diff --git a/weed/mq/broker/broker_grpc_lookup.go b/weed/mq/broker/broker_grpc_lookup.go index 5eec21b69..fad10f599 100644 --- a/weed/mq/broker/broker_grpc_lookup.go +++ b/weed/mq/broker/broker_grpc_lookup.go @@ -107,7 +107,7 @@ func (b *MessageQueueBroker) ListTopics(ctx context.Context, request *mq_pb.List return err } - if !resp.Entry.IsDirectory { + if resp.Entry == nil || !resp.Entry.IsDirectory { continue } @@ -135,7 +135,7 @@ func (b *MessageQueueBroker) ListTopics(ctx context.Context, request *mq_pb.List break } - if !topicResp.Entry.IsDirectory { + if topicResp.Entry == nil || !topicResp.Entry.IsDirectory { continue } diff --git a/weed/mq/broker/broker_topic_conf_read_write.go b/weed/mq/broker/broker_topic_conf_read_write.go index 138d1023e..976efb36c 100644 --- a/weed/mq/broker/broker_topic_conf_read_write.go +++ b/weed/mq/broker/broker_topic_conf_read_write.go @@ -252,7 +252,7 @@ func (b *MessageQueueBroker) getOffsetRangeFromChunkMetadata(t topic.Topic, part if err != nil { return err } - if resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { + if resp.Entry != nil && resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { if latestVersion == "" || resp.Entry.Name > latestVersion { latestVersion = resp.Entry.Name } @@ -290,7 +290,7 @@ func (b *MessageQueueBroker) getOffsetRangeFromChunkMetadata(t topic.Topic, part if err != nil { return err } - if resp.Entry.IsDirectory && resp.Entry.Name == targetPartitionName { + if resp.Entry != nil && resp.Entry.IsDirectory && resp.Entry.Name == targetPartitionName { partitionDir = resp.Entry.Name break } @@ -327,7 +327,7 @@ func (b *MessageQueueBroker) getOffsetRangeFromChunkMetadata(t topic.Topic, part if err != nil { return err } - if !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { + if resp.Entry != nil && !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { // Check for offset ranges in Extended attributes (both log files and parquet files) if resp.Entry.Extended != nil { fileType := "log" diff --git a/weed/mq/kafka/consumer_offset/filer_storage.go b/weed/mq/kafka/consumer_offset/filer_storage.go index 8eeceb660..9d92ad730 100644 --- a/weed/mq/kafka/consumer_offset/filer_storage.go +++ b/weed/mq/kafka/consumer_offset/filer_storage.go @@ -285,7 +285,7 @@ func (f *FilerStorage) listDirectory(path string) ([]string, error) { return err } - if resp.Entry.IsDirectory { + if resp.Entry != nil && resp.Entry.IsDirectory { entries = append(entries, resp.Entry.Name) } } diff --git a/weed/mq/kafka/integration/broker_client.go b/weed/mq/kafka/integration/broker_client.go index c1f743f0b..158f9e6e5 100644 --- a/weed/mq/kafka/integration/broker_client.go +++ b/weed/mq/kafka/integration/broker_client.go @@ -232,7 +232,7 @@ func (bc *BrokerClient) getOffsetRangeFromChunkMetadata(topic string, partition if err != nil { return err } - if resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { + if resp.Entry != nil && resp.Entry.IsDirectory && strings.HasPrefix(resp.Entry.Name, "v") { if latestVersion == "" || resp.Entry.Name > latestVersion { latestVersion = resp.Entry.Name } @@ -267,9 +267,15 @@ func (bc *BrokerClient) getOffsetRangeFromChunkMetadata(topic string, partition if err != nil { return err } - if resp.Entry.IsDirectory && strings.Contains(resp.Entry.Name, "-") { - partitionDir = resp.Entry.Name - break // Use the first partition directory we find + if resp.Entry != nil && resp.Entry.IsDirectory && strings.Contains(resp.Entry.Name, "-") { + // Parse partition range (format: NNNN-NNNN) and match requested partition + var pStart, pStop int32 + if n, scanErr := fmt.Sscanf(resp.Entry.Name, "%04d-%04d", &pStart, &pStop); n == 2 && scanErr == nil { + if partition >= pStart && partition < pStop { + partitionDir = resp.Entry.Name + break + } + } } } return nil @@ -303,7 +309,7 @@ func (bc *BrokerClient) getOffsetRangeFromChunkMetadata(topic string, partition if err != nil { return err } - if !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { + if resp.Entry != nil && !resp.Entry.IsDirectory && resp.Entry.Name != "checkpoint.offset" { // Check for offset ranges in Extended attributes (both log files and parquet files) if resp.Entry.Extended != nil { // Track maximum offset for high water mark diff --git a/weed/pb/filer.proto b/weed/pb/filer.proto index 9257996ed..78dd58b1f 100644 --- a/weed/pb/filer.proto +++ b/weed/pb/filer.proto @@ -100,10 +100,12 @@ message ListEntriesRequest { string startFromFileName = 3; bool inclusiveStartFrom = 4; uint32 limit = 5; + int64 snapshot_ts_ns = 6; } message ListEntriesResponse { Entry entry = 1; + int64 snapshot_ts_ns = 2; } message RemoteEntry { @@ -203,6 +205,7 @@ message CreateEntryRequest { message CreateEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message UpdateEntryRequest { @@ -212,6 +215,7 @@ message UpdateEntryRequest { repeated int32 signatures = 4; } message UpdateEntryResponse { + SubscribeMetadataResponse metadata_event = 1; } message AppendToEntryRequest { @@ -236,6 +240,7 @@ message DeleteEntryRequest { message DeleteEntryResponse { string error = 1; + SubscribeMetadataResponse metadata_event = 2; } message AtomicRenameEntryRequest { @@ -469,6 +474,7 @@ message CacheRemoteObjectToLocalClusterRequest { } message CacheRemoteObjectToLocalClusterResponse { Entry entry = 1; + SubscribeMetadataResponse metadata_event = 2; } ///////////////////////// diff --git a/weed/pb/filer_pb/filer.pb.go b/weed/pb/filer_pb/filer.pb.go index 7b96f095d..e86fc9d2a 100644 --- a/weed/pb/filer_pb/filer.pb.go +++ b/weed/pb/filer_pb/filer.pb.go @@ -176,6 +176,7 @@ type ListEntriesRequest struct { StartFromFileName string `protobuf:"bytes,3,opt,name=startFromFileName,proto3" json:"startFromFileName,omitempty"` InclusiveStartFrom bool `protobuf:"varint,4,opt,name=inclusiveStartFrom,proto3" json:"inclusiveStartFrom,omitempty"` Limit uint32 `protobuf:"varint,5,opt,name=limit,proto3" json:"limit,omitempty"` + SnapshotTsNs int64 `protobuf:"varint,6,opt,name=snapshot_ts_ns,json=snapshotTsNs,proto3" json:"snapshot_ts_ns,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -245,9 +246,17 @@ func (x *ListEntriesRequest) GetLimit() uint32 { return 0 } +func (x *ListEntriesRequest) GetSnapshotTsNs() int64 { + if x != nil { + return x.SnapshotTsNs + } + return 0 +} + type ListEntriesResponse struct { state protoimpl.MessageState `protogen:"open.v1"` Entry *Entry `protobuf:"bytes,1,opt,name=entry,proto3" json:"entry,omitempty"` + SnapshotTsNs int64 `protobuf:"varint,2,opt,name=snapshot_ts_ns,json=snapshotTsNs,proto3" json:"snapshot_ts_ns,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -289,6 +298,13 @@ func (x *ListEntriesResponse) GetEntry() *Entry { return nil } +func (x *ListEntriesResponse) GetSnapshotTsNs() int64 { + if x != nil { + return x.SnapshotTsNs + } + return 0 +} + type RemoteEntry struct { state protoimpl.MessageState `protogen:"open.v1"` StorageName string `protobuf:"bytes,1,opt,name=storage_name,json=storageName,proto3" json:"storage_name,omitempty"` @@ -1102,8 +1118,9 @@ func (x *CreateEntryRequest) GetSkipCheckParentDirectory() bool { } type CreateEntryResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,2,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -1145,6 +1162,13 @@ func (x *CreateEntryResponse) GetError() string { return "" } +func (x *CreateEntryResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + type UpdateEntryRequest struct { state protoimpl.MessageState `protogen:"open.v1"` Directory string `protobuf:"bytes,1,opt,name=directory,proto3" json:"directory,omitempty"` @@ -1214,7 +1238,8 @@ func (x *UpdateEntryRequest) GetSignatures() []int32 { } type UpdateEntryResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` + state protoimpl.MessageState `protogen:"open.v1"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,1,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -1249,6 +1274,13 @@ func (*UpdateEntryResponse) Descriptor() ([]byte, []int) { return file_filer_proto_rawDescGZIP(), []int{15} } +func (x *UpdateEntryResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + type AppendToEntryRequest struct { state protoimpl.MessageState `protogen:"open.v1"` Directory string `protobuf:"bytes,1,opt,name=directory,proto3" json:"directory,omitempty"` @@ -1447,8 +1479,9 @@ func (x *DeleteEntryRequest) GetIfNotModifiedAfter() int64 { } type DeleteEntryResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + Error string `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,2,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -1490,6 +1523,13 @@ func (x *DeleteEntryResponse) GetError() string { return "" } +func (x *DeleteEntryResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + type AtomicRenameEntryRequest struct { state protoimpl.MessageState `protogen:"open.v1"` OldDirectory string `protobuf:"bytes,1,opt,name=old_directory,json=oldDirectory,proto3" json:"old_directory,omitempty"` @@ -3628,8 +3668,9 @@ func (x *CacheRemoteObjectToLocalClusterRequest) GetName() string { } type CacheRemoteObjectToLocalClusterResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Entry *Entry `protobuf:"bytes,1,opt,name=entry,proto3" json:"entry,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + Entry *Entry `protobuf:"bytes,1,opt,name=entry,proto3" json:"entry,omitempty"` + MetadataEvent *SubscribeMetadataResponse `protobuf:"bytes,2,opt,name=metadata_event,json=metadataEvent,proto3" json:"metadata_event,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -3671,6 +3712,13 @@ func (x *CacheRemoteObjectToLocalClusterResponse) GetEntry() *Entry { return nil } +func (x *CacheRemoteObjectToLocalClusterResponse) GetMetadataEvent() *SubscribeMetadataResponse { + if x != nil { + return x.MetadataEvent + } + return nil +} + // /////////////////////// // distributed lock management // /////////////////////// @@ -4401,15 +4449,17 @@ const file_filer_proto_rawDesc = "" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x12\n" + "\x04name\x18\x02 \x01(\tR\x04name\"E\n" + "\x1cLookupDirectoryEntryResponse\x12%\n" + - "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\xbe\x01\n" + + "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\xe4\x01\n" + "\x12ListEntriesRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x16\n" + "\x06prefix\x18\x02 \x01(\tR\x06prefix\x12,\n" + "\x11startFromFileName\x18\x03 \x01(\tR\x11startFromFileName\x12.\n" + "\x12inclusiveStartFrom\x18\x04 \x01(\bR\x12inclusiveStartFrom\x12\x14\n" + - "\x05limit\x18\x05 \x01(\rR\x05limit\"<\n" + + "\x05limit\x18\x05 \x01(\rR\x05limit\x12$\n" + + "\x0esnapshot_ts_ns\x18\x06 \x01(\x03R\fsnapshotTsNs\"b\n" + "\x13ListEntriesResponse\x12%\n" + - "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\xc8\x01\n" + + "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\x12$\n" + + "\x0esnapshot_ts_ns\x18\x02 \x01(\x03R\fsnapshotTsNs\"\xc8\x01\n" + "\vRemoteEntry\x12!\n" + "\fstorage_name\x18\x01 \x01(\tR\vstorageName\x120\n" + "\x15last_local_sync_ts_ns\x18\x02 \x01(\x03R\x11lastLocalSyncTsNs\x12 \n" + @@ -4497,17 +4547,19 @@ const file_filer_proto_rawDesc = "" + "\n" + "signatures\x18\x05 \x03(\x05R\n" + "signatures\x12=\n" + - "\x1bskip_check_parent_directory\x18\x06 \x01(\bR\x18skipCheckParentDirectory\"+\n" + + "\x1bskip_check_parent_directory\x18\x06 \x01(\bR\x18skipCheckParentDirectory\"w\n" + "\x13CreateEntryResponse\x12\x14\n" + - "\x05error\x18\x01 \x01(\tR\x05error\"\xac\x01\n" + + "\x05error\x18\x01 \x01(\tR\x05error\x12J\n" + + "\x0emetadata_event\x18\x02 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\xac\x01\n" + "\x12UpdateEntryRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12%\n" + "\x05entry\x18\x02 \x01(\v2\x0f.filer_pb.EntryR\x05entry\x121\n" + "\x15is_from_other_cluster\x18\x03 \x01(\bR\x12isFromOtherCluster\x12\x1e\n" + "\n" + "signatures\x18\x04 \x03(\x05R\n" + - "signatures\"\x15\n" + - "\x13UpdateEntryResponse\"\x80\x01\n" + + "signatures\"a\n" + + "\x13UpdateEntryResponse\x12J\n" + + "\x0emetadata_event\x18\x01 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\x80\x01\n" + "\x14AppendToEntryRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x1d\n" + "\n" + @@ -4524,9 +4576,10 @@ const file_filer_proto_rawDesc = "" + "\n" + "signatures\x18\b \x03(\x05R\n" + "signatures\x121\n" + - "\x15if_not_modified_after\x18\t \x01(\x03R\x12ifNotModifiedAfter\"+\n" + + "\x15if_not_modified_after\x18\t \x01(\x03R\x12ifNotModifiedAfter\"w\n" + "\x13DeleteEntryResponse\x12\x14\n" + - "\x05error\x18\x01 \x01(\tR\x05error\"\xba\x01\n" + + "\x05error\x18\x01 \x01(\tR\x05error\x12J\n" + + "\x0emetadata_event\x18\x02 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\xba\x01\n" + "\x18AtomicRenameEntryRequest\x12#\n" + "\rold_directory\x18\x01 \x01(\tR\foldDirectory\x12\x19\n" + "\bold_name\x18\x02 \x01(\tR\aoldName\x12#\n" + @@ -4723,9 +4776,10 @@ const file_filer_proto_rawDesc = "" + "\x1bworm_retention_time_seconds\x18\x10 \x01(\x04R\x18wormRetentionTimeSeconds\"Z\n" + "&CacheRemoteObjectToLocalClusterRequest\x12\x1c\n" + "\tdirectory\x18\x01 \x01(\tR\tdirectory\x12\x12\n" + - "\x04name\x18\x02 \x01(\tR\x04name\"P\n" + + "\x04name\x18\x02 \x01(\tR\x04name\"\x9c\x01\n" + "'CacheRemoteObjectToLocalClusterResponse\x12%\n" + - "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\"\x9b\x01\n" + + "\x05entry\x18\x01 \x01(\v2\x0f.filer_pb.EntryR\x05entry\x12J\n" + + "\x0emetadata_event\x18\x02 \x01(\v2#.filer_pb.SubscribeMetadataResponseR\rmetadataEvent\"\x9b\x01\n" + "\vLockRequest\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12&\n" + "\x0fseconds_to_lock\x18\x02 \x01(\x03R\rsecondsToLock\x12\x1f\n" + @@ -4900,75 +4954,79 @@ var file_filer_proto_depIdxs = []int32{ 0, // 11: filer_pb.FileChunk.sse_type:type_name -> filer_pb.SSEType 9, // 12: filer_pb.FileChunkManifest.chunks:type_name -> filer_pb.FileChunk 6, // 13: filer_pb.CreateEntryRequest.entry:type_name -> filer_pb.Entry - 6, // 14: filer_pb.UpdateEntryRequest.entry:type_name -> filer_pb.Entry - 9, // 15: filer_pb.AppendToEntryRequest.chunks:type_name -> filer_pb.FileChunk - 8, // 16: filer_pb.StreamRenameEntryResponse.event_notification:type_name -> filer_pb.EventNotification - 29, // 17: filer_pb.AssignVolumeResponse.location:type_name -> filer_pb.Location - 29, // 18: filer_pb.Locations.locations:type_name -> filer_pb.Location - 68, // 19: filer_pb.LookupVolumeResponse.locations_map:type_name -> filer_pb.LookupVolumeResponse.LocationsMapEntry - 31, // 20: filer_pb.CollectionListResponse.collections:type_name -> filer_pb.Collection - 8, // 21: filer_pb.SubscribeMetadataResponse.event_notification:type_name -> filer_pb.EventNotification - 6, // 22: filer_pb.TraverseBfsMetadataResponse.entry:type_name -> filer_pb.Entry - 69, // 23: filer_pb.LocateBrokerResponse.resources:type_name -> filer_pb.LocateBrokerResponse.Resource - 70, // 24: filer_pb.FilerConf.locations:type_name -> filer_pb.FilerConf.PathConf - 6, // 25: filer_pb.CacheRemoteObjectToLocalClusterResponse.entry:type_name -> filer_pb.Entry - 64, // 26: filer_pb.TransferLocksRequest.locks:type_name -> filer_pb.Lock - 28, // 27: filer_pb.LookupVolumeResponse.LocationsMapEntry.value:type_name -> filer_pb.Locations - 1, // 28: filer_pb.SeaweedFiler.LookupDirectoryEntry:input_type -> filer_pb.LookupDirectoryEntryRequest - 3, // 29: filer_pb.SeaweedFiler.ListEntries:input_type -> filer_pb.ListEntriesRequest - 13, // 30: filer_pb.SeaweedFiler.CreateEntry:input_type -> filer_pb.CreateEntryRequest - 15, // 31: filer_pb.SeaweedFiler.UpdateEntry:input_type -> filer_pb.UpdateEntryRequest - 17, // 32: filer_pb.SeaweedFiler.AppendToEntry:input_type -> filer_pb.AppendToEntryRequest - 19, // 33: filer_pb.SeaweedFiler.DeleteEntry:input_type -> filer_pb.DeleteEntryRequest - 21, // 34: filer_pb.SeaweedFiler.AtomicRenameEntry:input_type -> filer_pb.AtomicRenameEntryRequest - 23, // 35: filer_pb.SeaweedFiler.StreamRenameEntry:input_type -> filer_pb.StreamRenameEntryRequest - 25, // 36: filer_pb.SeaweedFiler.AssignVolume:input_type -> filer_pb.AssignVolumeRequest - 27, // 37: filer_pb.SeaweedFiler.LookupVolume:input_type -> filer_pb.LookupVolumeRequest - 32, // 38: filer_pb.SeaweedFiler.CollectionList:input_type -> filer_pb.CollectionListRequest - 34, // 39: filer_pb.SeaweedFiler.DeleteCollection:input_type -> filer_pb.DeleteCollectionRequest - 36, // 40: filer_pb.SeaweedFiler.Statistics:input_type -> filer_pb.StatisticsRequest - 38, // 41: filer_pb.SeaweedFiler.Ping:input_type -> filer_pb.PingRequest - 40, // 42: filer_pb.SeaweedFiler.GetFilerConfiguration:input_type -> filer_pb.GetFilerConfigurationRequest - 44, // 43: filer_pb.SeaweedFiler.TraverseBfsMetadata:input_type -> filer_pb.TraverseBfsMetadataRequest - 42, // 44: filer_pb.SeaweedFiler.SubscribeMetadata:input_type -> filer_pb.SubscribeMetadataRequest - 42, // 45: filer_pb.SeaweedFiler.SubscribeLocalMetadata:input_type -> filer_pb.SubscribeMetadataRequest - 51, // 46: filer_pb.SeaweedFiler.KvGet:input_type -> filer_pb.KvGetRequest - 53, // 47: filer_pb.SeaweedFiler.KvPut:input_type -> filer_pb.KvPutRequest - 56, // 48: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:input_type -> filer_pb.CacheRemoteObjectToLocalClusterRequest - 58, // 49: filer_pb.SeaweedFiler.DistributedLock:input_type -> filer_pb.LockRequest - 60, // 50: filer_pb.SeaweedFiler.DistributedUnlock:input_type -> filer_pb.UnlockRequest - 62, // 51: filer_pb.SeaweedFiler.FindLockOwner:input_type -> filer_pb.FindLockOwnerRequest - 65, // 52: filer_pb.SeaweedFiler.TransferLocks:input_type -> filer_pb.TransferLocksRequest - 2, // 53: filer_pb.SeaweedFiler.LookupDirectoryEntry:output_type -> filer_pb.LookupDirectoryEntryResponse - 4, // 54: filer_pb.SeaweedFiler.ListEntries:output_type -> filer_pb.ListEntriesResponse - 14, // 55: filer_pb.SeaweedFiler.CreateEntry:output_type -> filer_pb.CreateEntryResponse - 16, // 56: filer_pb.SeaweedFiler.UpdateEntry:output_type -> filer_pb.UpdateEntryResponse - 18, // 57: filer_pb.SeaweedFiler.AppendToEntry:output_type -> filer_pb.AppendToEntryResponse - 20, // 58: filer_pb.SeaweedFiler.DeleteEntry:output_type -> filer_pb.DeleteEntryResponse - 22, // 59: filer_pb.SeaweedFiler.AtomicRenameEntry:output_type -> filer_pb.AtomicRenameEntryResponse - 24, // 60: filer_pb.SeaweedFiler.StreamRenameEntry:output_type -> filer_pb.StreamRenameEntryResponse - 26, // 61: filer_pb.SeaweedFiler.AssignVolume:output_type -> filer_pb.AssignVolumeResponse - 30, // 62: filer_pb.SeaweedFiler.LookupVolume:output_type -> filer_pb.LookupVolumeResponse - 33, // 63: filer_pb.SeaweedFiler.CollectionList:output_type -> filer_pb.CollectionListResponse - 35, // 64: filer_pb.SeaweedFiler.DeleteCollection:output_type -> filer_pb.DeleteCollectionResponse - 37, // 65: filer_pb.SeaweedFiler.Statistics:output_type -> filer_pb.StatisticsResponse - 39, // 66: filer_pb.SeaweedFiler.Ping:output_type -> filer_pb.PingResponse - 41, // 67: filer_pb.SeaweedFiler.GetFilerConfiguration:output_type -> filer_pb.GetFilerConfigurationResponse - 45, // 68: filer_pb.SeaweedFiler.TraverseBfsMetadata:output_type -> filer_pb.TraverseBfsMetadataResponse - 43, // 69: filer_pb.SeaweedFiler.SubscribeMetadata:output_type -> filer_pb.SubscribeMetadataResponse - 43, // 70: filer_pb.SeaweedFiler.SubscribeLocalMetadata:output_type -> filer_pb.SubscribeMetadataResponse - 52, // 71: filer_pb.SeaweedFiler.KvGet:output_type -> filer_pb.KvGetResponse - 54, // 72: filer_pb.SeaweedFiler.KvPut:output_type -> filer_pb.KvPutResponse - 57, // 73: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:output_type -> filer_pb.CacheRemoteObjectToLocalClusterResponse - 59, // 74: filer_pb.SeaweedFiler.DistributedLock:output_type -> filer_pb.LockResponse - 61, // 75: filer_pb.SeaweedFiler.DistributedUnlock:output_type -> filer_pb.UnlockResponse - 63, // 76: filer_pb.SeaweedFiler.FindLockOwner:output_type -> filer_pb.FindLockOwnerResponse - 66, // 77: filer_pb.SeaweedFiler.TransferLocks:output_type -> filer_pb.TransferLocksResponse - 53, // [53:78] is the sub-list for method output_type - 28, // [28:53] is the sub-list for method input_type - 28, // [28:28] is the sub-list for extension type_name - 28, // [28:28] is the sub-list for extension extendee - 0, // [0:28] is the sub-list for field type_name + 43, // 14: filer_pb.CreateEntryResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 6, // 15: filer_pb.UpdateEntryRequest.entry:type_name -> filer_pb.Entry + 43, // 16: filer_pb.UpdateEntryResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 9, // 17: filer_pb.AppendToEntryRequest.chunks:type_name -> filer_pb.FileChunk + 43, // 18: filer_pb.DeleteEntryResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 8, // 19: filer_pb.StreamRenameEntryResponse.event_notification:type_name -> filer_pb.EventNotification + 29, // 20: filer_pb.AssignVolumeResponse.location:type_name -> filer_pb.Location + 29, // 21: filer_pb.Locations.locations:type_name -> filer_pb.Location + 68, // 22: filer_pb.LookupVolumeResponse.locations_map:type_name -> filer_pb.LookupVolumeResponse.LocationsMapEntry + 31, // 23: filer_pb.CollectionListResponse.collections:type_name -> filer_pb.Collection + 8, // 24: filer_pb.SubscribeMetadataResponse.event_notification:type_name -> filer_pb.EventNotification + 6, // 25: filer_pb.TraverseBfsMetadataResponse.entry:type_name -> filer_pb.Entry + 69, // 26: filer_pb.LocateBrokerResponse.resources:type_name -> filer_pb.LocateBrokerResponse.Resource + 70, // 27: filer_pb.FilerConf.locations:type_name -> filer_pb.FilerConf.PathConf + 6, // 28: filer_pb.CacheRemoteObjectToLocalClusterResponse.entry:type_name -> filer_pb.Entry + 43, // 29: filer_pb.CacheRemoteObjectToLocalClusterResponse.metadata_event:type_name -> filer_pb.SubscribeMetadataResponse + 64, // 30: filer_pb.TransferLocksRequest.locks:type_name -> filer_pb.Lock + 28, // 31: filer_pb.LookupVolumeResponse.LocationsMapEntry.value:type_name -> filer_pb.Locations + 1, // 32: filer_pb.SeaweedFiler.LookupDirectoryEntry:input_type -> filer_pb.LookupDirectoryEntryRequest + 3, // 33: filer_pb.SeaweedFiler.ListEntries:input_type -> filer_pb.ListEntriesRequest + 13, // 34: filer_pb.SeaweedFiler.CreateEntry:input_type -> filer_pb.CreateEntryRequest + 15, // 35: filer_pb.SeaweedFiler.UpdateEntry:input_type -> filer_pb.UpdateEntryRequest + 17, // 36: filer_pb.SeaweedFiler.AppendToEntry:input_type -> filer_pb.AppendToEntryRequest + 19, // 37: filer_pb.SeaweedFiler.DeleteEntry:input_type -> filer_pb.DeleteEntryRequest + 21, // 38: filer_pb.SeaweedFiler.AtomicRenameEntry:input_type -> filer_pb.AtomicRenameEntryRequest + 23, // 39: filer_pb.SeaweedFiler.StreamRenameEntry:input_type -> filer_pb.StreamRenameEntryRequest + 25, // 40: filer_pb.SeaweedFiler.AssignVolume:input_type -> filer_pb.AssignVolumeRequest + 27, // 41: filer_pb.SeaweedFiler.LookupVolume:input_type -> filer_pb.LookupVolumeRequest + 32, // 42: filer_pb.SeaweedFiler.CollectionList:input_type -> filer_pb.CollectionListRequest + 34, // 43: filer_pb.SeaweedFiler.DeleteCollection:input_type -> filer_pb.DeleteCollectionRequest + 36, // 44: filer_pb.SeaweedFiler.Statistics:input_type -> filer_pb.StatisticsRequest + 38, // 45: filer_pb.SeaweedFiler.Ping:input_type -> filer_pb.PingRequest + 40, // 46: filer_pb.SeaweedFiler.GetFilerConfiguration:input_type -> filer_pb.GetFilerConfigurationRequest + 44, // 47: filer_pb.SeaweedFiler.TraverseBfsMetadata:input_type -> filer_pb.TraverseBfsMetadataRequest + 42, // 48: filer_pb.SeaweedFiler.SubscribeMetadata:input_type -> filer_pb.SubscribeMetadataRequest + 42, // 49: filer_pb.SeaweedFiler.SubscribeLocalMetadata:input_type -> filer_pb.SubscribeMetadataRequest + 51, // 50: filer_pb.SeaweedFiler.KvGet:input_type -> filer_pb.KvGetRequest + 53, // 51: filer_pb.SeaweedFiler.KvPut:input_type -> filer_pb.KvPutRequest + 56, // 52: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:input_type -> filer_pb.CacheRemoteObjectToLocalClusterRequest + 58, // 53: filer_pb.SeaweedFiler.DistributedLock:input_type -> filer_pb.LockRequest + 60, // 54: filer_pb.SeaweedFiler.DistributedUnlock:input_type -> filer_pb.UnlockRequest + 62, // 55: filer_pb.SeaweedFiler.FindLockOwner:input_type -> filer_pb.FindLockOwnerRequest + 65, // 56: filer_pb.SeaweedFiler.TransferLocks:input_type -> filer_pb.TransferLocksRequest + 2, // 57: filer_pb.SeaweedFiler.LookupDirectoryEntry:output_type -> filer_pb.LookupDirectoryEntryResponse + 4, // 58: filer_pb.SeaweedFiler.ListEntries:output_type -> filer_pb.ListEntriesResponse + 14, // 59: filer_pb.SeaweedFiler.CreateEntry:output_type -> filer_pb.CreateEntryResponse + 16, // 60: filer_pb.SeaweedFiler.UpdateEntry:output_type -> filer_pb.UpdateEntryResponse + 18, // 61: filer_pb.SeaweedFiler.AppendToEntry:output_type -> filer_pb.AppendToEntryResponse + 20, // 62: filer_pb.SeaweedFiler.DeleteEntry:output_type -> filer_pb.DeleteEntryResponse + 22, // 63: filer_pb.SeaweedFiler.AtomicRenameEntry:output_type -> filer_pb.AtomicRenameEntryResponse + 24, // 64: filer_pb.SeaweedFiler.StreamRenameEntry:output_type -> filer_pb.StreamRenameEntryResponse + 26, // 65: filer_pb.SeaweedFiler.AssignVolume:output_type -> filer_pb.AssignVolumeResponse + 30, // 66: filer_pb.SeaweedFiler.LookupVolume:output_type -> filer_pb.LookupVolumeResponse + 33, // 67: filer_pb.SeaweedFiler.CollectionList:output_type -> filer_pb.CollectionListResponse + 35, // 68: filer_pb.SeaweedFiler.DeleteCollection:output_type -> filer_pb.DeleteCollectionResponse + 37, // 69: filer_pb.SeaweedFiler.Statistics:output_type -> filer_pb.StatisticsResponse + 39, // 70: filer_pb.SeaweedFiler.Ping:output_type -> filer_pb.PingResponse + 41, // 71: filer_pb.SeaweedFiler.GetFilerConfiguration:output_type -> filer_pb.GetFilerConfigurationResponse + 45, // 72: filer_pb.SeaweedFiler.TraverseBfsMetadata:output_type -> filer_pb.TraverseBfsMetadataResponse + 43, // 73: filer_pb.SeaweedFiler.SubscribeMetadata:output_type -> filer_pb.SubscribeMetadataResponse + 43, // 74: filer_pb.SeaweedFiler.SubscribeLocalMetadata:output_type -> filer_pb.SubscribeMetadataResponse + 52, // 75: filer_pb.SeaweedFiler.KvGet:output_type -> filer_pb.KvGetResponse + 54, // 76: filer_pb.SeaweedFiler.KvPut:output_type -> filer_pb.KvPutResponse + 57, // 77: filer_pb.SeaweedFiler.CacheRemoteObjectToLocalCluster:output_type -> filer_pb.CacheRemoteObjectToLocalClusterResponse + 59, // 78: filer_pb.SeaweedFiler.DistributedLock:output_type -> filer_pb.LockResponse + 61, // 79: filer_pb.SeaweedFiler.DistributedUnlock:output_type -> filer_pb.UnlockResponse + 63, // 80: filer_pb.SeaweedFiler.FindLockOwner:output_type -> filer_pb.FindLockOwnerResponse + 66, // 81: filer_pb.SeaweedFiler.TransferLocks:output_type -> filer_pb.TransferLocksResponse + 57, // [57:82] is the sub-list for method output_type + 32, // [32:57] is the sub-list for method input_type + 32, // [32:32] is the sub-list for extension type_name + 32, // [32:32] is the sub-list for extension extendee + 0, // [0:32] is the sub-list for field type_name } func init() { file_filer_proto_init() } diff --git a/weed/pb/filer_pb/filer_client.go b/weed/pb/filer_pb/filer_client.go index 17953c67d..c93417eee 100644 --- a/weed/pb/filer_pb/filer_client.go +++ b/weed/pb/filer_pb/filer_client.go @@ -58,7 +58,11 @@ func GetEntry(ctx context.Context, filerClient FilerClient, fullFilePath util.Fu type EachEntryFunction func(entry *Entry, isLast bool) error func ReadDirAllEntries(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction) (err error) { + _, err = ReadDirAllEntriesWithSnapshot(ctx, filerClient, fullDirPath, prefix, fn) + return err +} +func ReadDirAllEntriesWithSnapshot(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction) (snapshotTsNs int64, err error) { var counter uint32 var startFrom string var counterFunc = func(entry *Entry, isLast bool) error { @@ -69,18 +73,18 @@ func ReadDirAllEntries(ctx context.Context, filerClient FilerClient, fullDirPath var paginationLimit uint32 = 10000 - if err = doList(ctx, filerClient, fullDirPath, prefix, counterFunc, "", false, paginationLimit); err != nil { - return err + if snapshotTsNs, err = doListWithSnapshot(ctx, filerClient, fullDirPath, prefix, counterFunc, "", false, paginationLimit, 0); err != nil { + return snapshotTsNs, err } for counter == paginationLimit { counter = 0 - if err = doList(ctx, filerClient, fullDirPath, prefix, counterFunc, startFrom, false, paginationLimit); err != nil { - return err + if _, err = doListWithSnapshot(ctx, filerClient, fullDirPath, prefix, counterFunc, startFrom, false, paginationLimit, snapshotTsNs); err != nil { + return snapshotTsNs, err } } - return nil + return snapshotTsNs, nil } func List(ctx context.Context, filerClient FilerClient, parentDirectoryPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { @@ -90,9 +94,16 @@ func List(ctx context.Context, filerClient FilerClient, parentDirectoryPath, pre } func doList(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { - return filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { - return doSeaweedList(ctx, client, fullDirPath, prefix, fn, startFrom, inclusive, limit) + _, err = doListWithSnapshot(ctx, filerClient, fullDirPath, prefix, fn, startFrom, inclusive, limit, 0) + return err +} + +func doListWithSnapshot(ctx context.Context, filerClient FilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32, snapshotTsNs int64) (actualSnapshotTsNs int64, err error) { + err = filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { + actualSnapshotTsNs, err = DoSeaweedListWithSnapshot(ctx, client, fullDirPath, prefix, fn, startFrom, inclusive, limit, snapshotTsNs) + return err }) + return actualSnapshotTsNs, err } func SeaweedList(ctx context.Context, client SeaweedFilerClient, parentDirectoryPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { @@ -100,6 +111,11 @@ func SeaweedList(ctx context.Context, client SeaweedFilerClient, parentDirectory } func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32) (err error) { + _, err = DoSeaweedListWithSnapshot(ctx, client, fullDirPath, prefix, fn, startFrom, inclusive, limit, 0) + return err +} + +func DoSeaweedListWithSnapshot(ctx context.Context, client SeaweedFilerClient, fullDirPath util.FullPath, prefix string, fn EachEntryFunction, startFrom string, inclusive bool, limit uint32, snapshotTsNs int64) (actualSnapshotTsNs int64, err error) { // Redundancy limit to make it correctly judge whether it is the last file. redLimit := limit @@ -115,14 +131,23 @@ func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath u StartFromFileName: startFrom, Limit: redLimit, InclusiveStartFrom: inclusive, + SnapshotTsNs: snapshotTsNs, } + // Preserve the caller-requested snapshot so pagination uses the same + // boundary across pages. For first requests (snapshotTsNs==0) we do NOT + // synthesize a client-side timestamp — if the server returns no entries, + // we return 0 so callers like CompleteDirectoryBuild know no server + // snapshot was received and can replay all buffered events without + // clock-skew-sensitive filtering. + actualSnapshotTsNs = snapshotTsNs + glog.V(4).InfofCtx(ctx, "read directory: %v", request) ctx, cancel := context.WithCancel(ctx) defer cancel() stream, err := client.ListEntries(ctx, request) if err != nil { - return fmt.Errorf("list %s: %v", fullDirPath, err) + return actualSnapshotTsNs, fmt.Errorf("list %s: %v", fullDirPath, err) } var prevEntry *Entry @@ -133,17 +158,20 @@ func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath u if recvErr == io.EOF { if prevEntry != nil { if err := fn(prevEntry, true); err != nil { - return err + return actualSnapshotTsNs, err } } break } else { - return recvErr + return actualSnapshotTsNs, recvErr } } + if resp.SnapshotTsNs != 0 { + actualSnapshotTsNs = resp.SnapshotTsNs + } if prevEntry != nil { if err := fn(prevEntry, false); err != nil { - return err + return actualSnapshotTsNs, err } } prevEntry = resp.Entry @@ -153,7 +181,7 @@ func doSeaweedList(ctx context.Context, client SeaweedFilerClient, fullDirPath u } } - return nil + return actualSnapshotTsNs, nil } func Exists(ctx context.Context, filerClient FilerClient, parentDirectoryPath string, entryName string, isDirectory bool) (exists bool, err error) { @@ -277,12 +305,26 @@ func MkFile(ctx context.Context, filerClient FilerClient, parentDirectoryPath st } func Remove(ctx context.Context, filerClient FilerClient, parentDirectoryPath, name string, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster bool, signatures []int32) error { - return filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { - return DoRemove(ctx, client, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) - }) + _, err := RemoveWithResponse(ctx, filerClient, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) + return err } func DoRemove(ctx context.Context, client SeaweedFilerClient, parentDirectoryPath string, name string, isDeleteData bool, isRecursive bool, ignoreRecursiveErr bool, isFromOtherCluster bool, signatures []int32) error { + _, err := DoRemoveWithResponse(ctx, client, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) + return err +} + +func RemoveWithResponse(ctx context.Context, filerClient FilerClient, parentDirectoryPath, name string, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster bool, signatures []int32) (*DeleteEntryResponse, error) { + var resp *DeleteEntryResponse + err := filerClient.WithFilerClient(false, func(client SeaweedFilerClient) error { + var innerErr error + resp, innerErr = DoRemoveWithResponse(ctx, client, parentDirectoryPath, name, isDeleteData, isRecursive, ignoreRecursiveErr, isFromOtherCluster, signatures) + return innerErr + }) + return resp, err +} + +func DoRemoveWithResponse(ctx context.Context, client SeaweedFilerClient, parentDirectoryPath string, name string, isDeleteData bool, isRecursive bool, ignoreRecursiveErr bool, isFromOtherCluster bool, signatures []int32) (*DeleteEntryResponse, error) { deleteEntryRequest := &DeleteEntryRequest{ Directory: parentDirectoryPath, Name: name, @@ -294,19 +336,18 @@ func DoRemove(ctx context.Context, client SeaweedFilerClient, parentDirectoryPat } if resp, err := client.DeleteEntry(ctx, deleteEntryRequest); err != nil { if strings.Contains(err.Error(), ErrNotFound.Error()) { - return nil + return nil, nil } - return err + return nil, err } else { if resp.Error != "" { if strings.Contains(resp.Error, ErrNotFound.Error()) { - return nil + return nil, nil } - return errors.New(resp.Error) + return nil, errors.New(resp.Error) } + return resp, nil } - - return nil } // DoDeleteEmptyParentDirectories recursively deletes empty parent directories. diff --git a/weed/pb/filer_pb/filer_client_snapshot_test.go b/weed/pb/filer_pb/filer_client_snapshot_test.go new file mode 100644 index 000000000..a6521bd4d --- /dev/null +++ b/weed/pb/filer_pb/filer_client_snapshot_test.go @@ -0,0 +1,165 @@ +package filer_pb + +import ( + "context" + "fmt" + "io" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/util" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" +) + +type snapshotListStream struct { + responses []*ListEntriesResponse + index int +} + +func (s *snapshotListStream) Recv() (*ListEntriesResponse, error) { + if s.index >= len(s.responses) { + return nil, io.EOF + } + resp := s.responses[s.index] + s.index++ + return resp, nil +} + +func (s *snapshotListStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } +func (s *snapshotListStream) Trailer() metadata.MD { return metadata.MD{} } +func (s *snapshotListStream) CloseSend() error { return nil } +func (s *snapshotListStream) Context() context.Context { return context.Background() } +func (s *snapshotListStream) SendMsg(any) error { return nil } +func (s *snapshotListStream) RecvMsg(any) error { return nil } + +type snapshotListClient struct { + SeaweedFilerClient + entries []*Entry + requests []*ListEntriesRequest + snapshotTs int64 + listCalled bool +} + +func (c *snapshotListClient) ListEntries(ctx context.Context, in *ListEntriesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ListEntriesResponse], error) { + c.listCalled = true + c.requests = append(c.requests, proto.Clone(in).(*ListEntriesRequest)) + + start := 0 + if in.StartFromFileName != "" { + start = len(c.entries) + for i, entry := range c.entries { + if entry.Name == in.StartFromFileName { + start = i + if !in.InclusiveStartFrom { + start++ + } + break + } + } + } + + end := len(c.entries) + if in.Limit > 0 && start+int(in.Limit) < end { + end = start + int(in.Limit) + } + + snapshotTs := in.SnapshotTsNs + if snapshotTs == 0 { + snapshotTs = c.snapshotTs + } + + responses := make([]*ListEntriesResponse, 0, end-start) + for i, entry := range c.entries[start:end] { + resp := &ListEntriesResponse{ + Entry: entry, + } + if i == 0 { + resp.SnapshotTsNs = snapshotTs + } + responses = append(responses, resp) + } + + return &snapshotListStream{responses: responses}, nil +} + +type snapshotFilerAccessor struct { + client SeaweedFilerClient +} + +func (a *snapshotFilerAccessor) WithFilerClient(_ bool, fn func(SeaweedFilerClient) error) error { + return fn(a.client) +} + +func (a *snapshotFilerAccessor) AdjustedUrl(*Location) string { return "" } +func (a *snapshotFilerAccessor) GetDataCenter() string { return "" } + +func TestReadDirAllEntriesWithSnapshotCarriesSnapshotAcrossPages(t *testing.T) { + entries := make([]*Entry, 0, 10001) + for i := 0; i < 10001; i++ { + entries = append(entries, &Entry{Name: fmt.Sprintf("entry-%05d", i), Attributes: &FuseAttributes{}}) + } + + client := &snapshotListClient{ + entries: entries, + snapshotTs: 123456789, + } + accessor := &snapshotFilerAccessor{client: client} + + var listed []string + snapshotTs, err := ReadDirAllEntriesWithSnapshot(context.Background(), accessor, util.FullPath("/dir"), "", func(entry *Entry, isLast bool) error { + listed = append(listed, entry.Name) + return nil + }) + if err != nil { + t.Fatalf("ReadDirAllEntriesWithSnapshot: %v", err) + } + + if got := len(listed); got != len(entries) { + t.Fatalf("listed %d entries, want %d", got, len(entries)) + } + if snapshotTs != client.snapshotTs { + t.Fatalf("snapshotTs = %d, want %d", snapshotTs, client.snapshotTs) + } + if got := len(client.requests); got != 2 { + t.Fatalf("request count = %d, want 2", got) + } + if client.requests[0].SnapshotTsNs != 0 { + t.Fatalf("first request snapshot = %d, want 0", client.requests[0].SnapshotTsNs) + } + if client.requests[1].SnapshotTsNs != client.snapshotTs { + t.Fatalf("second request snapshot = %d, want %d", client.requests[1].SnapshotTsNs, client.snapshotTs) + } + if client.requests[1].StartFromFileName != entries[9999].Name { + t.Fatalf("second request marker = %q, want %q", client.requests[1].StartFromFileName, entries[9999].Name) + } +} + +func TestReadDirAllEntriesWithSnapshotEmptyDirectory(t *testing.T) { + client := &snapshotListClient{ + entries: nil, // empty directory + snapshotTs: 999888777, + } + accessor := &snapshotFilerAccessor{client: client} + + var listed []string + snapshotTs, err := ReadDirAllEntriesWithSnapshot(context.Background(), accessor, util.FullPath("/empty"), "", func(entry *Entry, isLast bool) error { + listed = append(listed, entry.Name) + return nil + }) + if err != nil { + t.Fatalf("ReadDirAllEntriesWithSnapshot: %v", err) + } + if len(listed) != 0 { + t.Fatalf("listed %d entries, want 0", len(listed)) + } + // When the server sends no entries (empty directory), no snapshot is + // received. The client returns 0 so callers like CompleteDirectoryBuild + // know to replay all buffered events without clock-skew filtering. + if snapshotTs != 0 { + t.Fatalf("snapshotTs = %d, want 0 for empty directory", snapshotTs) + } + if !client.listCalled { + t.Fatal("ListEntries was not invoked for the empty directory") + } +} diff --git a/weed/pb/filer_pb/filer_pb_helper.go b/weed/pb/filer_pb/filer_pb_helper.go index c776f83d7..fed902824 100644 --- a/weed/pb/filer_pb/filer_pb_helper.go +++ b/weed/pb/filer_pb/filer_pb_helper.go @@ -135,25 +135,35 @@ func AfterEntryDeserialization(chunks []*FileChunk) { } func CreateEntry(ctx context.Context, client SeaweedFilerClient, request *CreateEntryRequest) error { + _, err := CreateEntryWithResponse(ctx, client, request) + return err +} + +func CreateEntryWithResponse(ctx context.Context, client SeaweedFilerClient, request *CreateEntryRequest) (*CreateEntryResponse, error) { resp, err := client.CreateEntry(ctx, request) if err != nil { glog.V(1).InfofCtx(ctx, "create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, err) - return fmt.Errorf("CreateEntry: %w", err) + return nil, fmt.Errorf("CreateEntry: %w", err) } if resp.Error != "" { glog.V(1).InfofCtx(ctx, "create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, resp.Error) - return fmt.Errorf("CreateEntry : %v", resp.Error) + return nil, fmt.Errorf("CreateEntry: %w", errors.New(resp.Error)) } - return nil + return resp, nil } func UpdateEntry(ctx context.Context, client SeaweedFilerClient, request *UpdateEntryRequest) error { - _, err := client.UpdateEntry(ctx, request) + _, err := UpdateEntryWithResponse(ctx, client, request) + return err +} + +func UpdateEntryWithResponse(ctx context.Context, client SeaweedFilerClient, request *UpdateEntryRequest) (*UpdateEntryResponse, error) { + resp, err := client.UpdateEntry(ctx, request) if err != nil { glog.V(1).InfofCtx(ctx, "update entry %s/%s :%v", request.Directory, request.Entry.Name, err) - return fmt.Errorf("UpdateEntry: %w", err) + return nil, fmt.Errorf("UpdateEntry: %w", err) } - return nil + return resp, nil } func LookupEntry(ctx context.Context, client SeaweedFilerClient, request *LookupDirectoryEntryRequest) (*LookupDirectoryEntryResponse, error) { diff --git a/weed/query/engine/hybrid_message_scanner.go b/weed/query/engine/hybrid_message_scanner.go index 8fa9f4381..b8477acfb 100644 --- a/weed/query/engine/hybrid_message_scanner.go +++ b/weed/query/engine/hybrid_message_scanner.go @@ -625,6 +625,9 @@ func (hms *HybridMessageScanner) countLiveLogFiles(partition topic.Partition) (i return err } + if resp.Entry == nil { + continue + } // Count files that are not .parquet files (live log files) // Live log files typically have timestamps or are named like log files fileName := resp.Entry.Name diff --git a/weed/s3api/s3_objectlock/object_lock_check.go b/weed/s3api/s3_objectlock/object_lock_check.go index 2cd79b340..c1c8c0b18 100644 --- a/weed/s3api/s3_objectlock/object_lock_check.go +++ b/weed/s3api/s3_objectlock/object_lock_check.go @@ -99,8 +99,11 @@ func paginateEntries(ctx context.Context, client filer_pb.SeaweedFilerClient, di } return fmt.Errorf("failed to receive entry from %s: %w", dir, recvErr) } - entriesReceived = true entry := entryResp.Entry + if entry == nil { + continue + } + entriesReceived = true lastFileName = entry.Name // Skip invalid entry names to prevent path traversal diff --git a/weed/s3api/s3api_object_handlers_list.go b/weed/s3api/s3api_object_handlers_list.go index d32cbb415..5f647ae83 100644 --- a/weed/s3api/s3api_object_handlers_list.go +++ b/weed/s3api/s3api_object_handlers_list.go @@ -615,6 +615,9 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d } } entry := resp.Entry + if entry == nil { + continue + } // listFilerEntries always calls doListFilerEntries with inclusiveStartFrom=false // (S3 marker semantics are exclusive), but keep the guard explicit to preserve // behavior if inclusive callers are introduced in the future. diff --git a/weed/server/filer_grpc_server.go b/weed/server/filer_grpc_server.go index 4510a0397..219703ad5 100644 --- a/weed/server/filer_grpc_server.go +++ b/weed/server/filer_grpc_server.go @@ -53,14 +53,24 @@ func (fs *FilerServer) ListEntries(req *filer_pb.ListEntriesRequest, stream file lastFileName := req.StartFromFileName includeLastFile := req.InclusiveStartFrom + snapshotTsNs := req.SnapshotTsNs + if snapshotTsNs == 0 { + snapshotTsNs = time.Now().UnixNano() + } + sentSnapshot := false var listErr error for limit > 0 { var hasEntries bool lastFileName, listErr = fs.filer.StreamListDirectoryEntries(stream.Context(), util.FullPath(req.Directory), lastFileName, includeLastFile, int64(paginationLimit), req.Prefix, "", "", func(entry *filer.Entry) (bool, error) { hasEntries = true - if err = stream.Send(&filer_pb.ListEntriesResponse{ + resp := &filer_pb.ListEntriesResponse{ Entry: entry.ToProtoEntry(), - }); err != nil { + } + if !sentSnapshot { + resp.SnapshotTsNs = snapshotTsNs + sentSnapshot = true + } + if err = stream.Send(resp); err != nil { return false, err } @@ -78,13 +88,20 @@ func (fs *FilerServer) ListEntries(req *filer_pb.ListEntriesRequest, stream file return err } if !hasEntries { - return nil + break } includeLastFile = false } + // For empty directories we intentionally do NOT send a snapshot-only + // response (Entry == nil). Many consumers (Java FilerClient, S3 listing, + // etc.) treat any received response as an entry. The Go client-side + // DoSeaweedListWithSnapshot generates a client-side cutoff when the + // server sends no snapshot, so snapshot consistency is preserved + // without a server-side send. + return nil } @@ -162,10 +179,12 @@ func (fs *FilerServer) CreateEntry(ctx context.Context, req *filer_pb.CreateEntr newEntry.TtlSec = 0 } + ctx, eventSink := filer.WithMetadataEventSink(ctx) createErr := fs.filer.CreateEntry(ctx, newEntry, req.OExcl, req.IsFromOtherCluster, req.Signatures, req.SkipCheckParentDirectory, so.MaxFileNameLength) if createErr == nil { fs.filer.DeleteChunksNotRecursive(garbage) + resp.MetadataEvent = eventSink.Last() } else { glog.V(3).InfofCtx(ctx, "CreateEntry %s: %v", filepath.Join(req.Directory, req.Entry.Name), createErr) resp.Error = createErr.Error() @@ -201,16 +220,19 @@ func (fs *FilerServer) UpdateEntry(ctx context.Context, req *filer_pb.UpdateEntr return &filer_pb.UpdateEntryResponse{}, err } + ctx, eventSink := filer.WithMetadataEventSink(ctx) + resp := &filer_pb.UpdateEntryResponse{} if err = fs.filer.UpdateEntry(ctx, entry, newEntry); err == nil { fs.filer.DeleteChunksNotRecursive(garbage) fs.filer.NotifyUpdateEvent(ctx, entry, newEntry, true, req.IsFromOtherCluster, req.Signatures) + resp.MetadataEvent = eventSink.Last() } else { glog.V(3).InfofCtx(ctx, "UpdateEntry %s: %v", filepath.Join(req.Directory, req.Entry.Name), err) } - return &filer_pb.UpdateEntryResponse{}, err + return resp, err } func (fs *FilerServer) cleanupChunks(ctx context.Context, fullpath string, existingEntry *filer.Entry, newEntry *filer_pb.Entry) (chunks, garbage []*filer_pb.FileChunk, err error) { @@ -303,10 +325,13 @@ func (fs *FilerServer) DeleteEntry(ctx context.Context, req *filer_pb.DeleteEntr glog.V(4).InfofCtx(ctx, "DeleteEntry %v", req) + ctx, eventSink := filer.WithMetadataEventSink(ctx) err = fs.filer.DeleteEntryMetaAndData(ctx, util.JoinPath(req.Directory, req.Name), req.IsRecursive, req.IgnoreRecursiveError, req.IsDeleteData, req.IsFromOtherCluster, req.Signatures, req.IfNotModifiedAfter) resp = &filer_pb.DeleteEntryResponse{} if err != nil && err != filer_pb.ErrNotFound { resp.Error = err.Error() + } else { + resp.MetadataEvent = eventSink.Last() } return resp, nil } diff --git a/weed/server/filer_grpc_server_remote.go b/weed/server/filer_grpc_server_remote.go index 7aafe0799..e790a12ef 100644 --- a/weed/server/filer_grpc_server_remote.go +++ b/weed/server/filer_grpc_server_remote.go @@ -259,9 +259,11 @@ func (fs *FilerServer) doCacheRemoteObjectToLocalCluster(ctx context.Context, re } fs.filer.DeleteChunks(ctx, entry.FullPath, garbage) + ctx, eventSink := filer.WithMetadataEventSink(ctx) fs.filer.NotifyUpdateEvent(ctx, entry, newEntry, true, false, nil) resp.Entry = newEntry.ToProtoEntry() + resp.MetadataEvent = eventSink.Last() return resp, nil