diff --git a/weed/filer/empty_folder_cleanup/empty_folder_cleaner.go b/weed/filer/empty_folder_cleanup/empty_folder_cleaner.go index f7d7b8864..d98dd5ee6 100644 --- a/weed/filer/empty_folder_cleanup/empty_folder_cleaner.go +++ b/weed/filer/empty_folder_cleanup/empty_folder_cleaner.go @@ -9,6 +9,8 @@ import ( "github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager" "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/pb" + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" "github.com/seaweedfs/seaweedfs/weed/util" ) @@ -24,11 +26,13 @@ const ( type FilerOperations interface { CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error) DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error + GetEntryAttributes(ctx context.Context, p util.FullPath) (attributes map[string][]byte, err error) } // folderState tracks the state of a folder for empty folder cleanup type folderState struct { roughCount int // Cached rough count (up to maxCountCheck) + isImplicit *bool // Tri-state boolean: nil (unknown), true (implicit), false (explicit) lastAddTime time.Time // Last time an item was added lastDelTime time.Time // Last time an item was deleted lastCheck time.Time // Last time we checked the actual count @@ -265,8 +269,47 @@ func (efc *EmptyFolderCleaner) executeCleanup(folder string) { return } - // Check if folder is actually empty (count up to maxCountCheck) + // Check for explicit implicit_dir attribute + // First check cache ctx := context.Background() + efc.mu.RLock() + var cachedImplicit *bool + if state, exists := efc.folderCounts[folder]; exists { + cachedImplicit = state.isImplicit + } + efc.mu.RUnlock() + + var isImplicit bool + if cachedImplicit != nil { + isImplicit = *cachedImplicit + } else { + // Not cached, check filer + attrs, err := efc.filer.GetEntryAttributes(ctx, util.FullPath(folder)) + if err != nil { + if err == filer_pb.ErrNotFound { + return + } + glog.V(2).Infof("EmptyFolderCleaner: error getting attributes for %s: %v", folder, err) + return + } + + isImplicit = attrs != nil && string(attrs[s3_constants.ExtS3ImplicitDir]) == "true" + + // Update cache + efc.mu.Lock() + if _, exists := efc.folderCounts[folder]; !exists { + efc.folderCounts[folder] = &folderState{} + } + efc.folderCounts[folder].isImplicit = &isImplicit + efc.mu.Unlock() + } + + if !isImplicit { + glog.V(4).Infof("EmptyFolderCleaner: folder %s is not marked as implicit, skipping", folder) + return + } + + // Check if folder is actually empty (count up to maxCountCheck) count, err := efc.countItems(ctx, folder) if err != nil { glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err) diff --git a/weed/filer/filer.go b/weed/filer/filer.go index b3114d112..1b6cbe769 100644 --- a/weed/filer/filer.go +++ b/weed/filer/filer.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" "github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket" "github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager" @@ -273,7 +274,8 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di // fmt.Printf("dirParts: %v %v %v\n", dirParts[0], dirParts[1], dirParts[2]) // dirParts[0] == "" and dirParts[1] == "buckets" - if len(dirParts) >= 3 && dirParts[1] == "buckets" { + isUnderBuckets := len(dirParts) >= 3 && dirParts[1] == "buckets" + if isUnderBuckets { if err := s3bucket.VerifyS3BucketName(dirParts[2]); err != nil { return fmt.Errorf("invalid bucket name %s: %v", dirParts[2], err) } @@ -299,6 +301,13 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di GroupNames: entry.GroupNames, }, } + // level > 3 corresponds to a path depth greater than "/buckets/", + // ensuring we only mark subdirectories within a bucket as implicit. + if isUnderBuckets && level > 3 { + dirEntry.Extended = map[string][]byte{ + s3_constants.ExtS3ImplicitDir: []byte("true"), + } + } glog.V(2).InfofCtx(ctx, "create directory: %s %v", dirPath, dirEntry.Mode) mkdirErr := f.Store.InsertEntry(ctx, dirEntry) @@ -521,3 +530,14 @@ func (f *Filer) Shutdown() { f.LocalMetaLogBuffer.ShutdownLogBuffer() f.Store.Shutdown() } + +func (f *Filer) GetEntryAttributes(ctx context.Context, p util.FullPath) (map[string][]byte, error) { + entry, err := f.FindEntry(ctx, p) + if err != nil { + return nil, err + } + if entry == nil { + return nil, nil + } + return entry.Extended, nil +} diff --git a/weed/s3api/s3_constants/extend_key.go b/weed/s3api/s3_constants/extend_key.go index 56f2ac1ea..b4c030d49 100644 --- a/weed/s3api/s3_constants/extend_key.go +++ b/weed/s3api/s3_constants/extend_key.go @@ -11,6 +11,7 @@ const ( ExtETagKey = "Seaweed-X-Amz-ETag" ExtLatestVersionIdKey = "Seaweed-X-Amz-Latest-Version-Id" ExtLatestVersionFileNameKey = "Seaweed-X-Amz-Latest-Version-File-Name" + ExtS3ImplicitDir = "Seaweed-X-Amz-Implicit-Dir" // Cached list metadata in .versions directory for single-scan efficiency ExtLatestVersionSizeKey = "Seaweed-X-Amz-Latest-Version-Size" ExtLatestVersionETagKey = "Seaweed-X-Amz-Latest-Version-ETag"