Browse Source

filer: auto clean empty implicit s3 folders (#8051)

* filer: auto clean empty s3 implicit folders

Explicitly tag implicitly created S3 folders (parent directories from object uploads) with 'Seaweed-X-Amz-Implicit-Dir'.

Update EmptyFolderCleaner to check for this attribute and cache the result efficiently.

* filer: correctly handle nil attributes in empty folder cleaner cache

* filer: refine implicit tagging logic

Prevent tagging buckets as implicit directories. Reduce code duplication.

* filer: safeguard GetEntryAttributes against nil entry and not found error

* filer: move ErrNotFound handling to EmptyFolderCleaner

* filer: add comment to explain level > 3 check for implicit directories
pull/8052/head
Chris Lu 1 day ago
committed by GitHub
parent
commit
8880f9932f
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 45
      weed/filer/empty_folder_cleanup/empty_folder_cleaner.go
  2. 22
      weed/filer/filer.go
  3. 1
      weed/s3api/s3_constants/extend_key.go

45
weed/filer/empty_folder_cleanup/empty_folder_cleaner.go

@ -9,6 +9,8 @@ import (
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager" "github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb" "github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util"
) )
@ -24,11 +26,13 @@ const (
type FilerOperations interface { type FilerOperations interface {
CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error) CountDirectoryEntries(ctx context.Context, dirPath util.FullPath, limit int) (count int, err error)
DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32, ifNotModifiedAfter int64) error
GetEntryAttributes(ctx context.Context, p util.FullPath) (attributes map[string][]byte, err error)
} }
// folderState tracks the state of a folder for empty folder cleanup // folderState tracks the state of a folder for empty folder cleanup
type folderState struct { type folderState struct {
roughCount int // Cached rough count (up to maxCountCheck) roughCount int // Cached rough count (up to maxCountCheck)
isImplicit *bool // Tri-state boolean: nil (unknown), true (implicit), false (explicit)
lastAddTime time.Time // Last time an item was added lastAddTime time.Time // Last time an item was added
lastDelTime time.Time // Last time an item was deleted lastDelTime time.Time // Last time an item was deleted
lastCheck time.Time // Last time we checked the actual count lastCheck time.Time // Last time we checked the actual count
@ -265,8 +269,47 @@ func (efc *EmptyFolderCleaner) executeCleanup(folder string) {
return return
} }
// Check if folder is actually empty (count up to maxCountCheck)
// Check for explicit implicit_dir attribute
// First check cache
ctx := context.Background() ctx := context.Background()
efc.mu.RLock()
var cachedImplicit *bool
if state, exists := efc.folderCounts[folder]; exists {
cachedImplicit = state.isImplicit
}
efc.mu.RUnlock()
var isImplicit bool
if cachedImplicit != nil {
isImplicit = *cachedImplicit
} else {
// Not cached, check filer
attrs, err := efc.filer.GetEntryAttributes(ctx, util.FullPath(folder))
if err != nil {
if err == filer_pb.ErrNotFound {
return
}
glog.V(2).Infof("EmptyFolderCleaner: error getting attributes for %s: %v", folder, err)
return
}
isImplicit = attrs != nil && string(attrs[s3_constants.ExtS3ImplicitDir]) == "true"
// Update cache
efc.mu.Lock()
if _, exists := efc.folderCounts[folder]; !exists {
efc.folderCounts[folder] = &folderState{}
}
efc.folderCounts[folder].isImplicit = &isImplicit
efc.mu.Unlock()
}
if !isImplicit {
glog.V(4).Infof("EmptyFolderCleaner: folder %s is not marked as implicit, skipping", folder)
return
}
// Check if folder is actually empty (count up to maxCountCheck)
count, err := efc.countItems(ctx, folder) count, err := efc.countItems(ctx, folder)
if err != nil { if err != nil {
glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err) glog.V(2).Infof("EmptyFolderCleaner: error counting items in %s: %v", folder, err)

22
weed/filer/filer.go

@ -9,6 +9,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket" "github.com/seaweedfs/seaweedfs/weed/s3api/s3bucket"
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager" "github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
@ -273,7 +274,8 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di
// fmt.Printf("dirParts: %v %v %v\n", dirParts[0], dirParts[1], dirParts[2]) // fmt.Printf("dirParts: %v %v %v\n", dirParts[0], dirParts[1], dirParts[2])
// dirParts[0] == "" and dirParts[1] == "buckets" // dirParts[0] == "" and dirParts[1] == "buckets"
if len(dirParts) >= 3 && dirParts[1] == "buckets" {
isUnderBuckets := len(dirParts) >= 3 && dirParts[1] == "buckets"
if isUnderBuckets {
if err := s3bucket.VerifyS3BucketName(dirParts[2]); err != nil { if err := s3bucket.VerifyS3BucketName(dirParts[2]); err != nil {
return fmt.Errorf("invalid bucket name %s: %v", dirParts[2], err) return fmt.Errorf("invalid bucket name %s: %v", dirParts[2], err)
} }
@ -299,6 +301,13 @@ func (f *Filer) ensureParentDirectoryEntry(ctx context.Context, entry *Entry, di
GroupNames: entry.GroupNames, GroupNames: entry.GroupNames,
}, },
} }
// level > 3 corresponds to a path depth greater than "/buckets/<bucket_name>",
// ensuring we only mark subdirectories within a bucket as implicit.
if isUnderBuckets && level > 3 {
dirEntry.Extended = map[string][]byte{
s3_constants.ExtS3ImplicitDir: []byte("true"),
}
}
glog.V(2).InfofCtx(ctx, "create directory: %s %v", dirPath, dirEntry.Mode) glog.V(2).InfofCtx(ctx, "create directory: %s %v", dirPath, dirEntry.Mode)
mkdirErr := f.Store.InsertEntry(ctx, dirEntry) mkdirErr := f.Store.InsertEntry(ctx, dirEntry)
@ -521,3 +530,14 @@ func (f *Filer) Shutdown() {
f.LocalMetaLogBuffer.ShutdownLogBuffer() f.LocalMetaLogBuffer.ShutdownLogBuffer()
f.Store.Shutdown() f.Store.Shutdown()
} }
func (f *Filer) GetEntryAttributes(ctx context.Context, p util.FullPath) (map[string][]byte, error) {
entry, err := f.FindEntry(ctx, p)
if err != nil {
return nil, err
}
if entry == nil {
return nil, nil
}
return entry.Extended, nil
}

1
weed/s3api/s3_constants/extend_key.go

@ -11,6 +11,7 @@ const (
ExtETagKey = "Seaweed-X-Amz-ETag" ExtETagKey = "Seaweed-X-Amz-ETag"
ExtLatestVersionIdKey = "Seaweed-X-Amz-Latest-Version-Id" ExtLatestVersionIdKey = "Seaweed-X-Amz-Latest-Version-Id"
ExtLatestVersionFileNameKey = "Seaweed-X-Amz-Latest-Version-File-Name" ExtLatestVersionFileNameKey = "Seaweed-X-Amz-Latest-Version-File-Name"
ExtS3ImplicitDir = "Seaweed-X-Amz-Implicit-Dir"
// Cached list metadata in .versions directory for single-scan efficiency // Cached list metadata in .versions directory for single-scan efficiency
ExtLatestVersionSizeKey = "Seaweed-X-Amz-Latest-Version-Size" ExtLatestVersionSizeKey = "Seaweed-X-Amz-Latest-Version-Size"
ExtLatestVersionETagKey = "Seaweed-X-Amz-Latest-Version-ETag" ExtLatestVersionETagKey = "Seaweed-X-Amz-Latest-Version-ETag"

Loading…
Cancel
Save