Browse Source

Fix S3 delete for non-empty directory markers (#8740)

* Fix S3 delete for non-empty directory markers

* Address review feedback on directory marker deletes

* Stabilize FUSE concurrent directory operations
fix/subscribe-metadata-slow-consumer-blocked
Chris Lu 18 hours ago
committed by GitHub
parent
commit
d5ee35c8df
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 25
      test/fuse_integration/framework_test.go
  2. 45
      test/s3/normal/s3_integration_test.go
  3. 21
      weed/mount/filehandle.go
  4. 52
      weed/mount/filehandle_test.go
  5. 6
      weed/mount/weedfs_async_flush.go
  6. 3
      weed/mount/weedfs_file_sync.go
  7. 1
      weed/mount/weedfs_filehandle.go
  8. 7
      weed/mount/weedfs_rename.go
  9. 93
      weed/s3api/filer_util.go
  10. 152
      weed/s3api/filer_util_delete_test.go
  11. 2
      weed/s3api/s3api_object_handlers_copy.go
  12. 7
      weed/s3api/s3api_object_handlers_delete.go
  13. 2
      weed/s3api/s3api_object_versioning.go

25
test/fuse_integration/framework_test.go

@ -97,11 +97,26 @@ func NewFuseTestFramework(t *testing.T, config *TestConfig) *FuseTestFramework {
// freePort asks the OS for a free TCP port.
func freePort(t *testing.T) int {
t.Helper()
l, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
port := l.Addr().(*net.TCPAddr).Port
l.Close()
return port
const (
minServicePort = 20000
maxServicePort = 55535 // SeaweedFS gRPC service uses httpPort + 10000.
)
portCount := maxServicePort - minServicePort + 1
start := minServicePort + int(time.Now().UnixNano()%int64(portCount))
for attempt := 0; attempt < 512; attempt++ {
port := minServicePort + (start-minServicePort+attempt)%portCount
l, err := net.Listen("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
continue
}
l.Close()
return port
}
t.Fatalf("failed to allocate port <= %d after repeated attempts", maxServicePort)
return 0
}
// Setup starts SeaweedFS cluster and mounts FUSE filesystem

45
test/s3/normal/s3_integration_test.go

@ -99,6 +99,10 @@ func TestS3Integration(t *testing.T) {
testDeleteObject(t, cluster)
})
t.Run("DeleteDirectoryMarkerWithChildren", func(t *testing.T) {
testDeleteDirectoryMarkerWithChildren(t, cluster)
})
t.Run("DeleteBucket", func(t *testing.T) {
testDeleteBucket(t, cluster)
})
@ -754,6 +758,47 @@ func testDeleteObject(t *testing.T, cluster *TestCluster) {
t.Logf("✓ Deleted object: %s/%s", bucketName, objectKey)
}
func testDeleteDirectoryMarkerWithChildren(t *testing.T, cluster *TestCluster) {
bucketName := createTestBucket(t, cluster, "test-delete-dir-marker-")
childKey := "test-content/file1.txt"
directoryMarkerKey := "test-content/"
_, err := cluster.s3Client.PutObject(&s3.PutObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(childKey),
Body: bytes.NewReader([]byte("child")),
})
require.NoError(t, err)
_, err = cluster.s3Client.PutObject(&s3.PutObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(directoryMarkerKey),
Body: bytes.NewReader(nil),
ContentType: aws.String("application/octet-stream"),
})
require.NoError(t, err)
_, err = cluster.s3Client.DeleteObject(&s3.DeleteObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(directoryMarkerKey),
})
require.NoError(t, err, "Deleting a directory marker should succeed even when children exist")
listResp, err := cluster.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
Bucket: aws.String(bucketName),
Prefix: aws.String("test-content/"),
})
require.NoError(t, err)
foundKeys := make(map[string]bool)
for _, obj := range listResp.Contents {
foundKeys[aws.StringValue(obj.Key)] = true
}
assert.True(t, foundKeys[childKey], "Child object should remain after deleting the directory marker")
assert.False(t, foundKeys[directoryMarkerKey], "Directory marker should no longer be listed after deletion")
}
func testDeleteBucket(t *testing.T, cluster *TestCluster) {
bucketName := "test-delete-bucket-" + randomString(8)

21
weed/mount/filehandle.go

@ -4,6 +4,7 @@ import (
"os"
"sync"
"github.com/seaweedfs/go-fuse/v2/fuse"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
@ -31,8 +32,8 @@ type FileHandle struct {
asyncFlushPending bool // set in writebackCache mode to defer flush to Release
asyncFlushUid uint32 // saved uid for deferred metadata flush
asyncFlushGid uint32 // saved gid for deferred metadata flush
asyncFlushDir string // saved directory at defer time (fallback if inode forgotten)
asyncFlushName string // saved file name at defer time (fallback if inode forgotten)
savedDir string // last known parent path if inode-to-path state is forgotten
savedName string // last known file name if inode-to-path state is forgotten
isDeleted bool
@ -73,8 +74,20 @@ func newFileHandle(wfs *WFS, handleId FileHandleId, inode uint64, entry *filer_p
}
func (fh *FileHandle) FullPath() util.FullPath {
fp, _ := fh.wfs.inodeToPath.GetPath(fh.inode)
return fp
if fp, status := fh.wfs.inodeToPath.GetPath(fh.inode); status == fuse.OK {
return fp
}
if fh.savedName != "" {
return util.FullPath(fh.savedDir).Child(fh.savedName)
}
return ""
}
func (fh *FileHandle) RememberPath(fullPath util.FullPath) {
if fullPath == "" {
return
}
fh.savedDir, fh.savedName = fullPath.DirAndName()
}
func (fh *FileHandle) GetEntry() *LockedEntry {

52
weed/mount/filehandle_test.go

@ -0,0 +1,52 @@
package mount
import (
"testing"
"github.com/seaweedfs/seaweedfs/weed/util"
)
func TestFileHandleFullPathFallsBackAfterForget(t *testing.T) {
wfs := &WFS{
inodeToPath: NewInodeToPath(util.FullPath("/"), 0),
}
fullPath := util.FullPath("/worker_0/subdir_0/test.txt")
inode := wfs.inodeToPath.Lookup(fullPath, 1, false, false, 0, true)
fh := &FileHandle{
inode: inode,
wfs: wfs,
}
fh.RememberPath(fullPath)
wfs.inodeToPath.Forget(inode, 1, nil)
if got := fh.FullPath(); got != fullPath {
t.Fatalf("FullPath() after forget = %q, want %q", got, fullPath)
}
}
func TestFileHandleFullPathUsesSavedRenamePathAfterForget(t *testing.T) {
wfs := &WFS{
inodeToPath: NewInodeToPath(util.FullPath("/"), 0),
}
oldPath := util.FullPath("/worker_0/subdir_0/test.txt")
newPath := util.FullPath("/worker_0/subdir_1/test.txt")
inode := wfs.inodeToPath.Lookup(oldPath, 1, false, false, 0, true)
fh := &FileHandle{
inode: inode,
wfs: wfs,
}
fh.RememberPath(oldPath)
wfs.inodeToPath.MovePath(oldPath, newPath)
fh.RememberPath(newPath)
wfs.inodeToPath.Forget(inode, 1, nil)
if got := fh.FullPath(); got != newPath {
t.Fatalf("FullPath() after rename+forget = %q, want %q", got, newPath)
}
}

6
weed/mount/weedfs_async_flush.go

@ -41,12 +41,12 @@ func (wfs *WFS) completeAsyncFlush(fh *FileHandle) {
// Try GetPath first — it reflects any rename that happened
// after close(). If the inode mapping is gone (Forget
// dropped it after the kernel's lookup count hit zero), fall
// back to the dir/name saved at doFlush time. Rename also
// updates the saved path, so the fallback is always current.
// back to the last path saved on the handle. Rename keeps
// that fallback current, so it is always the newest known path.
//
// Forget does NOT mean the file was deleted — it only means
// the kernel evicted its cache entry.
dir, name := fh.asyncFlushDir, fh.asyncFlushName
dir, name := fh.savedDir, fh.savedName
fileFullPath := util.FullPath(dir).Child(name)
if resolvedPath, status := wfs.inodeToPath.GetPath(fh.inode); status == fuse.OK {

3
weed/mount/weedfs_file_sync.go

@ -97,6 +97,7 @@ func (wfs *WFS) doFlush(fh *FileHandle, uid, gid uint32, allowAsync bool) fuse.S
// flush works at fh level
fileFullPath := fh.FullPath()
fh.RememberPath(fileFullPath)
dir, name := fileFullPath.DirAndName()
// send the data to the OS
glog.V(4).Infof("doFlush %s fh %d", fileFullPath, fh.fh)
@ -112,8 +113,6 @@ func (wfs *WFS) doFlush(fh *FileHandle, uid, gid uint32, allowAsync bool) fuse.S
fh.asyncFlushPending = true
fh.asyncFlushUid = uid
fh.asyncFlushGid = gid
fh.asyncFlushDir = dir
fh.asyncFlushName = name
glog.V(3).Infof("doFlush async deferred %s fh %d", fileFullPath, fh.fh)
return fuse.OK
}

1
weed/mount/weedfs_filehandle.go

@ -23,6 +23,7 @@ func (wfs *WFS) AcquireHandle(inode uint64, flags, uid, gid uint32) (fileHandle
}
// need to AcquireFileHandle again to ensure correct handle counter
fileHandle = wfs.fhMap.AcquireFileHandle(wfs, inode, entry)
fileHandle.RememberPath(path)
}
return
}

7
weed/mount/weedfs_rename.go

@ -253,12 +253,9 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR
if entry := fh.GetEntry(); entry != nil {
entry.Name = newName
}
// Keep the saved async-flush path current so the fallback
// Keep the saved handle path current so any flush fallback
// after Forget uses the post-rename location, not the old one.
if fh.asyncFlushPending {
fh.asyncFlushDir = string(newParent)
fh.asyncFlushName = newName
}
fh.RememberPath(newPath)
}
// invalidate attr and data
// wfs.fuseServer.InodeNotify(sourceInode, 0, -1)

93
weed/s3api/filer_util.go

@ -11,6 +11,8 @@ import (
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/util"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
func (s3a *S3ApiServer) mkdir(parentDirectoryPath string, dirName string, fn func(entry *filer_pb.Entry)) error {
@ -47,16 +49,32 @@ func (s3a *S3ApiServer) rm(parentDirectoryPath, entryName string, isDeleteData,
return s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
if err != nil {
return err
}
return doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
})
return nil
}
func (s3a *S3ApiServer) rmObject(parentDirectoryPath, entryName string, isDeleteData, isRecursive bool) error {
return s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
return deleteObjectEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
})
}
func deleteObjectEntry(client filer_pb.SeaweedFilerClient, parentDirectoryPath, entryName string, isDeleteData, isRecursive bool) error {
err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
if err == nil {
return nil
}
if !strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) {
return err
}
return demoteDirectoryMarkerToImplicitDirectory(client, parentDirectoryPath, entryName)
}
func doDeleteEntry(client filer_pb.SeaweedFilerClient, parentDirectoryPath string, entryName string, isDeleteData bool, isRecursive bool) error {
request := &filer_pb.DeleteEntryRequest{
Directory: parentDirectoryPath,
@ -78,6 +96,71 @@ func doDeleteEntry(client filer_pb.SeaweedFilerClient, parentDirectoryPath strin
return nil
}
func demoteDirectoryMarkerToImplicitDirectory(client filer_pb.SeaweedFilerClient, parentDirectoryPath, entryName string) error {
resp, err := filer_pb.LookupEntry(context.Background(), client, &filer_pb.LookupDirectoryEntryRequest{
Directory: parentDirectoryPath,
Name: entryName,
})
if err != nil {
if errors.Is(err, filer_pb.ErrNotFound) {
return nil
}
return fmt.Errorf("lookup entry %s/%s: %w", parentDirectoryPath, entryName, err)
}
if resp.Entry == nil || !resp.Entry.IsDirectory {
return nil
}
if !resp.Entry.IsDirectoryKeyObject() {
return nil
}
clearDirectoryMarkerMetadata(resp.Entry)
if err := filer_pb.UpdateEntry(context.Background(), client, &filer_pb.UpdateEntryRequest{
Directory: parentDirectoryPath,
Entry: resp.Entry,
}); err != nil {
if errors.Is(err, filer_pb.ErrNotFound) || status.Code(err) == codes.NotFound {
return nil
}
return fmt.Errorf("update entry %s/%s: %w", parentDirectoryPath, entryName, err)
}
return nil
}
func clearDirectoryMarkerMetadata(entry *filer_pb.Entry) {
if entry == nil {
return
}
if entry.Attributes == nil {
entry.Attributes = &filer_pb.FuseAttributes{}
}
entry.Attributes.Mime = ""
entry.Attributes.Md5 = nil
entry.Attributes.FileSize = 0
entry.Content = nil
entry.Chunks = nil
if len(entry.Extended) == 0 {
return
}
filtered := make(map[string][]byte)
for k, v := range entry.Extended {
lowerKey := strings.ToLower(k)
if strings.HasPrefix(lowerKey, "xattr-") || strings.HasPrefix(lowerKey, s3_constants.SeaweedFSInternalPrefix) {
filtered[k] = v
}
}
if len(filtered) == 0 {
entry.Extended = nil
return
}
entry.Extended = filtered
}
func (s3a *S3ApiServer) exists(parentDirectoryPath string, entryName string, isDirectory bool) (exists bool, err error) {
return filer_pb.Exists(context.Background(), s3a, parentDirectoryPath, entryName, isDirectory)

152
weed/s3api/filer_util_delete_test.go

@ -0,0 +1,152 @@
package s3api
import (
"context"
"errors"
"testing"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
grpc "google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
type deleteObjectEntryTestClient struct {
filer_pb.SeaweedFilerClient
deleteResp *filer_pb.DeleteEntryResponse
deleteErr error
lookupResp *filer_pb.LookupDirectoryEntryResponse
lookupErr error
updateErr error
deleteReq *filer_pb.DeleteEntryRequest
lookupReq *filer_pb.LookupDirectoryEntryRequest
updateReq *filer_pb.UpdateEntryRequest
}
func (c *deleteObjectEntryTestClient) DeleteEntry(_ context.Context, req *filer_pb.DeleteEntryRequest, _ ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error) {
c.deleteReq = req
if c.deleteResp == nil {
return &filer_pb.DeleteEntryResponse{}, c.deleteErr
}
return c.deleteResp, c.deleteErr
}
func (c *deleteObjectEntryTestClient) LookupDirectoryEntry(_ context.Context, req *filer_pb.LookupDirectoryEntryRequest, _ ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) {
c.lookupReq = req
if c.lookupResp == nil {
return &filer_pb.LookupDirectoryEntryResponse{}, c.lookupErr
}
return c.lookupResp, c.lookupErr
}
func (c *deleteObjectEntryTestClient) UpdateEntry(_ context.Context, req *filer_pb.UpdateEntryRequest, _ ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error) {
c.updateReq = req
return &filer_pb.UpdateEntryResponse{}, c.updateErr
}
func TestDeleteObjectEntryDemotesNonEmptyDirectoryMarker(t *testing.T) {
client := &deleteObjectEntryTestClient{
deleteResp: &filer_pb.DeleteEntryResponse{
Error: filer.MsgFailDelNonEmptyFolder + ": /buckets/test/photos",
},
lookupResp: &filer_pb.LookupDirectoryEntryResponse{
Entry: &filer_pb.Entry{
Name: "photos",
IsDirectory: true,
Attributes: &filer_pb.FuseAttributes{
Mime: "application/octet-stream",
Md5: []byte{1, 2, 3, 4},
FileSize: 4,
},
Content: []byte("test"),
Extended: map[string][]byte{
s3_constants.ExtETagKey: []byte("etag"),
s3_constants.ExtAmzOwnerKey: []byte("owner"),
s3_constants.AmzUserMetaPrefix + "Color": []byte("blue"),
s3_constants.AmzObjectTaggingPrefix + "k": []byte("v"),
"xattr-keep": []byte("keep-me"),
"x-seaweedfs-internal": []byte("keep-me-too"),
},
},
},
}
err := deleteObjectEntry(client, "/buckets/test", "photos", true, false)
require.NoError(t, err)
require.NotNil(t, client.lookupReq)
require.NotNil(t, client.updateReq)
updated := client.updateReq.Entry
require.NotNil(t, updated)
assert.False(t, updated.IsDirectoryKeyObject())
assert.Equal(t, "", updated.Attributes.Mime)
assert.Empty(t, updated.Attributes.Md5)
assert.Zero(t, updated.Attributes.FileSize)
assert.Nil(t, updated.Content)
assert.Nil(t, updated.Chunks)
assert.Equal(t, map[string][]byte{
"xattr-keep": []byte("keep-me"),
"x-seaweedfs-internal": []byte("keep-me-too"),
}, updated.Extended)
}
func TestDeleteObjectEntryTreatsImplicitDirectoryAsSuccessfulNoop(t *testing.T) {
client := &deleteObjectEntryTestClient{
deleteResp: &filer_pb.DeleteEntryResponse{
Error: filer.MsgFailDelNonEmptyFolder + ": /buckets/test/photos",
},
lookupResp: &filer_pb.LookupDirectoryEntryResponse{
Entry: &filer_pb.Entry{
Name: "photos",
IsDirectory: true,
Attributes: &filer_pb.FuseAttributes{},
},
},
}
err := deleteObjectEntry(client, "/buckets/test", "photos", true, false)
require.NoError(t, err)
require.NotNil(t, client.lookupReq)
assert.Nil(t, client.updateReq)
}
func TestDeleteObjectEntryIgnoresConcurrentUpdateNotFound(t *testing.T) {
client := &deleteObjectEntryTestClient{
deleteResp: &filer_pb.DeleteEntryResponse{
Error: filer.MsgFailDelNonEmptyFolder + ": /buckets/test/photos",
},
lookupResp: &filer_pb.LookupDirectoryEntryResponse{
Entry: &filer_pb.Entry{
Name: "photos",
IsDirectory: true,
Attributes: &filer_pb.FuseAttributes{
Mime: "application/octet-stream",
},
},
},
updateErr: status.Error(codes.NotFound, "already removed"),
}
err := deleteObjectEntry(client, "/buckets/test", "photos", true, false)
require.NoError(t, err)
require.NotNil(t, client.lookupReq)
require.NotNil(t, client.updateReq)
}
func TestDeleteObjectEntryPropagatesNonDirectoryDeleteErrors(t *testing.T) {
client := &deleteObjectEntryTestClient{
deleteErr: errors.New("boom"),
}
err := deleteObjectEntry(client, "/buckets/test", "photos", true, false)
require.Error(t, err)
assert.Contains(t, err.Error(), "boom")
assert.Nil(t, client.lookupReq)
assert.Nil(t, client.updateReq)
}

2
weed/s3api/s3api_object_handlers_copy.go

@ -372,7 +372,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request
// Check if destination exists and remove it first (S3 copy overwrites)
if exists, _ := s3a.exists(dstDir, dstName, false); exists {
if err := s3a.rm(dstDir, dstName, false, false); err != nil {
if err := s3a.rmObject(dstDir, dstName, false, false); err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
return
}

7
weed/s3api/s3api_object_handlers_delete.go

@ -6,7 +6,6 @@ import (
"net/http"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
@ -130,7 +129,7 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque
dir, name := target.DirAndName()
err := s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
return doDeleteEntry(client, dir, name, true, false)
return deleteObjectEntry(client, dir, name, true, false)
// Note: Empty folder cleanup is now handled asynchronously by EmptyFolderCleaner
// which listens to metadata events and uses consistent hashing for coordination
})
@ -345,11 +344,9 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
parentDirectoryPath, entryName := target.DirAndName()
isDeleteData, isRecursive := true, false
err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
err := deleteObjectEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
if err == nil {
deletedObjects = append(deletedObjects, object)
} else if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) {
deletedObjects = append(deletedObjects, object)
} else {
deleteErrors = append(deleteErrors, DeleteError{
Code: "",

2
weed/s3api/s3api_object_versioning.go

@ -949,7 +949,7 @@ func (s3a *S3ApiServer) deleteSpecificObjectVersion(bucket, object, versionId st
}
// Delete the regular file
deleteErr := s3a.rm(bucketDir, normalizedObject, true, false)
deleteErr := s3a.rmObject(bucketDir, normalizedObject, true, false)
if deleteErr != nil {
// Check if file was already deleted by another process
if _, checkErr := s3a.getEntry(bucketDir, normalizedObject); checkErr != nil {

Loading…
Cancel
Save