You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
285 lines
8.8 KiB
285 lines
8.8 KiB
package s3api
|
|
|
|
import (
|
|
"io"
|
|
"testing"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
)
|
|
|
|
// TestImplicitDirectoryBehaviorLogic tests the core logic for implicit directory detection
|
|
// This tests the decision logic without requiring a full S3 server setup
|
|
func TestImplicitDirectoryBehaviorLogic(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
objectPath string
|
|
hasTrailingSlash bool
|
|
fileSize uint64
|
|
isDirectory bool
|
|
hasChildren bool
|
|
versioningEnabled bool
|
|
shouldReturn404 bool
|
|
description string
|
|
}{
|
|
{
|
|
name: "Implicit directory: 0-byte file with children, no trailing slash",
|
|
objectPath: "dataset",
|
|
hasTrailingSlash: false,
|
|
fileSize: 0,
|
|
isDirectory: false,
|
|
hasChildren: true,
|
|
versioningEnabled: false,
|
|
shouldReturn404: true,
|
|
description: "Should return 404 to force s3fs LIST-based discovery",
|
|
},
|
|
{
|
|
name: "Implicit directory: actual directory with children, no trailing slash",
|
|
objectPath: "dataset",
|
|
hasTrailingSlash: false,
|
|
fileSize: 0,
|
|
isDirectory: true,
|
|
hasChildren: true,
|
|
versioningEnabled: false,
|
|
shouldReturn404: true,
|
|
description: "Should return 404 for directory with children",
|
|
},
|
|
{
|
|
name: "Explicit directory request: trailing slash",
|
|
objectPath: "dataset/",
|
|
hasTrailingSlash: true,
|
|
fileSize: 0,
|
|
isDirectory: true,
|
|
hasChildren: true,
|
|
versioningEnabled: false,
|
|
shouldReturn404: false,
|
|
description: "Should return 200 for explicit directory request (trailing slash)",
|
|
},
|
|
{
|
|
name: "Empty file: 0-byte file without children",
|
|
objectPath: "empty.txt",
|
|
hasTrailingSlash: false,
|
|
fileSize: 0,
|
|
isDirectory: false,
|
|
hasChildren: false,
|
|
versioningEnabled: false,
|
|
shouldReturn404: false,
|
|
description: "Should return 200 for legitimate empty file",
|
|
},
|
|
{
|
|
name: "Empty directory: 0-byte directory without children",
|
|
objectPath: "empty-dir",
|
|
hasTrailingSlash: false,
|
|
fileSize: 0,
|
|
isDirectory: true,
|
|
hasChildren: false,
|
|
versioningEnabled: false,
|
|
shouldReturn404: false,
|
|
description: "Should return 200 for empty directory",
|
|
},
|
|
{
|
|
name: "Regular file: non-zero size",
|
|
objectPath: "file.txt",
|
|
hasTrailingSlash: false,
|
|
fileSize: 100,
|
|
isDirectory: false,
|
|
hasChildren: false,
|
|
versioningEnabled: false,
|
|
shouldReturn404: false,
|
|
description: "Should return 200 for regular file with content",
|
|
},
|
|
{
|
|
name: "Versioned bucket: implicit directory should return 200",
|
|
objectPath: "dataset",
|
|
hasTrailingSlash: false,
|
|
fileSize: 0,
|
|
isDirectory: false,
|
|
hasChildren: true,
|
|
versioningEnabled: true,
|
|
shouldReturn404: false,
|
|
description: "Should return 200 for versioned buckets (skip implicit dir check)",
|
|
},
|
|
{
|
|
name: "PyArrow directory marker: 0-byte with children",
|
|
objectPath: "dataset",
|
|
hasTrailingSlash: false,
|
|
fileSize: 0,
|
|
isDirectory: false,
|
|
hasChildren: true,
|
|
versioningEnabled: false,
|
|
shouldReturn404: true,
|
|
description: "Should return 404 for PyArrow-created directory markers",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
// Test the logic: should we return 404?
|
|
// Logic from HeadObjectHandler:
|
|
// if !versioningConfigured && !strings.HasSuffix(object, "/") {
|
|
// if isZeroByteFile || isActualDirectory {
|
|
// if hasChildren {
|
|
// return 404
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
isZeroByteFile := tt.fileSize == 0 && !tt.isDirectory
|
|
isActualDirectory := tt.isDirectory
|
|
|
|
shouldReturn404 := false
|
|
if !tt.versioningEnabled && !tt.hasTrailingSlash {
|
|
if isZeroByteFile || isActualDirectory {
|
|
if tt.hasChildren {
|
|
shouldReturn404 = true
|
|
}
|
|
}
|
|
}
|
|
|
|
if shouldReturn404 != tt.shouldReturn404 {
|
|
t.Errorf("Logic mismatch for %s:\n Expected shouldReturn404=%v\n Got shouldReturn404=%v\n Description: %s",
|
|
tt.name, tt.shouldReturn404, shouldReturn404, tt.description)
|
|
} else {
|
|
t.Logf("✓ %s: correctly returns %d", tt.name, map[bool]int{true: 404, false: 200}[shouldReturn404])
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestHasChildrenLogic tests the hasChildren helper function logic
|
|
func TestHasChildrenLogic(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
bucket string
|
|
prefix string
|
|
listResponse *filer_pb.ListEntriesResponse
|
|
listError error
|
|
expectedResult bool
|
|
description string
|
|
}{
|
|
{
|
|
name: "Directory with children",
|
|
bucket: "test-bucket",
|
|
prefix: "dataset",
|
|
listResponse: &filer_pb.ListEntriesResponse{
|
|
Entry: &filer_pb.Entry{
|
|
Name: "file.parquet",
|
|
IsDirectory: false,
|
|
},
|
|
},
|
|
listError: nil,
|
|
expectedResult: true,
|
|
description: "Should return true when at least one child exists",
|
|
},
|
|
{
|
|
name: "Empty directory",
|
|
bucket: "test-bucket",
|
|
prefix: "empty-dir",
|
|
listResponse: nil,
|
|
listError: io.EOF,
|
|
expectedResult: false,
|
|
description: "Should return false when no children exist (EOF)",
|
|
},
|
|
{
|
|
name: "Directory with leading slash in prefix",
|
|
bucket: "test-bucket",
|
|
prefix: "/dataset",
|
|
listResponse: &filer_pb.ListEntriesResponse{
|
|
Entry: &filer_pb.Entry{
|
|
Name: "file.parquet",
|
|
IsDirectory: false,
|
|
},
|
|
},
|
|
listError: nil,
|
|
expectedResult: true,
|
|
description: "Should handle leading slashes correctly",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
// Test the hasChildren logic:
|
|
// 1. It should trim leading slashes from prefix
|
|
// 2. It should list with Limit=1
|
|
// 3. It should return true if any entry is received
|
|
// 4. It should return false if EOF is received
|
|
|
|
hasChildren := false
|
|
if tt.listError == nil && tt.listResponse != nil {
|
|
hasChildren = true
|
|
} else if tt.listError == io.EOF {
|
|
hasChildren = false
|
|
}
|
|
|
|
if hasChildren != tt.expectedResult {
|
|
t.Errorf("hasChildren logic mismatch for %s:\n Expected: %v\n Got: %v\n Description: %s",
|
|
tt.name, tt.expectedResult, hasChildren, tt.description)
|
|
} else {
|
|
t.Logf("✓ %s: correctly returns %v", tt.name, hasChildren)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestImplicitDirectoryEdgeCases tests edge cases in the implicit directory detection
|
|
func TestImplicitDirectoryEdgeCases(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
scenario string
|
|
expectation string
|
|
}{
|
|
{
|
|
name: "PyArrow write_dataset creates 0-byte files",
|
|
scenario: "PyArrow creates 'dataset' as 0-byte file, then writes 'dataset/file.parquet'",
|
|
expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
|
|
},
|
|
{
|
|
name: "Filer creates actual directories",
|
|
scenario: "Filer creates 'dataset' as actual directory with IsDirectory=true",
|
|
expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
|
|
},
|
|
{
|
|
name: "Empty file edge case",
|
|
scenario: "User creates 'empty.txt' as 0-byte file with no children",
|
|
expectation: "HEAD empty.txt → 200 (no children), s3fs correctly reports as file",
|
|
},
|
|
{
|
|
name: "Explicit directory request",
|
|
scenario: "User requests 'dataset/' with trailing slash",
|
|
expectation: "HEAD dataset/ → 200 (explicit directory request), normal directory behavior",
|
|
},
|
|
{
|
|
name: "Versioned bucket",
|
|
scenario: "Bucket has versioning enabled",
|
|
expectation: "HEAD dataset → 200 (skip implicit dir check), versioned semantics apply",
|
|
},
|
|
{
|
|
name: "AWS S3 compatibility",
|
|
scenario: "Only 'dataset/file.txt' exists, no marker at 'dataset'",
|
|
expectation: "HEAD dataset → 404 (object doesn't exist), matches AWS S3 behavior",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
t.Logf("Scenario: %s", tt.scenario)
|
|
t.Logf("Expected: %s", tt.expectation)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestImplicitDirectoryIntegration is an integration test placeholder
|
|
// Run with: cd test/s3/parquet && make test-implicit-dir-with-server
|
|
func TestImplicitDirectoryIntegration(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("Skipping integration test in short mode")
|
|
}
|
|
|
|
t.Skip("Integration test - run manually with: cd test/s3/parquet && make test-implicit-dir-with-server")
|
|
}
|
|
|
|
// Benchmark for hasChildren performance
|
|
func BenchmarkHasChildrenCheck(b *testing.B) {
|
|
// This benchmark would measure the performance impact of the hasChildren check
|
|
// Expected: ~1-5ms per call (one gRPC LIST request with Limit=1)
|
|
b.Skip("Benchmark - requires full filer setup")
|
|
}
|