You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

285 lines
8.8 KiB

package s3api
import (
"io"
"testing"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
)
// TestImplicitDirectoryBehaviorLogic tests the core logic for implicit directory detection
// This tests the decision logic without requiring a full S3 server setup
func TestImplicitDirectoryBehaviorLogic(t *testing.T) {
tests := []struct {
name string
objectPath string
hasTrailingSlash bool
fileSize uint64
isDirectory bool
hasChildren bool
versioningEnabled bool
shouldReturn404 bool
description string
}{
{
name: "Implicit directory: 0-byte file with children, no trailing slash",
objectPath: "dataset",
hasTrailingSlash: false,
fileSize: 0,
isDirectory: false,
hasChildren: true,
versioningEnabled: false,
shouldReturn404: true,
description: "Should return 404 to force s3fs LIST-based discovery",
},
{
name: "Implicit directory: actual directory with children, no trailing slash",
objectPath: "dataset",
hasTrailingSlash: false,
fileSize: 0,
isDirectory: true,
hasChildren: true,
versioningEnabled: false,
shouldReturn404: true,
description: "Should return 404 for directory with children",
},
{
name: "Explicit directory request: trailing slash",
objectPath: "dataset/",
hasTrailingSlash: true,
fileSize: 0,
isDirectory: true,
hasChildren: true,
versioningEnabled: false,
shouldReturn404: false,
description: "Should return 200 for explicit directory request (trailing slash)",
},
{
name: "Empty file: 0-byte file without children",
objectPath: "empty.txt",
hasTrailingSlash: false,
fileSize: 0,
isDirectory: false,
hasChildren: false,
versioningEnabled: false,
shouldReturn404: false,
description: "Should return 200 for legitimate empty file",
},
{
name: "Empty directory: 0-byte directory without children",
objectPath: "empty-dir",
hasTrailingSlash: false,
fileSize: 0,
isDirectory: true,
hasChildren: false,
versioningEnabled: false,
shouldReturn404: false,
description: "Should return 200 for empty directory",
},
{
name: "Regular file: non-zero size",
objectPath: "file.txt",
hasTrailingSlash: false,
fileSize: 100,
isDirectory: false,
hasChildren: false,
versioningEnabled: false,
shouldReturn404: false,
description: "Should return 200 for regular file with content",
},
{
name: "Versioned bucket: implicit directory should return 200",
objectPath: "dataset",
hasTrailingSlash: false,
fileSize: 0,
isDirectory: false,
hasChildren: true,
versioningEnabled: true,
shouldReturn404: false,
description: "Should return 200 for versioned buckets (skip implicit dir check)",
},
{
name: "PyArrow directory marker: 0-byte with children",
objectPath: "dataset",
hasTrailingSlash: false,
fileSize: 0,
isDirectory: false,
hasChildren: true,
versioningEnabled: false,
shouldReturn404: true,
description: "Should return 404 for PyArrow-created directory markers",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test the logic: should we return 404?
// Logic from HeadObjectHandler:
// if !versioningConfigured && !strings.HasSuffix(object, "/") {
// if isZeroByteFile || isActualDirectory {
// if hasChildren {
// return 404
// }
// }
// }
isZeroByteFile := tt.fileSize == 0 && !tt.isDirectory
isActualDirectory := tt.isDirectory
shouldReturn404 := false
if !tt.versioningEnabled && !tt.hasTrailingSlash {
if isZeroByteFile || isActualDirectory {
if tt.hasChildren {
shouldReturn404 = true
}
}
}
if shouldReturn404 != tt.shouldReturn404 {
t.Errorf("Logic mismatch for %s:\n Expected shouldReturn404=%v\n Got shouldReturn404=%v\n Description: %s",
tt.name, tt.shouldReturn404, shouldReturn404, tt.description)
} else {
t.Logf("✓ %s: correctly returns %d", tt.name, map[bool]int{true: 404, false: 200}[shouldReturn404])
}
})
}
}
// TestHasChildrenLogic tests the hasChildren helper function logic
func TestHasChildrenLogic(t *testing.T) {
tests := []struct {
name string
bucket string
prefix string
listResponse *filer_pb.ListEntriesResponse
listError error
expectedResult bool
description string
}{
{
name: "Directory with children",
bucket: "test-bucket",
prefix: "dataset",
listResponse: &filer_pb.ListEntriesResponse{
Entry: &filer_pb.Entry{
Name: "file.parquet",
IsDirectory: false,
},
},
listError: nil,
expectedResult: true,
description: "Should return true when at least one child exists",
},
{
name: "Empty directory",
bucket: "test-bucket",
prefix: "empty-dir",
listResponse: nil,
listError: io.EOF,
expectedResult: false,
description: "Should return false when no children exist (EOF)",
},
{
name: "Directory with leading slash in prefix",
bucket: "test-bucket",
prefix: "/dataset",
listResponse: &filer_pb.ListEntriesResponse{
Entry: &filer_pb.Entry{
Name: "file.parquet",
IsDirectory: false,
},
},
listError: nil,
expectedResult: true,
description: "Should handle leading slashes correctly",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test the hasChildren logic:
// 1. It should trim leading slashes from prefix
// 2. It should list with Limit=1
// 3. It should return true if any entry is received
// 4. It should return false if EOF is received
hasChildren := false
if tt.listError == nil && tt.listResponse != nil {
hasChildren = true
} else if tt.listError == io.EOF {
hasChildren = false
}
if hasChildren != tt.expectedResult {
t.Errorf("hasChildren logic mismatch for %s:\n Expected: %v\n Got: %v\n Description: %s",
tt.name, tt.expectedResult, hasChildren, tt.description)
} else {
t.Logf("✓ %s: correctly returns %v", tt.name, hasChildren)
}
})
}
}
// TestImplicitDirectoryEdgeCases tests edge cases in the implicit directory detection
func TestImplicitDirectoryEdgeCases(t *testing.T) {
tests := []struct {
name string
scenario string
expectation string
}{
{
name: "PyArrow write_dataset creates 0-byte files",
scenario: "PyArrow creates 'dataset' as 0-byte file, then writes 'dataset/file.parquet'",
expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
},
{
name: "Filer creates actual directories",
scenario: "Filer creates 'dataset' as actual directory with IsDirectory=true",
expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
},
{
name: "Empty file edge case",
scenario: "User creates 'empty.txt' as 0-byte file with no children",
expectation: "HEAD empty.txt → 200 (no children), s3fs correctly reports as file",
},
{
name: "Explicit directory request",
scenario: "User requests 'dataset/' with trailing slash",
expectation: "HEAD dataset/ → 200 (explicit directory request), normal directory behavior",
},
{
name: "Versioned bucket",
scenario: "Bucket has versioning enabled",
expectation: "HEAD dataset → 200 (skip implicit dir check), versioned semantics apply",
},
{
name: "AWS S3 compatibility",
scenario: "Only 'dataset/file.txt' exists, no marker at 'dataset'",
expectation: "HEAD dataset → 404 (object doesn't exist), matches AWS S3 behavior",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Logf("Scenario: %s", tt.scenario)
t.Logf("Expected: %s", tt.expectation)
})
}
}
// TestImplicitDirectoryIntegration is an integration test placeholder
// Run with: cd test/s3/parquet && make test-implicit-dir-with-server
func TestImplicitDirectoryIntegration(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
t.Skip("Integration test - run manually with: cd test/s3/parquet && make test-implicit-dir-with-server")
}
// Benchmark for hasChildren performance
func BenchmarkHasChildrenCheck(b *testing.B) {
// This benchmark would measure the performance impact of the hasChildren check
// Expected: ~1-5ms per call (one gRPC LIST request with Limit=1)
b.Skip("Benchmark - requires full filer setup")
}