package s3api import ( "io" "testing" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" ) // TestImplicitDirectoryBehaviorLogic tests the core logic for implicit directory detection // This tests the decision logic without requiring a full S3 server setup func TestImplicitDirectoryBehaviorLogic(t *testing.T) { tests := []struct { name string objectPath string hasTrailingSlash bool fileSize uint64 isDirectory bool hasChildren bool versioningEnabled bool shouldReturn404 bool description string }{ { name: "Implicit directory: 0-byte file with children, no trailing slash", objectPath: "dataset", hasTrailingSlash: false, fileSize: 0, isDirectory: false, hasChildren: true, versioningEnabled: false, shouldReturn404: true, description: "Should return 404 to force s3fs LIST-based discovery", }, { name: "Implicit directory: actual directory with children, no trailing slash", objectPath: "dataset", hasTrailingSlash: false, fileSize: 0, isDirectory: true, hasChildren: true, versioningEnabled: false, shouldReturn404: true, description: "Should return 404 for directory with children", }, { name: "Explicit directory request: trailing slash", objectPath: "dataset/", hasTrailingSlash: true, fileSize: 0, isDirectory: true, hasChildren: true, versioningEnabled: false, shouldReturn404: false, description: "Should return 200 for explicit directory request (trailing slash)", }, { name: "Empty file: 0-byte file without children", objectPath: "empty.txt", hasTrailingSlash: false, fileSize: 0, isDirectory: false, hasChildren: false, versioningEnabled: false, shouldReturn404: false, description: "Should return 200 for legitimate empty file", }, { name: "Empty directory: 0-byte directory without children", objectPath: "empty-dir", hasTrailingSlash: false, fileSize: 0, isDirectory: true, hasChildren: false, versioningEnabled: false, shouldReturn404: false, description: "Should return 200 for empty directory", }, { name: "Regular file: non-zero size", objectPath: "file.txt", hasTrailingSlash: false, fileSize: 100, isDirectory: false, hasChildren: false, versioningEnabled: false, shouldReturn404: false, description: "Should return 200 for regular file with content", }, { name: "Versioned bucket: implicit directory should return 200", objectPath: "dataset", hasTrailingSlash: false, fileSize: 0, isDirectory: false, hasChildren: true, versioningEnabled: true, shouldReturn404: false, description: "Should return 200 for versioned buckets (skip implicit dir check)", }, { name: "PyArrow directory marker: 0-byte with children", objectPath: "dataset", hasTrailingSlash: false, fileSize: 0, isDirectory: false, hasChildren: true, versioningEnabled: false, shouldReturn404: true, description: "Should return 404 for PyArrow-created directory markers", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Test the logic: should we return 404? // Logic from HeadObjectHandler: // if !versioningConfigured && !strings.HasSuffix(object, "/") { // if isZeroByteFile || isActualDirectory { // if hasChildren { // return 404 // } // } // } isZeroByteFile := tt.fileSize == 0 && !tt.isDirectory isActualDirectory := tt.isDirectory shouldReturn404 := false if !tt.versioningEnabled && !tt.hasTrailingSlash { if isZeroByteFile || isActualDirectory { if tt.hasChildren { shouldReturn404 = true } } } if shouldReturn404 != tt.shouldReturn404 { t.Errorf("Logic mismatch for %s:\n Expected shouldReturn404=%v\n Got shouldReturn404=%v\n Description: %s", tt.name, tt.shouldReturn404, shouldReturn404, tt.description) } else { t.Logf("✓ %s: correctly returns %d", tt.name, map[bool]int{true: 404, false: 200}[shouldReturn404]) } }) } } // TestHasChildrenLogic tests the hasChildren helper function logic func TestHasChildrenLogic(t *testing.T) { tests := []struct { name string bucket string prefix string listResponse *filer_pb.ListEntriesResponse listError error expectedResult bool description string }{ { name: "Directory with children", bucket: "test-bucket", prefix: "dataset", listResponse: &filer_pb.ListEntriesResponse{ Entry: &filer_pb.Entry{ Name: "file.parquet", IsDirectory: false, }, }, listError: nil, expectedResult: true, description: "Should return true when at least one child exists", }, { name: "Empty directory", bucket: "test-bucket", prefix: "empty-dir", listResponse: nil, listError: io.EOF, expectedResult: false, description: "Should return false when no children exist (EOF)", }, { name: "Directory with leading slash in prefix", bucket: "test-bucket", prefix: "/dataset", listResponse: &filer_pb.ListEntriesResponse{ Entry: &filer_pb.Entry{ Name: "file.parquet", IsDirectory: false, }, }, listError: nil, expectedResult: true, description: "Should handle leading slashes correctly", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Test the hasChildren logic: // 1. It should trim leading slashes from prefix // 2. It should list with Limit=1 // 3. It should return true if any entry is received // 4. It should return false if EOF is received hasChildren := false if tt.listError == nil && tt.listResponse != nil { hasChildren = true } else if tt.listError == io.EOF { hasChildren = false } if hasChildren != tt.expectedResult { t.Errorf("hasChildren logic mismatch for %s:\n Expected: %v\n Got: %v\n Description: %s", tt.name, tt.expectedResult, hasChildren, tt.description) } else { t.Logf("✓ %s: correctly returns %v", tt.name, hasChildren) } }) } } // TestImplicitDirectoryEdgeCases tests edge cases in the implicit directory detection func TestImplicitDirectoryEdgeCases(t *testing.T) { tests := []struct { name string scenario string expectation string }{ { name: "PyArrow write_dataset creates 0-byte files", scenario: "PyArrow creates 'dataset' as 0-byte file, then writes 'dataset/file.parquet'", expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory", }, { name: "Filer creates actual directories", scenario: "Filer creates 'dataset' as actual directory with IsDirectory=true", expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory", }, { name: "Empty file edge case", scenario: "User creates 'empty.txt' as 0-byte file with no children", expectation: "HEAD empty.txt → 200 (no children), s3fs correctly reports as file", }, { name: "Explicit directory request", scenario: "User requests 'dataset/' with trailing slash", expectation: "HEAD dataset/ → 200 (explicit directory request), normal directory behavior", }, { name: "Versioned bucket", scenario: "Bucket has versioning enabled", expectation: "HEAD dataset → 200 (skip implicit dir check), versioned semantics apply", }, { name: "AWS S3 compatibility", scenario: "Only 'dataset/file.txt' exists, no marker at 'dataset'", expectation: "HEAD dataset → 404 (object doesn't exist), matches AWS S3 behavior", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Logf("Scenario: %s", tt.scenario) t.Logf("Expected: %s", tt.expectation) }) } } // TestImplicitDirectoryIntegration is an integration test placeholder // Run with: cd test/s3/parquet && make test-implicit-dir-with-server func TestImplicitDirectoryIntegration(t *testing.T) { if testing.Short() { t.Skip("Skipping integration test in short mode") } t.Skip("Integration test - run manually with: cd test/s3/parquet && make test-implicit-dir-with-server") } // Benchmark for hasChildren performance func BenchmarkHasChildrenCheck(b *testing.B) { // This benchmark would measure the performance impact of the hasChildren check // Expected: ~1-5ms per call (one gRPC LIST request with Limit=1) b.Skip("Benchmark - requires full filer setup") }