You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

185 lines
6.8 KiB

package s3tables
import (
"testing"
)
func TestIcebergLayoutValidator_ValidateFilePath(t *testing.T) {
v := NewIcebergLayoutValidator()
tests := []struct {
name string
path string
wantErr bool
}{
// Valid metadata files
{"valid metadata v1", "metadata/v1.metadata.json", false},
{"valid metadata v123", "metadata/v123.metadata.json", false},
{"valid snapshot manifest", "metadata/snap-123-1-abc12345-1234-5678-9abc-def012345678.avro", false},
{"valid manifest file", "metadata/abc12345-1234-5678-9abc-def012345678-m0.avro", false},
{"valid general manifest", "metadata/abc12345-1234-5678-9abc-def012345678.avro", false},
{"valid version hint", "metadata/version-hint.text", false},
{"valid uuid metadata", "metadata/abc12345-1234-5678-9abc-def012345678.metadata.json", false},
// Valid data files
{"valid parquet file", "data/file.parquet", false},
{"valid orc file", "data/file.orc", false},
{"valid avro data file", "data/file.avro", false},
{"valid parquet with path", "data/00000-0-abc12345.parquet", false},
// Valid partitioned data
{"valid partitioned parquet", "data/year=2024/file.parquet", false},
{"valid multi-partition", "data/year=2024/month=01/file.parquet", false},
{"valid bucket subdirectory", "data/bucket0/file.parquet", false},
// Directories only
{"metadata directory bare", "metadata", true},
{"data directory bare", "data", true},
{"metadata directory with slash", "metadata/", false},
{"data directory with slash", "data/", false},
// Invalid paths
{"empty path", "", true},
{"invalid top dir", "invalid/file.parquet", true},
{"root file", "file.parquet", true},
{"invalid metadata file", "metadata/random.txt", true},
{"nested metadata directory", "metadata/nested/v1.metadata.json", true},
{"nested metadata directory no file", "metadata/nested/", true},
{"metadata subdir no slash", "metadata/nested", true},
{"invalid data file", "data/file.csv", true},
{"invalid data file json", "data/file.json", true},
// Partition/subdirectory without trailing slashes
{"partition directory no slash", "data/year=2024", false},
{"data subdirectory no slash", "data/my_subdir", false},
{"multi-level partition", "data/event_date=2025-01-01/hour=00/file.parquet", false},
{"multi-level partition directory", "data/event_date=2025-01-01/hour=00/", false},
{"multi-level partition directory no slash", "data/event_date=2025-01-01/hour=00", false},
// Double slashes
{"data double slash", "data//file.parquet", true},
{"data redundant slash", "data/year=2024//file.parquet", true},
{"metadata redundant slash", "metadata//v1.metadata.json", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := v.ValidateFilePath(tt.path)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateFilePath(%q) error = %v, wantErr %v", tt.path, err, tt.wantErr)
}
})
}
}
func TestIcebergLayoutValidator_PartitionPaths(t *testing.T) {
v := NewIcebergLayoutValidator()
validPaths := []string{
"data/year=2024/file.parquet",
"data/date=2024-01-15/file.parquet",
"data/category=electronics/file.parquet",
"data/user_id=12345/file.parquet",
"data/region=us-east-1/file.parquet",
"data/year=2024/month=01/day=15/file.parquet",
}
for _, path := range validPaths {
if err := v.ValidateFilePath(path); err != nil {
t.Errorf("ValidateFilePath(%q) should be valid, got error: %v", path, err)
}
}
}
func TestTableBucketFileValidator_ValidateTableBucketUpload(t *testing.T) {
v := NewTableBucketFileValidator()
tests := []struct {
name string
path string
wantErr bool
}{
// Non-table bucket paths should pass (no validation)
{"regular bucket path", "/buckets/mybucket/file.txt", false},
{"filer path", "/home/user/file.txt", false},
// Table bucket structure paths (creating directories)
{"table bucket root", "/table-buckets/mybucket", false},
{"namespace dir", "/table-buckets/mybucket/myns", false},
{"table dir", "/table-buckets/mybucket/myns/mytable", false},
{"table dir trailing slash", "/table-buckets/mybucket/myns/mytable/", false},
// Valid table bucket file uploads
{"valid parquet upload", "/table-buckets/mybucket/myns/mytable/data/file.parquet", false},
{"valid metadata upload", "/table-buckets/mybucket/myns/mytable/metadata/v1.metadata.json", false},
{"valid partitioned data", "/table-buckets/mybucket/myns/mytable/data/year=2024/file.parquet", false},
// Invalid table bucket file uploads
{"invalid file type", "/table-buckets/mybucket/myns/mytable/data/file.csv", true},
{"invalid top-level dir", "/table-buckets/mybucket/myns/mytable/invalid/file.parquet", true},
{"root file in table", "/table-buckets/mybucket/myns/mytable/file.parquet", true},
// Empty segment cases
{"empty bucket", "/table-buckets//myns/mytable/data/file.parquet", true},
{"empty namespace", "/table-buckets/mybucket//mytable/data/file.parquet", true},
{"empty table", "/table-buckets/mybucket/myns//data/file.parquet", true},
{"empty bucket dir", "/table-buckets//", true},
{"empty namespace dir", "/table-buckets/mybucket//", true},
{"table double slash bypass", "/table-buckets/mybucket/myns/mytable//data/file.parquet", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := v.ValidateTableBucketUpload(tt.path)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateTableBucketUpload(%q) error = %v, wantErr %v", tt.path, err, tt.wantErr)
}
})
}
}
func TestIsTableBucketPath(t *testing.T) {
tests := []struct {
path string
want bool
}{
{"/table-buckets/mybucket", true},
{"/table-buckets/mybucket/ns/table/data/file.parquet", true},
{"/buckets/mybucket", false},
{"/home/user/file.txt", false},
{"table-buckets/mybucket", false}, // missing leading slash
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
if got := IsTableBucketPath(tt.path); got != tt.want {
t.Errorf("IsTableBucketPath(%q) = %v, want %v", tt.path, got, tt.want)
}
})
}
}
func TestGetTableInfoFromPath(t *testing.T) {
tests := []struct {
path string
wantBucket string
wantNamespace string
wantTable string
}{
{"/table-buckets/mybucket/myns/mytable/data/file.parquet", "mybucket", "myns", "mytable"},
{"/table-buckets/mybucket/myns/mytable", "mybucket", "myns", "mytable"},
{"/table-buckets/mybucket/myns", "mybucket", "myns", ""},
{"/table-buckets/mybucket", "mybucket", "", ""},
{"/buckets/mybucket", "", "", ""},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
bucket, namespace, table := GetTableInfoFromPath(tt.path)
if bucket != tt.wantBucket || namespace != tt.wantNamespace || table != tt.wantTable {
t.Errorf("GetTableInfoFromPath(%q) = (%q, %q, %q), want (%q, %q, %q)",
tt.path, bucket, namespace, table, tt.wantBucket, tt.wantNamespace, tt.wantTable)
}
})
}
}