You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
185 lines
6.8 KiB
185 lines
6.8 KiB
package s3tables
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
func TestIcebergLayoutValidator_ValidateFilePath(t *testing.T) {
|
|
v := NewIcebergLayoutValidator()
|
|
|
|
tests := []struct {
|
|
name string
|
|
path string
|
|
wantErr bool
|
|
}{
|
|
// Valid metadata files
|
|
{"valid metadata v1", "metadata/v1.metadata.json", false},
|
|
{"valid metadata v123", "metadata/v123.metadata.json", false},
|
|
{"valid snapshot manifest", "metadata/snap-123-1-abc12345-1234-5678-9abc-def012345678.avro", false},
|
|
{"valid manifest file", "metadata/abc12345-1234-5678-9abc-def012345678-m0.avro", false},
|
|
{"valid general manifest", "metadata/abc12345-1234-5678-9abc-def012345678.avro", false},
|
|
{"valid version hint", "metadata/version-hint.text", false},
|
|
{"valid uuid metadata", "metadata/abc12345-1234-5678-9abc-def012345678.metadata.json", false},
|
|
|
|
// Valid data files
|
|
{"valid parquet file", "data/file.parquet", false},
|
|
{"valid orc file", "data/file.orc", false},
|
|
{"valid avro data file", "data/file.avro", false},
|
|
{"valid parquet with path", "data/00000-0-abc12345.parquet", false},
|
|
|
|
// Valid partitioned data
|
|
{"valid partitioned parquet", "data/year=2024/file.parquet", false},
|
|
{"valid multi-partition", "data/year=2024/month=01/file.parquet", false},
|
|
{"valid bucket subdirectory", "data/bucket0/file.parquet", false},
|
|
|
|
// Directories only
|
|
{"metadata directory bare", "metadata", true},
|
|
{"data directory bare", "data", true},
|
|
{"metadata directory with slash", "metadata/", false},
|
|
{"data directory with slash", "data/", false},
|
|
|
|
// Invalid paths
|
|
{"empty path", "", true},
|
|
{"invalid top dir", "invalid/file.parquet", true},
|
|
{"root file", "file.parquet", true},
|
|
{"invalid metadata file", "metadata/random.txt", true},
|
|
{"nested metadata directory", "metadata/nested/v1.metadata.json", true},
|
|
{"nested metadata directory no file", "metadata/nested/", true},
|
|
{"metadata subdir no slash", "metadata/nested", true},
|
|
{"invalid data file", "data/file.csv", true},
|
|
{"invalid data file json", "data/file.json", true},
|
|
|
|
// Partition/subdirectory without trailing slashes
|
|
{"partition directory no slash", "data/year=2024", false},
|
|
{"data subdirectory no slash", "data/my_subdir", false},
|
|
{"multi-level partition", "data/event_date=2025-01-01/hour=00/file.parquet", false},
|
|
{"multi-level partition directory", "data/event_date=2025-01-01/hour=00/", false},
|
|
{"multi-level partition directory no slash", "data/event_date=2025-01-01/hour=00", false},
|
|
|
|
// Double slashes
|
|
{"data double slash", "data//file.parquet", true},
|
|
{"data redundant slash", "data/year=2024//file.parquet", true},
|
|
{"metadata redundant slash", "metadata//v1.metadata.json", true},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
err := v.ValidateFilePath(tt.path)
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("ValidateFilePath(%q) error = %v, wantErr %v", tt.path, err, tt.wantErr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestIcebergLayoutValidator_PartitionPaths(t *testing.T) {
|
|
v := NewIcebergLayoutValidator()
|
|
|
|
validPaths := []string{
|
|
"data/year=2024/file.parquet",
|
|
"data/date=2024-01-15/file.parquet",
|
|
"data/category=electronics/file.parquet",
|
|
"data/user_id=12345/file.parquet",
|
|
"data/region=us-east-1/file.parquet",
|
|
"data/year=2024/month=01/day=15/file.parquet",
|
|
}
|
|
|
|
for _, path := range validPaths {
|
|
if err := v.ValidateFilePath(path); err != nil {
|
|
t.Errorf("ValidateFilePath(%q) should be valid, got error: %v", path, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestTableBucketFileValidator_ValidateTableBucketUpload(t *testing.T) {
|
|
v := NewTableBucketFileValidator()
|
|
|
|
tests := []struct {
|
|
name string
|
|
path string
|
|
wantErr bool
|
|
}{
|
|
// Non-table bucket paths should pass (no validation)
|
|
{"regular bucket path", "/buckets/mybucket/file.txt", false},
|
|
{"filer path", "/home/user/file.txt", false},
|
|
|
|
// Table bucket structure paths (creating directories)
|
|
{"table bucket root", "/table-buckets/mybucket", false},
|
|
{"namespace dir", "/table-buckets/mybucket/myns", false},
|
|
{"table dir", "/table-buckets/mybucket/myns/mytable", false},
|
|
{"table dir trailing slash", "/table-buckets/mybucket/myns/mytable/", false},
|
|
|
|
// Valid table bucket file uploads
|
|
{"valid parquet upload", "/table-buckets/mybucket/myns/mytable/data/file.parquet", false},
|
|
{"valid metadata upload", "/table-buckets/mybucket/myns/mytable/metadata/v1.metadata.json", false},
|
|
{"valid partitioned data", "/table-buckets/mybucket/myns/mytable/data/year=2024/file.parquet", false},
|
|
|
|
// Invalid table bucket file uploads
|
|
{"invalid file type", "/table-buckets/mybucket/myns/mytable/data/file.csv", true},
|
|
{"invalid top-level dir", "/table-buckets/mybucket/myns/mytable/invalid/file.parquet", true},
|
|
{"root file in table", "/table-buckets/mybucket/myns/mytable/file.parquet", true},
|
|
|
|
// Empty segment cases
|
|
{"empty bucket", "/table-buckets//myns/mytable/data/file.parquet", true},
|
|
{"empty namespace", "/table-buckets/mybucket//mytable/data/file.parquet", true},
|
|
{"empty table", "/table-buckets/mybucket/myns//data/file.parquet", true},
|
|
{"empty bucket dir", "/table-buckets//", true},
|
|
{"empty namespace dir", "/table-buckets/mybucket//", true},
|
|
{"table double slash bypass", "/table-buckets/mybucket/myns/mytable//data/file.parquet", true},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
err := v.ValidateTableBucketUpload(tt.path)
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("ValidateTableBucketUpload(%q) error = %v, wantErr %v", tt.path, err, tt.wantErr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestIsTableBucketPath(t *testing.T) {
|
|
tests := []struct {
|
|
path string
|
|
want bool
|
|
}{
|
|
{"/table-buckets/mybucket", true},
|
|
{"/table-buckets/mybucket/ns/table/data/file.parquet", true},
|
|
{"/buckets/mybucket", false},
|
|
{"/home/user/file.txt", false},
|
|
{"table-buckets/mybucket", false}, // missing leading slash
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.path, func(t *testing.T) {
|
|
if got := IsTableBucketPath(tt.path); got != tt.want {
|
|
t.Errorf("IsTableBucketPath(%q) = %v, want %v", tt.path, got, tt.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestGetTableInfoFromPath(t *testing.T) {
|
|
tests := []struct {
|
|
path string
|
|
wantBucket string
|
|
wantNamespace string
|
|
wantTable string
|
|
}{
|
|
{"/table-buckets/mybucket/myns/mytable/data/file.parquet", "mybucket", "myns", "mytable"},
|
|
{"/table-buckets/mybucket/myns/mytable", "mybucket", "myns", "mytable"},
|
|
{"/table-buckets/mybucket/myns", "mybucket", "myns", ""},
|
|
{"/table-buckets/mybucket", "mybucket", "", ""},
|
|
{"/buckets/mybucket", "", "", ""},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.path, func(t *testing.T) {
|
|
bucket, namespace, table := GetTableInfoFromPath(tt.path)
|
|
if bucket != tt.wantBucket || namespace != tt.wantNamespace || table != tt.wantTable {
|
|
t.Errorf("GetTableInfoFromPath(%q) = (%q, %q, %q), want (%q, %q, %q)",
|
|
tt.path, bucket, namespace, table, tt.wantBucket, tt.wantNamespace, tt.wantTable)
|
|
}
|
|
})
|
|
}
|
|
}
|