Browse Source
iceberg: add delete file rewrite maintenance (#8664)
iceberg: add delete file rewrite maintenance (#8664)
* iceberg: add delete file rewrite maintenance * iceberg: preserve untouched delete files during rewrites * iceberg: share detection threshold defaults * iceberg: add partition-scoped maintenance filters (#8665) * iceberg: add partition-scoped maintenance filters * iceberg: tighten where-filter partition matchingpull/8674/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 2140 additions and 117 deletions
-
59weed/plugin/worker/iceberg/compact.go
-
139weed/plugin/worker/iceberg/config.go
-
595weed/plugin/worker/iceberg/delete_rewrite.go
-
126weed/plugin/worker/iceberg/detection.go
-
512weed/plugin/worker/iceberg/exec_test.go
-
144weed/plugin/worker/iceberg/handler.go
-
35weed/plugin/worker/iceberg/handler_test.go
-
47weed/plugin/worker/iceberg/operations.go
-
2weed/plugin/worker/iceberg/planning_index.go
-
311weed/plugin/worker/iceberg/where_filter.go
-
287weed/plugin/worker/iceberg/where_filter_test.go
@ -0,0 +1,595 @@ |
|||||
|
package iceberg |
||||
|
|
||||
|
import ( |
||||
|
"bytes" |
||||
|
"context" |
||||
|
"fmt" |
||||
|
"math" |
||||
|
"path" |
||||
|
"sort" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/apache/iceberg-go" |
||||
|
"github.com/apache/iceberg-go/table" |
||||
|
"github.com/parquet-go/parquet-go" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/glog" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3tables" |
||||
|
) |
||||
|
|
||||
|
type deleteRewriteInput struct { |
||||
|
Entry iceberg.ManifestEntry |
||||
|
ReferencedPath string |
||||
|
Positions []int64 |
||||
|
} |
||||
|
|
||||
|
type deleteRewriteGroup struct { |
||||
|
SpecID int32 |
||||
|
Partition map[int]any |
||||
|
PartitionKey string |
||||
|
ReferencedPath string |
||||
|
Inputs []deleteRewriteInput |
||||
|
TotalSize int64 |
||||
|
} |
||||
|
|
||||
|
type positionDeleteRow struct { |
||||
|
FilePath string `parquet:"file_path"` |
||||
|
Pos int64 `parquet:"pos"` |
||||
|
} |
||||
|
|
||||
|
func hasEligibleDeleteRewrite( |
||||
|
ctx context.Context, |
||||
|
filerClient filer_pb.SeaweedFilerClient, |
||||
|
bucketName, tablePath string, |
||||
|
manifests []iceberg.ManifestFile, |
||||
|
config Config, |
||||
|
meta table.Metadata, |
||||
|
predicate *partitionPredicate, |
||||
|
) (bool, error) { |
||||
|
groups, _, err := collectDeleteRewriteGroups(ctx, filerClient, bucketName, tablePath, manifests) |
||||
|
if err != nil { |
||||
|
return false, err |
||||
|
} |
||||
|
for _, group := range groups { |
||||
|
if predicate != nil { |
||||
|
spec, ok := specByID(meta)[int(group.SpecID)] |
||||
|
if !ok { |
||||
|
continue |
||||
|
} |
||||
|
match, err := predicate.Matches(spec, group.Partition) |
||||
|
if err != nil { |
||||
|
return false, err |
||||
|
} |
||||
|
if !match { |
||||
|
continue |
||||
|
} |
||||
|
} |
||||
|
if groupEligibleForRewrite(group, config) { |
||||
|
return true, nil |
||||
|
} |
||||
|
} |
||||
|
return false, nil |
||||
|
} |
||||
|
|
||||
|
func collectDeleteRewriteGroups( |
||||
|
ctx context.Context, |
||||
|
filerClient filer_pb.SeaweedFilerClient, |
||||
|
bucketName, tablePath string, |
||||
|
manifests []iceberg.ManifestFile, |
||||
|
) (map[string]*deleteRewriteGroup, []iceberg.ManifestEntry, error) { |
||||
|
groups := make(map[string]*deleteRewriteGroup) |
||||
|
var allPositionEntries []iceberg.ManifestEntry |
||||
|
|
||||
|
for _, mf := range manifests { |
||||
|
if mf.ManifestContent() != iceberg.ManifestContentDeletes { |
||||
|
continue |
||||
|
} |
||||
|
|
||||
|
manifestData, err := loadFileByIcebergPath(ctx, filerClient, bucketName, tablePath, mf.FilePath()) |
||||
|
if err != nil { |
||||
|
return nil, nil, fmt.Errorf("read delete manifest %s: %w", mf.FilePath(), err) |
||||
|
} |
||||
|
entries, err := iceberg.ReadManifest(mf, bytes.NewReader(manifestData), true) |
||||
|
if err != nil { |
||||
|
return nil, nil, fmt.Errorf("parse delete manifest %s: %w", mf.FilePath(), err) |
||||
|
} |
||||
|
|
||||
|
for _, entry := range entries { |
||||
|
if entry.DataFile().ContentType() != iceberg.EntryContentPosDeletes { |
||||
|
continue |
||||
|
} |
||||
|
|
||||
|
allPositionEntries = append(allPositionEntries, entry) |
||||
|
|
||||
|
fileDeletes, err := readPositionDeleteFile(ctx, filerClient, bucketName, tablePath, entry.DataFile().FilePath()) |
||||
|
if err != nil { |
||||
|
return nil, nil, fmt.Errorf("read position delete file %s: %w", entry.DataFile().FilePath(), err) |
||||
|
} |
||||
|
if len(fileDeletes) != 1 { |
||||
|
// Phase 1 only rewrites files that target a single data file.
|
||||
|
continue |
||||
|
} |
||||
|
|
||||
|
var referencedPath string |
||||
|
var positions []int64 |
||||
|
for fp, pos := range fileDeletes { |
||||
|
referencedPath = normalizeIcebergPath(fp, bucketName, tablePath) |
||||
|
positions = append(positions, pos...) |
||||
|
} |
||||
|
sort.Slice(positions, func(i, j int) bool { return positions[i] < positions[j] }) |
||||
|
|
||||
|
partKey := partitionKey(entry.DataFile().Partition()) |
||||
|
groupKey := fmt.Sprintf("spec%d\x00%s\x00%s", entry.DataFile().SpecID(), partKey, referencedPath) |
||||
|
group, ok := groups[groupKey] |
||||
|
if !ok { |
||||
|
group = &deleteRewriteGroup{ |
||||
|
SpecID: entry.DataFile().SpecID(), |
||||
|
Partition: entry.DataFile().Partition(), |
||||
|
PartitionKey: partKey, |
||||
|
ReferencedPath: referencedPath, |
||||
|
} |
||||
|
groups[groupKey] = group |
||||
|
} |
||||
|
group.Inputs = append(group.Inputs, deleteRewriteInput{ |
||||
|
Entry: entry, |
||||
|
ReferencedPath: referencedPath, |
||||
|
Positions: positions, |
||||
|
}) |
||||
|
group.TotalSize += entry.DataFile().FileSizeBytes() |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return groups, allPositionEntries, nil |
||||
|
} |
||||
|
|
||||
|
func groupEligibleForRewrite(group *deleteRewriteGroup, config Config) bool { |
||||
|
if group == nil { |
||||
|
return false |
||||
|
} |
||||
|
if len(group.Inputs) < 2 { |
||||
|
return false |
||||
|
} |
||||
|
if group.TotalSize > config.DeleteMaxFileGroupSizeBytes { |
||||
|
return false |
||||
|
} |
||||
|
target := config.DeleteTargetFileSizeBytes |
||||
|
if target <= 0 { |
||||
|
target = defaultDeleteTargetFileSizeMB * 1024 * 1024 |
||||
|
} |
||||
|
outputFiles := int64(estimatedDeleteOutputFiles(group.TotalSize, target)) |
||||
|
if config.DeleteMaxOutputFiles > 0 && outputFiles > config.DeleteMaxOutputFiles { |
||||
|
return false |
||||
|
} |
||||
|
return int64(len(group.Inputs)) >= config.DeleteMinInputFiles |
||||
|
} |
||||
|
|
||||
|
func estimatedDeleteOutputFiles(totalSize, targetSize int64) int { |
||||
|
if totalSize <= 0 || targetSize <= 0 { |
||||
|
return 1 |
||||
|
} |
||||
|
count := int(math.Ceil(float64(totalSize) / float64(targetSize))) |
||||
|
if count < 1 { |
||||
|
return 1 |
||||
|
} |
||||
|
return count |
||||
|
} |
||||
|
|
||||
|
func manifestEntrySeqNum(entry iceberg.ManifestEntry) *int64 { |
||||
|
seqNum := entry.SequenceNum() |
||||
|
if seqNum < 0 { |
||||
|
return nil |
||||
|
} |
||||
|
return &seqNum |
||||
|
} |
||||
|
|
||||
|
func manifestEntryFileSeqNum(entry iceberg.ManifestEntry) *int64 { |
||||
|
if fileSeqNum := entry.FileSequenceNum(); fileSeqNum != nil { |
||||
|
value := *fileSeqNum |
||||
|
return &value |
||||
|
} |
||||
|
return manifestEntrySeqNum(entry) |
||||
|
} |
||||
|
|
||||
|
func writeManifestWithContent( |
||||
|
filename string, |
||||
|
version int, |
||||
|
spec iceberg.PartitionSpec, |
||||
|
schema *iceberg.Schema, |
||||
|
snapshotID int64, |
||||
|
entries []iceberg.ManifestEntry, |
||||
|
content iceberg.ManifestContent, |
||||
|
) (iceberg.ManifestFile, []byte, error) { |
||||
|
var manifestBuf bytes.Buffer |
||||
|
mf, err := iceberg.WriteManifest(filename, &manifestBuf, version, spec, schema, snapshotID, entries) |
||||
|
if err != nil { |
||||
|
return nil, nil, err |
||||
|
} |
||||
|
|
||||
|
manifestBytes := manifestBuf.Bytes() |
||||
|
if content == iceberg.ManifestContentDeletes { |
||||
|
manifestBytes, err = patchManifestContentBytesToDeletes(manifestBytes) |
||||
|
if err != nil { |
||||
|
return nil, nil, err |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
rebuilt := iceberg.NewManifestFile(version, filename, int64(len(manifestBytes)), int32(spec.ID()), snapshotID). |
||||
|
Content(content). |
||||
|
AddedFiles(mf.AddedDataFiles()). |
||||
|
ExistingFiles(mf.ExistingDataFiles()). |
||||
|
DeletedFiles(mf.DeletedDataFiles()). |
||||
|
AddedRows(mf.AddedRows()). |
||||
|
ExistingRows(mf.ExistingRows()). |
||||
|
DeletedRows(mf.DeletedRows()). |
||||
|
Partitions(mf.Partitions()). |
||||
|
Build() |
||||
|
return rebuilt, manifestBytes, nil |
||||
|
} |
||||
|
|
||||
|
func patchManifestContentBytesToDeletes(manifestBytes []byte) ([]byte, error) { |
||||
|
old := append([]byte{0x0e}, []byte("content")...) |
||||
|
old = append(old, 0x08) |
||||
|
old = append(old, []byte("data")...) |
||||
|
|
||||
|
new := append([]byte{0x0e}, []byte("content")...) |
||||
|
new = append(new, 0x0e) |
||||
|
new = append(new, []byte("deletes")...) |
||||
|
|
||||
|
result := bytes.Replace(manifestBytes, old, new, 1) |
||||
|
if bytes.Equal(result, manifestBytes) { |
||||
|
return nil, fmt.Errorf("delete manifest content patch failed") |
||||
|
} |
||||
|
return result, nil |
||||
|
} |
||||
|
|
||||
|
func writePositionDeleteFile(rows []positionDeleteRow) ([]byte, error) { |
||||
|
var buf bytes.Buffer |
||||
|
writer := parquet.NewWriter(&buf, parquet.SchemaOf(new(positionDeleteRow))) |
||||
|
for _, row := range rows { |
||||
|
if err := writer.Write(&row); err != nil { |
||||
|
return nil, fmt.Errorf("write position delete row: %w", err) |
||||
|
} |
||||
|
} |
||||
|
if err := writer.Close(); err != nil { |
||||
|
return nil, fmt.Errorf("close position delete file: %w", err) |
||||
|
} |
||||
|
return buf.Bytes(), nil |
||||
|
} |
||||
|
|
||||
|
func (h *Handler) rewritePositionDeleteFiles( |
||||
|
ctx context.Context, |
||||
|
filerClient filer_pb.SeaweedFilerClient, |
||||
|
bucketName, tablePath string, |
||||
|
config Config, |
||||
|
) (string, map[string]int64, error) { |
||||
|
start := time.Now() |
||||
|
meta, metadataFileName, err := loadCurrentMetadata(ctx, filerClient, bucketName, tablePath) |
||||
|
if err != nil { |
||||
|
return "", nil, fmt.Errorf("load metadata: %w", err) |
||||
|
} |
||||
|
|
||||
|
currentSnap := meta.CurrentSnapshot() |
||||
|
if currentSnap == nil || currentSnap.ManifestList == "" { |
||||
|
return "no current snapshot", nil, nil |
||||
|
} |
||||
|
|
||||
|
manifestListData, err := loadFileByIcebergPath(ctx, filerClient, bucketName, tablePath, currentSnap.ManifestList) |
||||
|
if err != nil { |
||||
|
return "", nil, fmt.Errorf("read manifest list: %w", err) |
||||
|
} |
||||
|
manifests, err := iceberg.ReadManifestList(bytes.NewReader(manifestListData)) |
||||
|
if err != nil { |
||||
|
return "", nil, fmt.Errorf("parse manifest list: %w", err) |
||||
|
} |
||||
|
|
||||
|
var dataManifests []iceberg.ManifestFile |
||||
|
var allEqualityEntries []iceberg.ManifestEntry |
||||
|
for _, mf := range manifests { |
||||
|
switch mf.ManifestContent() { |
||||
|
case iceberg.ManifestContentData: |
||||
|
dataManifests = append(dataManifests, mf) |
||||
|
case iceberg.ManifestContentDeletes: |
||||
|
manifestData, readErr := loadFileByIcebergPath(ctx, filerClient, bucketName, tablePath, mf.FilePath()) |
||||
|
if readErr != nil { |
||||
|
return "", nil, fmt.Errorf("read delete manifest %s: %w", mf.FilePath(), readErr) |
||||
|
} |
||||
|
entries, parseErr := iceberg.ReadManifest(mf, bytes.NewReader(manifestData), true) |
||||
|
if parseErr != nil { |
||||
|
return "", nil, fmt.Errorf("parse delete manifest %s: %w", mf.FilePath(), parseErr) |
||||
|
} |
||||
|
for _, entry := range entries { |
||||
|
if entry.DataFile().ContentType() == iceberg.EntryContentEqDeletes { |
||||
|
allEqualityEntries = append(allEqualityEntries, entry) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
groupMap, allPositionEntries, err := collectDeleteRewriteGroups(ctx, filerClient, bucketName, tablePath, manifests) |
||||
|
if err != nil { |
||||
|
return "", nil, err |
||||
|
} |
||||
|
if len(groupMap) == 0 { |
||||
|
return "no position delete files eligible for rewrite", nil, nil |
||||
|
} |
||||
|
|
||||
|
type artifact struct { |
||||
|
dir, fileName string |
||||
|
} |
||||
|
var writtenArtifacts []artifact |
||||
|
committed := false |
||||
|
defer func() { |
||||
|
if committed || len(writtenArtifacts) == 0 { |
||||
|
return |
||||
|
} |
||||
|
cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) |
||||
|
defer cancel() |
||||
|
for _, a := range writtenArtifacts { |
||||
|
if err := deleteFilerFile(cleanupCtx, filerClient, a.dir, a.fileName); err != nil { |
||||
|
glog.Warningf("iceberg delete rewrite: failed to clean up artifact %s/%s: %v", a.dir, a.fileName, err) |
||||
|
} |
||||
|
} |
||||
|
}() |
||||
|
|
||||
|
specByID := specByID(meta) |
||||
|
predicate, err := parsePartitionPredicate(config.Where, meta) |
||||
|
if err != nil { |
||||
|
return "", nil, err |
||||
|
} |
||||
|
|
||||
|
type specEntries struct { |
||||
|
specID int32 |
||||
|
entries []iceberg.ManifestEntry |
||||
|
} |
||||
|
specEntriesMap := make(map[int32]*specEntries) |
||||
|
addToSpec := func(specID int32, entry iceberg.ManifestEntry) { |
||||
|
se, ok := specEntriesMap[specID] |
||||
|
if !ok { |
||||
|
se = &specEntries{specID: specID} |
||||
|
specEntriesMap[specID] = se |
||||
|
} |
||||
|
se.entries = append(se.entries, entry) |
||||
|
} |
||||
|
|
||||
|
newSnapID := time.Now().UnixMilli() |
||||
|
version := meta.Version() |
||||
|
snapshotID := currentSnap.SnapshotID |
||||
|
seqNum := currentSnap.SequenceNumber + 1 |
||||
|
metaDir := path.Join(s3tables.TablesPath, bucketName, tablePath, "metadata") |
||||
|
dataDir := path.Join(s3tables.TablesPath, bucketName, tablePath, "data") |
||||
|
artifactSuffix := compactRandomSuffix() |
||||
|
|
||||
|
replacedPaths := make(map[string]struct{}) |
||||
|
var rewrittenGroups int64 |
||||
|
var skippedGroups int64 |
||||
|
var deleteFilesRewritten int64 |
||||
|
var deleteFilesWritten int64 |
||||
|
var deleteBytesRewritten int64 |
||||
|
|
||||
|
sortedKeys := make([]string, 0, len(groupMap)) |
||||
|
for key := range groupMap { |
||||
|
sortedKeys = append(sortedKeys, key) |
||||
|
} |
||||
|
sort.Strings(sortedKeys) |
||||
|
|
||||
|
for _, key := range sortedKeys { |
||||
|
group := groupMap[key] |
||||
|
if predicate != nil { |
||||
|
spec, ok := specByID[int(group.SpecID)] |
||||
|
if !ok { |
||||
|
continue |
||||
|
} |
||||
|
match, err := predicate.Matches(spec, group.Partition) |
||||
|
if err != nil { |
||||
|
return "", nil, err |
||||
|
} |
||||
|
if !match { |
||||
|
skippedGroups++ |
||||
|
continue |
||||
|
} |
||||
|
} |
||||
|
if !groupEligibleForRewrite(group, config) { |
||||
|
skippedGroups++ |
||||
|
continue |
||||
|
} |
||||
|
rows := make([]positionDeleteRow, 0) |
||||
|
for _, input := range group.Inputs { |
||||
|
for _, pos := range input.Positions { |
||||
|
rows = append(rows, positionDeleteRow{FilePath: input.ReferencedPath, Pos: pos}) |
||||
|
} |
||||
|
replacedPaths[input.Entry.DataFile().FilePath()] = struct{}{} |
||||
|
deleteFilesRewritten++ |
||||
|
deleteBytesRewritten += input.Entry.DataFile().FileSizeBytes() |
||||
|
} |
||||
|
sort.Slice(rows, func(i, j int) bool { |
||||
|
if rows[i].FilePath != rows[j].FilePath { |
||||
|
return rows[i].FilePath < rows[j].FilePath |
||||
|
} |
||||
|
return rows[i].Pos < rows[j].Pos |
||||
|
}) |
||||
|
|
||||
|
outputFiles := estimatedDeleteOutputFiles(group.TotalSize, config.DeleteTargetFileSizeBytes) |
||||
|
rowsPerFile := (len(rows) + outputFiles - 1) / outputFiles |
||||
|
if rowsPerFile < 1 { |
||||
|
rowsPerFile = len(rows) |
||||
|
} |
||||
|
|
||||
|
for startIdx, fileIdx := 0, 0; startIdx < len(rows); startIdx, fileIdx = startIdx+rowsPerFile, fileIdx+1 { |
||||
|
endIdx := startIdx + rowsPerFile |
||||
|
if endIdx > len(rows) { |
||||
|
endIdx = len(rows) |
||||
|
} |
||||
|
outputRows := rows[startIdx:endIdx] |
||||
|
deleteBytes, err := writePositionDeleteFile(outputRows) |
||||
|
if err != nil { |
||||
|
return "", nil, err |
||||
|
} |
||||
|
fileName := fmt.Sprintf("rewrite-delete-%d-%s-%d.parquet", newSnapID, artifactSuffix, deleteFilesWritten) |
||||
|
if err := ensureFilerDir(ctx, filerClient, dataDir); err != nil { |
||||
|
return "", nil, fmt.Errorf("ensure data dir: %w", err) |
||||
|
} |
||||
|
if err := saveFilerFile(ctx, filerClient, dataDir, fileName, deleteBytes); err != nil { |
||||
|
return "", nil, fmt.Errorf("save rewritten delete file: %w", err) |
||||
|
} |
||||
|
writtenArtifacts = append(writtenArtifacts, artifact{dir: dataDir, fileName: fileName}) |
||||
|
|
||||
|
spec, ok := specByID[int(group.SpecID)] |
||||
|
if !ok { |
||||
|
return "", nil, fmt.Errorf("partition spec %d not found", group.SpecID) |
||||
|
} |
||||
|
dfBuilder, err := iceberg.NewDataFileBuilder( |
||||
|
spec, |
||||
|
iceberg.EntryContentPosDeletes, |
||||
|
path.Join("data", fileName), |
||||
|
iceberg.ParquetFile, |
||||
|
group.Partition, |
||||
|
nil, nil, |
||||
|
int64(len(outputRows)), |
||||
|
int64(len(deleteBytes)), |
||||
|
) |
||||
|
if err != nil { |
||||
|
return "", nil, fmt.Errorf("build rewritten delete file: %w", err) |
||||
|
} |
||||
|
entry := iceberg.NewManifestEntry(iceberg.EntryStatusADDED, &newSnapID, nil, nil, dfBuilder.Build()) |
||||
|
addToSpec(group.SpecID, entry) |
||||
|
deleteFilesWritten++ |
||||
|
} |
||||
|
|
||||
|
for _, input := range group.Inputs { |
||||
|
delEntry := iceberg.NewManifestEntry( |
||||
|
iceberg.EntryStatusDELETED, |
||||
|
&newSnapID, |
||||
|
manifestEntrySeqNum(input.Entry), |
||||
|
manifestEntryFileSeqNum(input.Entry), |
||||
|
input.Entry.DataFile(), |
||||
|
) |
||||
|
addToSpec(group.SpecID, delEntry) |
||||
|
} |
||||
|
rewrittenGroups++ |
||||
|
} |
||||
|
|
||||
|
if rewrittenGroups == 0 { |
||||
|
return "no position delete files eligible for rewrite", nil, nil |
||||
|
} |
||||
|
|
||||
|
for _, entry := range allEqualityEntries { |
||||
|
existingEntry := iceberg.NewManifestEntry( |
||||
|
iceberg.EntryStatusEXISTING, |
||||
|
func() *int64 { id := entry.SnapshotID(); return &id }(), |
||||
|
manifestEntrySeqNum(entry), |
||||
|
manifestEntryFileSeqNum(entry), |
||||
|
entry.DataFile(), |
||||
|
) |
||||
|
addToSpec(entry.DataFile().SpecID(), existingEntry) |
||||
|
} |
||||
|
|
||||
|
for _, entry := range allPositionEntries { |
||||
|
if _, replaced := replacedPaths[entry.DataFile().FilePath()]; replaced { |
||||
|
continue |
||||
|
} |
||||
|
existingEntry := iceberg.NewManifestEntry( |
||||
|
iceberg.EntryStatusEXISTING, |
||||
|
func() *int64 { id := entry.SnapshotID(); return &id }(), |
||||
|
manifestEntrySeqNum(entry), |
||||
|
manifestEntryFileSeqNum(entry), |
||||
|
entry.DataFile(), |
||||
|
) |
||||
|
addToSpec(entry.DataFile().SpecID(), existingEntry) |
||||
|
} |
||||
|
|
||||
|
sortedSpecIDs := make([]int32, 0, len(specEntriesMap)) |
||||
|
for specID := range specEntriesMap { |
||||
|
sortedSpecIDs = append(sortedSpecIDs, specID) |
||||
|
} |
||||
|
sort.Slice(sortedSpecIDs, func(i, j int) bool { return sortedSpecIDs[i] < sortedSpecIDs[j] }) |
||||
|
|
||||
|
allManifests := make([]iceberg.ManifestFile, 0, len(dataManifests)+len(sortedSpecIDs)) |
||||
|
allManifests = append(allManifests, dataManifests...) |
||||
|
|
||||
|
for _, specID := range sortedSpecIDs { |
||||
|
spec, ok := specByID[int(specID)] |
||||
|
if !ok { |
||||
|
return "", nil, fmt.Errorf("partition spec %d not found", specID) |
||||
|
} |
||||
|
manifestName := fmt.Sprintf("rewrite-delete-%d-%s-spec%d.avro", newSnapID, artifactSuffix, specID) |
||||
|
manifestPath := path.Join("metadata", manifestName) |
||||
|
mf, manifestBytes, err := writeManifestWithContent( |
||||
|
manifestPath, |
||||
|
version, |
||||
|
spec, |
||||
|
meta.CurrentSchema(), |
||||
|
newSnapID, |
||||
|
specEntriesMap[specID].entries, |
||||
|
iceberg.ManifestContentDeletes, |
||||
|
) |
||||
|
if err != nil { |
||||
|
return "", nil, fmt.Errorf("write delete manifest for spec %d: %w", specID, err) |
||||
|
} |
||||
|
if err := saveFilerFile(ctx, filerClient, metaDir, manifestName, manifestBytes); err != nil { |
||||
|
return "", nil, fmt.Errorf("save delete manifest for spec %d: %w", specID, err) |
||||
|
} |
||||
|
writtenArtifacts = append(writtenArtifacts, artifact{dir: metaDir, fileName: manifestName}) |
||||
|
allManifests = append(allManifests, mf) |
||||
|
} |
||||
|
|
||||
|
var manifestListBuf bytes.Buffer |
||||
|
if err := iceberg.WriteManifestList(version, &manifestListBuf, newSnapID, &snapshotID, &seqNum, 0, allManifests); err != nil { |
||||
|
return "", nil, fmt.Errorf("write delete manifest list: %w", err) |
||||
|
} |
||||
|
manifestListName := fmt.Sprintf("snap-%d-%s.avro", newSnapID, artifactSuffix) |
||||
|
if err := saveFilerFile(ctx, filerClient, metaDir, manifestListName, manifestListBuf.Bytes()); err != nil { |
||||
|
return "", nil, fmt.Errorf("save delete manifest list: %w", err) |
||||
|
} |
||||
|
writtenArtifacts = append(writtenArtifacts, artifact{dir: metaDir, fileName: manifestListName}) |
||||
|
|
||||
|
manifestListLocation := path.Join("metadata", manifestListName) |
||||
|
err = h.commitWithRetry(ctx, filerClient, bucketName, tablePath, metadataFileName, config, func(currentMeta table.Metadata, builder *table.MetadataBuilder) error { |
||||
|
cs := currentMeta.CurrentSnapshot() |
||||
|
if cs == nil || cs.SnapshotID != snapshotID { |
||||
|
return errStalePlan |
||||
|
} |
||||
|
newSnapshot := &table.Snapshot{ |
||||
|
SnapshotID: newSnapID, |
||||
|
ParentSnapshotID: &snapshotID, |
||||
|
SequenceNumber: seqNum, |
||||
|
TimestampMs: newSnapID, |
||||
|
ManifestList: manifestListLocation, |
||||
|
Summary: &table.Summary{ |
||||
|
Operation: table.OpReplace, |
||||
|
Properties: map[string]string{ |
||||
|
"maintenance": "rewrite_position_delete_files", |
||||
|
"delete-files-rewritten": fmt.Sprintf("%d", deleteFilesRewritten), |
||||
|
"delete-files-written": fmt.Sprintf("%d", deleteFilesWritten), |
||||
|
"delete-groups": fmt.Sprintf("%d", rewrittenGroups), |
||||
|
}, |
||||
|
}, |
||||
|
SchemaID: func() *int { |
||||
|
id := meta.CurrentSchema().ID |
||||
|
return &id |
||||
|
}(), |
||||
|
} |
||||
|
if err := builder.AddSnapshot(newSnapshot); err != nil { |
||||
|
return err |
||||
|
} |
||||
|
return builder.SetSnapshotRef(table.MainBranch, newSnapID, table.BranchRef) |
||||
|
}) |
||||
|
if err != nil { |
||||
|
return "", nil, fmt.Errorf("commit delete rewrite: %w", err) |
||||
|
} |
||||
|
|
||||
|
committed = true |
||||
|
metrics := map[string]int64{ |
||||
|
MetricDeleteFilesRewritten: deleteFilesRewritten, |
||||
|
MetricDeleteFilesWritten: deleteFilesWritten, |
||||
|
MetricDeleteBytesRewritten: deleteBytesRewritten, |
||||
|
MetricDeleteGroupsPlanned: rewrittenGroups, |
||||
|
MetricDeleteGroupsSkipped: skippedGroups, |
||||
|
MetricDurationMs: time.Since(start).Milliseconds(), |
||||
|
} |
||||
|
return fmt.Sprintf( |
||||
|
"rewrote %d position delete files into %d across %d group(s)", |
||||
|
deleteFilesRewritten, |
||||
|
deleteFilesWritten, |
||||
|
rewrittenGroups, |
||||
|
), metrics, nil |
||||
|
} |
||||
@ -0,0 +1,311 @@ |
|||||
|
package iceberg |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
"regexp" |
||||
|
"strconv" |
||||
|
"strings" |
||||
|
|
||||
|
"github.com/apache/iceberg-go" |
||||
|
"github.com/apache/iceberg-go/table" |
||||
|
) |
||||
|
|
||||
|
var ( |
||||
|
whereEqualsPattern = regexp.MustCompile(`^([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.+)$`) |
||||
|
whereInPattern = regexp.MustCompile(`^(?i)([A-Za-z_][A-Za-z0-9_]*)\s+IN\s*\((.*)\)$`) |
||||
|
) |
||||
|
|
||||
|
type whereClause struct { |
||||
|
Field string |
||||
|
Literals []string |
||||
|
} |
||||
|
|
||||
|
type partitionPredicate struct { |
||||
|
Clauses []whereClause |
||||
|
} |
||||
|
|
||||
|
func validateWhereOperations(where string, ops []string) error { |
||||
|
if strings.TrimSpace(where) == "" { |
||||
|
return nil |
||||
|
} |
||||
|
for _, op := range ops { |
||||
|
switch op { |
||||
|
case "compact", "rewrite_manifests", "rewrite_position_delete_files": |
||||
|
continue |
||||
|
default: |
||||
|
return fmt.Errorf("where filter is only supported for compact, rewrite_position_delete_files, and rewrite_manifests") |
||||
|
} |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func parsePartitionPredicate(where string, meta table.Metadata) (*partitionPredicate, error) { |
||||
|
where = strings.TrimSpace(where) |
||||
|
if where == "" { |
||||
|
return nil, nil |
||||
|
} |
||||
|
if meta == nil { |
||||
|
return nil, fmt.Errorf("where filter requires table metadata") |
||||
|
} |
||||
|
|
||||
|
specs := meta.PartitionSpecs() |
||||
|
if len(specs) == 0 || meta.PartitionSpec().IsUnpartitioned() { |
||||
|
return nil, fmt.Errorf("where filter is not supported for unpartitioned tables") |
||||
|
} |
||||
|
|
||||
|
rawClauses := splitWhereConjunction(where) |
||||
|
clauses := make([]whereClause, 0, len(rawClauses)) |
||||
|
for _, raw := range rawClauses { |
||||
|
clause, err := parseWhereClause(raw) |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
clauses = append(clauses, clause) |
||||
|
} |
||||
|
|
||||
|
// Validate against the current partition spec only. Historical specs may
|
||||
|
// lack fields added during schema evolution; per-entry matching in Matches()
|
||||
|
// handles those gracefully.
|
||||
|
currentSpec := meta.PartitionSpec() |
||||
|
for _, clause := range clauses { |
||||
|
if !specHasFieldByName(currentSpec, clause.Field) { |
||||
|
return nil, fmt.Errorf("where field %q is not present in current partition spec %d", clause.Field, currentSpec.ID()) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return &partitionPredicate{Clauses: clauses}, nil |
||||
|
} |
||||
|
|
||||
|
func splitWhereConjunction(where string) []string { |
||||
|
// Quote-aware split: only split on AND that appears outside quotes.
|
||||
|
var parts []string |
||||
|
var current strings.Builder |
||||
|
var quote rune |
||||
|
runes := []rune(where) |
||||
|
for i := 0; i < len(runes); i++ { |
||||
|
r := runes[i] |
||||
|
if quote != 0 { |
||||
|
current.WriteRune(r) |
||||
|
if r == quote { |
||||
|
quote = 0 |
||||
|
} |
||||
|
continue |
||||
|
} |
||||
|
if r == '\'' || r == '"' { |
||||
|
quote = r |
||||
|
current.WriteRune(r) |
||||
|
continue |
||||
|
} |
||||
|
// Check for case-insensitive AND surrounded by whitespace.
|
||||
|
if (r == 'A' || r == 'a') && i+3 < len(runes) { |
||||
|
candidate := string(runes[i : i+3]) |
||||
|
if strings.EqualFold(candidate, "AND") { |
||||
|
before := i > 0 && isWhitespace(runes[i-1]) |
||||
|
after := i+3 < len(runes) && isWhitespace(runes[i+3]) |
||||
|
if before && after { |
||||
|
part := strings.TrimSpace(current.String()) |
||||
|
if part != "" { |
||||
|
parts = append(parts, part) |
||||
|
} |
||||
|
current.Reset() |
||||
|
i += 3 // skip "AND" + the after-space will be consumed next iteration
|
||||
|
continue |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
current.WriteRune(r) |
||||
|
} |
||||
|
if part := strings.TrimSpace(current.String()); part != "" { |
||||
|
parts = append(parts, part) |
||||
|
} |
||||
|
return parts |
||||
|
} |
||||
|
|
||||
|
func isWhitespace(r rune) bool { |
||||
|
return r == ' ' || r == '\t' || r == '\n' || r == '\r' |
||||
|
} |
||||
|
|
||||
|
func parseWhereClause(raw string) (whereClause, error) { |
||||
|
raw = strings.TrimSpace(raw) |
||||
|
if raw == "" { |
||||
|
return whereClause{}, fmt.Errorf("empty where clause") |
||||
|
} |
||||
|
if matches := whereInPattern.FindStringSubmatch(raw); matches != nil { |
||||
|
literals, err := splitLiteralList(matches[2]) |
||||
|
if err != nil { |
||||
|
return whereClause{}, err |
||||
|
} |
||||
|
if len(literals) == 0 { |
||||
|
return whereClause{}, fmt.Errorf("empty IN list in where clause %q", raw) |
||||
|
} |
||||
|
return whereClause{Field: matches[1], Literals: literals}, nil |
||||
|
} |
||||
|
if matches := whereEqualsPattern.FindStringSubmatch(raw); matches != nil { |
||||
|
return whereClause{Field: matches[1], Literals: []string{strings.TrimSpace(matches[2])}}, nil |
||||
|
} |
||||
|
return whereClause{}, fmt.Errorf("unsupported where clause %q", raw) |
||||
|
} |
||||
|
|
||||
|
func splitLiteralList(raw string) ([]string, error) { |
||||
|
raw = strings.TrimSpace(raw) |
||||
|
if raw == "" { |
||||
|
return nil, nil |
||||
|
} |
||||
|
var ( |
||||
|
literals []string |
||||
|
current strings.Builder |
||||
|
quote rune |
||||
|
) |
||||
|
for _, r := range raw { |
||||
|
switch { |
||||
|
case quote != 0: |
||||
|
current.WriteRune(r) |
||||
|
if r == quote { |
||||
|
quote = 0 |
||||
|
} |
||||
|
case r == '\'' || r == '"': |
||||
|
quote = r |
||||
|
current.WriteRune(r) |
||||
|
case r == ',': |
||||
|
literal := strings.TrimSpace(current.String()) |
||||
|
if literal != "" { |
||||
|
literals = append(literals, literal) |
||||
|
} |
||||
|
current.Reset() |
||||
|
default: |
||||
|
current.WriteRune(r) |
||||
|
} |
||||
|
} |
||||
|
if quote != 0 { |
||||
|
return nil, fmt.Errorf("unterminated quoted literal in IN list") |
||||
|
} |
||||
|
if literal := strings.TrimSpace(current.String()); literal != "" { |
||||
|
literals = append(literals, literal) |
||||
|
} |
||||
|
return literals, nil |
||||
|
} |
||||
|
|
||||
|
func specHasFieldByName(spec iceberg.PartitionSpec, fieldName string) bool { |
||||
|
for field := range spec.Fields() { |
||||
|
if field.Name == fieldName { |
||||
|
return true |
||||
|
} |
||||
|
} |
||||
|
return false |
||||
|
} |
||||
|
|
||||
|
func specByID(meta table.Metadata) map[int]iceberg.PartitionSpec { |
||||
|
result := make(map[int]iceberg.PartitionSpec) |
||||
|
if meta == nil { |
||||
|
return result |
||||
|
} |
||||
|
for _, spec := range meta.PartitionSpecs() { |
||||
|
result[spec.ID()] = spec |
||||
|
} |
||||
|
return result |
||||
|
} |
||||
|
|
||||
|
func (p *partitionPredicate) Matches(spec iceberg.PartitionSpec, partition map[int]any) (bool, error) { |
||||
|
if p == nil { |
||||
|
return true, nil |
||||
|
} |
||||
|
|
||||
|
valuesByName := make(map[string]any) |
||||
|
for field := range spec.Fields() { |
||||
|
if value, ok := partition[field.FieldID]; ok { |
||||
|
valuesByName[field.Name] = value |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
for _, clause := range p.Clauses { |
||||
|
actual, ok := valuesByName[clause.Field] |
||||
|
if !ok { |
||||
|
// Field not present in this spec (e.g. older spec before schema
|
||||
|
// evolution). Skip this entry rather than erroring.
|
||||
|
return false, nil |
||||
|
} |
||||
|
matched := false |
||||
|
for _, literal := range clause.Literals { |
||||
|
ok, err := literalMatchesActual(literal, actual) |
||||
|
if err != nil { |
||||
|
return false, fmt.Errorf("where field %q: %w", clause.Field, err) |
||||
|
} |
||||
|
if ok { |
||||
|
matched = true |
||||
|
break |
||||
|
} |
||||
|
} |
||||
|
if !matched { |
||||
|
return false, nil |
||||
|
} |
||||
|
} |
||||
|
return true, nil |
||||
|
} |
||||
|
|
||||
|
func literalMatchesActual(raw string, actual any) (bool, error) { |
||||
|
raw = strings.TrimSpace(raw) |
||||
|
if raw == "" { |
||||
|
return false, fmt.Errorf("empty literal") |
||||
|
} |
||||
|
|
||||
|
switch v := actual.(type) { |
||||
|
case string: |
||||
|
value, err := unquoteLiteral(raw) |
||||
|
if err != nil { |
||||
|
return false, err |
||||
|
} |
||||
|
return v == value, nil |
||||
|
case bool: |
||||
|
value, err := strconv.ParseBool(strings.ToLower(strings.TrimSpace(raw))) |
||||
|
if err != nil { |
||||
|
return false, fmt.Errorf("parse bool literal %q: %w", raw, err) |
||||
|
} |
||||
|
return v == value, nil |
||||
|
case int: |
||||
|
value, err := strconv.ParseInt(raw, 10, 64) |
||||
|
if err != nil { |
||||
|
return false, fmt.Errorf("parse int literal %q: %w", raw, err) |
||||
|
} |
||||
|
return int64(v) == value, nil |
||||
|
case int32: |
||||
|
value, err := strconv.ParseInt(raw, 10, 32) |
||||
|
if err != nil { |
||||
|
return false, fmt.Errorf("parse int32 literal %q: %w", raw, err) |
||||
|
} |
||||
|
return v == int32(value), nil |
||||
|
case int64: |
||||
|
value, err := strconv.ParseInt(raw, 10, 64) |
||||
|
if err != nil { |
||||
|
return false, fmt.Errorf("parse int64 literal %q: %w", raw, err) |
||||
|
} |
||||
|
return v == value, nil |
||||
|
case float32: |
||||
|
value, err := strconv.ParseFloat(raw, 32) |
||||
|
if err != nil { |
||||
|
return false, fmt.Errorf("parse float32 literal %q: %w", raw, err) |
||||
|
} |
||||
|
return v == float32(value), nil |
||||
|
case float64: |
||||
|
value, err := strconv.ParseFloat(raw, 64) |
||||
|
if err != nil { |
||||
|
return false, fmt.Errorf("parse float64 literal %q: %w", raw, err) |
||||
|
} |
||||
|
return v == value, nil |
||||
|
default: |
||||
|
value, err := unquoteLiteral(raw) |
||||
|
if err != nil { |
||||
|
return false, err |
||||
|
} |
||||
|
return fmt.Sprint(actual) == value, nil |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func unquoteLiteral(raw string) (string, error) { |
||||
|
raw = strings.TrimSpace(raw) |
||||
|
if len(raw) >= 2 { |
||||
|
if (raw[0] == '\'' && raw[len(raw)-1] == '\'') || (raw[0] == '"' && raw[len(raw)-1] == '"') { |
||||
|
return raw[1 : len(raw)-1], nil |
||||
|
} |
||||
|
} |
||||
|
return raw, nil |
||||
|
} |
||||
@ -0,0 +1,287 @@ |
|||||
|
package iceberg |
||||
|
|
||||
|
import ( |
||||
|
"bytes" |
||||
|
"context" |
||||
|
"encoding/json" |
||||
|
"fmt" |
||||
|
"path" |
||||
|
"strings" |
||||
|
"testing" |
||||
|
"time" |
||||
|
|
||||
|
"github.com/apache/iceberg-go" |
||||
|
"github.com/apache/iceberg-go/table" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3tables" |
||||
|
) |
||||
|
|
||||
|
type partitionedTestFile struct { |
||||
|
Name string |
||||
|
Partition map[int]any |
||||
|
Rows []struct { |
||||
|
ID int64 |
||||
|
Name string |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func populatePartitionedDataTable( |
||||
|
t *testing.T, |
||||
|
fs *fakeFilerServer, |
||||
|
setup tableSetup, |
||||
|
partitionSpec iceberg.PartitionSpec, |
||||
|
manifestGroups [][]partitionedTestFile, |
||||
|
) table.Metadata { |
||||
|
t.Helper() |
||||
|
|
||||
|
schema := newTestSchema() |
||||
|
meta, err := table.NewMetadata(schema, &partitionSpec, table.UnsortedSortOrder, "s3://"+setup.BucketName+"/"+setup.tablePath(), nil) |
||||
|
if err != nil { |
||||
|
t.Fatalf("create metadata: %v", err) |
||||
|
} |
||||
|
|
||||
|
bucketsPath := s3tables.TablesPath |
||||
|
bucketPath := path.Join(bucketsPath, setup.BucketName) |
||||
|
nsPath := path.Join(bucketPath, setup.Namespace) |
||||
|
tablePath := path.Join(nsPath, setup.TableName) |
||||
|
metaDir := path.Join(tablePath, "metadata") |
||||
|
dataDir := path.Join(tablePath, "data") |
||||
|
|
||||
|
version := meta.Version() |
||||
|
var manifestFiles []iceberg.ManifestFile |
||||
|
for idx, group := range manifestGroups { |
||||
|
entries := make([]iceberg.ManifestEntry, 0, len(group)) |
||||
|
for _, file := range group { |
||||
|
data := writeTestParquetFile(t, fs, dataDir, file.Name, file.Rows) |
||||
|
dfBuilder, err := iceberg.NewDataFileBuilder( |
||||
|
partitionSpec, |
||||
|
iceberg.EntryContentData, |
||||
|
path.Join("data", file.Name), |
||||
|
iceberg.ParquetFile, |
||||
|
file.Partition, |
||||
|
nil, nil, |
||||
|
int64(len(file.Rows)), |
||||
|
int64(len(data)), |
||||
|
) |
||||
|
if err != nil { |
||||
|
t.Fatalf("build data file %s: %v", file.Name, err) |
||||
|
} |
||||
|
snapID := int64(1) |
||||
|
entries = append(entries, iceberg.NewManifestEntry(iceberg.EntryStatusADDED, &snapID, nil, nil, dfBuilder.Build())) |
||||
|
} |
||||
|
|
||||
|
manifestName := fmt.Sprintf("where-manifest-%d.avro", idx+1) |
||||
|
var manifestBuf bytes.Buffer |
||||
|
mf, err := iceberg.WriteManifest(path.Join("metadata", manifestName), &manifestBuf, version, partitionSpec, schema, 1, entries) |
||||
|
if err != nil { |
||||
|
t.Fatalf("write manifest %d: %v", idx+1, err) |
||||
|
} |
||||
|
fs.putEntry(metaDir, manifestName, &filer_pb.Entry{ |
||||
|
Name: manifestName, |
||||
|
Content: manifestBuf.Bytes(), |
||||
|
Attributes: &filer_pb.FuseAttributes{Mtime: time.Now().Unix(), FileSize: uint64(manifestBuf.Len())}, |
||||
|
}) |
||||
|
manifestFiles = append(manifestFiles, mf) |
||||
|
} |
||||
|
|
||||
|
var manifestListBuf bytes.Buffer |
||||
|
seqNum := int64(1) |
||||
|
if err := iceberg.WriteManifestList(version, &manifestListBuf, 1, nil, &seqNum, 0, manifestFiles); err != nil { |
||||
|
t.Fatalf("write manifest list: %v", err) |
||||
|
} |
||||
|
fs.putEntry(metaDir, "snap-1.avro", &filer_pb.Entry{ |
||||
|
Name: "snap-1.avro", |
||||
|
Content: manifestListBuf.Bytes(), |
||||
|
Attributes: &filer_pb.FuseAttributes{Mtime: time.Now().Unix(), FileSize: uint64(manifestListBuf.Len())}, |
||||
|
}) |
||||
|
|
||||
|
builder, err := table.MetadataBuilderFromBase(meta, "s3://"+setup.BucketName+"/"+setup.tablePath()) |
||||
|
if err != nil { |
||||
|
t.Fatalf("metadata builder: %v", err) |
||||
|
} |
||||
|
snapshot := table.Snapshot{SnapshotID: 1, TimestampMs: time.Now().UnixMilli(), ManifestList: "metadata/snap-1.avro", SequenceNumber: 1} |
||||
|
if err := builder.AddSnapshot(&snapshot); err != nil { |
||||
|
t.Fatalf("add snapshot: %v", err) |
||||
|
} |
||||
|
if err := builder.SetSnapshotRef(table.MainBranch, 1, table.BranchRef); err != nil { |
||||
|
t.Fatalf("set snapshot ref: %v", err) |
||||
|
} |
||||
|
meta, err = builder.Build() |
||||
|
if err != nil { |
||||
|
t.Fatalf("build metadata: %v", err) |
||||
|
} |
||||
|
|
||||
|
fullMetadataJSON, _ := json.Marshal(meta) |
||||
|
internalMeta := map[string]interface{}{ |
||||
|
"metadataVersion": 1, |
||||
|
"metadataLocation": "metadata/v1.metadata.json", |
||||
|
"metadata": map[string]interface{}{"fullMetadata": json.RawMessage(fullMetadataJSON)}, |
||||
|
} |
||||
|
xattr, _ := json.Marshal(internalMeta) |
||||
|
|
||||
|
fs.putEntry(bucketsPath, setup.BucketName, &filer_pb.Entry{ |
||||
|
Name: setup.BucketName, |
||||
|
IsDirectory: true, |
||||
|
Extended: map[string][]byte{s3tables.ExtendedKeyTableBucket: []byte("true")}, |
||||
|
}) |
||||
|
fs.putEntry(bucketPath, setup.Namespace, &filer_pb.Entry{Name: setup.Namespace, IsDirectory: true}) |
||||
|
fs.putEntry(nsPath, setup.TableName, &filer_pb.Entry{ |
||||
|
Name: setup.TableName, |
||||
|
IsDirectory: true, |
||||
|
Extended: map[string][]byte{ |
||||
|
s3tables.ExtendedKeyMetadata: xattr, |
||||
|
s3tables.ExtendedKeyMetadataVersion: metadataVersionXattr(1), |
||||
|
}, |
||||
|
}) |
||||
|
|
||||
|
return meta |
||||
|
} |
||||
|
|
||||
|
func TestValidateWhereOperations(t *testing.T) { |
||||
|
if err := validateWhereOperations("name = 'us'", []string{"compact", "rewrite_manifests"}); err != nil { |
||||
|
t.Fatalf("unexpected validation error: %v", err) |
||||
|
} |
||||
|
if err := validateWhereOperations("name = 'us'", []string{"expire_snapshots"}); err == nil { |
||||
|
t.Fatal("expected where validation to reject expire_snapshots") |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func TestSplitWhereConjunctionQuoteAware(t *testing.T) { |
||||
|
cases := []struct { |
||||
|
input string |
||||
|
expected []string |
||||
|
}{ |
||||
|
{"a = 1 AND b = 2", []string{"a = 1", "b = 2"}}, |
||||
|
{"a = 'research AND dev'", []string{"a = 'research AND dev'"}}, |
||||
|
{"a IN ('sales AND marketing', 'eng') AND b = 2", []string{"a IN ('sales AND marketing', 'eng')", "b = 2"}}, |
||||
|
{"a = 1 and b = 2", []string{"a = 1", "b = 2"}}, |
||||
|
{"a = 'x' AND b = \"y AND z\"", []string{"a = 'x'", "b = \"y AND z\""}}, |
||||
|
} |
||||
|
for _, tc := range cases { |
||||
|
got := splitWhereConjunction(tc.input) |
||||
|
if len(got) != len(tc.expected) { |
||||
|
t.Errorf("splitWhereConjunction(%q) = %v, want %v", tc.input, got, tc.expected) |
||||
|
continue |
||||
|
} |
||||
|
for i := range got { |
||||
|
if got[i] != tc.expected[i] { |
||||
|
t.Errorf("splitWhereConjunction(%q)[%d] = %q, want %q", tc.input, i, got[i], tc.expected[i]) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func TestPartitionPredicateMatchesUsesPartitionFieldIDs(t *testing.T) { |
||||
|
spec := iceberg.NewPartitionSpec(iceberg.PartitionField{ |
||||
|
SourceID: 2, |
||||
|
FieldID: 1000, |
||||
|
Name: "name", |
||||
|
Transform: iceberg.IdentityTransform{}, |
||||
|
}) |
||||
|
predicate := &partitionPredicate{Clauses: []whereClause{{Field: "name", Literals: []string{"'us'"}}}} |
||||
|
|
||||
|
match, err := predicate.Matches(spec, map[int]any{2: "us"}) |
||||
|
if err != nil { |
||||
|
t.Fatalf("unexpected error: %v", err) |
||||
|
} |
||||
|
if match { |
||||
|
t.Fatal("expected source-column key to not match partition predicate") |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func TestCompactDataFilesWhereFilter(t *testing.T) { |
||||
|
fs, client := startFakeFiler(t) |
||||
|
|
||||
|
partitionSpec := iceberg.NewPartitionSpec(iceberg.PartitionField{ |
||||
|
SourceID: 2, |
||||
|
FieldID: 1000, |
||||
|
Name: "name", |
||||
|
Transform: iceberg.IdentityTransform{}, |
||||
|
}) |
||||
|
|
||||
|
setup := tableSetup{BucketName: "tb", Namespace: "ns", TableName: "tbl"} |
||||
|
populatePartitionedDataTable(t, fs, setup, partitionSpec, [][]partitionedTestFile{ |
||||
|
{ |
||||
|
{Name: "us-1.parquet", Partition: map[int]any{1000: "us"}, Rows: []struct { |
||||
|
ID int64 |
||||
|
Name string |
||||
|
}{{1, "us"}}}, |
||||
|
}, |
||||
|
{ |
||||
|
{Name: "us-2.parquet", Partition: map[int]any{1000: "us"}, Rows: []struct { |
||||
|
ID int64 |
||||
|
Name string |
||||
|
}{{2, "us"}}}, |
||||
|
}, |
||||
|
{ |
||||
|
{Name: "eu-1.parquet", Partition: map[int]any{1000: "eu"}, Rows: []struct { |
||||
|
ID int64 |
||||
|
Name string |
||||
|
}{{3, "eu"}}}, |
||||
|
{Name: "eu-2.parquet", Partition: map[int]any{1000: "eu"}, Rows: []struct { |
||||
|
ID int64 |
||||
|
Name string |
||||
|
}{{4, "eu"}}}, |
||||
|
}, |
||||
|
}) |
||||
|
|
||||
|
handler := NewHandler(nil) |
||||
|
config := Config{ |
||||
|
TargetFileSizeBytes: 256 * 1024 * 1024, |
||||
|
MinInputFiles: 2, |
||||
|
MaxCommitRetries: 3, |
||||
|
Where: "name = 'us'", |
||||
|
} |
||||
|
|
||||
|
result, _, err := handler.compactDataFiles(context.Background(), client, setup.BucketName, setup.tablePath(), config, nil) |
||||
|
if err != nil { |
||||
|
t.Fatalf("compactDataFiles: %v", err) |
||||
|
} |
||||
|
if !strings.Contains(result, "compacted 2 files into 1") { |
||||
|
t.Fatalf("unexpected result: %q", result) |
||||
|
} |
||||
|
|
||||
|
meta, _, err := loadCurrentMetadata(context.Background(), client, setup.BucketName, setup.tablePath()) |
||||
|
if err != nil { |
||||
|
t.Fatalf("loadCurrentMetadata: %v", err) |
||||
|
} |
||||
|
manifests, err := loadCurrentManifests(context.Background(), client, setup.BucketName, setup.tablePath(), meta) |
||||
|
if err != nil { |
||||
|
t.Fatalf("loadCurrentManifests: %v", err) |
||||
|
} |
||||
|
|
||||
|
var liveDataPaths []string |
||||
|
for _, mf := range manifests { |
||||
|
if mf.ManifestContent() != iceberg.ManifestContentData { |
||||
|
continue |
||||
|
} |
||||
|
manifestData, err := loadFileByIcebergPath(context.Background(), client, setup.BucketName, setup.tablePath(), mf.FilePath()) |
||||
|
if err != nil { |
||||
|
t.Fatalf("load data manifest: %v", err) |
||||
|
} |
||||
|
entries, err := iceberg.ReadManifest(mf, bytes.NewReader(manifestData), true) |
||||
|
if err != nil { |
||||
|
t.Fatalf("read data manifest: %v", err) |
||||
|
} |
||||
|
for _, entry := range entries { |
||||
|
liveDataPaths = append(liveDataPaths, entry.DataFile().FilePath()) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if len(liveDataPaths) != 3 { |
||||
|
t.Fatalf("expected 3 live data files after filtered compaction, got %v", liveDataPaths) |
||||
|
} |
||||
|
var compactedCount int |
||||
|
for _, p := range liveDataPaths { |
||||
|
switch { |
||||
|
case strings.HasPrefix(p, "data/compact-"): |
||||
|
compactedCount++ |
||||
|
case p == "data/eu-1.parquet", p == "data/eu-2.parquet": |
||||
|
default: |
||||
|
t.Fatalf("unexpected live data file %q", p) |
||||
|
} |
||||
|
} |
||||
|
if compactedCount != 1 { |
||||
|
t.Fatalf("expected exactly one compacted file, got %d in %v", compactedCount, liveDataPaths) |
||||
|
} |
||||
|
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue