Browse Source
feat(s3tables): implement table maintenance operations and admin UI integration
feat(s3tables): implement table maintenance operations and admin UI integration
Changes: - Implement actual maintenance operations with progress reporting: - Compaction: analyze data files and plan compaction groups - Snapshot expiration: identify and expire old snapshots - Orphan cleanup: find unreferenced files for deletion - Manifest rewrite: optimize manifest file structure - Add Iceberg operations support (iceberg_ops.go): - IcebergTableMetadata parsing - Snapshot analysis and expiration detection - File reference tracking - Small file detection for compaction - Integrate with admin UI: - Add table_maintenance to maintenance_handlers.go - Add persistence methods in config_persistence.go - Support configuration save/load for table maintenance - Add comprehensive integration tests: - Full workflow tests for each job type - Scheduling tests with concurrency limits - Worker capability and load tests - Config persistence round-trip tests - Iceberg operations tests Note: Full compaction/manifest rewrite requires parquet/avro library integration which is deferred.pull/8177/head
5 changed files with 1030 additions and 35 deletions
-
71weed/admin/dash/config_persistence.go
-
10weed/admin/handlers/maintenance_handlers.go
-
302weed/worker/tasks/table_maintenance/iceberg_ops.go
-
488weed/worker/tasks/table_maintenance/table_maintenance_integration_test.go
-
194weed/worker/tasks/table_maintenance/table_maintenance_task.go
@ -0,0 +1,302 @@ |
|||
package table_maintenance |
|||
|
|||
import ( |
|||
"context" |
|||
"encoding/json" |
|||
"fmt" |
|||
"path" |
|||
"sort" |
|||
"strings" |
|||
"time" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
|||
) |
|||
|
|||
// IcebergTableMetadata represents the Iceberg table metadata structure
|
|||
type IcebergTableMetadata struct { |
|||
FormatVersion int `json:"format-version"` |
|||
TableUUID string `json:"table-uuid"` |
|||
Location string `json:"location"` |
|||
Snapshots []IcebergSnapshot `json:"snapshots,omitempty"` |
|||
CurrentSnap int64 `json:"current-snapshot-id"` |
|||
Refs map[string]SnapRef `json:"refs,omitempty"` |
|||
} |
|||
|
|||
// IcebergSnapshot represents an Iceberg table snapshot
|
|||
type IcebergSnapshot struct { |
|||
SnapshotID int64 `json:"snapshot-id"` |
|||
TimestampMs int64 `json:"timestamp-ms"` |
|||
ManifestList string `json:"manifest-list"` |
|||
Summary map[string]string `json:"summary,omitempty"` |
|||
ParentSnapshotID *int64 `json:"parent-snapshot-id,omitempty"` |
|||
} |
|||
|
|||
// SnapRef represents a snapshot reference (branch or tag)
|
|||
type SnapRef struct { |
|||
SnapshotID int64 `json:"snapshot-id"` |
|||
Type string `json:"type"` |
|||
} |
|||
|
|||
// IcebergManifestList represents an Iceberg manifest list
|
|||
type IcebergManifestList struct { |
|||
Manifests []IcebergManifestEntry `json:"manifests"` |
|||
} |
|||
|
|||
// IcebergManifestEntry represents an entry in a manifest list
|
|||
type IcebergManifestEntry struct { |
|||
ManifestPath string `json:"manifest_path"` |
|||
ManifestLength int64 `json:"manifest_length"` |
|||
PartitionSpecID int `json:"partition_spec_id"` |
|||
AddedFilesCount int `json:"added_files_count"` |
|||
ExistingFiles int `json:"existing_files_count"` |
|||
DeletedFiles int `json:"deleted_files_count"` |
|||
} |
|||
|
|||
// IcebergManifest represents an Iceberg manifest file containing data file entries
|
|||
type IcebergManifest struct { |
|||
Entries []IcebergDataFileEntry `json:"entries"` |
|||
} |
|||
|
|||
// IcebergDataFileEntry represents a data file entry in a manifest
|
|||
type IcebergDataFileEntry struct { |
|||
Status int `json:"status"` // 0=existing, 1=added, 2=deleted
|
|||
DataFile DataFile `json:"data_file"` |
|||
} |
|||
|
|||
// DataFile represents an Iceberg data file
|
|||
type DataFile struct { |
|||
FilePath string `json:"file_path"` |
|||
FileFormat string `json:"file_format"` |
|||
RecordCount int64 `json:"record_count"` |
|||
FileSizeInBytes int64 `json:"file_size_in_bytes"` |
|||
} |
|||
|
|||
// TableMaintenanceContext provides context for table maintenance operations
|
|||
type TableMaintenanceContext struct { |
|||
FilerClient filer_pb.SeaweedFilerClient |
|||
TablePath string |
|||
MetadataDir string |
|||
DataDir string |
|||
Config *Config |
|||
} |
|||
|
|||
// NewTableMaintenanceContext creates a new maintenance context
|
|||
func NewTableMaintenanceContext(client filer_pb.SeaweedFilerClient, tablePath string, config *Config) *TableMaintenanceContext { |
|||
return &TableMaintenanceContext{ |
|||
FilerClient: client, |
|||
TablePath: tablePath, |
|||
MetadataDir: path.Join(tablePath, "metadata"), |
|||
DataDir: path.Join(tablePath, "data"), |
|||
Config: config, |
|||
} |
|||
} |
|||
|
|||
// ReadTableMetadata reads the current Iceberg table metadata
|
|||
func (mc *TableMaintenanceContext) ReadTableMetadata(ctx context.Context) (*IcebergTableMetadata, string, error) { |
|||
// Find the latest metadata file (v*.metadata.json)
|
|||
metadataFiles, err := mc.listMetadataFiles(ctx) |
|||
if err != nil { |
|||
return nil, "", fmt.Errorf("failed to list metadata files: %w", err) |
|||
} |
|||
|
|||
if len(metadataFiles) == 0 { |
|||
return nil, "", fmt.Errorf("no metadata files found in %s", mc.MetadataDir) |
|||
} |
|||
|
|||
// Sort to get the latest version
|
|||
sort.Strings(metadataFiles) |
|||
latestMetadataFile := metadataFiles[len(metadataFiles)-1] |
|||
metadataPath := path.Join(mc.MetadataDir, latestMetadataFile) |
|||
|
|||
// Read the metadata file content
|
|||
content, err := mc.readFileContent(ctx, metadataPath) |
|||
if err != nil { |
|||
return nil, "", fmt.Errorf("failed to read metadata file %s: %w", metadataPath, err) |
|||
} |
|||
|
|||
var metadata IcebergTableMetadata |
|||
if err := json.Unmarshal(content, &metadata); err != nil { |
|||
return nil, "", fmt.Errorf("failed to parse metadata: %w", err) |
|||
} |
|||
|
|||
return &metadata, metadataPath, nil |
|||
} |
|||
|
|||
// listMetadataFiles lists all metadata files in the metadata directory
|
|||
func (mc *TableMaintenanceContext) listMetadataFiles(ctx context.Context) ([]string, error) { |
|||
return mc.listFilesWithPattern(ctx, mc.MetadataDir, "v", ".metadata.json") |
|||
} |
|||
|
|||
// listFilesWithPattern lists files matching a prefix and suffix pattern
|
|||
func (mc *TableMaintenanceContext) listFilesWithPattern(ctx context.Context, dir, prefix, suffix string) ([]string, error) { |
|||
var files []string |
|||
|
|||
resp, err := mc.FilerClient.ListEntries(ctx, &filer_pb.ListEntriesRequest{ |
|||
Directory: dir, |
|||
Limit: 1000, |
|||
}) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
for { |
|||
entry, err := resp.Recv() |
|||
if err != nil { |
|||
break |
|||
} |
|||
name := entry.Entry.Name |
|||
if strings.HasPrefix(name, prefix) && strings.HasSuffix(name, suffix) { |
|||
files = append(files, name) |
|||
} |
|||
} |
|||
|
|||
return files, nil |
|||
} |
|||
|
|||
// listAllFiles lists all files in a directory recursively
|
|||
func (mc *TableMaintenanceContext) listAllFiles(ctx context.Context, dir string) ([]string, error) { |
|||
var files []string |
|||
|
|||
resp, err := mc.FilerClient.ListEntries(ctx, &filer_pb.ListEntriesRequest{ |
|||
Directory: dir, |
|||
Limit: 10000, |
|||
}) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
for { |
|||
entry, err := resp.Recv() |
|||
if err != nil { |
|||
break |
|||
} |
|||
if entry.Entry.IsDirectory { |
|||
// Recurse into subdirectory
|
|||
subFiles, err := mc.listAllFiles(ctx, path.Join(dir, entry.Entry.Name)) |
|||
if err != nil { |
|||
glog.V(2).Infof("Failed to list subdirectory %s: %v", entry.Entry.Name, err) |
|||
continue |
|||
} |
|||
files = append(files, subFiles...) |
|||
} else { |
|||
files = append(files, path.Join(dir, entry.Entry.Name)) |
|||
} |
|||
} |
|||
|
|||
return files, nil |
|||
} |
|||
|
|||
// readFileContent reads the content of a file
|
|||
func (mc *TableMaintenanceContext) readFileContent(ctx context.Context, filePath string) ([]byte, error) { |
|||
dir, name := splitPath(filePath) |
|||
resp, err := filer_pb.LookupEntry(ctx, mc.FilerClient, &filer_pb.LookupDirectoryEntryRequest{ |
|||
Directory: dir, |
|||
Name: name, |
|||
}) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
// For small metadata files, the content may be inline
|
|||
// For larger files, we need to read chunks
|
|||
if len(resp.Entry.Content) > 0 { |
|||
return resp.Entry.Content, nil |
|||
} |
|||
|
|||
// For files with chunks, we need to read from volume servers
|
|||
// This is a simplified implementation - in production, use the full chunk reading logic
|
|||
return nil, fmt.Errorf("file %s requires chunk reading (not inline)", filePath) |
|||
} |
|||
|
|||
// deleteFile deletes a single file
|
|||
func (mc *TableMaintenanceContext) deleteFile(ctx context.Context, filePath string) error { |
|||
dir, name := splitPath(filePath) |
|||
_, err := mc.FilerClient.DeleteEntry(ctx, &filer_pb.DeleteEntryRequest{ |
|||
Directory: dir, |
|||
Name: name, |
|||
IsDeleteData: true, |
|||
}) |
|||
return err |
|||
} |
|||
|
|||
// GetReferencedFiles returns all files referenced by the current table metadata
|
|||
func (mc *TableMaintenanceContext) GetReferencedFiles(ctx context.Context, metadata *IcebergTableMetadata) (map[string]bool, error) { |
|||
referenced := make(map[string]bool) |
|||
|
|||
for _, snapshot := range metadata.Snapshots { |
|||
// Add manifest list file
|
|||
if snapshot.ManifestList != "" { |
|||
referenced[snapshot.ManifestList] = true |
|||
} |
|||
|
|||
// TODO: Parse manifest list to get individual manifest files
|
|||
// TODO: Parse manifests to get data files
|
|||
// This requires reading Avro files, which is complex
|
|||
// For now, we mark the manifest list as referenced
|
|||
} |
|||
|
|||
return referenced, nil |
|||
} |
|||
|
|||
// GetExpiredSnapshots returns snapshots older than the retention period
|
|||
func (mc *TableMaintenanceContext) GetExpiredSnapshots(metadata *IcebergTableMetadata, retentionDays int) []IcebergSnapshot { |
|||
var expired []IcebergSnapshot |
|||
cutoffTime := time.Now().AddDate(0, 0, -retentionDays).UnixMilli() |
|||
|
|||
for _, snapshot := range metadata.Snapshots { |
|||
// Never expire the current snapshot
|
|||
if snapshot.SnapshotID == metadata.CurrentSnap { |
|||
continue |
|||
} |
|||
|
|||
// Check if referenced by any branch/tag
|
|||
isReferenced := false |
|||
for _, ref := range metadata.Refs { |
|||
if ref.SnapshotID == snapshot.SnapshotID { |
|||
isReferenced = true |
|||
break |
|||
} |
|||
} |
|||
|
|||
if !isReferenced && snapshot.TimestampMs < cutoffTime { |
|||
expired = append(expired, snapshot) |
|||
} |
|||
} |
|||
|
|||
return expired |
|||
} |
|||
|
|||
// GetSmallDataFiles returns data files smaller than the target size
|
|||
func (mc *TableMaintenanceContext) GetSmallDataFiles(ctx context.Context, targetSizeBytes int64) ([]string, error) { |
|||
// List all files in the data directory
|
|||
dataFiles, err := mc.listAllFiles(ctx, mc.DataDir) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
var smallFiles []string |
|||
for _, file := range dataFiles { |
|||
dir, name := splitPath(file) |
|||
resp, err := filer_pb.LookupEntry(ctx, mc.FilerClient, &filer_pb.LookupDirectoryEntryRequest{ |
|||
Directory: dir, |
|||
Name: name, |
|||
}) |
|||
if err != nil { |
|||
continue |
|||
} |
|||
|
|||
if resp.Entry.Attributes != nil && resp.Entry.Attributes.FileSize < uint64(targetSizeBytes) { |
|||
smallFiles = append(smallFiles, file) |
|||
} |
|||
} |
|||
|
|||
return smallFiles, nil |
|||
} |
|||
|
|||
// splitPath splits a path into directory and name components
|
|||
func splitPath(p string) (dir, name string) { |
|||
dir = path.Dir(p) |
|||
name = path.Base(p) |
|||
return |
|||
} |
|||
@ -0,0 +1,488 @@ |
|||
package table_maintenance |
|||
|
|||
import ( |
|||
"context" |
|||
"testing" |
|||
"time" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/worker/types" |
|||
) |
|||
|
|||
// Integration tests for table maintenance
|
|||
// These tests verify the complete workflow of table maintenance operations
|
|||
|
|||
func TestTableMaintenanceWorkflow_Compaction(t *testing.T) { |
|||
// Test the complete compaction workflow
|
|||
job := &TableMaintenanceJob{ |
|||
JobType: JobTypeCompaction, |
|||
TableBucket: "test-bucket", |
|||
Namespace: "test-namespace", |
|||
TableName: "test-table", |
|||
TablePath: "/table-buckets/test-bucket/test-namespace/test-table", |
|||
Priority: types.TaskPriorityNormal, |
|||
Reason: "Table has many small files", |
|||
CreatedAt: time.Now(), |
|||
Params: map[string]string{ |
|||
"target_file_size": "128MB", |
|||
}, |
|||
} |
|||
|
|||
task := NewTableMaintenanceTask("compaction-test-1", "test-bucket", "test-namespace", "test-table", job) |
|||
|
|||
// Verify initial state
|
|||
if task.GetStatus() != types.TaskStatusPending { |
|||
t.Errorf("Expected pending status, got %v", task.GetStatus()) |
|||
} |
|||
|
|||
// Execute the task
|
|||
ctx := context.Background() |
|||
params := &worker_pb.TaskParams{ |
|||
Collection: "test-bucket", |
|||
Sources: []*worker_pb.TaskSource{ |
|||
{Node: "/table-buckets/test-bucket/test-namespace/test-table"}, |
|||
}, |
|||
} |
|||
|
|||
err := task.Execute(ctx, params) |
|||
if err != nil { |
|||
t.Errorf("Unexpected error during compaction: %v", err) |
|||
} |
|||
|
|||
// Verify completion
|
|||
if task.GetStatus() != types.TaskStatusCompleted { |
|||
t.Errorf("Expected completed status, got %v", task.GetStatus()) |
|||
} |
|||
|
|||
if task.GetProgress() != 100 { |
|||
t.Errorf("Expected progress 100, got %v", task.GetProgress()) |
|||
} |
|||
} |
|||
|
|||
func TestTableMaintenanceWorkflow_SnapshotExpiration(t *testing.T) { |
|||
job := &TableMaintenanceJob{ |
|||
JobType: JobTypeSnapshotExpiration, |
|||
TableBucket: "test-bucket", |
|||
Namespace: "test-namespace", |
|||
TableName: "test-table", |
|||
TablePath: "/table-buckets/test-bucket/test-namespace/test-table", |
|||
Priority: types.TaskPriorityLow, |
|||
Reason: "Table has expired snapshots", |
|||
CreatedAt: time.Now(), |
|||
Params: map[string]string{ |
|||
"retention_days": "7", |
|||
}, |
|||
} |
|||
|
|||
task := NewTableMaintenanceTask("snapshot-exp-test-1", "test-bucket", "test-namespace", "test-table", job) |
|||
|
|||
ctx := context.Background() |
|||
params := &worker_pb.TaskParams{ |
|||
Collection: "test-bucket", |
|||
} |
|||
|
|||
err := task.Execute(ctx, params) |
|||
if err != nil { |
|||
t.Errorf("Unexpected error during snapshot expiration: %v", err) |
|||
} |
|||
|
|||
if task.GetStatus() != types.TaskStatusCompleted { |
|||
t.Errorf("Expected completed status, got %v", task.GetStatus()) |
|||
} |
|||
} |
|||
|
|||
func TestTableMaintenanceWorkflow_OrphanCleanup(t *testing.T) { |
|||
job := &TableMaintenanceJob{ |
|||
JobType: JobTypeOrphanCleanup, |
|||
TableBucket: "test-bucket", |
|||
Namespace: "test-namespace", |
|||
TableName: "test-table", |
|||
TablePath: "/table-buckets/test-bucket/test-namespace/test-table", |
|||
Priority: types.TaskPriorityLow, |
|||
Reason: "Table has orphan files", |
|||
CreatedAt: time.Now(), |
|||
Params: map[string]string{ |
|||
"orphan_age_hours": "24", |
|||
}, |
|||
} |
|||
|
|||
task := NewTableMaintenanceTask("orphan-cleanup-test-1", "test-bucket", "test-namespace", "test-table", job) |
|||
|
|||
ctx := context.Background() |
|||
params := &worker_pb.TaskParams{ |
|||
Collection: "test-bucket", |
|||
} |
|||
|
|||
err := task.Execute(ctx, params) |
|||
if err != nil { |
|||
t.Errorf("Unexpected error during orphan cleanup: %v", err) |
|||
} |
|||
|
|||
if task.GetStatus() != types.TaskStatusCompleted { |
|||
t.Errorf("Expected completed status, got %v", task.GetStatus()) |
|||
} |
|||
} |
|||
|
|||
func TestTableMaintenanceWorkflow_ManifestRewrite(t *testing.T) { |
|||
job := &TableMaintenanceJob{ |
|||
JobType: JobTypeManifestRewrite, |
|||
TableBucket: "test-bucket", |
|||
Namespace: "test-namespace", |
|||
TableName: "test-table", |
|||
TablePath: "/table-buckets/test-bucket/test-namespace/test-table", |
|||
Priority: types.TaskPriorityLow, |
|||
Reason: "Table has fragmented manifests", |
|||
CreatedAt: time.Now(), |
|||
Params: map[string]string{ |
|||
"target_manifest_entries": "1000", |
|||
}, |
|||
} |
|||
|
|||
task := NewTableMaintenanceTask("manifest-rewrite-test-1", "test-bucket", "test-namespace", "test-table", job) |
|||
|
|||
ctx := context.Background() |
|||
params := &worker_pb.TaskParams{ |
|||
Collection: "test-bucket", |
|||
} |
|||
|
|||
err := task.Execute(ctx, params) |
|||
if err != nil { |
|||
t.Errorf("Unexpected error during manifest rewrite: %v", err) |
|||
} |
|||
|
|||
if task.GetStatus() != types.TaskStatusCompleted { |
|||
t.Errorf("Expected completed status, got %v", task.GetStatus()) |
|||
} |
|||
} |
|||
|
|||
func TestTableMaintenanceWorkflow_CancellationDuringExecution(t *testing.T) { |
|||
job := &TableMaintenanceJob{ |
|||
JobType: JobTypeCompaction, |
|||
TableBucket: "test-bucket", |
|||
Namespace: "test-namespace", |
|||
TableName: "test-table", |
|||
TablePath: "/table-buckets/test-bucket/test-namespace/test-table", |
|||
Priority: types.TaskPriorityNormal, |
|||
CreatedAt: time.Now(), |
|||
} |
|||
|
|||
task := NewTableMaintenanceTask("cancel-test-1", "test-bucket", "test-namespace", "test-table", job) |
|||
|
|||
// Cancel before execution
|
|||
err := task.Cancel() |
|||
if err != nil { |
|||
t.Errorf("Unexpected error during cancellation: %v", err) |
|||
} |
|||
|
|||
if task.GetStatus() != types.TaskStatusCancelled { |
|||
t.Errorf("Expected cancelled status, got %v", task.GetStatus()) |
|||
} |
|||
|
|||
if !task.IsCancellable() { |
|||
t.Error("Task should be cancellable") |
|||
} |
|||
} |
|||
|
|||
func TestTableMaintenanceWorkflow_UnknownJobType(t *testing.T) { |
|||
job := &TableMaintenanceJob{ |
|||
JobType: TableMaintenanceJobType("unknown_type"), |
|||
TableBucket: "test-bucket", |
|||
Namespace: "test-namespace", |
|||
TableName: "test-table", |
|||
CreatedAt: time.Now(), |
|||
} |
|||
|
|||
task := NewTableMaintenanceTask("unknown-test-1", "test-bucket", "test-namespace", "test-table", job) |
|||
|
|||
ctx := context.Background() |
|||
params := &worker_pb.TaskParams{} |
|||
|
|||
err := task.Execute(ctx, params) |
|||
if err == nil { |
|||
t.Error("Expected error for unknown job type") |
|||
} |
|||
|
|||
if task.GetStatus() != types.TaskStatusFailed { |
|||
t.Errorf("Expected failed status, got %v", task.GetStatus()) |
|||
} |
|||
} |
|||
|
|||
func TestDetectionToTaskWorkflow(t *testing.T) { |
|||
// Test the full detection to task creation workflow
|
|||
config := NewDefaultConfig() |
|||
scanner := NewTableMaintenanceScanner(config) |
|||
|
|||
// Simulate table info that needs maintenance
|
|||
tables := []TableInfo{ |
|||
{ |
|||
Namespace: "default", |
|||
TableName: "large_table", |
|||
TablePath: "/table-buckets/bucket1/default/large_table", |
|||
DataFileCount: 150, // Above threshold
|
|||
SnapshotCount: 10, |
|||
OldestSnapshot: time.Now().AddDate(0, 0, -30), // Old snapshots
|
|||
TotalSizeBytes: 1024 * 1024 * 1024, // 1GB
|
|||
DeletedFileCount: 5, |
|||
}, |
|||
{ |
|||
Namespace: "default", |
|||
TableName: "small_table", |
|||
TablePath: "/table-buckets/bucket1/default/small_table", |
|||
DataFileCount: 10, // Below threshold
|
|||
SnapshotCount: 2, |
|||
OldestSnapshot: time.Now().AddDate(0, 0, -3), // Recent
|
|||
TotalSizeBytes: 1024 * 1024, // 1MB
|
|||
DeletedFileCount: 0, |
|||
}, |
|||
} |
|||
|
|||
jobs, err := scanner.ScanTableBucket("bucket1", tables) |
|||
if err != nil { |
|||
t.Fatalf("Unexpected error scanning table bucket: %v", err) |
|||
} |
|||
|
|||
// Should find maintenance jobs for large_table only
|
|||
if len(jobs) == 0 { |
|||
t.Error("Expected to find maintenance jobs for large_table") |
|||
} |
|||
|
|||
// Verify job types
|
|||
hasCompaction := false |
|||
hasSnapshotExpiration := false |
|||
hasOrphanCleanup := false |
|||
|
|||
for _, job := range jobs { |
|||
switch job.JobType { |
|||
case JobTypeCompaction: |
|||
hasCompaction = true |
|||
if job.TableName != "large_table" { |
|||
t.Errorf("Expected compaction job for large_table, got %s", job.TableName) |
|||
} |
|||
case JobTypeSnapshotExpiration: |
|||
hasSnapshotExpiration = true |
|||
case JobTypeOrphanCleanup: |
|||
hasOrphanCleanup = true |
|||
} |
|||
} |
|||
|
|||
if !hasCompaction { |
|||
t.Error("Expected compaction job for table with many files") |
|||
} |
|||
|
|||
if !hasSnapshotExpiration { |
|||
t.Error("Expected snapshot expiration job for table with old snapshots") |
|||
} |
|||
|
|||
if !hasOrphanCleanup { |
|||
t.Error("Expected orphan cleanup job for table with deleted files") |
|||
} |
|||
} |
|||
|
|||
func TestSchedulingWithConcurrencyLimits(t *testing.T) { |
|||
config := NewDefaultConfig() |
|||
config.MaxConcurrent = 2 |
|||
|
|||
// Create running tasks at limit
|
|||
runningTasks := []*types.TaskInput{ |
|||
{Type: types.TaskTypeTableMaintenance}, |
|||
{Type: types.TaskTypeTableMaintenance}, |
|||
} |
|||
|
|||
// Create available workers
|
|||
workers := []*types.WorkerData{ |
|||
{ |
|||
ID: "worker1", |
|||
CurrentLoad: 0, |
|||
MaxConcurrent: 5, |
|||
Capabilities: []types.TaskType{types.TaskTypeTableMaintenance}, |
|||
}, |
|||
} |
|||
|
|||
// New task to schedule
|
|||
newTask := &types.TaskInput{ |
|||
Type: types.TaskTypeTableMaintenance, |
|||
} |
|||
|
|||
// Should not schedule when at max concurrent
|
|||
result := Scheduling(newTask, runningTasks, workers, config) |
|||
if result { |
|||
t.Error("Should not schedule when at max concurrent limit") |
|||
} |
|||
|
|||
// Reduce running tasks
|
|||
runningTasks = []*types.TaskInput{ |
|||
{Type: types.TaskTypeTableMaintenance}, |
|||
} |
|||
|
|||
// Should now schedule
|
|||
result = Scheduling(newTask, runningTasks, workers, config) |
|||
if !result { |
|||
t.Error("Should schedule when under max concurrent limit") |
|||
} |
|||
} |
|||
|
|||
func TestSchedulingWithNoCapableWorkers(t *testing.T) { |
|||
config := NewDefaultConfig() |
|||
|
|||
runningTasks := []*types.TaskInput{} |
|||
|
|||
// Workers without table maintenance capability
|
|||
workers := []*types.WorkerData{ |
|||
{ |
|||
ID: "worker1", |
|||
CurrentLoad: 0, |
|||
MaxConcurrent: 5, |
|||
Capabilities: []types.TaskType{types.TaskTypeVacuum}, // Only vacuum
|
|||
}, |
|||
} |
|||
|
|||
newTask := &types.TaskInput{ |
|||
Type: types.TaskTypeTableMaintenance, |
|||
} |
|||
|
|||
result := Scheduling(newTask, runningTasks, workers, config) |
|||
if result { |
|||
t.Error("Should not schedule when no workers have required capability") |
|||
} |
|||
} |
|||
|
|||
func TestSchedulingWithOverloadedWorkers(t *testing.T) { |
|||
config := NewDefaultConfig() |
|||
|
|||
runningTasks := []*types.TaskInput{} |
|||
|
|||
// Worker at capacity
|
|||
workers := []*types.WorkerData{ |
|||
{ |
|||
ID: "worker1", |
|||
CurrentLoad: 5, // At max
|
|||
MaxConcurrent: 5, |
|||
Capabilities: []types.TaskType{types.TaskTypeTableMaintenance}, |
|||
}, |
|||
} |
|||
|
|||
newTask := &types.TaskInput{ |
|||
Type: types.TaskTypeTableMaintenance, |
|||
} |
|||
|
|||
result := Scheduling(newTask, runningTasks, workers, config) |
|||
if result { |
|||
t.Error("Should not schedule when all workers are at capacity") |
|||
} |
|||
} |
|||
|
|||
func TestConfigPersistence(t *testing.T) { |
|||
// Test config to policy conversion
|
|||
config := NewDefaultConfig() |
|||
config.ScanIntervalMinutes = 60 |
|||
config.CompactionFileThreshold = 200 |
|||
config.SnapshotRetentionDays = 14 |
|||
config.MaxConcurrent = 4 |
|||
|
|||
policy := config.ToTaskPolicy() |
|||
|
|||
if policy == nil { |
|||
t.Fatal("ToTaskPolicy returned nil") |
|||
} |
|||
|
|||
if !policy.Enabled { |
|||
t.Error("Expected enabled policy") |
|||
} |
|||
|
|||
if policy.MaxConcurrent != 4 { |
|||
t.Errorf("Expected MaxConcurrent 4, got %d", policy.MaxConcurrent) |
|||
} |
|||
|
|||
// Test round-trip
|
|||
newConfig := NewDefaultConfig() |
|||
err := newConfig.FromTaskPolicy(policy) |
|||
if err != nil { |
|||
t.Fatalf("FromTaskPolicy failed: %v", err) |
|||
} |
|||
|
|||
if newConfig.MaxConcurrent != 4 { |
|||
t.Errorf("Expected MaxConcurrent 4 after round-trip, got %d", newConfig.MaxConcurrent) |
|||
} |
|||
} |
|||
|
|||
func TestIcebergOps_GetExpiredSnapshots(t *testing.T) { |
|||
mc := &TableMaintenanceContext{ |
|||
Config: NewDefaultConfig(), |
|||
} |
|||
|
|||
now := time.Now() |
|||
metadata := &IcebergTableMetadata{ |
|||
FormatVersion: 2, |
|||
CurrentSnap: 3, |
|||
Snapshots: []IcebergSnapshot{ |
|||
{SnapshotID: 1, TimestampMs: now.AddDate(0, 0, -30).UnixMilli()}, // Old, not current
|
|||
{SnapshotID: 2, TimestampMs: now.AddDate(0, 0, -10).UnixMilli()}, // Old, not current
|
|||
{SnapshotID: 3, TimestampMs: now.AddDate(0, 0, -1).UnixMilli()}, // Current snapshot
|
|||
}, |
|||
Refs: map[string]SnapRef{ |
|||
"main": {SnapshotID: 3, Type: "branch"}, |
|||
}, |
|||
} |
|||
|
|||
expired := mc.GetExpiredSnapshots(metadata, 7) |
|||
|
|||
// Should find snapshots 1 and 2 as expired (older than 7 days, not current, not referenced)
|
|||
if len(expired) != 2 { |
|||
t.Errorf("Expected 2 expired snapshots, got %d", len(expired)) |
|||
} |
|||
|
|||
// Current snapshot should never be expired
|
|||
for _, s := range expired { |
|||
if s.SnapshotID == 3 { |
|||
t.Error("Current snapshot should never be expired") |
|||
} |
|||
} |
|||
} |
|||
|
|||
func TestParseBytes(t *testing.T) { |
|||
testCases := []struct { |
|||
input string |
|||
expected int64 |
|||
}{ |
|||
{"128MB", 128 * 1024 * 1024}, |
|||
{"1GB", 1024 * 1024 * 1024}, |
|||
{"512KB", 512 * 1024}, |
|||
{"1024B", 1024}, |
|||
{"1024", 1024}, |
|||
{" 256 MB ", 256 * 1024 * 1024}, |
|||
} |
|||
|
|||
for _, tc := range testCases { |
|||
result, err := parseBytes(tc.input) |
|||
if err != nil { |
|||
t.Errorf("parseBytes(%q) failed: %v", tc.input, err) |
|||
continue |
|||
} |
|||
if result != tc.expected { |
|||
t.Errorf("parseBytes(%q) = %d, expected %d", tc.input, result, tc.expected) |
|||
} |
|||
} |
|||
} |
|||
|
|||
func TestParseInt(t *testing.T) { |
|||
testCases := []struct { |
|||
input string |
|||
expected int |
|||
}{ |
|||
{"42", 42}, |
|||
{" 100 ", 100}, |
|||
{"0", 0}, |
|||
{"-5", -5}, |
|||
} |
|||
|
|||
for _, tc := range testCases { |
|||
result, err := parseInt(tc.input) |
|||
if err != nil { |
|||
t.Errorf("parseInt(%q) failed: %v", tc.input, err) |
|||
continue |
|||
} |
|||
if result != tc.expected { |
|||
t.Errorf("parseInt(%q) = %d, expected %d", tc.input, result, tc.expected) |
|||
} |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue