You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

178 lines
5.7 KiB

package iceberg
import (
"fmt"
"strconv"
"strings"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
)
const (
jobType = "iceberg_maintenance"
defaultSnapshotRetentionHours = 168 // 7 days
defaultMaxSnapshotsToKeep = 5
defaultOrphanOlderThanHours = 72
defaultMaxCommitRetries = 5
defaultTargetFileSizeBytes = 256 * 1024 * 1024
defaultMinInputFiles = 5
defaultOperations = "all"
)
// Config holds parsed worker config values.
type Config struct {
SnapshotRetentionHours int64
MaxSnapshotsToKeep int64
OrphanOlderThanHours int64
MaxCommitRetries int64
TargetFileSizeBytes int64
MinInputFiles int64
Operations string
}
// ParseConfig extracts an iceberg maintenance Config from plugin config values.
// Values are clamped to safe minimums to prevent misconfiguration.
func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
cfg := Config{
SnapshotRetentionHours: readInt64Config(values, "snapshot_retention_hours", defaultSnapshotRetentionHours),
MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep),
OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours),
MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries),
TargetFileSizeBytes: readInt64Config(values, "target_file_size_bytes", defaultTargetFileSizeBytes),
MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles),
Operations: readStringConfig(values, "operations", defaultOperations),
}
// Clamp to safe minimums using the default constants
if cfg.SnapshotRetentionHours <= 0 {
cfg.SnapshotRetentionHours = defaultSnapshotRetentionHours
}
if cfg.MaxSnapshotsToKeep <= 0 {
cfg.MaxSnapshotsToKeep = defaultMaxSnapshotsToKeep
}
if cfg.OrphanOlderThanHours <= 0 {
cfg.OrphanOlderThanHours = defaultOrphanOlderThanHours
}
if cfg.MaxCommitRetries <= 0 {
cfg.MaxCommitRetries = defaultMaxCommitRetries
}
if cfg.TargetFileSizeBytes <= 0 {
cfg.TargetFileSizeBytes = defaultTargetFileSizeBytes
}
if cfg.MinInputFiles < 2 {
cfg.MinInputFiles = defaultMinInputFiles
}
return cfg
}
// parseOperations returns the ordered list of maintenance operations to execute.
// Order follows Iceberg best practices: compact → expire_snapshots → remove_orphans → rewrite_manifests.
// Returns an error if any unknown operation is specified or the result would be empty.
func parseOperations(ops string) ([]string, error) {
ops = strings.TrimSpace(strings.ToLower(ops))
if ops == "" || ops == "all" {
return []string{"compact", "expire_snapshots", "remove_orphans", "rewrite_manifests"}, nil
}
validOps := map[string]struct{}{
"compact": {},
"expire_snapshots": {},
"remove_orphans": {},
"rewrite_manifests": {},
}
requested := make(map[string]struct{})
for _, op := range strings.Split(ops, ",") {
op = strings.TrimSpace(op)
if op == "" {
continue
}
if _, ok := validOps[op]; !ok {
return nil, fmt.Errorf("unknown maintenance operation %q (valid: compact, expire_snapshots, remove_orphans, rewrite_manifests)", op)
}
requested[op] = struct{}{}
}
// Return in canonical order: compact → expire_snapshots → remove_orphans → rewrite_manifests
canonicalOrder := []string{"compact", "expire_snapshots", "remove_orphans", "rewrite_manifests"}
var result []string
for _, op := range canonicalOrder {
if _, ok := requested[op]; ok {
result = append(result, op)
}
}
if len(result) == 0 {
return nil, fmt.Errorf("no valid maintenance operations specified")
}
return result, nil
}
func extractMetadataVersion(metadataFileName string) int {
// Parse "v3.metadata.json" or "v3-{nonce}.metadata.json" → 3
name := strings.TrimPrefix(metadataFileName, "v")
name = strings.TrimSuffix(name, ".metadata.json")
// Strip any nonce suffix (e.g. "3-1709766000" → "3")
if dashIdx := strings.Index(name, "-"); dashIdx > 0 {
name = name[:dashIdx]
}
version, _ := strconv.Atoi(name)
return version
}
// readStringConfig reads a string value from plugin config, with fallback.
func readStringConfig(values map[string]*plugin_pb.ConfigValue, field string, fallback string) string {
if values == nil {
return fallback
}
value := values[field]
if value == nil {
return fallback
}
switch kind := value.Kind.(type) {
case *plugin_pb.ConfigValue_StringValue:
return kind.StringValue
case *plugin_pb.ConfigValue_Int64Value:
return strconv.FormatInt(kind.Int64Value, 10)
case *plugin_pb.ConfigValue_DoubleValue:
return strconv.FormatFloat(kind.DoubleValue, 'f', -1, 64)
case *plugin_pb.ConfigValue_BoolValue:
return strconv.FormatBool(kind.BoolValue)
default:
glog.V(1).Infof("readStringConfig: unexpected config value type %T for field %q, using fallback", value.Kind, field)
}
return fallback
}
// readInt64Config reads an int64 value from plugin config, with fallback.
func readInt64Config(values map[string]*plugin_pb.ConfigValue, field string, fallback int64) int64 {
if values == nil {
return fallback
}
value := values[field]
if value == nil {
return fallback
}
switch kind := value.Kind.(type) {
case *plugin_pb.ConfigValue_Int64Value:
return kind.Int64Value
case *plugin_pb.ConfigValue_DoubleValue:
return int64(kind.DoubleValue)
case *plugin_pb.ConfigValue_StringValue:
parsed, err := strconv.ParseInt(strings.TrimSpace(kind.StringValue), 10, 64)
if err == nil {
return parsed
}
case *plugin_pb.ConfigValue_BoolValue:
if kind.BoolValue {
return 1
}
return 0
default:
glog.V(1).Infof("readInt64Config: unexpected config value type %T for field %q, using fallback", value.Kind, field)
}
return fallback
}