You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
178 lines
5.7 KiB
178 lines
5.7 KiB
package iceberg
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
|
)
|
|
|
|
const (
|
|
jobType = "iceberg_maintenance"
|
|
|
|
defaultSnapshotRetentionHours = 168 // 7 days
|
|
defaultMaxSnapshotsToKeep = 5
|
|
defaultOrphanOlderThanHours = 72
|
|
defaultMaxCommitRetries = 5
|
|
defaultTargetFileSizeBytes = 256 * 1024 * 1024
|
|
defaultMinInputFiles = 5
|
|
defaultOperations = "all"
|
|
)
|
|
|
|
// Config holds parsed worker config values.
|
|
type Config struct {
|
|
SnapshotRetentionHours int64
|
|
MaxSnapshotsToKeep int64
|
|
OrphanOlderThanHours int64
|
|
MaxCommitRetries int64
|
|
TargetFileSizeBytes int64
|
|
MinInputFiles int64
|
|
Operations string
|
|
}
|
|
|
|
// ParseConfig extracts an iceberg maintenance Config from plugin config values.
|
|
// Values are clamped to safe minimums to prevent misconfiguration.
|
|
func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
|
|
cfg := Config{
|
|
SnapshotRetentionHours: readInt64Config(values, "snapshot_retention_hours", defaultSnapshotRetentionHours),
|
|
MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep),
|
|
OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours),
|
|
MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries),
|
|
TargetFileSizeBytes: readInt64Config(values, "target_file_size_bytes", defaultTargetFileSizeBytes),
|
|
MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles),
|
|
Operations: readStringConfig(values, "operations", defaultOperations),
|
|
}
|
|
|
|
// Clamp to safe minimums using the default constants
|
|
if cfg.SnapshotRetentionHours <= 0 {
|
|
cfg.SnapshotRetentionHours = defaultSnapshotRetentionHours
|
|
}
|
|
if cfg.MaxSnapshotsToKeep <= 0 {
|
|
cfg.MaxSnapshotsToKeep = defaultMaxSnapshotsToKeep
|
|
}
|
|
if cfg.OrphanOlderThanHours <= 0 {
|
|
cfg.OrphanOlderThanHours = defaultOrphanOlderThanHours
|
|
}
|
|
if cfg.MaxCommitRetries <= 0 {
|
|
cfg.MaxCommitRetries = defaultMaxCommitRetries
|
|
}
|
|
if cfg.TargetFileSizeBytes <= 0 {
|
|
cfg.TargetFileSizeBytes = defaultTargetFileSizeBytes
|
|
}
|
|
if cfg.MinInputFiles < 2 {
|
|
cfg.MinInputFiles = defaultMinInputFiles
|
|
}
|
|
|
|
return cfg
|
|
}
|
|
|
|
// parseOperations returns the ordered list of maintenance operations to execute.
|
|
// Order follows Iceberg best practices: compact → expire_snapshots → remove_orphans → rewrite_manifests.
|
|
// Returns an error if any unknown operation is specified or the result would be empty.
|
|
func parseOperations(ops string) ([]string, error) {
|
|
ops = strings.TrimSpace(strings.ToLower(ops))
|
|
if ops == "" || ops == "all" {
|
|
return []string{"compact", "expire_snapshots", "remove_orphans", "rewrite_manifests"}, nil
|
|
}
|
|
|
|
validOps := map[string]struct{}{
|
|
"compact": {},
|
|
"expire_snapshots": {},
|
|
"remove_orphans": {},
|
|
"rewrite_manifests": {},
|
|
}
|
|
|
|
requested := make(map[string]struct{})
|
|
for _, op := range strings.Split(ops, ",") {
|
|
op = strings.TrimSpace(op)
|
|
if op == "" {
|
|
continue
|
|
}
|
|
if _, ok := validOps[op]; !ok {
|
|
return nil, fmt.Errorf("unknown maintenance operation %q (valid: compact, expire_snapshots, remove_orphans, rewrite_manifests)", op)
|
|
}
|
|
requested[op] = struct{}{}
|
|
}
|
|
|
|
// Return in canonical order: compact → expire_snapshots → remove_orphans → rewrite_manifests
|
|
canonicalOrder := []string{"compact", "expire_snapshots", "remove_orphans", "rewrite_manifests"}
|
|
var result []string
|
|
for _, op := range canonicalOrder {
|
|
if _, ok := requested[op]; ok {
|
|
result = append(result, op)
|
|
}
|
|
}
|
|
|
|
if len(result) == 0 {
|
|
return nil, fmt.Errorf("no valid maintenance operations specified")
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func extractMetadataVersion(metadataFileName string) int {
|
|
// Parse "v3.metadata.json" or "v3-{nonce}.metadata.json" → 3
|
|
name := strings.TrimPrefix(metadataFileName, "v")
|
|
name = strings.TrimSuffix(name, ".metadata.json")
|
|
// Strip any nonce suffix (e.g. "3-1709766000" → "3")
|
|
if dashIdx := strings.Index(name, "-"); dashIdx > 0 {
|
|
name = name[:dashIdx]
|
|
}
|
|
version, _ := strconv.Atoi(name)
|
|
return version
|
|
}
|
|
|
|
// readStringConfig reads a string value from plugin config, with fallback.
|
|
func readStringConfig(values map[string]*plugin_pb.ConfigValue, field string, fallback string) string {
|
|
if values == nil {
|
|
return fallback
|
|
}
|
|
value := values[field]
|
|
if value == nil {
|
|
return fallback
|
|
}
|
|
switch kind := value.Kind.(type) {
|
|
case *plugin_pb.ConfigValue_StringValue:
|
|
return kind.StringValue
|
|
case *plugin_pb.ConfigValue_Int64Value:
|
|
return strconv.FormatInt(kind.Int64Value, 10)
|
|
case *plugin_pb.ConfigValue_DoubleValue:
|
|
return strconv.FormatFloat(kind.DoubleValue, 'f', -1, 64)
|
|
case *plugin_pb.ConfigValue_BoolValue:
|
|
return strconv.FormatBool(kind.BoolValue)
|
|
default:
|
|
glog.V(1).Infof("readStringConfig: unexpected config value type %T for field %q, using fallback", value.Kind, field)
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
// readInt64Config reads an int64 value from plugin config, with fallback.
|
|
func readInt64Config(values map[string]*plugin_pb.ConfigValue, field string, fallback int64) int64 {
|
|
if values == nil {
|
|
return fallback
|
|
}
|
|
value := values[field]
|
|
if value == nil {
|
|
return fallback
|
|
}
|
|
switch kind := value.Kind.(type) {
|
|
case *plugin_pb.ConfigValue_Int64Value:
|
|
return kind.Int64Value
|
|
case *plugin_pb.ConfigValue_DoubleValue:
|
|
return int64(kind.DoubleValue)
|
|
case *plugin_pb.ConfigValue_StringValue:
|
|
parsed, err := strconv.ParseInt(strings.TrimSpace(kind.StringValue), 10, 64)
|
|
if err == nil {
|
|
return parsed
|
|
}
|
|
case *plugin_pb.ConfigValue_BoolValue:
|
|
if kind.BoolValue {
|
|
return 1
|
|
}
|
|
return 0
|
|
default:
|
|
glog.V(1).Infof("readInt64Config: unexpected config value type %T for field %q, using fallback", value.Kind, field)
|
|
}
|
|
return fallback
|
|
}
|