You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

692 lines
24 KiB

package pluginworker
import (
"context"
"fmt"
"math"
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/admin/topology"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
ecbalancetask "github.com/seaweedfs/seaweedfs/weed/worker/tasks/ec_balance"
workertypes "github.com/seaweedfs/seaweedfs/weed/worker/types"
"google.golang.org/grpc"
"google.golang.org/protobuf/proto"
)
const (
ecBalanceMinImbalanceThreshold = 0.05
ecBalanceMaxImbalanceThreshold = 0.5
ecBalanceMinServerCount = 2
)
func init() {
RegisterHandler(HandlerFactory{
JobType: "ec_balance",
Category: CategoryDefault,
Aliases: []string{"ec-balance", "ec.balance", "ec_shard_balance"},
Build: func(opts HandlerBuildOptions) (JobHandler, error) {
return NewECBalanceHandler(opts.GrpcDialOption), nil
},
})
}
type ecBalanceWorkerConfig struct {
TaskConfig *ecbalancetask.Config
MinIntervalSeconds int
}
// ECBalanceHandler is the plugin job handler for EC shard balancing.
type ECBalanceHandler struct {
grpcDialOption grpc.DialOption
}
func NewECBalanceHandler(grpcDialOption grpc.DialOption) *ECBalanceHandler {
return &ECBalanceHandler{grpcDialOption: grpcDialOption}
}
func (h *ECBalanceHandler) Capability() *plugin_pb.JobTypeCapability {
return &plugin_pb.JobTypeCapability{
JobType: "ec_balance",
CanDetect: true,
CanExecute: true,
MaxDetectionConcurrency: 1,
MaxExecutionConcurrency: 3,
DisplayName: "EC Shard Balance",
Description: "Balance EC shard distribution across racks and nodes",
Weight: 60,
}
}
func (h *ECBalanceHandler) Descriptor() *plugin_pb.JobTypeDescriptor {
return &plugin_pb.JobTypeDescriptor{
JobType: "ec_balance",
DisplayName: "EC Shard Balance",
Description: "Detect and execute EC shard rebalancing across the cluster",
Icon: "fas fa-balance-scale-left",
DescriptorVersion: 1,
AdminConfigForm: &plugin_pb.ConfigForm{
FormId: "ec-balance-admin",
Title: "EC Shard Balance Admin Config",
Description: "Admin-side controls for EC shard balance detection scope.",
Sections: []*plugin_pb.ConfigSection{
{
SectionId: "scope",
Title: "Scope",
Description: "Optional filters applied before EC shard balance detection.",
Fields: []*plugin_pb.ConfigField{
{
Name: "collection_filter",
Label: "Collection Filter",
Description: "Only balance EC shards in matching collections (wildcard supported).",
Placeholder: "all collections",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
},
{
Name: "data_center_filter",
Label: "Data Center Filter",
Description: "Only balance within matching data centers (wildcard supported).",
Placeholder: "all data centers",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
},
{
Name: "disk_type",
Label: "Disk Type",
Description: "Only balance EC shards on this disk type (hdd, ssd, or empty for all).",
Placeholder: "all disk types",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
},
},
},
},
DefaultValues: map[string]*plugin_pb.ConfigValue{
"collection_filter": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
"data_center_filter": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
"disk_type": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
},
},
WorkerConfigForm: &plugin_pb.ConfigForm{
FormId: "ec-balance-worker",
Title: "EC Shard Balance Worker Config",
Description: "Worker-side detection thresholds and execution controls.",
Sections: []*plugin_pb.ConfigSection{
{
SectionId: "thresholds",
Title: "Detection Thresholds",
Description: "Controls for when EC shard balance jobs should be proposed.",
Fields: []*plugin_pb.ConfigField{
{
Name: "imbalance_threshold",
Label: "Imbalance Threshold",
Description: "Minimum shard count imbalance ratio to trigger balancing (0.0-1.0).",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_DOUBLE,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER,
Required: true,
MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_DoubleValue{DoubleValue: ecBalanceMinImbalanceThreshold}},
MaxValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_DoubleValue{DoubleValue: ecBalanceMaxImbalanceThreshold}},
},
{
Name: "min_server_count",
Label: "Minimum Server Count",
Description: "Minimum servers required for EC shard balancing.",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_INT64,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER,
Required: true,
MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: ecBalanceMinServerCount}},
},
{
Name: "min_interval_seconds",
Label: "Minimum Detection Interval (s)",
Description: "Skip detection if the last successful run is more recent than this interval.",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_INT64,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER,
Required: true,
MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 0}},
},
{
Name: "preferred_tags",
Label: "Preferred Tags",
Description: "Comma-separated disk tags to prioritize for shard placement, ordered by preference.",
Placeholder: "fast,ssd",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
},
},
},
},
DefaultValues: map[string]*plugin_pb.ConfigValue{
"imbalance_threshold": {Kind: &plugin_pb.ConfigValue_DoubleValue{DoubleValue: 0.2}},
"min_server_count": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 3}},
"min_interval_seconds": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 60 * 60}},
"preferred_tags": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
},
},
AdminRuntimeDefaults: &plugin_pb.AdminRuntimeDefaults{
Enabled: true,
DetectionIntervalSeconds: 60 * 30,
DetectionTimeoutSeconds: 300,
MaxJobsPerDetection: 500,
GlobalExecutionConcurrency: 16,
PerWorkerExecutionConcurrency: 4,
RetryLimit: 1,
RetryBackoffSeconds: 30,
JobTypeMaxRuntimeSeconds: 1800,
},
WorkerDefaultValues: map[string]*plugin_pb.ConfigValue{
"imbalance_threshold": {Kind: &plugin_pb.ConfigValue_DoubleValue{DoubleValue: 0.2}},
"min_server_count": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 3}},
"min_interval_seconds": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 60 * 60}},
"preferred_tags": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
},
}
}
func (h *ECBalanceHandler) Detect(
ctx context.Context,
request *plugin_pb.RunDetectionRequest,
sender DetectionSender,
) error {
if request == nil {
return fmt.Errorf("run detection request is nil")
}
if sender == nil {
return fmt.Errorf("detection sender is nil")
}
if request.JobType != "" && request.JobType != "ec_balance" {
return fmt.Errorf("job type %q is not handled by ec_balance worker", request.JobType)
}
workerConfig := deriveECBalanceWorkerConfig(request.GetWorkerConfigValues())
if ShouldSkipDetectionByInterval(request.GetLastSuccessfulRun(), workerConfig.MinIntervalSeconds) {
minInterval := time.Duration(workerConfig.MinIntervalSeconds) * time.Second
_ = sender.SendActivity(BuildDetectorActivity(
"skipped_by_interval",
fmt.Sprintf("EC BALANCE: Detection skipped due to min interval (%s)", minInterval),
map[string]*plugin_pb.ConfigValue{
"min_interval_seconds": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(workerConfig.MinIntervalSeconds)},
},
},
))
if err := sender.SendProposals(&plugin_pb.DetectionProposals{
JobType: "ec_balance",
Proposals: []*plugin_pb.JobProposal{},
HasMore: false,
}); err != nil {
return err
}
return sender.SendComplete(&plugin_pb.DetectionComplete{
JobType: "ec_balance",
Success: true,
TotalProposals: 0,
})
}
// Apply admin-side scope filters
collectionFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "collection_filter", ""))
if collectionFilter != "" {
workerConfig.TaskConfig.CollectionFilter = collectionFilter
}
dcFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "data_center_filter", ""))
if dcFilter != "" {
workerConfig.TaskConfig.DataCenterFilter = dcFilter
}
diskType := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "disk_type", ""))
if diskType != "" {
workerConfig.TaskConfig.DiskType = diskType
}
masters := make([]string, 0)
if request.ClusterContext != nil {
masters = append(masters, request.ClusterContext.MasterGrpcAddresses...)
}
metrics, activeTopology, err := h.collectVolumeMetrics(ctx, masters, collectionFilter)
if err != nil {
return err
}
clusterInfo := &workertypes.ClusterInfo{ActiveTopology: activeTopology}
maxResults := int(request.MaxResults)
if maxResults < 0 {
maxResults = 0
}
results, hasMore, err := ecbalancetask.Detection(ctx, metrics, clusterInfo, workerConfig.TaskConfig, maxResults)
if err != nil {
return err
}
if traceErr := emitECBalanceDecisionTrace(sender, workerConfig.TaskConfig, results, maxResults, hasMore); traceErr != nil {
glog.Warningf("Plugin worker failed to emit ec_balance detection trace: %v", traceErr)
}
proposals := make([]*plugin_pb.JobProposal, 0, len(results))
for _, result := range results {
proposal, proposalErr := buildECBalanceProposal(result)
if proposalErr != nil {
glog.Warningf("Plugin worker skip invalid ec_balance proposal: %v", proposalErr)
continue
}
proposals = append(proposals, proposal)
}
if err := sender.SendProposals(&plugin_pb.DetectionProposals{
JobType: "ec_balance",
Proposals: proposals,
HasMore: hasMore,
}); err != nil {
return err
}
return sender.SendComplete(&plugin_pb.DetectionComplete{
JobType: "ec_balance",
Success: true,
TotalProposals: int32(len(proposals)),
})
}
func (h *ECBalanceHandler) Execute(
ctx context.Context,
request *plugin_pb.ExecuteJobRequest,
sender ExecutionSender,
) error {
if request == nil || request.Job == nil {
return fmt.Errorf("execute request/job is nil")
}
if sender == nil {
return fmt.Errorf("execution sender is nil")
}
if request.Job.JobType != "" && request.Job.JobType != "ec_balance" {
return fmt.Errorf("job type %q is not handled by ec_balance worker", request.Job.JobType)
}
params, err := decodeECBalanceTaskParams(request.Job)
if err != nil {
return err
}
if len(params.Sources) == 0 || strings.TrimSpace(params.Sources[0].Node) == "" {
return fmt.Errorf("ec balance source node is required")
}
if len(params.Targets) == 0 || strings.TrimSpace(params.Targets[0].Node) == "" {
return fmt.Errorf("ec balance target node is required")
}
task := ecbalancetask.NewECBalanceTask(
request.Job.JobId,
params.VolumeId,
params.Collection,
h.grpcDialOption,
)
execCtx, execCancel := context.WithCancel(ctx)
defer execCancel()
task.SetProgressCallback(func(progress float64, stage string) {
message := fmt.Sprintf("ec balance progress %.0f%%", progress)
if strings.TrimSpace(stage) != "" {
message = stage
}
if err := sender.SendProgress(&plugin_pb.JobProgressUpdate{
JobId: request.Job.JobId,
JobType: request.Job.JobType,
State: plugin_pb.JobState_JOB_STATE_RUNNING,
ProgressPercent: progress,
Stage: stage,
Message: message,
Activities: []*plugin_pb.ActivityEvent{
BuildExecutorActivity(stage, message),
},
}); err != nil {
glog.Warningf("EC balance job %s (%s): failed to send progress (%.0f%%, stage=%q): %v, cancelling execution",
request.Job.JobId, request.Job.JobType, progress, stage, err)
execCancel()
}
})
if err := sender.SendProgress(&plugin_pb.JobProgressUpdate{
JobId: request.Job.JobId,
JobType: request.Job.JobType,
State: plugin_pb.JobState_JOB_STATE_ASSIGNED,
ProgressPercent: 0,
Stage: "assigned",
Message: "ec balance job accepted",
Activities: []*plugin_pb.ActivityEvent{
BuildExecutorActivity("assigned", "ec balance job accepted"),
},
}); err != nil {
return err
}
if err := task.Execute(execCtx, params); err != nil {
_ = sender.SendProgress(&plugin_pb.JobProgressUpdate{
JobId: request.Job.JobId,
JobType: request.Job.JobType,
State: plugin_pb.JobState_JOB_STATE_FAILED,
ProgressPercent: 100,
Stage: "failed",
Message: err.Error(),
Activities: []*plugin_pb.ActivityEvent{
BuildExecutorActivity("failed", err.Error()),
},
})
return err
}
sourceNode := params.Sources[0].Node
targetNode := params.Targets[0].Node
resultSummary := fmt.Sprintf("EC shard balance completed: volume %d shards moved from %s to %s",
params.VolumeId, sourceNode, targetNode)
return sender.SendCompleted(&plugin_pb.JobCompleted{
JobId: request.Job.JobId,
JobType: request.Job.JobType,
Success: true,
Result: &plugin_pb.JobResult{
Summary: resultSummary,
OutputValues: map[string]*plugin_pb.ConfigValue{
"volume_id": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(params.VolumeId)},
},
"source_server": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: sourceNode},
},
"target_server": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: targetNode},
},
},
},
Activities: []*plugin_pb.ActivityEvent{
BuildExecutorActivity("completed", resultSummary),
},
})
}
func (h *ECBalanceHandler) collectVolumeMetrics(
ctx context.Context,
masterAddresses []string,
collectionFilter string,
) ([]*workertypes.VolumeHealthMetrics, *topology.ActiveTopology, error) {
metrics, activeTopology, _, err := collectVolumeMetricsFromMasters(ctx, masterAddresses, collectionFilter, h.grpcDialOption)
return metrics, activeTopology, err
}
func deriveECBalanceWorkerConfig(values map[string]*plugin_pb.ConfigValue) *ecBalanceWorkerConfig {
taskConfig := ecbalancetask.NewDefaultConfig()
imbalanceThreshold := readDoubleConfig(values, "imbalance_threshold", taskConfig.ImbalanceThreshold)
if imbalanceThreshold < ecBalanceMinImbalanceThreshold {
imbalanceThreshold = ecBalanceMinImbalanceThreshold
}
if imbalanceThreshold > ecBalanceMaxImbalanceThreshold {
imbalanceThreshold = ecBalanceMaxImbalanceThreshold
}
taskConfig.ImbalanceThreshold = imbalanceThreshold
minServerCountRaw := readInt64Config(values, "min_server_count", int64(taskConfig.MinServerCount))
if minServerCountRaw < int64(ecBalanceMinServerCount) {
minServerCountRaw = int64(ecBalanceMinServerCount)
}
if minServerCountRaw > math.MaxInt32 {
minServerCountRaw = math.MaxInt32
}
taskConfig.MinServerCount = int(minServerCountRaw)
minIntervalRaw := readInt64Config(values, "min_interval_seconds", 60*60)
if minIntervalRaw < 0 {
minIntervalRaw = 0
}
if minIntervalRaw > math.MaxInt32 {
minIntervalRaw = math.MaxInt32
}
minIntervalSeconds := int(minIntervalRaw)
taskConfig.PreferredTags = util.NormalizeTagList(readStringListConfig(values, "preferred_tags"))
return &ecBalanceWorkerConfig{
TaskConfig: taskConfig,
MinIntervalSeconds: minIntervalSeconds,
}
}
func buildECBalanceProposal(result *workertypes.TaskDetectionResult) (*plugin_pb.JobProposal, error) {
if result == nil {
return nil, fmt.Errorf("task detection result is nil")
}
if result.TypedParams == nil {
return nil, fmt.Errorf("missing typed params for volume %d", result.VolumeID)
}
paramsPayload, err := proto.Marshal(result.TypedParams)
if err != nil {
return nil, fmt.Errorf("marshal task params: %w", err)
}
proposalID := strings.TrimSpace(result.TaskID)
if proposalID == "" {
proposalID = fmt.Sprintf("ec-balance-%d-%d", result.VolumeID, time.Now().UnixNano())
}
// Dedupe key includes volume ID, shard ID, source node, and collection
// to distinguish moves of the same shard from different source nodes (e.g. dedup)
dedupeKey := fmt.Sprintf("ec_balance:%d", result.VolumeID)
if len(result.TypedParams.Sources) > 0 {
src := result.TypedParams.Sources[0]
if len(src.ShardIds) > 0 {
dedupeKey = fmt.Sprintf("ec_balance:%d:%d:%s", result.VolumeID, src.ShardIds[0], src.Node)
}
}
if result.Collection != "" {
dedupeKey += ":" + result.Collection
}
sourceNode := ""
targetNode := ""
if len(result.TypedParams.Sources) > 0 {
sourceNode = strings.TrimSpace(result.TypedParams.Sources[0].Node)
}
if len(result.TypedParams.Targets) > 0 {
targetNode = strings.TrimSpace(result.TypedParams.Targets[0].Node)
}
summary := fmt.Sprintf("Balance EC shard of volume %d", result.VolumeID)
if sourceNode != "" && targetNode != "" {
summary = fmt.Sprintf("Move EC shard of volume %d: %s → %s", result.VolumeID, sourceNode, targetNode)
}
return &plugin_pb.JobProposal{
ProposalId: proposalID,
DedupeKey: dedupeKey,
JobType: "ec_balance",
Priority: mapTaskPriority(result.Priority),
Summary: summary,
Detail: strings.TrimSpace(result.Reason),
Parameters: map[string]*plugin_pb.ConfigValue{
"task_params_pb": {
Kind: &plugin_pb.ConfigValue_BytesValue{BytesValue: paramsPayload},
},
"volume_id": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(result.VolumeID)},
},
"source_server": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: sourceNode},
},
"target_server": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: targetNode},
},
"collection": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: result.Collection},
},
},
Labels: map[string]string{
"task_type": "ec_balance",
"volume_id": fmt.Sprintf("%d", result.VolumeID),
"collection": result.Collection,
"source_node": sourceNode,
"target_node": targetNode,
},
}, nil
}
func decodeECBalanceTaskParams(job *plugin_pb.JobSpec) (*worker_pb.TaskParams, error) {
if job == nil {
return nil, fmt.Errorf("job spec is nil")
}
// Try protobuf-encoded params first (preferred path)
if payload := readBytesConfig(job.Parameters, "task_params_pb"); len(payload) > 0 {
params := &worker_pb.TaskParams{}
if err := proto.Unmarshal(payload, params); err != nil {
return nil, fmt.Errorf("decodeECBalanceTaskParams: unmarshal task_params_pb: %w", err)
}
if params.TaskId == "" {
params.TaskId = job.JobId
}
// Validate execution-critical fields in the deserialized TaskParams
if len(params.Sources) == 0 || strings.TrimSpace(params.Sources[0].Node) == "" {
return nil, fmt.Errorf("decodeECBalanceTaskParams: TaskParams missing Sources[0].Node")
}
if len(params.Sources[0].ShardIds) == 0 {
return nil, fmt.Errorf("decodeECBalanceTaskParams: TaskParams missing Sources[0].ShardIds")
}
if len(params.Targets) == 0 || strings.TrimSpace(params.Targets[0].Node) == "" {
return nil, fmt.Errorf("decodeECBalanceTaskParams: TaskParams missing Targets[0].Node")
}
if len(params.Targets[0].ShardIds) == 0 {
return nil, fmt.Errorf("decodeECBalanceTaskParams: TaskParams missing Targets[0].ShardIds")
}
return params, nil
}
// Legacy fallback: construct TaskParams from individual scalar parameters.
// All execution-critical fields are required.
volumeID := readInt64Config(job.Parameters, "volume_id", 0)
sourceNode := strings.TrimSpace(readStringConfig(job.Parameters, "source_server", ""))
targetNode := strings.TrimSpace(readStringConfig(job.Parameters, "target_server", ""))
collection := readStringConfig(job.Parameters, "collection", "")
if volumeID <= 0 || volumeID > math.MaxUint32 {
return nil, fmt.Errorf("decodeECBalanceTaskParams: invalid or missing volume_id: %d", volumeID)
}
if sourceNode == "" {
return nil, fmt.Errorf("decodeECBalanceTaskParams: missing source_server")
}
if targetNode == "" {
return nil, fmt.Errorf("decodeECBalanceTaskParams: missing target_server")
}
shardIDVal, hasShardID := job.Parameters["shard_id"]
if !hasShardID || shardIDVal == nil {
return nil, fmt.Errorf("decodeECBalanceTaskParams: missing shard_id (required for EcBalanceTaskParams)")
}
shardID := readInt64Config(job.Parameters, "shard_id", -1)
if shardID < 0 || shardID > math.MaxUint32 {
return nil, fmt.Errorf("decodeECBalanceTaskParams: invalid shard_id: %d", shardID)
}
sourceDiskID := readInt64Config(job.Parameters, "source_disk_id", 0)
if sourceDiskID < 0 || sourceDiskID > math.MaxUint32 {
return nil, fmt.Errorf("decodeECBalanceTaskParams: invalid source_disk_id: %d", sourceDiskID)
}
targetDiskID := readInt64Config(job.Parameters, "target_disk_id", 0)
if targetDiskID < 0 || targetDiskID > math.MaxUint32 {
return nil, fmt.Errorf("decodeECBalanceTaskParams: invalid target_disk_id: %d", targetDiskID)
}
return &worker_pb.TaskParams{
TaskId: job.JobId,
VolumeId: uint32(volumeID),
Collection: collection,
Sources: []*worker_pb.TaskSource{{
Node: sourceNode,
DiskId: uint32(sourceDiskID),
ShardIds: []uint32{uint32(shardID)},
}},
Targets: []*worker_pb.TaskTarget{{
Node: targetNode,
DiskId: uint32(targetDiskID),
ShardIds: []uint32{uint32(shardID)},
}},
TaskParams: &worker_pb.TaskParams_EcBalanceParams{
EcBalanceParams: &worker_pb.EcBalanceTaskParams{
TimeoutSeconds: 600,
},
},
}, nil
}
func emitECBalanceDecisionTrace(
sender DetectionSender,
taskConfig *ecbalancetask.Config,
results []*workertypes.TaskDetectionResult,
maxResults int,
hasMore bool,
) error {
if sender == nil || taskConfig == nil {
return nil
}
// Count moves by phase
phaseCounts := make(map[string]int)
for _, result := range results {
if result.Reason != "" {
// Extract phase from reason string
for _, phase := range []string{"dedup", "cross_rack", "within_rack", "global"} {
if strings.Contains(result.Reason, phase) {
phaseCounts[phase]++
break
}
}
}
}
summarySuffix := ""
if hasMore {
summarySuffix = fmt.Sprintf(" (max_results=%d reached)", maxResults)
}
summaryMessage := fmt.Sprintf(
"EC balance detection: %d moves proposed%s (dedup=%d, cross_rack=%d, within_rack=%d, global=%d)",
len(results),
summarySuffix,
phaseCounts["dedup"],
phaseCounts["cross_rack"],
phaseCounts["within_rack"],
phaseCounts["global"],
)
return sender.SendActivity(BuildDetectorActivity("decision_summary", summaryMessage, map[string]*plugin_pb.ConfigValue{
"total_moves": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(len(results))},
},
"has_more": {
Kind: &plugin_pb.ConfigValue_BoolValue{BoolValue: hasMore},
},
"dedup_moves": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(phaseCounts["dedup"])},
},
"cross_rack_moves": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(phaseCounts["cross_rack"])},
},
"within_rack_moves": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(phaseCounts["within_rack"])},
},
"global_moves": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(phaseCounts["global"])},
},
"imbalance_threshold": {
Kind: &plugin_pb.ConfigValue_DoubleValue{DoubleValue: taskConfig.ImbalanceThreshold},
},
"min_server_count": {
Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: int64(taskConfig.MinServerCount)},
},
}))
}