You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							272 lines
						
					
					
						
							9.0 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							272 lines
						
					
					
						
							9.0 KiB
						
					
					
				| package balance | |
| 
 | |
| import ( | |
| 	"fmt" | |
| 	"time" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/admin/topology" | |
| 	"github.com/seaweedfs/seaweedfs/weed/glog" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/worker/tasks/base" | |
| 	"github.com/seaweedfs/seaweedfs/weed/worker/types" | |
| ) | |
| 
 | |
| // Detection implements the detection logic for balance tasks | |
| func Detection(metrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterInfo, config base.TaskConfig) ([]*types.TaskDetectionResult, error) { | |
| 	if !config.IsEnabled() { | |
| 		return nil, nil | |
| 	} | |
| 
 | |
| 	balanceConfig := config.(*Config) | |
| 
 | |
| 	// Skip if cluster is too small | |
| 	minVolumeCount := 2 // More reasonable for small clusters | |
| 	if len(metrics) < minVolumeCount { | |
| 		glog.Infof("BALANCE: No tasks created - cluster too small (%d volumes, need ≥%d)", len(metrics), minVolumeCount) | |
| 		return nil, nil | |
| 	} | |
| 
 | |
| 	// Analyze volume distribution across servers | |
| 	serverVolumeCounts := make(map[string]int) | |
| 	for _, metric := range metrics { | |
| 		serverVolumeCounts[metric.Server]++ | |
| 	} | |
| 
 | |
| 	if len(serverVolumeCounts) < balanceConfig.MinServerCount { | |
| 		glog.Infof("BALANCE: No tasks created - too few servers (%d servers, need ≥%d)", len(serverVolumeCounts), balanceConfig.MinServerCount) | |
| 		return nil, nil | |
| 	} | |
| 
 | |
| 	// Calculate balance metrics | |
| 	totalVolumes := len(metrics) | |
| 	avgVolumesPerServer := float64(totalVolumes) / float64(len(serverVolumeCounts)) | |
| 
 | |
| 	maxVolumes := 0 | |
| 	minVolumes := totalVolumes | |
| 	maxServer := "" | |
| 	minServer := "" | |
| 
 | |
| 	for server, count := range serverVolumeCounts { | |
| 		if count > maxVolumes { | |
| 			maxVolumes = count | |
| 			maxServer = server | |
| 		} | |
| 		if count < minVolumes { | |
| 			minVolumes = count | |
| 			minServer = server | |
| 		} | |
| 	} | |
| 
 | |
| 	// Check if imbalance exceeds threshold | |
| 	imbalanceRatio := float64(maxVolumes-minVolumes) / avgVolumesPerServer | |
| 	if imbalanceRatio <= balanceConfig.ImbalanceThreshold { | |
| 		glog.Infof("BALANCE: No tasks created - cluster well balanced. Imbalance=%.1f%% (threshold=%.1f%%). Max=%d volumes on %s, Min=%d on %s, Avg=%.1f", | |
| 			imbalanceRatio*100, balanceConfig.ImbalanceThreshold*100, maxVolumes, maxServer, minVolumes, minServer, avgVolumesPerServer) | |
| 		return nil, nil | |
| 	} | |
| 
 | |
| 	// Select a volume from the overloaded server for balance | |
| 	var selectedVolume *types.VolumeHealthMetrics | |
| 	for _, metric := range metrics { | |
| 		if metric.Server == maxServer { | |
| 			selectedVolume = metric | |
| 			break | |
| 		} | |
| 	} | |
| 
 | |
| 	if selectedVolume == nil { | |
| 		glog.Warningf("BALANCE: Could not find volume on overloaded server %s", maxServer) | |
| 		return nil, nil | |
| 	} | |
| 
 | |
| 	// Create balance task with volume and destination planning info | |
| 	reason := fmt.Sprintf("Cluster imbalance detected: %.1f%% (max: %d on %s, min: %d on %s, avg: %.1f)", | |
| 		imbalanceRatio*100, maxVolumes, maxServer, minVolumes, minServer, avgVolumesPerServer) | |
| 
 | |
| 	// Generate task ID for ActiveTopology integration | |
| 	taskID := fmt.Sprintf("balance_vol_%d_%d", selectedVolume.VolumeID, time.Now().Unix()) | |
| 
 | |
| 	task := &types.TaskDetectionResult{ | |
| 		TaskID:     taskID, // Link to ActiveTopology pending task | |
| 		TaskType:   types.TaskTypeBalance, | |
| 		VolumeID:   selectedVolume.VolumeID, | |
| 		Server:     selectedVolume.Server, | |
| 		Collection: selectedVolume.Collection, | |
| 		Priority:   types.TaskPriorityNormal, | |
| 		Reason:     reason, | |
| 		ScheduleAt: time.Now(), | |
| 	} | |
| 
 | |
| 	// Plan destination if ActiveTopology is available | |
| 	if clusterInfo.ActiveTopology != nil { | |
| 		destinationPlan, err := planBalanceDestination(clusterInfo.ActiveTopology, selectedVolume) | |
| 		if err != nil { | |
| 			glog.Warningf("Failed to plan balance destination for volume %d: %v", selectedVolume.VolumeID, err) | |
| 			return nil, nil // Skip this task if destination planning fails | |
| 		} | |
| 
 | |
| 		// Find the actual disk containing the volume on the source server | |
| 		sourceDisk, found := base.FindVolumeDisk(clusterInfo.ActiveTopology, selectedVolume.VolumeID, selectedVolume.Collection, selectedVolume.Server) | |
| 		if !found { | |
| 			return nil, fmt.Errorf("BALANCE: Could not find volume %d (collection: %s) on source server %s - unable to create balance task", | |
| 				selectedVolume.VolumeID, selectedVolume.Collection, selectedVolume.Server) | |
| 		} | |
| 
 | |
| 		// Create typed parameters with unified source and target information | |
| 		task.TypedParams = &worker_pb.TaskParams{ | |
| 			TaskId:     taskID, // Link to ActiveTopology pending task | |
| 			VolumeId:   selectedVolume.VolumeID, | |
| 			Collection: selectedVolume.Collection, | |
| 			VolumeSize: selectedVolume.Size, // Store original volume size for tracking changes | |
|  | |
| 			// Unified sources and targets - the only way to specify locations | |
| 			Sources: []*worker_pb.TaskSource{ | |
| 				{ | |
| 					Node:          selectedVolume.Server, | |
| 					DiskId:        sourceDisk, | |
| 					VolumeId:      selectedVolume.VolumeID, | |
| 					EstimatedSize: selectedVolume.Size, | |
| 					DataCenter:    selectedVolume.DataCenter, | |
| 					Rack:          selectedVolume.Rack, | |
| 				}, | |
| 			}, | |
| 			Targets: []*worker_pb.TaskTarget{ | |
| 				{ | |
| 					Node:          destinationPlan.TargetNode, | |
| 					DiskId:        destinationPlan.TargetDisk, | |
| 					VolumeId:      selectedVolume.VolumeID, | |
| 					EstimatedSize: destinationPlan.ExpectedSize, | |
| 					DataCenter:    destinationPlan.TargetDC, | |
| 					Rack:          destinationPlan.TargetRack, | |
| 				}, | |
| 			}, | |
| 
 | |
| 			TaskParams: &worker_pb.TaskParams_BalanceParams{ | |
| 				BalanceParams: &worker_pb.BalanceTaskParams{ | |
| 					ForceMove:      false, | |
| 					TimeoutSeconds: 600, // 10 minutes default | |
| 				}, | |
| 			}, | |
| 		} | |
| 
 | |
| 		glog.V(1).Infof("Planned balance destination for volume %d: %s -> %s", | |
| 			selectedVolume.VolumeID, selectedVolume.Server, destinationPlan.TargetNode) | |
| 
 | |
| 		// Add pending balance task to ActiveTopology for capacity management | |
| 		targetDisk := destinationPlan.TargetDisk | |
| 
 | |
| 		err = clusterInfo.ActiveTopology.AddPendingTask(topology.TaskSpec{ | |
| 			TaskID:     taskID, | |
| 			TaskType:   topology.TaskTypeBalance, | |
| 			VolumeID:   selectedVolume.VolumeID, | |
| 			VolumeSize: int64(selectedVolume.Size), | |
| 			Sources: []topology.TaskSourceSpec{ | |
| 				{ServerID: selectedVolume.Server, DiskID: sourceDisk}, | |
| 			}, | |
| 			Destinations: []topology.TaskDestinationSpec{ | |
| 				{ServerID: destinationPlan.TargetNode, DiskID: targetDisk}, | |
| 			}, | |
| 		}) | |
| 		if err != nil { | |
| 			return nil, fmt.Errorf("BALANCE: Failed to add pending task for volume %d: %v", selectedVolume.VolumeID, err) | |
| 		} | |
| 
 | |
| 		glog.V(2).Infof("Added pending balance task %s to ActiveTopology for volume %d: %s:%d -> %s:%d", | |
| 			taskID, selectedVolume.VolumeID, selectedVolume.Server, sourceDisk, destinationPlan.TargetNode, targetDisk) | |
| 	} else { | |
| 		glog.Warningf("No ActiveTopology available for destination planning in balance detection") | |
| 		return nil, nil | |
| 	} | |
| 
 | |
| 	return []*types.TaskDetectionResult{task}, nil | |
| } | |
| 
 | |
| // planBalanceDestination plans the destination for a balance operation | |
| // This function implements destination planning logic directly in the detection phase | |
| func planBalanceDestination(activeTopology *topology.ActiveTopology, selectedVolume *types.VolumeHealthMetrics) (*topology.DestinationPlan, error) { | |
| 	// Get source node information from topology | |
| 	var sourceRack, sourceDC string | |
| 
 | |
| 	// Extract rack and DC from topology info | |
| 	topologyInfo := activeTopology.GetTopologyInfo() | |
| 	if topologyInfo != nil { | |
| 		for _, dc := range topologyInfo.DataCenterInfos { | |
| 			for _, rack := range dc.RackInfos { | |
| 				for _, dataNodeInfo := range rack.DataNodeInfos { | |
| 					if dataNodeInfo.Id == selectedVolume.Server { | |
| 						sourceDC = dc.Id | |
| 						sourceRack = rack.Id | |
| 						break | |
| 					} | |
| 				} | |
| 				if sourceRack != "" { | |
| 					break | |
| 				} | |
| 			} | |
| 			if sourceDC != "" { | |
| 				break | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	// Get available disks, excluding the source node | |
| 	availableDisks := activeTopology.GetAvailableDisks(topology.TaskTypeBalance, selectedVolume.Server) | |
| 	if len(availableDisks) == 0 { | |
| 		return nil, fmt.Errorf("no available disks for balance operation") | |
| 	} | |
| 
 | |
| 	// Find the best destination disk based on balance criteria | |
| 	var bestDisk *topology.DiskInfo | |
| 	bestScore := -1.0 | |
| 
 | |
| 	for _, disk := range availableDisks { | |
| 		score := calculateBalanceScore(disk, sourceRack, sourceDC, selectedVolume.Size) | |
| 		if score > bestScore { | |
| 			bestScore = score | |
| 			bestDisk = disk | |
| 		} | |
| 	} | |
| 
 | |
| 	if bestDisk == nil { | |
| 		return nil, fmt.Errorf("no suitable destination found for balance operation") | |
| 	} | |
| 
 | |
| 	return &topology.DestinationPlan{ | |
| 		TargetNode:     bestDisk.NodeID, | |
| 		TargetDisk:     bestDisk.DiskID, | |
| 		TargetRack:     bestDisk.Rack, | |
| 		TargetDC:       bestDisk.DataCenter, | |
| 		ExpectedSize:   selectedVolume.Size, | |
| 		PlacementScore: bestScore, | |
| 	}, nil | |
| } | |
| 
 | |
| // calculateBalanceScore calculates placement score for balance operations | |
| func calculateBalanceScore(disk *topology.DiskInfo, sourceRack, sourceDC string, volumeSize uint64) float64 { | |
| 	if disk.DiskInfo == nil { | |
| 		return 0.0 | |
| 	} | |
| 
 | |
| 	score := 0.0 | |
| 
 | |
| 	// Prefer disks with lower current volume count (better for balance) | |
| 	if disk.DiskInfo.MaxVolumeCount > 0 { | |
| 		utilization := float64(disk.DiskInfo.VolumeCount) / float64(disk.DiskInfo.MaxVolumeCount) | |
| 		score += (1.0 - utilization) * 40.0 // Up to 40 points for low utilization | |
| 	} | |
| 
 | |
| 	// Prefer different racks for better distribution | |
| 	if disk.Rack != sourceRack { | |
| 		score += 30.0 | |
| 	} | |
| 
 | |
| 	// Prefer different data centers for better distribution | |
| 	if disk.DataCenter != sourceDC { | |
| 		score += 20.0 | |
| 	} | |
| 
 | |
| 	// Prefer disks with lower current load | |
| 	score += (10.0 - float64(disk.LoadCount)) // Up to 10 points for low load | |
|  | |
| 	return score | |
| }
 |