You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							311 lines
						
					
					
						
							9.5 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							311 lines
						
					
					
						
							9.5 KiB
						
					
					
				
								package maintenance
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"sync"
							 | 
						|
									"time"
							 | 
						|
								
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/glog"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/worker/types"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// PendingOperationType represents the type of pending operation
							 | 
						|
								type PendingOperationType string
							 | 
						|
								
							 | 
						|
								const (
							 | 
						|
									OpTypeVolumeMove    PendingOperationType = "volume_move"
							 | 
						|
									OpTypeVolumeBalance PendingOperationType = "volume_balance"
							 | 
						|
									OpTypeErasureCoding PendingOperationType = "erasure_coding"
							 | 
						|
									OpTypeVacuum        PendingOperationType = "vacuum"
							 | 
						|
									OpTypeReplication   PendingOperationType = "replication"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								// PendingOperation represents a pending volume/shard operation
							 | 
						|
								type PendingOperation struct {
							 | 
						|
									VolumeID      uint32               `json:"volume_id"`
							 | 
						|
									OperationType PendingOperationType `json:"operation_type"`
							 | 
						|
									SourceNode    string               `json:"source_node"`
							 | 
						|
									DestNode      string               `json:"dest_node,omitempty"` // Empty for non-movement operations
							 | 
						|
									TaskID        string               `json:"task_id"`
							 | 
						|
									StartTime     time.Time            `json:"start_time"`
							 | 
						|
									EstimatedSize uint64               `json:"estimated_size"` // Bytes
							 | 
						|
									Collection    string               `json:"collection"`
							 | 
						|
									Status        string               `json:"status"` // "assigned", "in_progress", "completing"
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// PendingOperations tracks all pending volume/shard operations
							 | 
						|
								type PendingOperations struct {
							 | 
						|
									// Operations by volume ID for conflict detection
							 | 
						|
									byVolumeID map[uint32]*PendingOperation
							 | 
						|
								
							 | 
						|
									// Operations by task ID for updates
							 | 
						|
									byTaskID map[string]*PendingOperation
							 | 
						|
								
							 | 
						|
									// Operations by node for capacity calculations
							 | 
						|
									bySourceNode map[string][]*PendingOperation
							 | 
						|
									byDestNode   map[string][]*PendingOperation
							 | 
						|
								
							 | 
						|
									mutex sync.RWMutex
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// NewPendingOperations creates a new pending operations tracker
							 | 
						|
								func NewPendingOperations() *PendingOperations {
							 | 
						|
									return &PendingOperations{
							 | 
						|
										byVolumeID:   make(map[uint32]*PendingOperation),
							 | 
						|
										byTaskID:     make(map[string]*PendingOperation),
							 | 
						|
										bySourceNode: make(map[string][]*PendingOperation),
							 | 
						|
										byDestNode:   make(map[string][]*PendingOperation),
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// AddOperation adds a pending operation
							 | 
						|
								func (po *PendingOperations) AddOperation(op *PendingOperation) {
							 | 
						|
									po.mutex.Lock()
							 | 
						|
									defer po.mutex.Unlock()
							 | 
						|
								
							 | 
						|
									// Check for existing operation on this volume
							 | 
						|
									if existing, exists := po.byVolumeID[op.VolumeID]; exists {
							 | 
						|
										glog.V(1).Infof("Replacing existing pending operation on volume %d: %s -> %s",
							 | 
						|
											op.VolumeID, existing.TaskID, op.TaskID)
							 | 
						|
										po.removeOperationUnlocked(existing)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Add new operation
							 | 
						|
									po.byVolumeID[op.VolumeID] = op
							 | 
						|
									po.byTaskID[op.TaskID] = op
							 | 
						|
								
							 | 
						|
									// Add to node indexes
							 | 
						|
									po.bySourceNode[op.SourceNode] = append(po.bySourceNode[op.SourceNode], op)
							 | 
						|
									if op.DestNode != "" {
							 | 
						|
										po.byDestNode[op.DestNode] = append(po.byDestNode[op.DestNode], op)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									glog.V(2).Infof("Added pending operation: volume %d, type %s, task %s, %s -> %s",
							 | 
						|
										op.VolumeID, op.OperationType, op.TaskID, op.SourceNode, op.DestNode)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// RemoveOperation removes a completed operation
							 | 
						|
								func (po *PendingOperations) RemoveOperation(taskID string) {
							 | 
						|
									po.mutex.Lock()
							 | 
						|
									defer po.mutex.Unlock()
							 | 
						|
								
							 | 
						|
									if op, exists := po.byTaskID[taskID]; exists {
							 | 
						|
										po.removeOperationUnlocked(op)
							 | 
						|
										glog.V(2).Infof("Removed completed operation: volume %d, task %s", op.VolumeID, taskID)
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// removeOperationUnlocked removes an operation (must hold lock)
							 | 
						|
								func (po *PendingOperations) removeOperationUnlocked(op *PendingOperation) {
							 | 
						|
									delete(po.byVolumeID, op.VolumeID)
							 | 
						|
									delete(po.byTaskID, op.TaskID)
							 | 
						|
								
							 | 
						|
									// Remove from source node list
							 | 
						|
									if ops, exists := po.bySourceNode[op.SourceNode]; exists {
							 | 
						|
										for i, other := range ops {
							 | 
						|
											if other.TaskID == op.TaskID {
							 | 
						|
												po.bySourceNode[op.SourceNode] = append(ops[:i], ops[i+1:]...)
							 | 
						|
												break
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Remove from dest node list
							 | 
						|
									if op.DestNode != "" {
							 | 
						|
										if ops, exists := po.byDestNode[op.DestNode]; exists {
							 | 
						|
											for i, other := range ops {
							 | 
						|
												if other.TaskID == op.TaskID {
							 | 
						|
													po.byDestNode[op.DestNode] = append(ops[:i], ops[i+1:]...)
							 | 
						|
													break
							 | 
						|
												}
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// HasPendingOperationOnVolume checks if a volume has a pending operation
							 | 
						|
								func (po *PendingOperations) HasPendingOperationOnVolume(volumeID uint32) bool {
							 | 
						|
									po.mutex.RLock()
							 | 
						|
									defer po.mutex.RUnlock()
							 | 
						|
								
							 | 
						|
									_, exists := po.byVolumeID[volumeID]
							 | 
						|
									return exists
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetPendingOperationOnVolume returns the pending operation on a volume
							 | 
						|
								func (po *PendingOperations) GetPendingOperationOnVolume(volumeID uint32) *PendingOperation {
							 | 
						|
									po.mutex.RLock()
							 | 
						|
									defer po.mutex.RUnlock()
							 | 
						|
								
							 | 
						|
									return po.byVolumeID[volumeID]
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// WouldConflictWithPending checks if a new operation would conflict with pending ones
							 | 
						|
								func (po *PendingOperations) WouldConflictWithPending(volumeID uint32, opType PendingOperationType) bool {
							 | 
						|
									po.mutex.RLock()
							 | 
						|
									defer po.mutex.RUnlock()
							 | 
						|
								
							 | 
						|
									if existing, exists := po.byVolumeID[volumeID]; exists {
							 | 
						|
										// Volume already has a pending operation
							 | 
						|
										glog.V(3).Infof("Volume %d conflict: already has %s operation (task %s)",
							 | 
						|
											volumeID, existing.OperationType, existing.TaskID)
							 | 
						|
										return true
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return false
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetPendingCapacityImpactForNode calculates pending capacity changes for a node
							 | 
						|
								func (po *PendingOperations) GetPendingCapacityImpactForNode(nodeID string) (incoming uint64, outgoing uint64) {
							 | 
						|
									po.mutex.RLock()
							 | 
						|
									defer po.mutex.RUnlock()
							 | 
						|
								
							 | 
						|
									// Calculate outgoing capacity (volumes leaving this node)
							 | 
						|
									if ops, exists := po.bySourceNode[nodeID]; exists {
							 | 
						|
										for _, op := range ops {
							 | 
						|
											// Only count movement operations
							 | 
						|
											if op.DestNode != "" {
							 | 
						|
												outgoing += op.EstimatedSize
							 | 
						|
											}
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Calculate incoming capacity (volumes coming to this node)
							 | 
						|
									if ops, exists := po.byDestNode[nodeID]; exists {
							 | 
						|
										for _, op := range ops {
							 | 
						|
											incoming += op.EstimatedSize
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return incoming, outgoing
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// FilterVolumeMetricsExcludingPending filters out volumes with pending operations
							 | 
						|
								func (po *PendingOperations) FilterVolumeMetricsExcludingPending(metrics []*types.VolumeHealthMetrics) []*types.VolumeHealthMetrics {
							 | 
						|
									po.mutex.RLock()
							 | 
						|
									defer po.mutex.RUnlock()
							 | 
						|
								
							 | 
						|
									var filtered []*types.VolumeHealthMetrics
							 | 
						|
									excludedCount := 0
							 | 
						|
								
							 | 
						|
									for _, metric := range metrics {
							 | 
						|
										if _, hasPending := po.byVolumeID[metric.VolumeID]; !hasPending {
							 | 
						|
											filtered = append(filtered, metric)
							 | 
						|
										} else {
							 | 
						|
											excludedCount++
							 | 
						|
											glog.V(3).Infof("Excluding volume %d from scan due to pending operation", metric.VolumeID)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									if excludedCount > 0 {
							 | 
						|
										glog.V(1).Infof("Filtered out %d volumes with pending operations from %d total volumes",
							 | 
						|
											excludedCount, len(metrics))
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return filtered
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetNodeCapacityProjection calculates projected capacity for a node
							 | 
						|
								func (po *PendingOperations) GetNodeCapacityProjection(nodeID string, currentUsed uint64, totalCapacity uint64) NodeCapacityProjection {
							 | 
						|
									incoming, outgoing := po.GetPendingCapacityImpactForNode(nodeID)
							 | 
						|
								
							 | 
						|
									projectedUsed := currentUsed + incoming - outgoing
							 | 
						|
									projectedFree := totalCapacity - projectedUsed
							 | 
						|
								
							 | 
						|
									return NodeCapacityProjection{
							 | 
						|
										NodeID:          nodeID,
							 | 
						|
										CurrentUsed:     currentUsed,
							 | 
						|
										TotalCapacity:   totalCapacity,
							 | 
						|
										PendingIncoming: incoming,
							 | 
						|
										PendingOutgoing: outgoing,
							 | 
						|
										ProjectedUsed:   projectedUsed,
							 | 
						|
										ProjectedFree:   projectedFree,
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetAllPendingOperations returns all pending operations
							 | 
						|
								func (po *PendingOperations) GetAllPendingOperations() []*PendingOperation {
							 | 
						|
									po.mutex.RLock()
							 | 
						|
									defer po.mutex.RUnlock()
							 | 
						|
								
							 | 
						|
									var operations []*PendingOperation
							 | 
						|
									for _, op := range po.byVolumeID {
							 | 
						|
										operations = append(operations, op)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return operations
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// UpdateOperationStatus updates the status of a pending operation
							 | 
						|
								func (po *PendingOperations) UpdateOperationStatus(taskID string, status string) {
							 | 
						|
									po.mutex.Lock()
							 | 
						|
									defer po.mutex.Unlock()
							 | 
						|
								
							 | 
						|
									if op, exists := po.byTaskID[taskID]; exists {
							 | 
						|
										op.Status = status
							 | 
						|
										glog.V(3).Infof("Updated operation status: task %s, volume %d -> %s", taskID, op.VolumeID, status)
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// CleanupStaleOperations removes operations that have been running too long
							 | 
						|
								func (po *PendingOperations) CleanupStaleOperations(maxAge time.Duration) int {
							 | 
						|
									po.mutex.Lock()
							 | 
						|
									defer po.mutex.Unlock()
							 | 
						|
								
							 | 
						|
									cutoff := time.Now().Add(-maxAge)
							 | 
						|
									var staleOps []*PendingOperation
							 | 
						|
								
							 | 
						|
									for _, op := range po.byVolumeID {
							 | 
						|
										if op.StartTime.Before(cutoff) {
							 | 
						|
											staleOps = append(staleOps, op)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									for _, op := range staleOps {
							 | 
						|
										po.removeOperationUnlocked(op)
							 | 
						|
										glog.Warningf("Removed stale pending operation: volume %d, task %s, age %v",
							 | 
						|
											op.VolumeID, op.TaskID, time.Since(op.StartTime))
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return len(staleOps)
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// NodeCapacityProjection represents projected capacity for a node
							 | 
						|
								type NodeCapacityProjection struct {
							 | 
						|
									NodeID          string `json:"node_id"`
							 | 
						|
									CurrentUsed     uint64 `json:"current_used"`
							 | 
						|
									TotalCapacity   uint64 `json:"total_capacity"`
							 | 
						|
									PendingIncoming uint64 `json:"pending_incoming"`
							 | 
						|
									PendingOutgoing uint64 `json:"pending_outgoing"`
							 | 
						|
									ProjectedUsed   uint64 `json:"projected_used"`
							 | 
						|
									ProjectedFree   uint64 `json:"projected_free"`
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// GetStats returns statistics about pending operations
							 | 
						|
								func (po *PendingOperations) GetStats() PendingOperationsStats {
							 | 
						|
									po.mutex.RLock()
							 | 
						|
									defer po.mutex.RUnlock()
							 | 
						|
								
							 | 
						|
									stats := PendingOperationsStats{
							 | 
						|
										TotalOperations: len(po.byVolumeID),
							 | 
						|
										ByType:          make(map[PendingOperationType]int),
							 | 
						|
										ByStatus:        make(map[string]int),
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									var totalSize uint64
							 | 
						|
									for _, op := range po.byVolumeID {
							 | 
						|
										stats.ByType[op.OperationType]++
							 | 
						|
										stats.ByStatus[op.Status]++
							 | 
						|
										totalSize += op.EstimatedSize
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									stats.TotalEstimatedSize = totalSize
							 | 
						|
									return stats
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// PendingOperationsStats provides statistics about pending operations
							 | 
						|
								type PendingOperationsStats struct {
							 | 
						|
									TotalOperations    int                          `json:"total_operations"`
							 | 
						|
									ByType             map[PendingOperationType]int `json:"by_type"`
							 | 
						|
									ByStatus           map[string]int               `json:"by_status"`
							 | 
						|
									TotalEstimatedSize uint64                       `json:"total_estimated_size"`
							 | 
						|
								}
							 |