You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							238 lines
						
					
					
						
							7.8 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							238 lines
						
					
					
						
							7.8 KiB
						
					
					
				
								package command
							 | 
						|
								
							 | 
						|
								import (
							 | 
						|
									"os"
							 | 
						|
									"os/signal"
							 | 
						|
									"path/filepath"
							 | 
						|
									"strings"
							 | 
						|
									"syscall"
							 | 
						|
									"time"
							 | 
						|
								
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/glog"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/security"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/util"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/worker"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/worker/tasks"
							 | 
						|
									"github.com/seaweedfs/seaweedfs/weed/worker/types"
							 | 
						|
								
							 | 
						|
									// Import task packages to trigger their auto-registration
							 | 
						|
									_ "github.com/seaweedfs/seaweedfs/weed/worker/tasks/balance"
							 | 
						|
									_ "github.com/seaweedfs/seaweedfs/weed/worker/tasks/erasure_coding"
							 | 
						|
									_ "github.com/seaweedfs/seaweedfs/weed/worker/tasks/vacuum"
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								var cmdWorker = &Command{
							 | 
						|
									UsageLine: "worker -admin=<admin_server> [-capabilities=<task_types>] [-maxConcurrent=<num>] [-workingDir=<path>]",
							 | 
						|
									Short:     "start a maintenance worker to process cluster maintenance tasks",
							 | 
						|
									Long: `Start a maintenance worker that connects to an admin server to process
							 | 
						|
								maintenance tasks like vacuum, erasure coding, remote upload, and replication fixes.
							 | 
						|
								
							 | 
						|
								The worker ID and address are automatically generated.
							 | 
						|
								The worker connects to the admin server via gRPC (admin HTTP port + 10000).
							 | 
						|
								
							 | 
						|
								Examples:
							 | 
						|
								  weed worker -admin=localhost:23646
							 | 
						|
								  weed worker -admin=admin.example.com:23646
							 | 
						|
								  weed worker -admin=localhost:23646 -capabilities=vacuum,replication
							 | 
						|
								  weed worker -admin=localhost:23646 -maxConcurrent=4
							 | 
						|
								  weed worker -admin=localhost:23646 -workingDir=/tmp/worker
							 | 
						|
								`,
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								var (
							 | 
						|
									workerAdminServer         = cmdWorker.Flag.String("admin", "localhost:23646", "admin server address")
							 | 
						|
									workerCapabilities        = cmdWorker.Flag.String("capabilities", "vacuum,ec,remote,replication,balance", "comma-separated list of task types this worker can handle")
							 | 
						|
									workerMaxConcurrent       = cmdWorker.Flag.Int("maxConcurrent", 2, "maximum number of concurrent tasks")
							 | 
						|
									workerHeartbeatInterval   = cmdWorker.Flag.Duration("heartbeat", 30*time.Second, "heartbeat interval")
							 | 
						|
									workerTaskRequestInterval = cmdWorker.Flag.Duration("taskInterval", 5*time.Second, "task request interval")
							 | 
						|
									workerWorkingDir          = cmdWorker.Flag.String("workingDir", "", "working directory for the worker")
							 | 
						|
								)
							 | 
						|
								
							 | 
						|
								func init() {
							 | 
						|
									cmdWorker.Run = runWorker
							 | 
						|
								
							 | 
						|
									// Set default capabilities from registered task types
							 | 
						|
									// This happens after package imports have triggered auto-registration
							 | 
						|
									tasks.SetDefaultCapabilitiesFromRegistry()
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								func runWorker(cmd *Command, args []string) bool {
							 | 
						|
									util.LoadConfiguration("security", false)
							 | 
						|
								
							 | 
						|
									glog.Infof("Starting maintenance worker")
							 | 
						|
									glog.Infof("Admin server: %s", *workerAdminServer)
							 | 
						|
									glog.Infof("Capabilities: %s", *workerCapabilities)
							 | 
						|
								
							 | 
						|
									// Parse capabilities
							 | 
						|
									capabilities := parseCapabilities(*workerCapabilities)
							 | 
						|
									if len(capabilities) == 0 {
							 | 
						|
										glog.Fatalf("No valid capabilities specified")
							 | 
						|
										return false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Set working directory and create task-specific subdirectories
							 | 
						|
									var baseWorkingDir string
							 | 
						|
									if *workerWorkingDir != "" {
							 | 
						|
										glog.Infof("Setting working directory to: %s", *workerWorkingDir)
							 | 
						|
										if err := os.Chdir(*workerWorkingDir); err != nil {
							 | 
						|
											glog.Fatalf("Failed to change working directory: %v", err)
							 | 
						|
											return false
							 | 
						|
										}
							 | 
						|
										wd, err := os.Getwd()
							 | 
						|
										if err != nil {
							 | 
						|
											glog.Fatalf("Failed to get working directory: %v", err)
							 | 
						|
											return false
							 | 
						|
										}
							 | 
						|
										baseWorkingDir = wd
							 | 
						|
										glog.Infof("Current working directory: %s", baseWorkingDir)
							 | 
						|
									} else {
							 | 
						|
										// Use default working directory when not specified
							 | 
						|
										wd, err := os.Getwd()
							 | 
						|
										if err != nil {
							 | 
						|
											glog.Fatalf("Failed to get current working directory: %v", err)
							 | 
						|
											return false
							 | 
						|
										}
							 | 
						|
										baseWorkingDir = wd
							 | 
						|
										glog.Infof("Using current working directory: %s", baseWorkingDir)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Create task-specific subdirectories
							 | 
						|
									for _, capability := range capabilities {
							 | 
						|
										taskDir := filepath.Join(baseWorkingDir, string(capability))
							 | 
						|
										if err := os.MkdirAll(taskDir, 0755); err != nil {
							 | 
						|
											glog.Fatalf("Failed to create task directory %s: %v", taskDir, err)
							 | 
						|
											return false
							 | 
						|
										}
							 | 
						|
										glog.Infof("Created task directory: %s", taskDir)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Create gRPC dial option using TLS configuration
							 | 
						|
									grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.worker")
							 | 
						|
								
							 | 
						|
									// Create worker configuration
							 | 
						|
									config := &types.WorkerConfig{
							 | 
						|
										AdminServer:         *workerAdminServer,
							 | 
						|
										Capabilities:        capabilities,
							 | 
						|
										MaxConcurrent:       *workerMaxConcurrent,
							 | 
						|
										HeartbeatInterval:   *workerHeartbeatInterval,
							 | 
						|
										TaskRequestInterval: *workerTaskRequestInterval,
							 | 
						|
										BaseWorkingDir:      baseWorkingDir,
							 | 
						|
										GrpcDialOption:      grpcDialOption,
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Create worker instance
							 | 
						|
									workerInstance, err := worker.NewWorker(config)
							 | 
						|
									if err != nil {
							 | 
						|
										glog.Fatalf("Failed to create worker: %v", err)
							 | 
						|
										return false
							 | 
						|
									}
							 | 
						|
									adminClient, err := worker.CreateAdminClient(*workerAdminServer, workerInstance.ID(), grpcDialOption)
							 | 
						|
									if err != nil {
							 | 
						|
										glog.Fatalf("Failed to create admin client: %v", err)
							 | 
						|
										return false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Set admin client
							 | 
						|
									workerInstance.SetAdminClient(adminClient)
							 | 
						|
								
							 | 
						|
									// Set working directory
							 | 
						|
									if *workerWorkingDir != "" {
							 | 
						|
										glog.Infof("Setting working directory to: %s", *workerWorkingDir)
							 | 
						|
										if err := os.Chdir(*workerWorkingDir); err != nil {
							 | 
						|
											glog.Fatalf("Failed to change working directory: %v", err)
							 | 
						|
											return false
							 | 
						|
										}
							 | 
						|
										wd, err := os.Getwd()
							 | 
						|
										if err != nil {
							 | 
						|
											glog.Fatalf("Failed to get working directory: %v", err)
							 | 
						|
											return false
							 | 
						|
										}
							 | 
						|
										glog.Infof("Current working directory: %s", wd)
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Start the worker
							 | 
						|
									err = workerInstance.Start()
							 | 
						|
									if err != nil {
							 | 
						|
										glog.Errorf("Failed to start worker: %v", err)
							 | 
						|
										return false
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Set up signal handling
							 | 
						|
									sigChan := make(chan os.Signal, 1)
							 | 
						|
									signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
							 | 
						|
								
							 | 
						|
									glog.Infof("Maintenance worker %s started successfully", workerInstance.ID())
							 | 
						|
									glog.Infof("Press Ctrl+C to stop the worker")
							 | 
						|
								
							 | 
						|
									// Wait for shutdown signal
							 | 
						|
									<-sigChan
							 | 
						|
									glog.Infof("Shutdown signal received, stopping worker...")
							 | 
						|
								
							 | 
						|
									// Gracefully stop the worker
							 | 
						|
									err = workerInstance.Stop()
							 | 
						|
									if err != nil {
							 | 
						|
										glog.Errorf("Error stopping worker: %v", err)
							 | 
						|
									}
							 | 
						|
									glog.Infof("Worker stopped")
							 | 
						|
								
							 | 
						|
									return true
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// parseCapabilities converts comma-separated capability string to task types
							 | 
						|
								func parseCapabilities(capabilityStr string) []types.TaskType {
							 | 
						|
									if capabilityStr == "" {
							 | 
						|
										return nil
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									capabilityMap := map[string]types.TaskType{}
							 | 
						|
								
							 | 
						|
									// Populate capabilityMap with registered task types
							 | 
						|
									typesRegistry := tasks.GetGlobalTypesRegistry()
							 | 
						|
									for taskType := range typesRegistry.GetAllDetectors() {
							 | 
						|
										// Use the task type string directly as the key
							 | 
						|
										capabilityMap[strings.ToLower(string(taskType))] = taskType
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Add common aliases for convenience
							 | 
						|
									if taskType, exists := capabilityMap["erasure_coding"]; exists {
							 | 
						|
										capabilityMap["ec"] = taskType
							 | 
						|
									}
							 | 
						|
									if taskType, exists := capabilityMap["remote_upload"]; exists {
							 | 
						|
										capabilityMap["remote"] = taskType
							 | 
						|
									}
							 | 
						|
									if taskType, exists := capabilityMap["fix_replication"]; exists {
							 | 
						|
										capabilityMap["replication"] = taskType
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									var capabilities []types.TaskType
							 | 
						|
									parts := strings.Split(capabilityStr, ",")
							 | 
						|
								
							 | 
						|
									for _, part := range parts {
							 | 
						|
										part = strings.TrimSpace(part)
							 | 
						|
										if taskType, exists := capabilityMap[part]; exists {
							 | 
						|
											capabilities = append(capabilities, taskType)
							 | 
						|
										} else {
							 | 
						|
											glog.Warningf("Unknown capability: %s", part)
							 | 
						|
										}
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return capabilities
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// Legacy compatibility types for backward compatibility
							 | 
						|
								// These will be deprecated in future versions
							 | 
						|
								
							 | 
						|
								// WorkerStatus represents the current status of a worker (deprecated)
							 | 
						|
								type WorkerStatus struct {
							 | 
						|
									WorkerID       string           `json:"worker_id"`
							 | 
						|
									Address        string           `json:"address"`
							 | 
						|
									Status         string           `json:"status"`
							 | 
						|
									Capabilities   []types.TaskType `json:"capabilities"`
							 | 
						|
									MaxConcurrent  int              `json:"max_concurrent"`
							 | 
						|
									CurrentLoad    int              `json:"current_load"`
							 | 
						|
									LastHeartbeat  time.Time        `json:"last_heartbeat"`
							 | 
						|
									CurrentTasks   []types.Task     `json:"current_tasks"`
							 | 
						|
									Uptime         time.Duration    `json:"uptime"`
							 | 
						|
									TasksCompleted int              `json:"tasks_completed"`
							 | 
						|
									TasksFailed    int              `json:"tasks_failed"`
							 | 
						|
								}
							 |