2 changed files with 300 additions and 751 deletions
@ -1,698 +0,0 @@ |
|||
package worker |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"net" |
|||
"strconv" |
|||
"sync" |
|||
"time" |
|||
|
|||
"google.golang.org/grpc" |
|||
"google.golang.org/grpc/credentials/insecure" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb" |
|||
) |
|||
|
|||
// ECWorker implements maintenance worker with actual EC functionality
|
|||
type ECWorker struct { |
|||
workerID string |
|||
adminAddress string |
|||
grpcAddress string |
|||
capabilities []string |
|||
maxConcurrent int |
|||
|
|||
// gRPC server and client
|
|||
server *grpc.Server |
|||
adminConn *grpc.ClientConn |
|||
adminClient worker_pb.WorkerServiceClient |
|||
adminStream worker_pb.WorkerService_WorkerStreamClient |
|||
|
|||
// Task management
|
|||
currentTasks map[string]*ActiveTask |
|||
taskMutex sync.RWMutex |
|||
|
|||
// Control
|
|||
running bool |
|||
stopCh chan struct{} |
|||
mutex sync.RWMutex |
|||
} |
|||
|
|||
// ActiveTask represents a task currently being executed
|
|||
type ActiveTask struct { |
|||
ID string |
|||
Type string |
|||
VolumeID uint32 |
|||
Server string |
|||
Parameters map[string]string |
|||
StartedAt time.Time |
|||
Progress float32 |
|||
Status string |
|||
Context context.Context |
|||
Cancel context.CancelFunc |
|||
} |
|||
|
|||
// NewECWorker creates a new EC worker
|
|||
func NewECWorker(workerID, adminAddress, grpcAddress string) *ECWorker { |
|||
return &ECWorker{ |
|||
workerID: workerID, |
|||
adminAddress: adminAddress, |
|||
grpcAddress: grpcAddress, |
|||
capabilities: []string{"ec_encode", "ec_rebuild", "vacuum"}, |
|||
maxConcurrent: 2, // Can handle 2 concurrent tasks
|
|||
currentTasks: make(map[string]*ActiveTask), |
|||
stopCh: make(chan struct{}), |
|||
} |
|||
} |
|||
|
|||
// Start starts the worker
|
|||
func (w *ECWorker) Start() error { |
|||
w.mutex.Lock() |
|||
defer w.mutex.Unlock() |
|||
|
|||
if w.running { |
|||
return fmt.Errorf("worker already running") |
|||
} |
|||
|
|||
glog.Infof("Starting EC worker %s", w.workerID) |
|||
|
|||
// Start gRPC server
|
|||
err := w.startGRPCServer() |
|||
if err != nil { |
|||
return fmt.Errorf("failed to start gRPC server: %v", err) |
|||
} |
|||
|
|||
// Connect to admin server
|
|||
err = w.connectToAdmin() |
|||
if err != nil { |
|||
return fmt.Errorf("failed to connect to admin: %v", err) |
|||
} |
|||
|
|||
w.running = true |
|||
|
|||
// Start background goroutines
|
|||
go w.adminCommunicationLoop() |
|||
go w.heartbeatLoop() |
|||
go w.taskRequestLoop() |
|||
|
|||
glog.Infof("EC worker %s started successfully", w.workerID) |
|||
return nil |
|||
} |
|||
|
|||
// Stop stops the worker
|
|||
func (w *ECWorker) Stop() { |
|||
w.mutex.Lock() |
|||
defer w.mutex.Unlock() |
|||
|
|||
if !w.running { |
|||
return |
|||
} |
|||
|
|||
glog.Infof("Stopping EC worker %s", w.workerID) |
|||
|
|||
close(w.stopCh) |
|||
|
|||
// Cancel all active tasks
|
|||
w.taskMutex.Lock() |
|||
for _, task := range w.currentTasks { |
|||
task.Cancel() |
|||
} |
|||
w.taskMutex.Unlock() |
|||
|
|||
// Close connections
|
|||
if w.adminConn != nil { |
|||
w.adminConn.Close() |
|||
} |
|||
|
|||
if w.server != nil { |
|||
w.server.Stop() |
|||
} |
|||
|
|||
w.running = false |
|||
glog.Infof("EC worker %s stopped", w.workerID) |
|||
} |
|||
|
|||
// startGRPCServer starts the worker's gRPC server
|
|||
func (w *ECWorker) startGRPCServer() error { |
|||
listener, err := net.Listen("tcp", w.grpcAddress) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to listen on %s: %v", w.grpcAddress, err) |
|||
} |
|||
|
|||
w.server = grpc.NewServer() |
|||
// Register any worker-specific services here
|
|||
|
|||
go func() { |
|||
err := w.server.Serve(listener) |
|||
if err != nil { |
|||
glog.Errorf("gRPC server error: %v", err) |
|||
} |
|||
}() |
|||
|
|||
glog.Infof("Worker gRPC server listening on %s", w.grpcAddress) |
|||
return nil |
|||
} |
|||
|
|||
// connectToAdmin establishes connection to admin server
|
|||
func (w *ECWorker) connectToAdmin() error { |
|||
// Convert to gRPC address (HTTP port + 10000)
|
|||
grpcAddress := pb.ServerToGrpcAddress(w.adminAddress) |
|||
conn, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials())) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to connect to admin at %s: %v", w.adminAddress, err) |
|||
} |
|||
|
|||
w.adminConn = conn |
|||
w.adminClient = worker_pb.NewWorkerServiceClient(conn) |
|||
|
|||
// Create bidirectional stream
|
|||
stream, err := w.adminClient.WorkerStream(context.Background()) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to create admin stream: %v", err) |
|||
} |
|||
|
|||
w.adminStream = stream |
|||
|
|||
// Send registration message
|
|||
err = w.sendRegistration() |
|||
if err != nil { |
|||
return fmt.Errorf("failed to register with admin: %v", err) |
|||
} |
|||
|
|||
glog.Infof("Connected to admin server at %s", w.adminAddress) |
|||
return nil |
|||
} |
|||
|
|||
// sendRegistration sends worker registration to admin
|
|||
func (w *ECWorker) sendRegistration() error { |
|||
registration := &worker_pb.WorkerMessage{ |
|||
WorkerId: w.workerID, |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &worker_pb.WorkerMessage_Registration{ |
|||
Registration: &worker_pb.WorkerRegistration{ |
|||
WorkerId: w.workerID, |
|||
Address: w.grpcAddress, |
|||
Capabilities: w.capabilities, |
|||
MaxConcurrent: int32(w.maxConcurrent), |
|||
Metadata: map[string]string{ |
|||
"version": "1.0", |
|||
"type": "ec_worker", |
|||
}, |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
return w.adminStream.Send(registration) |
|||
} |
|||
|
|||
// adminCommunicationLoop handles messages from admin server
|
|||
func (w *ECWorker) adminCommunicationLoop() { |
|||
for { |
|||
select { |
|||
case <-w.stopCh: |
|||
return |
|||
default: |
|||
} |
|||
|
|||
msg, err := w.adminStream.Recv() |
|||
if err != nil { |
|||
glog.Errorf("Error receiving from admin: %v", err) |
|||
time.Sleep(5 * time.Second) // Retry connection
|
|||
continue |
|||
} |
|||
|
|||
w.handleAdminMessage(msg) |
|||
} |
|||
} |
|||
|
|||
// handleAdminMessage processes messages from admin server
|
|||
func (w *ECWorker) handleAdminMessage(msg *worker_pb.AdminMessage) { |
|||
switch message := msg.Message.(type) { |
|||
case *worker_pb.AdminMessage_RegistrationResponse: |
|||
w.handleRegistrationResponse(message.RegistrationResponse) |
|||
case *worker_pb.AdminMessage_TaskAssignment: |
|||
w.handleTaskAssignment(message.TaskAssignment) |
|||
case *worker_pb.AdminMessage_TaskCancellation: |
|||
w.handleTaskCancellation(message.TaskCancellation) |
|||
case *worker_pb.AdminMessage_AdminShutdown: |
|||
w.handleAdminShutdown(message.AdminShutdown) |
|||
default: |
|||
glog.Warningf("Unknown message type from admin") |
|||
} |
|||
} |
|||
|
|||
// handleRegistrationResponse processes registration response
|
|||
func (w *ECWorker) handleRegistrationResponse(resp *worker_pb.RegistrationResponse) { |
|||
if resp.Success { |
|||
glog.Infof("Worker %s registered successfully with admin", w.workerID) |
|||
} else { |
|||
glog.Errorf("Worker registration failed: %s", resp.Message) |
|||
} |
|||
} |
|||
|
|||
// handleTaskAssignment processes task assignment from admin
|
|||
func (w *ECWorker) handleTaskAssignment(assignment *worker_pb.TaskAssignment) { |
|||
glog.Infof("Received task assignment: %s (%s) for volume %d", |
|||
assignment.TaskId, assignment.TaskType, assignment.Params.VolumeId) |
|||
|
|||
// Check if we can accept the task
|
|||
w.taskMutex.RLock() |
|||
currentLoad := len(w.currentTasks) |
|||
w.taskMutex.RUnlock() |
|||
|
|||
if currentLoad >= w.maxConcurrent { |
|||
glog.Warningf("Worker at capacity, cannot accept task %s", assignment.TaskId) |
|||
return |
|||
} |
|||
|
|||
// Create active task
|
|||
ctx, cancel := context.WithCancel(context.Background()) |
|||
task := &ActiveTask{ |
|||
ID: assignment.TaskId, |
|||
Type: assignment.TaskType, |
|||
VolumeID: assignment.Params.VolumeId, |
|||
Server: assignment.Params.Server, |
|||
Parameters: assignment.Params.Parameters, |
|||
StartedAt: time.Now(), |
|||
Progress: 0.0, |
|||
Status: "started", |
|||
Context: ctx, |
|||
Cancel: cancel, |
|||
} |
|||
|
|||
w.taskMutex.Lock() |
|||
w.currentTasks[assignment.TaskId] = task |
|||
w.taskMutex.Unlock() |
|||
|
|||
// Start task execution
|
|||
go w.executeTask(task) |
|||
} |
|||
|
|||
// handleTaskCancellation processes task cancellation
|
|||
func (w *ECWorker) handleTaskCancellation(cancellation *worker_pb.TaskCancellation) { |
|||
glog.Infof("Received task cancellation: %s", cancellation.TaskId) |
|||
|
|||
w.taskMutex.Lock() |
|||
defer w.taskMutex.Unlock() |
|||
|
|||
if task, exists := w.currentTasks[cancellation.TaskId]; exists { |
|||
task.Cancel() |
|||
delete(w.currentTasks, cancellation.TaskId) |
|||
glog.Infof("Cancelled task %s", cancellation.TaskId) |
|||
} |
|||
} |
|||
|
|||
// handleAdminShutdown processes admin shutdown notification
|
|||
func (w *ECWorker) handleAdminShutdown(shutdown *worker_pb.AdminShutdown) { |
|||
glog.Infof("Admin server shutting down: %s", shutdown.Reason) |
|||
w.Stop() |
|||
} |
|||
|
|||
// heartbeatLoop sends periodic heartbeats to admin
|
|||
func (w *ECWorker) heartbeatLoop() { |
|||
ticker := time.NewTicker(30 * time.Second) |
|||
defer ticker.Stop() |
|||
|
|||
for { |
|||
select { |
|||
case <-ticker.C: |
|||
w.sendHeartbeat() |
|||
case <-w.stopCh: |
|||
return |
|||
} |
|||
} |
|||
} |
|||
|
|||
// sendHeartbeat sends heartbeat to admin server
|
|||
func (w *ECWorker) sendHeartbeat() { |
|||
w.taskMutex.RLock() |
|||
currentLoad := len(w.currentTasks) |
|||
taskIDs := make([]string, 0, len(w.currentTasks)) |
|||
for taskID := range w.currentTasks { |
|||
taskIDs = append(taskIDs, taskID) |
|||
} |
|||
w.taskMutex.RUnlock() |
|||
|
|||
heartbeat := &worker_pb.WorkerMessage{ |
|||
WorkerId: w.workerID, |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &worker_pb.WorkerMessage_Heartbeat{ |
|||
Heartbeat: &worker_pb.WorkerHeartbeat{ |
|||
WorkerId: w.workerID, |
|||
Status: "active", |
|||
CurrentLoad: int32(currentLoad), |
|||
MaxConcurrent: int32(w.maxConcurrent), |
|||
CurrentTaskIds: taskIDs, |
|||
TasksCompleted: 0, // TODO: Track completed tasks
|
|||
TasksFailed: 0, // TODO: Track failed tasks
|
|||
UptimeSeconds: int64(time.Since(time.Now()).Seconds()), // TODO: Track actual uptime
|
|||
}, |
|||
}, |
|||
} |
|||
|
|||
if err := w.adminStream.Send(heartbeat); err != nil { |
|||
glog.Errorf("Failed to send heartbeat: %v", err) |
|||
} |
|||
} |
|||
|
|||
// taskRequestLoop periodically requests new tasks from admin
|
|||
func (w *ECWorker) taskRequestLoop() { |
|||
ticker := time.NewTicker(10 * time.Second) |
|||
defer ticker.Stop() |
|||
|
|||
for { |
|||
select { |
|||
case <-ticker.C: |
|||
w.requestTasks() |
|||
case <-w.stopCh: |
|||
return |
|||
} |
|||
} |
|||
} |
|||
|
|||
// requestTasks requests new tasks from admin if we have capacity
|
|||
func (w *ECWorker) requestTasks() { |
|||
w.taskMutex.RLock() |
|||
currentLoad := len(w.currentTasks) |
|||
w.taskMutex.RUnlock() |
|||
|
|||
availableSlots := w.maxConcurrent - currentLoad |
|||
if availableSlots <= 0 { |
|||
return // No capacity
|
|||
} |
|||
|
|||
request := &worker_pb.WorkerMessage{ |
|||
WorkerId: w.workerID, |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &worker_pb.WorkerMessage_TaskRequest{ |
|||
TaskRequest: &worker_pb.TaskRequest{ |
|||
WorkerId: w.workerID, |
|||
Capabilities: w.capabilities, |
|||
AvailableSlots: int32(availableSlots), |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
if err := w.adminStream.Send(request); err != nil { |
|||
glog.Errorf("Failed to request tasks: %v", err) |
|||
} |
|||
} |
|||
|
|||
// executeTask executes a task based on its type
|
|||
func (w *ECWorker) executeTask(task *ActiveTask) { |
|||
defer func() { |
|||
w.taskMutex.Lock() |
|||
delete(w.currentTasks, task.ID) |
|||
w.taskMutex.Unlock() |
|||
}() |
|||
|
|||
glog.Infof("Starting execution of task %s (%s) for volume %d", |
|||
task.ID, task.Type, task.VolumeID) |
|||
|
|||
var err error |
|||
var success bool |
|||
|
|||
switch task.Type { |
|||
case "ec_encode": |
|||
success, err = w.executeECEncode(task) |
|||
case "ec_rebuild": |
|||
success, err = w.executeECRebuild(task) |
|||
case "vacuum": |
|||
success, err = w.executeVacuum(task) |
|||
default: |
|||
err = fmt.Errorf("unknown task type: %s", task.Type) |
|||
success = false |
|||
} |
|||
|
|||
// Send completion message
|
|||
w.sendTaskCompletion(task, success, err) |
|||
|
|||
if success { |
|||
glog.Infof("Task %s completed successfully", task.ID) |
|||
} else { |
|||
glog.Errorf("Task %s failed: %v", task.ID, err) |
|||
} |
|||
} |
|||
|
|||
// executeECEncode performs actual EC encoding on a volume
|
|||
func (w *ECWorker) executeECEncode(task *ActiveTask) (bool, error) { |
|||
glog.Infof("Performing EC encoding on volume %d", task.VolumeID) |
|||
|
|||
// Update progress
|
|||
w.sendTaskUpdate(task, 0.1, "Initializing EC encoding") |
|||
|
|||
// Connect to volume server
|
|||
volumeServerAddress := task.Server |
|||
if volumeServerAddress == "" { |
|||
return false, fmt.Errorf("no volume server address provided") |
|||
} |
|||
|
|||
// Convert to gRPC address (HTTP port + 10000)
|
|||
grpcAddress := pb.ServerToGrpcAddress(volumeServerAddress) |
|||
conn, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials())) |
|||
if err != nil { |
|||
return false, fmt.Errorf("failed to connect to volume server %s: %v", volumeServerAddress, err) |
|||
} |
|||
defer conn.Close() |
|||
|
|||
client := volume_server_pb.NewVolumeServerClient(conn) |
|||
|
|||
// Step 1: Generate EC shards
|
|||
w.sendTaskUpdate(task, 0.2, "Generating EC shards") |
|||
|
|||
generateReq := &volume_server_pb.VolumeEcShardsGenerateRequest{ |
|||
VolumeId: task.VolumeID, |
|||
Collection: task.Parameters["collection"], |
|||
} |
|||
|
|||
_, err = client.VolumeEcShardsGenerate(task.Context, generateReq) |
|||
if err != nil { |
|||
return false, fmt.Errorf("EC shard generation failed: %v", err) |
|||
} |
|||
|
|||
w.sendTaskUpdate(task, 0.6, "EC shards generated successfully") |
|||
|
|||
// Step 2: Mount EC volume
|
|||
w.sendTaskUpdate(task, 0.8, "Mounting EC volume") |
|||
|
|||
mountReq := &volume_server_pb.VolumeEcShardsMountRequest{ |
|||
VolumeId: task.VolumeID, |
|||
Collection: task.Parameters["collection"], |
|||
ShardIds: []uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, // All EC shards
|
|||
} |
|||
|
|||
_, err = client.VolumeEcShardsMount(task.Context, mountReq) |
|||
if err != nil { |
|||
return false, fmt.Errorf("EC shard mount failed: %v", err) |
|||
} |
|||
|
|||
// Step 3: Mark original volume as read-only
|
|||
w.sendTaskUpdate(task, 0.9, "Marking volume read-only") |
|||
|
|||
readOnlyReq := &volume_server_pb.VolumeMarkReadonlyRequest{ |
|||
VolumeId: task.VolumeID, |
|||
} |
|||
|
|||
_, err = client.VolumeMarkReadonly(task.Context, readOnlyReq) |
|||
if err != nil { |
|||
glog.Warningf("Failed to mark volume %d read-only: %v", task.VolumeID, err) |
|||
// This is not a critical failure for EC encoding
|
|||
} |
|||
|
|||
w.sendTaskUpdate(task, 1.0, "EC encoding completed") |
|||
|
|||
return true, nil |
|||
} |
|||
|
|||
// executeECRebuild performs EC shard rebuilding
|
|||
func (w *ECWorker) executeECRebuild(task *ActiveTask) (bool, error) { |
|||
glog.Infof("Performing EC rebuild on volume %d", task.VolumeID) |
|||
|
|||
w.sendTaskUpdate(task, 0.1, "Initializing EC rebuild") |
|||
|
|||
// Connect to volume server
|
|||
grpcAddress := pb.ServerToGrpcAddress(task.Server) |
|||
conn, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials())) |
|||
if err != nil { |
|||
return false, fmt.Errorf("failed to connect to volume server: %v", err) |
|||
} |
|||
defer conn.Close() |
|||
|
|||
client := volume_server_pb.NewVolumeServerClient(conn) |
|||
|
|||
// Rebuild missing/corrupted shards
|
|||
w.sendTaskUpdate(task, 0.5, "Rebuilding EC shards") |
|||
|
|||
rebuildReq := &volume_server_pb.VolumeEcShardsRebuildRequest{ |
|||
VolumeId: task.VolumeID, |
|||
Collection: task.Parameters["collection"], |
|||
} |
|||
|
|||
_, err = client.VolumeEcShardsRebuild(task.Context, rebuildReq) |
|||
if err != nil { |
|||
return false, fmt.Errorf("EC rebuild failed: %v", err) |
|||
} |
|||
|
|||
w.sendTaskUpdate(task, 1.0, "EC rebuild completed") |
|||
|
|||
return true, nil |
|||
} |
|||
|
|||
// executeVacuum performs volume vacuum operation
|
|||
func (w *ECWorker) executeVacuum(task *ActiveTask) (bool, error) { |
|||
glog.Infof("Performing vacuum on volume %d", task.VolumeID) |
|||
|
|||
w.sendTaskUpdate(task, 0.1, "Initializing vacuum") |
|||
|
|||
// Parse garbage threshold
|
|||
thresholdStr := task.Parameters["garbage_threshold"] |
|||
if thresholdStr == "" { |
|||
thresholdStr = "0.3" // Default 30%
|
|||
} |
|||
|
|||
threshold, err := strconv.ParseFloat(thresholdStr, 32) |
|||
if err != nil { |
|||
return false, fmt.Errorf("invalid garbage threshold: %v", err) |
|||
} |
|||
|
|||
// Connect to volume server
|
|||
grpcAddress := pb.ServerToGrpcAddress(task.Server) |
|||
conn, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials())) |
|||
if err != nil { |
|||
return false, fmt.Errorf("failed to connect to volume server: %v", err) |
|||
} |
|||
defer conn.Close() |
|||
|
|||
client := volume_server_pb.NewVolumeServerClient(conn) |
|||
|
|||
// Step 1: Check vacuum eligibility
|
|||
w.sendTaskUpdate(task, 0.2, "Checking vacuum eligibility") |
|||
|
|||
checkReq := &volume_server_pb.VacuumVolumeCheckRequest{ |
|||
VolumeId: task.VolumeID, |
|||
} |
|||
|
|||
checkResp, err := client.VacuumVolumeCheck(task.Context, checkReq) |
|||
if err != nil { |
|||
return false, fmt.Errorf("vacuum check failed: %v", err) |
|||
} |
|||
|
|||
if checkResp.GarbageRatio < float64(threshold) { |
|||
return true, fmt.Errorf("volume %d garbage ratio %.2f%% below threshold %.2f%%", |
|||
task.VolumeID, checkResp.GarbageRatio*100, threshold*100) |
|||
} |
|||
|
|||
// Step 2: Compact volume
|
|||
w.sendTaskUpdate(task, 0.4, "Compacting volume") |
|||
|
|||
compactReq := &volume_server_pb.VacuumVolumeCompactRequest{ |
|||
VolumeId: task.VolumeID, |
|||
} |
|||
|
|||
compactStream, err := client.VacuumVolumeCompact(task.Context, compactReq) |
|||
if err != nil { |
|||
return false, fmt.Errorf("vacuum compact failed: %v", err) |
|||
} |
|||
|
|||
// Process compact stream
|
|||
for { |
|||
resp, err := compactStream.Recv() |
|||
if err != nil { |
|||
if err.Error() == "EOF" { |
|||
break |
|||
} |
|||
return false, fmt.Errorf("vacuum compact stream error: %v", err) |
|||
} |
|||
|
|||
progress := 0.4 + 0.4*(float64(resp.ProcessedBytes)/float64(resp.LoadAvg_1M)) // Rough progress estimate
|
|||
w.sendTaskUpdate(task, float32(progress), "Compacting volume") |
|||
} |
|||
|
|||
// Step 3: Commit vacuum
|
|||
w.sendTaskUpdate(task, 0.9, "Committing vacuum") |
|||
|
|||
commitReq := &volume_server_pb.VacuumVolumeCommitRequest{ |
|||
VolumeId: task.VolumeID, |
|||
} |
|||
|
|||
_, err = client.VacuumVolumeCommit(task.Context, commitReq) |
|||
if err != nil { |
|||
return false, fmt.Errorf("vacuum commit failed: %v", err) |
|||
} |
|||
|
|||
// Step 4: Cleanup
|
|||
w.sendTaskUpdate(task, 0.95, "Cleaning up") |
|||
|
|||
cleanupReq := &volume_server_pb.VacuumVolumeCleanupRequest{ |
|||
VolumeId: task.VolumeID, |
|||
} |
|||
|
|||
_, err = client.VacuumVolumeCleanup(task.Context, cleanupReq) |
|||
if err != nil { |
|||
glog.Warningf("Vacuum cleanup warning: %v", err) |
|||
// Non-critical error
|
|||
} |
|||
|
|||
w.sendTaskUpdate(task, 1.0, "Vacuum completed successfully") |
|||
|
|||
return true, nil |
|||
} |
|||
|
|||
// sendTaskUpdate sends task progress update to admin
|
|||
func (w *ECWorker) sendTaskUpdate(task *ActiveTask, progress float32, message string) { |
|||
task.Progress = progress |
|||
task.Status = message |
|||
|
|||
update := &worker_pb.WorkerMessage{ |
|||
WorkerId: w.workerID, |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &worker_pb.WorkerMessage_TaskUpdate{ |
|||
TaskUpdate: &worker_pb.TaskUpdate{ |
|||
TaskId: task.ID, |
|||
WorkerId: w.workerID, |
|||
Status: task.Status, |
|||
Progress: progress, |
|||
Message: message, |
|||
Metadata: map[string]string{ |
|||
"updated_at": time.Now().Format(time.RFC3339), |
|||
}, |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
if err := w.adminStream.Send(update); err != nil { |
|||
glog.Errorf("Failed to send task update: %v", err) |
|||
} |
|||
} |
|||
|
|||
// sendTaskCompletion sends task completion to admin
|
|||
func (w *ECWorker) sendTaskCompletion(task *ActiveTask, success bool, taskErr error) { |
|||
var errorMessage string |
|||
if taskErr != nil { |
|||
errorMessage = taskErr.Error() |
|||
} |
|||
|
|||
completion := &worker_pb.WorkerMessage{ |
|||
WorkerId: w.workerID, |
|||
Timestamp: time.Now().Unix(), |
|||
Message: &worker_pb.WorkerMessage_TaskComplete{ |
|||
TaskComplete: &worker_pb.TaskComplete{ |
|||
TaskId: task.ID, |
|||
WorkerId: w.workerID, |
|||
Success: success, |
|||
ErrorMessage: errorMessage, |
|||
CompletionTime: time.Now().Unix(), |
|||
ResultMetadata: map[string]string{ |
|||
"duration": time.Since(task.StartedAt).String(), |
|||
}, |
|||
}, |
|||
}, |
|||
} |
|||
|
|||
if err := w.adminStream.Send(completion); err != nil { |
|||
glog.Errorf("Failed to send task completion: %v", err) |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue