Browse Source

fix hanging task detail page

add-ec-vacuum
chrislu 4 months ago
parent
commit
c8c758e639
  1. 37
      weed/admin/dash/admin_server.go
  2. 39
      weed/admin/handlers/maintenance_handlers.go

37
weed/admin/dash/admin_server.go

@ -20,6 +20,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb" "github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb" "github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
"github.com/seaweedfs/seaweedfs/weed/security" "github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/util" "github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/wdclient" "github.com/seaweedfs/seaweedfs/weed/wdclient"
@ -1198,8 +1199,23 @@ func (as *AdminServer) GetMaintenanceTaskDetail(taskID string) (*maintenance.Tas
// Get execution logs from worker if task is active/completed and worker is connected // Get execution logs from worker if task is active/completed and worker is connected
if task.Status == maintenance.TaskStatusInProgress || task.Status == maintenance.TaskStatusCompleted { if task.Status == maintenance.TaskStatusInProgress || task.Status == maintenance.TaskStatusCompleted {
if as.workerGrpcServer != nil && task.WorkerID != "" { if as.workerGrpcServer != nil && task.WorkerID != "" {
// Add additional timeout protection for worker log requests
type logResult struct {
logs []*worker_pb.TaskLogEntry
err error
}
logChan := make(chan logResult, 1)
go func() {
workerLogs, err := as.workerGrpcServer.RequestTaskLogs(task.WorkerID, taskID, 100, "") workerLogs, err := as.workerGrpcServer.RequestTaskLogs(task.WorkerID, taskID, 100, "")
if err == nil && len(workerLogs) > 0 {
logChan <- logResult{logs: workerLogs, err: err}
}()
// Wait for logs with timeout
select {
case result := <-logChan:
if result.err == nil && len(result.logs) > 0 {
workerLogs := result.logs
// Convert worker logs to maintenance logs // Convert worker logs to maintenance logs
for _, workerLog := range workerLogs { for _, workerLog := range workerLogs {
maintenanceLog := &maintenance.TaskExecutionLog{ maintenanceLog := &maintenance.TaskExecutionLog{
@ -1227,18 +1243,31 @@ func (as *AdminServer) GetMaintenanceTaskDetail(taskID string) (*maintenance.Tas
} }
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, maintenanceLog) taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, maintenanceLog)
} }
} else if err != nil {
} else if result.err != nil {
// Add a diagnostic log entry when worker logs cannot be retrieved // Add a diagnostic log entry when worker logs cannot be retrieved
diagnosticLog := &maintenance.TaskExecutionLog{ diagnosticLog := &maintenance.TaskExecutionLog{
Timestamp: time.Now(), Timestamp: time.Now(),
Level: "WARNING", Level: "WARNING",
Message: fmt.Sprintf("Failed to retrieve worker logs: %v", err),
Message: fmt.Sprintf("Failed to retrieve worker logs: %v", result.err),
Source: "admin", Source: "admin",
TaskID: taskID, TaskID: taskID,
WorkerID: task.WorkerID, WorkerID: task.WorkerID,
} }
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog) taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog)
glog.V(1).Infof("Failed to get worker logs for task %s from worker %s: %v", taskID, task.WorkerID, err)
glog.V(1).Infof("Failed to get worker logs for task %s from worker %s: %v", taskID, task.WorkerID, result.err)
}
case <-time.After(8 * time.Second):
// Timeout getting logs from worker
timeoutLog := &maintenance.TaskExecutionLog{
Timestamp: time.Now(),
Level: "WARNING",
Message: "Timeout retrieving worker logs - worker may be unresponsive or busy",
Source: "admin",
TaskID: taskID,
WorkerID: task.WorkerID,
}
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, timeoutLog)
glog.Warningf("Timeout getting worker logs for task %s from worker %s", taskID, task.WorkerID)
} }
} else { } else {
// Add diagnostic information when worker is not available // Add diagnostic information when worker is not available

39
weed/admin/handlers/maintenance_handlers.go

@ -38,19 +38,36 @@ func (h *MaintenanceHandlers) ShowTaskDetail(c *gin.Context) {
taskID := c.Param("id") taskID := c.Param("id")
glog.Infof("DEBUG ShowTaskDetail: Starting for task ID: %s", taskID) glog.Infof("DEBUG ShowTaskDetail: Starting for task ID: %s", taskID)
// Add timeout to prevent indefinite hangs when worker is unresponsive
ctx, cancel := context.WithTimeout(c.Request.Context(), 15*time.Second)
defer cancel()
// Use a channel to handle timeout for task detail retrieval
type result struct {
taskDetail *maintenance.TaskDetailData
err error
}
resultChan := make(chan result, 1)
go func() {
taskDetail, err := h.adminServer.GetMaintenanceTaskDetail(taskID) taskDetail, err := h.adminServer.GetMaintenanceTaskDetail(taskID)
if err != nil {
glog.Errorf("DEBUG ShowTaskDetail: error getting task detail for %s: %v", taskID, err)
c.String(http.StatusNotFound, "Task not found: %s (Error: %v)", taskID, err)
resultChan <- result{taskDetail: taskDetail, err: err}
}()
select {
case res := <-resultChan:
if res.err != nil {
glog.Errorf("DEBUG ShowTaskDetail: error getting task detail for %s: %v", taskID, res.err)
c.String(http.StatusNotFound, "Task not found: %s (Error: %v)", taskID, res.err)
return return
} }
glog.Infof("DEBUG ShowTaskDetail: got task detail for %s, task type: %s, status: %s", taskID, taskDetail.Task.Type, taskDetail.Task.Status)
glog.Infof("DEBUG ShowTaskDetail: got task detail for %s, task type: %s, status: %s", taskID, res.taskDetail.Task.Type, res.taskDetail.Task.Status)
c.Header("Content-Type", "text/html") c.Header("Content-Type", "text/html")
taskDetailComponent := app.TaskDetail(taskDetail)
taskDetailComponent := app.TaskDetail(res.taskDetail)
layoutComponent := layout.Layout(c, taskDetailComponent) layoutComponent := layout.Layout(c, taskDetailComponent)
err = layoutComponent.Render(c.Request.Context(), c.Writer)
err := layoutComponent.Render(ctx, c.Writer)
if err != nil { if err != nil {
glog.Errorf("DEBUG ShowTaskDetail: render error: %v", err) glog.Errorf("DEBUG ShowTaskDetail: render error: %v", err)
c.String(http.StatusInternalServerError, "Failed to render template: %v", err) c.String(http.StatusInternalServerError, "Failed to render template: %v", err)
@ -58,6 +75,16 @@ func (h *MaintenanceHandlers) ShowTaskDetail(c *gin.Context) {
} }
glog.Infof("DEBUG ShowTaskDetail: template rendered successfully for task %s", taskID) glog.Infof("DEBUG ShowTaskDetail: template rendered successfully for task %s", taskID)
case <-ctx.Done():
glog.Warningf("ShowTaskDetail: timeout waiting for task detail data for task %s", taskID)
c.JSON(http.StatusRequestTimeout, gin.H{
"error": "Request timeout - task detail retrieval took too long. This may indicate the worker is unresponsive or stuck.",
"suggestion": "Try refreshing the page or check if the worker executing this task is responsive. If the task is stuck, it may need to be cancelled manually.",
"task_id": taskID,
})
return
}
} }
// ShowMaintenanceQueue displays the maintenance queue page // ShowMaintenanceQueue displays the maintenance queue page

Loading…
Cancel
Save