Browse Source

Fix Maintenance Task Sorting and Refactor Log Persistence (#8199)

* fix float stepping

* do not auto refresh

* only logs when non 200 status

* fix maintenance task sorting and cleanup redundant handler logic

* Refactor log retrieval to persist to disk and fix slowness

- Move log retrieval to disk-based persistence in GetMaintenanceTaskDetail
- Implement background log fetching on task completion in worker_grpc_server.go
- Implement async background refresh for in-progress tasks
- Completely remove blocking gRPC calls from the UI path to fix 10s timeouts
- Cleanup debug logs and performance profiling code

* Ensure consistent deterministic sorting in config_persistence cleanup

* Replace magic numbers with constants and remove debug logs

- Added descriptive constants for truncation limits and timeouts in admin_server.go and worker_grpc_server.go
- Replaced magic numbers with these constants throughout the codebase
- Verified removal of stdout debug printing
- Ensured consistent truncation logic during log persistence

* Address code review feedback on history truncation and logging logic

- Fix AssignmentHistory double-serialization by copying task in GetMaintenanceTaskDetail
- Fix handleTaskCompletion logging logic (mutually exclusive success/failure logs)
- Remove unused Timeout field from LogRequestContext and sync select timeouts with constants
- Ensure AssignmentHistory is only provided in the top-level field for better JSON structure

* Implement goroutine leak protection and request deduplication

- Add request deduplication in RequestTaskLogs to prevent multiple concurrent fetches for the same task
- Implement safe cleanup in timeout handlers to avoid race conditions in pendingLogRequests map
- Add a 10s cooldown for background log refreshes in GetMaintenanceTaskDetail to prevent spamming
- Ensure all persistent log-fetching goroutines are bounded and efficiently managed

* Fix potential nil pointer panics in maintenance handlers

- Add nil checks for adminServer in ShowTaskDetail, ShowMaintenanceWorkers, and UpdateTaskConfig
- Update getMaintenanceQueueData to return a descriptive error instead of nil when adminServer is uninitialized
- Ensure internal helper methods consistently check for adminServer initialization before use

* Strictly enforce disk-only log reading

- Remove background log fetching from GetMaintenanceTaskDetail to prevent timeouts and network calls during page view
- Remove unused lastLogFetch tracking fields to clean up dead code
- Ensure logs are only updated upon task completion via handleTaskCompletion

* Refactor GetWorkerLogs to read from disk

- Update /api/maintenance/workers/:id/logs endpoint to use configPersistence.LoadTaskExecutionLogs
- Remove synchronous gRPC call RequestTaskLogs to prevent timeouts and bad gateway errors
- Ensure consistent log retrieval behavior across the application (disk-only)

* Fix timestamp parsing in log viewer

- Update task_detail.templ JS to handle both ISO 8601 strings and Unix timestamps
- Fix "Invalid time value" error when displaying logs fetched from disk
- Regenerate templates

* master: fallback to HDD if SSD volumes are full in Assign

* worker: improve EC detection logging and fix skip counters

* worker: add Sync method to TaskLogger interface

* worker: implement Sync and ensure logs are flushed before task completion

* admin: improve task log retrieval with retries and better timeouts

* admin: robust timestamp parsing in task detail view
pull/8208/head
Chris Lu 3 weeks ago
committed by GitHub
parent
commit
72a8f598f2
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 241
      weed/admin/dash/admin_server.go
  2. 31
      weed/admin/dash/config_persistence.go
  3. 147
      weed/admin/dash/worker_grpc_server.go
  4. 76
      weed/admin/handlers/maintenance_handlers.go
  5. 34
      weed/admin/maintenance/maintenance_queue.go
  6. 2
      weed/admin/view/app/admin_templ.go
  7. 2
      weed/admin/view/app/cluster_brokers_templ.go
  8. 2
      weed/admin/view/app/cluster_collections_templ.go
  9. 2
      weed/admin/view/app/cluster_ec_shards_templ.go
  10. 2
      weed/admin/view/app/cluster_ec_volumes_templ.go
  11. 2
      weed/admin/view/app/cluster_filers_templ.go
  12. 2
      weed/admin/view/app/cluster_masters_templ.go
  13. 2
      weed/admin/view/app/cluster_volume_servers_templ.go
  14. 2
      weed/admin/view/app/cluster_volumes_templ.go
  15. 2
      weed/admin/view/app/collection_details_templ.go
  16. 2
      weed/admin/view/app/ec_volume_details_templ.go
  17. 2
      weed/admin/view/app/file_browser_templ.go
  18. 2
      weed/admin/view/app/iceberg_catalog_templ.go
  19. 2
      weed/admin/view/app/iceberg_namespaces_templ.go
  20. 2
      weed/admin/view/app/iceberg_tables_templ.go
  21. 2
      weed/admin/view/app/maintenance_config_schema_templ.go
  22. 2
      weed/admin/view/app/maintenance_config_templ.go
  23. 7
      weed/admin/view/app/maintenance_queue.templ
  24. 8
      weed/admin/view/app/maintenance_queue_templ.go
  25. 2
      weed/admin/view/app/maintenance_workers_templ.go
  26. 2
      weed/admin/view/app/object_store_users_templ.go
  27. 2
      weed/admin/view/app/policies_templ.go
  28. 2
      weed/admin/view/app/s3_buckets_templ.go
  29. 2
      weed/admin/view/app/s3tables_buckets_templ.go
  30. 2
      weed/admin/view/app/s3tables_namespaces_templ.go
  31. 2
      weed/admin/view/app/s3tables_tables_templ.go
  32. 2
      weed/admin/view/app/service_accounts_templ.go
  33. 2
      weed/admin/view/app/subscribers_templ.go
  34. 2
      weed/admin/view/app/task_config_schema.templ
  35. 4
      weed/admin/view/app/task_config_schema_templ.go
  36. 2
      weed/admin/view/app/task_config_templ.go
  37. 2
      weed/admin/view/app/task_config_templ_templ.go
  38. 47
      weed/admin/view/app/task_detail.templ
  39. 4
      weed/admin/view/app/task_detail_templ.go
  40. 2
      weed/admin/view/app/topic_details_templ.go
  41. 2
      weed/admin/view/app/topics_templ.go
  42. 2
      weed/admin/view/app/volume_details_templ.go
  43. 2
      weed/admin/view/components/config_sections_templ.go
  44. 2
      weed/admin/view/components/form_fields_templ.go
  45. 2
      weed/admin/view/layout/layout_templ.go
  46. 15
      weed/command/admin.go
  47. 10
      weed/server/master_grpc_server_assign.go
  48. 16
      weed/worker/tasks/erasure_coding/detection.go
  49. 17
      weed/worker/tasks/task_logger.go
  50. 1
      weed/worker/types/typed_task_interface.go
  51. 10
      weed/worker/worker.go

241
weed/admin/dash/admin_server.go

@ -5,7 +5,6 @@ import (
"fmt"
"net/http"
"sort"
"strconv"
"strings"
"time"
@ -33,6 +32,17 @@ import (
_ "github.com/seaweedfs/seaweedfs/weed/credential/grpc" // Register gRPC credential store
)
const (
maxAssignmentHistoryDisplay = 50
maxLogMessageLength = 2000
maxLogFields = 20
maxRelatedTasksDisplay = 50
maxRecentTasksDisplay = 10
defaultCacheTimeout = 10 * time.Second
defaultFilerCacheTimeout = 30 * time.Second
defaultStatsCacheTimeout = 30 * time.Second
)
// FilerConfig holds filer configuration needed for bucket operations
type FilerConfig struct {
BucketsPath string
@ -132,10 +142,10 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string,
templateFS: templateFS,
dataDir: dataDir,
grpcDialOption: grpcDialOption,
cacheExpiration: 10 * time.Second,
filerCacheExpiration: 30 * time.Second, // Cache filers for 30 seconds
cacheExpiration: defaultCacheTimeout,
filerCacheExpiration: defaultFilerCacheTimeout,
configPersistence: NewConfigPersistence(dataDir),
collectionStatsCacheThreshold: 30 * time.Second,
collectionStatsCacheThreshold: defaultStatsCacheTimeout,
s3TablesManager: newS3TablesManager(),
icebergPort: icebergPort,
}
@ -779,7 +789,7 @@ func (s *AdminServer) GetClusterBrokers() (*ClusterBrokersData, error) {
// ShowMaintenanceQueue displays the maintenance queue page
func (as *AdminServer) ShowMaintenanceQueue(c *gin.Context) {
data, err := as.getMaintenanceQueueData()
data, err := as.GetMaintenanceQueueData()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
@ -868,7 +878,7 @@ func (as *AdminServer) TriggerMaintenanceScan(c *gin.Context) {
// GetMaintenanceTasks returns all maintenance tasks
func (as *AdminServer) GetMaintenanceTasks(c *gin.Context) {
tasks, err := as.getMaintenanceTasks()
tasks, err := as.GetAllMaintenanceTasks()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
@ -1032,9 +1042,9 @@ func (as *AdminServer) UpdateMaintenanceConfigData(config *maintenance.Maintenan
// Helper methods for maintenance operations
// getMaintenanceQueueData returns data for the maintenance queue UI
func (as *AdminServer) getMaintenanceQueueData() (*maintenance.MaintenanceQueueData, error) {
tasks, err := as.getMaintenanceTasks()
// GetMaintenanceQueueData returns data for the maintenance queue UI
func (as *AdminServer) GetMaintenanceQueueData() (*maintenance.MaintenanceQueueData, error) {
tasks, err := as.GetAllMaintenanceTasks()
if err != nil {
return nil, err
}
@ -1089,14 +1099,16 @@ func (as *AdminServer) getMaintenanceQueueStats() (*maintenance.QueueStats, erro
return queueStats, nil
}
// getMaintenanceTasks returns all maintenance tasks
func (as *AdminServer) getMaintenanceTasks() ([]*maintenance.MaintenanceTask, error) {
// GetAllMaintenanceTasks returns all maintenance tasks
func (as *AdminServer) GetAllMaintenanceTasks() ([]*maintenance.MaintenanceTask, error) {
if as.maintenanceManager == nil {
return []*maintenance.MaintenanceTask{}, nil
}
// Collect all tasks from memory across all statuses
allTasks := []*maintenance.MaintenanceTask{}
// 1. Collect all tasks from memory
tasksMap := make(map[string]*maintenance.MaintenanceTask)
// Collect from memory via GetTasks loop to ensure we catch everything
statuses := []maintenance.MaintenanceTaskStatus{
maintenance.TaskStatusPending,
maintenance.TaskStatusAssigned,
@ -1108,29 +1120,92 @@ func (as *AdminServer) getMaintenanceTasks() ([]*maintenance.MaintenanceTask, er
for _, status := range statuses {
tasks := as.maintenanceManager.GetTasks(status, "", 0)
allTasks = append(allTasks, tasks...)
for _, t := range tasks {
tasksMap[t.ID] = t
}
}
// Also load any persisted tasks that might not be in memory
// 2. Merge persisted tasks
if as.configPersistence != nil {
persistedTasks, err := as.configPersistence.LoadAllTaskStates()
if err == nil {
// Add any persisted tasks not already in memory
for _, persistedTask := range persistedTasks {
found := false
for _, memoryTask := range allTasks {
if memoryTask.ID == persistedTask.ID {
found = true
break
}
}
if !found {
allTasks = append(allTasks, persistedTask)
for _, t := range persistedTasks {
if _, exists := tasksMap[t.ID]; !exists {
tasksMap[t.ID] = t
}
}
}
}
// 3. Bucketize buckets
var pendingTasks, activeTasks, finishedTasks []*maintenance.MaintenanceTask
for _, t := range tasksMap {
switch t.Status {
case maintenance.TaskStatusPending:
pendingTasks = append(pendingTasks, t)
case maintenance.TaskStatusAssigned, maintenance.TaskStatusInProgress:
activeTasks = append(activeTasks, t)
case maintenance.TaskStatusCompleted, maintenance.TaskStatusFailed, maintenance.TaskStatusCancelled:
finishedTasks = append(finishedTasks, t)
default:
// Treat unknown as finished/archived? Or pending?
// Safest to add to finished so they appear somewhere
finishedTasks = append(finishedTasks, t)
}
}
// 4. Sort buckets
// Pending: Newest Created First
sort.Slice(pendingTasks, func(i, j int) bool {
return pendingTasks[i].CreatedAt.After(pendingTasks[j].CreatedAt)
})
// Active: Newest Created First (or StartedAt?)
sort.Slice(activeTasks, func(i, j int) bool {
return activeTasks[i].CreatedAt.After(activeTasks[j].CreatedAt)
})
// Finished: Newest Completed First
sort.Slice(finishedTasks, func(i, j int) bool {
t1 := finishedTasks[i].CompletedAt
t2 := finishedTasks[j].CompletedAt
// Handle nil completion times
if t1 == nil && t2 == nil {
// Both nil, fallback to CreatedAt
if !finishedTasks[i].CreatedAt.Equal(finishedTasks[j].CreatedAt) {
return finishedTasks[i].CreatedAt.After(finishedTasks[j].CreatedAt)
}
return finishedTasks[i].ID > finishedTasks[j].ID
}
if t1 == nil {
return false // t1 (nil) goes to bottom
}
if t2 == nil {
return true // t2 (nil) goes to bottom
}
// Compare completion times
if !t1.Equal(*t2) {
return t1.After(*t2)
}
// Fallback to CreatedAt if completion times are identical
if !finishedTasks[i].CreatedAt.Equal(finishedTasks[j].CreatedAt) {
return finishedTasks[i].CreatedAt.After(finishedTasks[j].CreatedAt)
}
// Final tie-breaker: ID
return finishedTasks[i].ID > finishedTasks[j].ID
})
// 5. Recombine
allTasks := make([]*maintenance.MaintenanceTask, 0, len(tasksMap))
allTasks = append(allTasks, pendingTasks...)
allTasks = append(allTasks, activeTasks...)
allTasks = append(allTasks, finishedTasks...)
return allTasks, nil
}
@ -1181,15 +1256,25 @@ func (as *AdminServer) GetMaintenanceTaskDetail(taskID string) (*maintenance.Tas
return nil, err
}
// Copy task and truncate assignment history for display
displayTask := *task
displayTask.AssignmentHistory = nil // History is provided separately in taskDetail
// Create task detail structure from the loaded task
taskDetail := &maintenance.TaskDetailData{
Task: task,
Task: &displayTask,
AssignmentHistory: task.AssignmentHistory, // Use assignment history from persisted task
ExecutionLogs: []*maintenance.TaskExecutionLog{},
RelatedTasks: []*maintenance.MaintenanceTask{},
LastUpdated: time.Now(),
}
// Truncate assignment history if it's too long (display last N only)
if len(taskDetail.AssignmentHistory) > maxAssignmentHistoryDisplay {
startIdx := len(taskDetail.AssignmentHistory) - maxAssignmentHistoryDisplay
taskDetail.AssignmentHistory = taskDetail.AssignmentHistory[startIdx:]
}
if taskDetail.AssignmentHistory == nil {
taskDetail.AssignmentHistory = []*maintenance.TaskAssignmentRecord{}
}
@ -1205,72 +1290,19 @@ func (as *AdminServer) GetMaintenanceTaskDetail(taskID string) (*maintenance.Tas
}
}
// Get execution logs from worker if task is active/completed and worker is connected
if task.Status == maintenance.TaskStatusInProgress || task.Status == maintenance.TaskStatusCompleted {
if as.workerGrpcServer != nil && task.WorkerID != "" {
workerLogs, err := as.workerGrpcServer.RequestTaskLogs(task.WorkerID, taskID, 100, "")
if err == nil && len(workerLogs) > 0 {
// Convert worker logs to maintenance logs
for _, workerLog := range workerLogs {
maintenanceLog := &maintenance.TaskExecutionLog{
Timestamp: time.Unix(workerLog.Timestamp, 0),
Level: workerLog.Level,
Message: workerLog.Message,
Source: "worker",
TaskID: taskID,
WorkerID: task.WorkerID,
}
// carry structured fields if present
if len(workerLog.Fields) > 0 {
maintenanceLog.Fields = make(map[string]string, len(workerLog.Fields))
for k, v := range workerLog.Fields {
maintenanceLog.Fields[k] = v
}
}
// carry optional progress/status
if workerLog.Progress != 0 {
p := float64(workerLog.Progress)
maintenanceLog.Progress = &p
}
if workerLog.Status != "" {
maintenanceLog.Status = workerLog.Status
}
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, maintenanceLog)
}
} else if err != nil {
// Add a diagnostic log entry when worker logs cannot be retrieved
diagnosticLog := &maintenance.TaskExecutionLog{
Timestamp: time.Now(),
Level: "WARNING",
Message: fmt.Sprintf("Failed to retrieve worker logs: %v", err),
Source: "admin",
TaskID: taskID,
WorkerID: task.WorkerID,
}
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog)
glog.V(1).Infof("Failed to get worker logs for task %s from worker %s: %v", taskID, task.WorkerID, err)
}
// Load execution logs from disk
if as.configPersistence != nil {
logs, err := as.configPersistence.LoadTaskExecutionLogs(taskID)
if err == nil {
taskDetail.ExecutionLogs = logs
} else {
// Add diagnostic information when worker is not available
reason := "worker gRPC server not available"
if task.WorkerID == "" {
reason = "no worker assigned to task"
}
diagnosticLog := &maintenance.TaskExecutionLog{
Timestamp: time.Now(),
Level: "INFO",
Message: fmt.Sprintf("Worker logs not available: %s", reason),
Source: "admin",
TaskID: taskID,
WorkerID: task.WorkerID,
}
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog)
glog.V(2).Infof("No execution logs found on disk for task %s", taskID)
}
}
// Get related tasks (other tasks on same volume/server)
if task.VolumeID != 0 || task.Server != "" {
allTasks := as.maintenanceManager.GetTasks("", "", 50) // Get recent tasks
allTasks := as.maintenanceManager.GetTasks("", "", maxRelatedTasksDisplay) // Get recent tasks
for _, relatedTask := range allTasks {
if relatedTask.ID != taskID &&
(relatedTask.VolumeID == task.VolumeID || relatedTask.Server == task.Server) {
@ -1324,7 +1356,7 @@ func (as *AdminServer) getMaintenanceWorkerDetails(workerID string) (*WorkerDeta
}
// Get recent tasks for this worker
recentTasks := as.maintenanceManager.GetTasks(TaskStatusCompleted, "", 10)
recentTasks := as.maintenanceManager.GetTasks(TaskStatusCompleted, "", maxRecentTasksDisplay)
var workerRecentTasks []*MaintenanceTask
for _, task := range recentTasks {
if task.WorkerID == workerID {
@ -1336,12 +1368,13 @@ func (as *AdminServer) getMaintenanceWorkerDetails(workerID string) (*WorkerDeta
var totalDuration time.Duration
var completedTasks, failedTasks int
for _, task := range workerRecentTasks {
if task.Status == TaskStatusCompleted {
switch task.Status {
case TaskStatusCompleted:
completedTasks++
if task.StartedAt != nil && task.CompletedAt != nil {
totalDuration += task.CompletedAt.Sub(*task.StartedAt)
}
} else if task.Status == TaskStatusFailed {
case TaskStatusFailed:
failedTasks++
}
}
@ -1370,31 +1403,29 @@ func (as *AdminServer) getMaintenanceWorkerDetails(workerID string) (*WorkerDeta
}, nil
}
// GetWorkerLogs fetches logs from a specific worker for a task
// GetWorkerLogs fetches logs from a specific worker for a task (now reads from disk)
func (as *AdminServer) GetWorkerLogs(c *gin.Context) {
workerID := c.Param("id")
taskID := c.Query("taskId")
maxEntriesStr := c.DefaultQuery("maxEntries", "100")
logLevel := c.DefaultQuery("logLevel", "")
maxEntries := int32(100)
if maxEntriesStr != "" {
if parsed, err := strconv.ParseInt(maxEntriesStr, 10, 32); err == nil {
maxEntries = int32(parsed)
}
}
if as.workerGrpcServer == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "Worker gRPC server not available"})
// Check config persistence first
if as.configPersistence == nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Config persistence not available"})
return
}
logs, err := as.workerGrpcServer.RequestTaskLogs(workerID, taskID, maxEntries, logLevel)
// Load logs strictly from disk to avoid timeouts and network dependency
// This matches the behavior of the Task Detail page
logs, err := as.configPersistence.LoadTaskExecutionLogs(taskID)
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": fmt.Sprintf("Failed to get logs from worker: %v", err)})
return
glog.V(2).Infof("No execution logs found on disk for task %s: %v", taskID, err)
logs = []*maintenance.TaskExecutionLog{}
}
// Filter logs by workerID if strictly needed, but usually task logs are what we want
// The persistent logs struct (TaskExecutionLog) matches what the frontend expects for the detail view
// ensuring consistent display.
c.JSON(http.StatusOK, gin.H{"worker_id": workerID, "task_id": taskID, "logs": logs, "count": len(logs)})
}

31
weed/admin/dash/config_persistence.go

@ -962,7 +962,36 @@ func (cp *ConfigPersistence) CleanupCompletedTasks() error {
// Sort by completion time (most recent first)
sort.Slice(completedTasks, func(i, j int) bool {
return completedTasks[i].CompletedAt.After(*completedTasks[j].CompletedAt)
t1 := completedTasks[i].CompletedAt
t2 := completedTasks[j].CompletedAt
// Handle nil completion times
if t1 == nil && t2 == nil {
// Both nil, fallback to CreatedAt
if !completedTasks[i].CreatedAt.Equal(completedTasks[j].CreatedAt) {
return completedTasks[i].CreatedAt.After(completedTasks[j].CreatedAt)
}
return completedTasks[i].ID > completedTasks[j].ID
}
if t1 == nil {
return false // t1 (nil) goes to bottom
}
if t2 == nil {
return true // t2 (nil) goes to bottom
}
// Compare completion times
if !t1.Equal(*t2) {
return t1.After(*t2)
}
// Fallback to CreatedAt if completion times are identical
if !completedTasks[i].CreatedAt.Equal(completedTasks[j].CreatedAt) {
return completedTasks[i].CreatedAt.After(completedTasks[j].CreatedAt)
}
// Final tie-breaker: ID
return completedTasks[i].ID > completedTasks[j].ID
})
// Keep only the most recent MaxCompletedTasks, delete the rest

147
weed/admin/dash/worker_grpc_server.go

@ -8,6 +8,7 @@ import (
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/admin/maintenance"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
@ -17,6 +18,15 @@ import (
"google.golang.org/grpc/peer"
)
const (
maxLogFetchLimit = 1000
maxLogMessageSize = 2000
maxLogFieldsCount = 20
logRequestTimeout = 10 * time.Second
logResponseTimeout = 30 * time.Second
logSendTimeout = 10 * time.Second
)
// WorkerGrpcServer implements the WorkerService gRPC interface
type WorkerGrpcServer struct {
worker_pb.UnimplementedWorkerServiceServer
@ -42,7 +52,6 @@ type LogRequestContext struct {
TaskID string
WorkerID string
ResponseCh chan *worker_pb.TaskLogResponse
Timeout time.Time
}
// WorkerConnection represents an active worker connection
@ -89,8 +98,9 @@ func (s *WorkerGrpcServer) StartWithTLS(port int) error {
s.listener = listener
s.running = true
// Start cleanup routine
// Start background routines
go s.cleanupRoutine()
go s.activeLogFetchLoop()
// Start serving in a goroutine
go func() {
@ -437,9 +447,90 @@ func (s *WorkerGrpcServer) handleTaskCompletion(conn *WorkerConnection, completi
} else {
glog.Errorf("Worker %s failed task %s: %s", conn.workerID, completion.TaskId, completion.ErrorMessage)
}
// Fetch and persist logs
go s.FetchAndSaveLogs(conn.workerID, completion.TaskId)
}
}
// FetchAndSaveLogs retrieves logs from a worker and saves them to disk
func (s *WorkerGrpcServer) FetchAndSaveLogs(workerID, taskID string) error {
// Add a small initial delay to allow worker to finalize and sync logs
// especially when this is called immediately after TaskComplete
time.Sleep(300 * time.Millisecond)
var workerLogs []*worker_pb.TaskLogEntry
var err error
// Retry a few times if fetch fails, as logs might be in the middle of a terminal sync
for attempt := 1; attempt <= 3; attempt++ {
workerLogs, err = s.RequestTaskLogs(workerID, taskID, maxLogFetchLimit, "")
if err == nil {
break
}
if attempt < 3 {
glog.V(1).Infof("Fetch logs attempt %d failed for task %s: %v. Retrying in 1s...", attempt, taskID, err)
time.Sleep(1 * time.Second)
}
}
if err != nil {
glog.Warningf("Failed to fetch logs for task %s after 3 attempts: %v", taskID, err)
return err
}
// Convert logs
var maintenanceLogs []*maintenance.TaskExecutionLog
for _, workerLog := range workerLogs {
maintenanceLog := &maintenance.TaskExecutionLog{
Timestamp: time.Unix(workerLog.Timestamp, 0),
Level: workerLog.Level,
Message: workerLog.Message,
Source: "worker",
TaskID: taskID,
WorkerID: workerID,
}
// Truncate very long messages to prevent rendering issues and disk bloat
if len(maintenanceLog.Message) > maxLogMessageSize {
maintenanceLog.Message = maintenanceLog.Message[:maxLogMessageSize] + "... (truncated)"
}
// carry structured fields if present
if len(workerLog.Fields) > 0 {
maintenanceLog.Fields = make(map[string]string)
fieldCount := 0
for k, v := range workerLog.Fields {
if fieldCount >= maxLogFieldsCount {
maintenanceLog.Fields["..."] = fmt.Sprintf("(%d more fields truncated)", len(workerLog.Fields)-maxLogFieldsCount)
break
}
maintenanceLog.Fields[k] = v
fieldCount++
}
}
// carry optional progress/status
if workerLog.Progress != 0 {
p := float64(workerLog.Progress)
maintenanceLog.Progress = &p
}
if workerLog.Status != "" {
maintenanceLog.Status = workerLog.Status
}
maintenanceLogs = append(maintenanceLogs, maintenanceLog)
}
// Persist logs
if s.adminServer.configPersistence != nil {
if err := s.adminServer.configPersistence.SaveTaskExecutionLogs(taskID, maintenanceLogs); err != nil {
glog.Errorf("Failed to persist logs for task %s: %v", taskID, err)
return err
}
}
return nil
}
// handleTaskLogResponse processes task log responses from workers
func (s *WorkerGrpcServer) handleTaskLogResponse(conn *WorkerConnection, response *worker_pb.TaskLogResponse) {
requestKey := fmt.Sprintf("%s:%s", response.WorkerId, response.TaskId)
@ -575,10 +666,13 @@ func (s *WorkerGrpcServer) RequestTaskLogs(workerID, taskID string, maxEntries i
TaskID: taskID,
WorkerID: workerID,
ResponseCh: responseCh,
Timeout: time.Now().Add(10 * time.Second),
}
s.logRequestsMutex.Lock()
if _, exists := s.pendingLogRequests[requestKey]; exists {
s.logRequestsMutex.Unlock()
return nil, fmt.Errorf("a log request for task %s is already in progress", taskID)
}
s.pendingLogRequests[requestKey] = requestContext
s.logRequestsMutex.Unlock()
@ -601,10 +695,12 @@ func (s *WorkerGrpcServer) RequestTaskLogs(workerID, taskID string, maxEntries i
select {
case conn.outgoing <- logRequest:
glog.V(1).Infof("Log request sent to worker %s for task %s", workerID, taskID)
case <-time.After(5 * time.Second):
case <-time.After(logSendTimeout):
// Clean up pending request on timeout
s.logRequestsMutex.Lock()
delete(s.pendingLogRequests, requestKey)
if s.pendingLogRequests[requestKey] == requestContext {
delete(s.pendingLogRequests, requestKey)
}
s.logRequestsMutex.Unlock()
return nil, fmt.Errorf("timeout sending log request to worker %s", workerID)
}
@ -617,10 +713,12 @@ func (s *WorkerGrpcServer) RequestTaskLogs(workerID, taskID string, maxEntries i
}
glog.V(1).Infof("Received %d log entries for task %s from worker %s", len(response.LogEntries), taskID, workerID)
return response.LogEntries, nil
case <-time.After(10 * time.Second):
case <-time.After(logResponseTimeout):
// Clean up pending request on timeout
s.logRequestsMutex.Lock()
delete(s.pendingLogRequests, requestKey)
if s.pendingLogRequests[requestKey] == requestContext {
delete(s.pendingLogRequests, requestKey)
}
s.logRequestsMutex.Unlock()
return nil, fmt.Errorf("timeout waiting for log response from worker %s", workerID)
}
@ -684,3 +782,38 @@ func findClientAddress(ctx context.Context) string {
}
return pr.Addr.String()
}
// activeLogFetchLoop periodically fetches logs for all in-progress tasks
func (s *WorkerGrpcServer) activeLogFetchLoop() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-s.stopChan:
return
case <-ticker.C:
if !s.running || s.adminServer == nil || s.adminServer.maintenanceManager == nil {
continue
}
// Get all in-progress tasks
tasks := s.adminServer.maintenanceManager.GetTasks(maintenance.TaskStatusInProgress, "", 0)
if len(tasks) == 0 {
continue
}
glog.V(2).Infof("Background log fetcher: found %d in-progress tasks", len(tasks))
for _, task := range tasks {
if task.WorkerID != "" {
// Use a goroutine to avoid blocking the loop
go func(wID, tID string) {
if err := s.FetchAndSaveLogs(wID, tID); err != nil {
glog.V(2).Infof("Background log fetch failed for task %s on worker %s: %v", tID, wID, err)
}
}(task.WorkerID, task.ID)
}
}
}
}
}

76
weed/admin/handlers/maintenance_handlers.go

@ -39,6 +39,11 @@ func NewMaintenanceHandlers(adminServer *dash.AdminServer) *MaintenanceHandlers
func (h *MaintenanceHandlers) ShowTaskDetail(c *gin.Context) {
taskID := c.Param("id")
if h.adminServer == nil {
c.String(http.StatusInternalServerError, "Admin server not initialized")
return
}
taskDetail, err := h.adminServer.GetMaintenanceTaskDetail(taskID)
if err != nil {
glog.Errorf("DEBUG ShowTaskDetail: error getting task detail for %s: %v", taskID, err)
@ -111,6 +116,10 @@ func (h *MaintenanceHandlers) ShowMaintenanceQueue(c *gin.Context) {
// ShowMaintenanceWorkers displays the maintenance workers page
func (h *MaintenanceHandlers) ShowMaintenanceWorkers(c *gin.Context) {
if h.adminServer == nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Admin server not initialized"})
return
}
workersData, err := h.adminServer.GetMaintenanceWorkersData()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@ -339,6 +348,8 @@ func (h *MaintenanceHandlers) UpdateTaskConfig(c *gin.Context) {
glog.Warningf("Failed to save task config to protobuf file: %v", err)
// Don't fail the request, just log the warning
}
} else if h.adminServer == nil {
glog.Warningf("Failed to save task config: admin server not initialized")
}
// Trigger a configuration reload in the maintenance manager
@ -492,74 +503,25 @@ func (h *MaintenanceHandlers) UpdateMaintenanceConfig(c *gin.Context) {
// Helper methods that delegate to AdminServer
func (h *MaintenanceHandlers) getMaintenanceQueueData() (*maintenance.MaintenanceQueueData, error) {
tasks, err := h.getMaintenanceTasks()
if err != nil {
return nil, err
}
workers, err := h.getMaintenanceWorkers()
if err != nil {
return nil, err
}
stats, err := h.getMaintenanceQueueStats()
if err != nil {
return nil, err
}
data := &maintenance.MaintenanceQueueData{
Tasks: tasks,
Workers: workers,
Stats: stats,
LastUpdated: time.Now(),
}
return data, nil
}
func (h *MaintenanceHandlers) getMaintenanceQueueStats() (*maintenance.QueueStats, error) {
// Use the exported method from AdminServer
return h.adminServer.GetMaintenanceQueueStats()
}
func (h *MaintenanceHandlers) getMaintenanceTasks() ([]*maintenance.MaintenanceTask, error) {
// Call the maintenance manager directly to get recent tasks (limit for performance)
if h.adminServer == nil {
return []*maintenance.MaintenanceTask{}, nil
return nil, fmt.Errorf("admin server not initialized")
}
manager := h.adminServer.GetMaintenanceManager()
if manager == nil {
return []*maintenance.MaintenanceTask{}, nil
}
// Get recent tasks only (last 100) to prevent slow page loads
// Users can view more tasks via pagination if needed
allTasks := manager.GetTasks("", "", 100)
return allTasks, nil
// Use the exported method from AdminServer used by the JSON API
return h.adminServer.GetMaintenanceQueueData()
}
func (h *MaintenanceHandlers) getMaintenanceWorkers() ([]*maintenance.MaintenanceWorker, error) {
// Get workers from the admin server's maintenance manager
func (h *MaintenanceHandlers) getMaintenanceConfig() (*maintenance.MaintenanceConfigData, error) {
if h.adminServer == nil {
return []*maintenance.MaintenanceWorker{}, nil
return nil, fmt.Errorf("admin server not initialized")
}
if h.adminServer.GetMaintenanceManager() == nil {
return []*maintenance.MaintenanceWorker{}, nil
}
// Get workers from the maintenance manager
workers := h.adminServer.GetMaintenanceManager().GetWorkers()
return workers, nil
}
func (h *MaintenanceHandlers) getMaintenanceConfig() (*maintenance.MaintenanceConfigData, error) {
// Delegate to AdminServer's real persistence method
return h.adminServer.GetMaintenanceConfigData()
}
func (h *MaintenanceHandlers) updateMaintenanceConfig(config *maintenance.MaintenanceConfig) error {
if h.adminServer == nil {
return fmt.Errorf("admin server not initialized")
}
// Delegate to AdminServer's real persistence method
return h.adminServer.UpdateMaintenanceConfigData(config)
}

34
weed/admin/maintenance/maintenance_queue.go

@ -587,15 +587,35 @@ func (mq *MaintenanceQueue) GetTasks(status MaintenanceTaskStatus, taskType Main
continue
}
tasks = append(tasks, task)
if limit > 0 && len(tasks) >= limit {
break
}
}
// Sort by creation time (newest first)
sort.Slice(tasks, func(i, j int) bool {
return tasks[i].CreatedAt.After(tasks[j].CreatedAt)
})
// Sort based on status
if status == TaskStatusCompleted || status == TaskStatusFailed || status == TaskStatusCancelled {
sort.Slice(tasks, func(i, j int) bool {
t1 := tasks[i].CompletedAt
t2 := tasks[j].CompletedAt
if t1 == nil && t2 == nil {
return tasks[i].CreatedAt.After(tasks[j].CreatedAt)
}
if t1 == nil {
return false
}
if t2 == nil {
return true
}
return t1.After(*t2)
})
} else {
// Default to creation time (newest first)
sort.Slice(tasks, func(i, j int) bool {
return tasks[i].CreatedAt.After(tasks[j].CreatedAt)
})
}
// Apply limit after sorting
if limit > 0 && len(tasks) > limit {
tasks = tasks[:limit]
}
return tasks
}

2
weed/admin/view/app/admin_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_brokers_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_collections_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_ec_shards_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_ec_volumes_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_filers_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_masters_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_volume_servers_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/cluster_volumes_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/collection_details_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/ec_volume_details_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/file_browser_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/iceberg_catalog_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/iceberg_namespaces_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/iceberg_tables_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/maintenance_config_schema_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/maintenance_config_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

7
weed/admin/view/app/maintenance_queue.templ

@ -302,12 +302,7 @@ templ MaintenanceQueue(data *maintenance.MaintenanceQueueData) {
// Debug output to browser console
console.log("DEBUG: Maintenance Queue Template loaded");
// Auto-refresh every 10 seconds
setInterval(function() {
if (!document.hidden) {
window.location.reload();
}
}, 10000);
window.triggerScan = function() {
console.log("triggerScan called");

8
weed/admin/view/app/maintenance_queue_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.
@ -610,7 +610,7 @@ func MaintenanceQueue(data *maintenance.MaintenanceQueueData) templ.Component {
return templ_7745c5c3_Err
}
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 63, "</div></div></div></div></div><script>\n // Debug output to browser console\n console.log(\"DEBUG: Maintenance Queue Template loaded\");\n \n // Auto-refresh every 10 seconds\n setInterval(function() {\n if (!document.hidden) {\n window.location.reload();\n }\n }, 10000);\n\n window.triggerScan = function() {\n console.log(\"triggerScan called\");\n fetch('/api/maintenance/scan', {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n }\n })\n .then(response => response.json())\n .then(data => {\n if (data.success) {\n showToast('Success', 'Maintenance scan triggered successfully', 'success');\n setTimeout(() => window.location.reload(), 2000);\n } else {\n showToast('Error', 'Failed to trigger scan: ' + (data.error || 'Unknown error'), 'danger');\n }\n })\n .catch(error => {\n showToast('Error', 'Error: ' + error.message, 'danger');\n });\n };\n\n window.refreshPage = function() {\n console.log(\"refreshPage called\");\n window.location.reload();\n };\n\n window.navigateToTask = function(element) {\n const taskId = element.getAttribute('data-task-id');\n if (taskId) {\n window.location.href = '/maintenance/tasks/' + taskId;\n }\n };\n </script>")
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 63, "</div></div></div></div></div><script>\n // Debug output to browser console\n console.log(\"DEBUG: Maintenance Queue Template loaded\");\n \n\n\n window.triggerScan = function() {\n console.log(\"triggerScan called\");\n fetch('/api/maintenance/scan', {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n }\n })\n .then(response => response.json())\n .then(data => {\n if (data.success) {\n showToast('Success', 'Maintenance scan triggered successfully', 'success');\n setTimeout(() => window.location.reload(), 2000);\n } else {\n showToast('Error', 'Failed to trigger scan: ' + (data.error || 'Unknown error'), 'danger');\n }\n })\n .catch(error => {\n showToast('Error', 'Error: ' + error.message, 'danger');\n });\n };\n\n window.refreshPage = function() {\n console.log(\"refreshPage called\");\n window.location.reload();\n };\n\n window.navigateToTask = function(element) {\n const taskId = element.getAttribute('data-task-id');\n if (taskId) {\n window.location.href = '/maintenance/tasks/' + taskId;\n }\n };\n </script>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
@ -809,7 +809,7 @@ func ProgressBar(progress float64, status maintenance.MaintenanceTaskStatus) tem
var templ_7745c5c3_Var35 string
templ_7745c5c3_Var35, templ_7745c5c3_Err = templruntime.SanitizeStyleAttributeValues(fmt.Sprintf("width: %.1f%%", progress))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 390, Col: 102}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 385, Col: 102}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
if templ_7745c5c3_Err != nil {
@ -822,7 +822,7 @@ func ProgressBar(progress float64, status maintenance.MaintenanceTaskStatus) tem
var templ_7745c5c3_Var36 string
templ_7745c5c3_Var36, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%.1f%%", progress))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 393, Col: 66}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 388, Col: 66}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var36))
if templ_7745c5c3_Err != nil {

2
weed/admin/view/app/maintenance_workers_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/object_store_users_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/policies_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/s3_buckets_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/s3tables_buckets_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/s3tables_namespaces_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/s3tables_tables_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/service_accounts_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/subscribers_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/task_config_schema.templ

@ -430,7 +430,7 @@ func getTaskConfigStringField(config interface{}, fieldName string) string {
func getTaskNumberStep(field *config.Field) string {
if field.Type == config.FieldTypeFloat {
return "0.01"
return "any"
}
return "1"
}

4
weed/admin/view/app/task_config_schema_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.
@ -891,7 +891,7 @@ func getTaskConfigStringField(config interface{}, fieldName string) string {
func getTaskNumberStep(field *config.Field) string {
if field.Type == config.FieldTypeFloat {
return "0.01"
return "any"
}
return "1"
}

2
weed/admin/view/app/task_config_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/task_config_templ_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

47
weed/admin/view/app/task_detail.templ

@ -942,10 +942,48 @@ templ TaskDetail(data *maintenance.TaskDetailData) {
return;
}
// Format and display logs with structured fields
let logText = '';
// Helper function to format timestamps robustly
function formatTimestamp(timestamp) {
if (!timestamp) {
return 'N/A';
}
let date;
// Check if timestamp is a numeric string (e.g., "1738652668")
if (typeof timestamp === 'string' && /^\d+$/.test(timestamp)) {
const numericTimestamp = parseInt(timestamp, 10);
// Treat values > 10^10 as milliseconds, otherwise as seconds
date = numericTimestamp > 10000000000
? new Date(numericTimestamp)
: new Date(numericTimestamp * 1000);
} else if (typeof timestamp === 'string') {
// ISO date string
date = new Date(timestamp);
} else if (typeof timestamp === 'number') {
// Numeric timestamp (seconds or milliseconds)
date = timestamp > 10000000000
? new Date(timestamp)
: new Date(timestamp * 1000);
} else {
return 'N/A';
}
// Validate the date
if (isNaN(date.getTime())) {
return 'N/A';
}
return date.toISOString();
}
logs.forEach(entry => {
const timestamp = entry.timestamp ? new Date(entry.timestamp * 1000).toISOString() : 'N/A';
const timestamp = formatTimestamp(entry.timestamp);
const level = entry.level || 'INFO';
const message = entry.message || '';
@ -1011,7 +1049,12 @@ templ TaskDetail(data *maintenance.TaskDetailData) {
let logContent = '';
if (data.logs && data.logs.length > 0) {
data.logs.forEach(entry => {
const timestamp = entry.timestamp ? new Date(entry.timestamp * 1000).toISOString() : 'N/A';
let timestamp;
if (typeof entry.timestamp === 'string') {
timestamp = new Date(entry.timestamp).toISOString();
} else {
timestamp = entry.timestamp ? new Date(entry.timestamp * 1000).toISOString() : 'N/A';
}
const level = entry.level || 'INFO';
const message = entry.message || '';

4
weed/admin/view/app/task_detail_templ.go
File diff suppressed because it is too large
View File

2
weed/admin/view/app/topic_details_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/topics_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/app/volume_details_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/components/config_sections_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package components
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/components/form_fields_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package components
//lint:file-ignore SA4006 This context is only used if a nested component is present.

2
weed/admin/view/layout/layout_templ.go

@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package layout
//lint:file-ignore SA4006 This context is only used if a nested component is present.

15
weed/command/admin.go

@ -230,7 +230,20 @@ func startAdminServer(ctx context.Context, options AdminOptions, enableUI bool,
// Create router
r := gin.New()
r.Use(gin.Logger(), gin.Recovery())
r.Use(gin.LoggerWithFormatter(func(param gin.LogFormatterParams) string {
if param.StatusCode == 200 {
return ""
}
return fmt.Sprintf("[GIN] %v | %3d | %13v | %15s | %-7s %s\n%s",
param.TimeStamp.Format("2006/01/02 - 15:04:05"),
param.StatusCode,
param.Latency,
param.ClientIP,
param.Method,
param.Path,
param.ErrorMessage,
)
}), gin.Recovery())
// Create data directory first if specified (needed for session key storage)
var dataDir string

10
weed/server/master_grpc_server_assign.go

@ -79,6 +79,16 @@ func (ms *MasterServer) Assign(ctx context.Context, req *master_pb.AssignRequest
}
vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
if req.DiskType == "" {
if writable, _ := vl.GetWritableVolumeCount(); writable == 0 {
if hddVl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, types.ToDiskType(types.HddType)); hddVl != nil {
if writable, _ := hddVl.GetWritableVolumeCount(); writable > 0 {
option.DiskType = types.ToDiskType(types.HddType)
vl = hddVl
}
}
}
}
vl.SetLastGrowCount(req.WritableVolumeCount)
var (

16
weed/worker/tasks/erasure_coding/detection.go

@ -241,13 +241,15 @@ func Detection(metrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterI
results = append(results, result)
} else {
// Count debug reasons
if metric.Age < quietThreshold {
skippedQuietTime++
}
if metric.FullnessRatio < ecConfig.FullnessRatio {
skippedFullness++
}
if debugCount < 5 { // Limit to avoid spam
if metric.Age < quietThreshold {
skippedQuietTime++
}
if metric.FullnessRatio < ecConfig.FullnessRatio {
skippedFullness++
}
// Logic moved outside
}
debugCount++
}
@ -256,7 +258,7 @@ func Detection(metrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterI
// Log debug summary if no tasks were created
if len(results) == 0 && len(metrics) > 0 {
totalVolumes := len(metrics)
glog.V(1).Infof("EC detection: No tasks created for %d volumes (skipped: %d already EC, %d too small, %d filtered, %d not quiet, %d not full)",
glog.Infof("EC detection: No tasks created for %d volumes (skipped: %d already EC, %d too small, %d filtered, %d not quiet, %d not full)",
totalVolumes, skippedAlreadyEC, skippedTooSmall, skippedCollectionFilter, skippedQuietTime, skippedFullness)
// Show details for first few volumes

17
weed/worker/tasks/task_logger.go

@ -30,6 +30,7 @@ type TaskLogger interface {
LogWithFields(level string, message string, fields map[string]interface{})
// Lifecycle
Sync() error
Close() error
GetLogDir() string
}
@ -230,6 +231,17 @@ func (l *FileTaskLogger) LogWithFields(level string, message string, fields map[
l.writeLogEntry(entry)
}
// Sync flushes buffered data to disk
func (l *FileTaskLogger) Sync() error {
l.mutex.Lock()
defer l.mutex.Unlock()
if l.logFile != nil {
return l.logFile.Sync()
}
return nil
}
// Close closes the logger and finalizes metadata
func (l *FileTaskLogger) Close() error {
l.Info("Task logger closed for %s", l.taskID)
@ -423,7 +435,10 @@ func ReadTaskLogs(logDir string) ([]TaskLogEntry, error) {
if err == io.EOF {
break
}
return nil, fmt.Errorf("failed to decode log entry: %w", err)
// If we fail to decode an entry, it might be a partial write at the end of the file
// Return what we have so far instead of failing the entire request
glog.V(1).Infof("Failed to decode log entry in %s: %v (returning %d partial logs)", logPath, err, len(entries))
break
}
entries = append(entries, entry)
}

1
weed/worker/types/typed_task_interface.go

@ -19,6 +19,7 @@ type TaskLogger interface {
Error(message string, args ...interface{})
Debug(message string, args ...interface{})
LogWithFields(level string, message string, fields map[string]interface{})
Sync() error
Close() error
}

10
weed/worker/worker.go

@ -707,6 +707,9 @@ func (w *Worker) executeTask(task *types.TaskInput) {
err = taskInstance.Execute(ctx, task.TypedParams)
// Report completion
if fileLogger != nil {
fileLogger.Sync()
}
if err != nil {
w.completeTask(task.ID, false, err.Error())
w.cmds <- workerCommand{
@ -718,14 +721,15 @@ func (w *Worker) executeTask(task *types.TaskInput) {
fileLogger.Error("Task %s failed: %v", task.ID, err)
}
} else {
if fileLogger != nil {
fileLogger.Info("Task %s completed successfully", task.ID)
fileLogger.Sync()
}
w.completeTask(task.ID, true, "")
w.cmds <- workerCommand{
action: ActionIncTaskComplete,
}
glog.Infof("Worker %s completed task %s successfully", w.id, task.ID)
if fileLogger != nil {
fileLogger.Info("Task %s completed successfully", task.ID)
}
}
}

Loading…
Cancel
Save