Browse Source

Expire stuck plugin jobs (#8492)

* Add stale job expiry and expire API

* Add expire job button

* Add test hook and coverage for ExpirePluginJobAPI

* Document scheduler filtering side effect and reuse helper

* Restore job spec proposal test

* Regenerate plugin template output

---------

Co-authored-by: Copilot <copilot@github.com>
pull/4306/merge
Chris Lu 22 hours ago
committed by GitHub
parent
commit
a61a2affe3
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 12
      weed/admin/dash/admin_server.go
  2. 42
      weed/admin/dash/plugin_api.go
  3. 109
      weed/admin/dash/plugin_api_test.go
  4. 1
      weed/admin/handlers/admin_handlers.go
  5. 6
      weed/admin/handlers/admin_handlers_routes_test.go
  6. 2
      weed/admin/plugin/plugin.go
  7. 281
      weed/admin/plugin/plugin_monitor.go
  8. 7
      weed/admin/plugin/plugin_scheduler.go
  9. 7
      weed/admin/plugin/types.go
  10. 85
      weed/admin/view/app/plugin.templ
  11. 2
      weed/admin/view/app/plugin_templ.go

12
weed/admin/dash/admin_server.go

@ -98,6 +98,7 @@ type AdminServer struct {
// Maintenance system // Maintenance system
maintenanceManager *maintenance.MaintenanceManager maintenanceManager *maintenance.MaintenanceManager
plugin *adminplugin.Plugin plugin *adminplugin.Plugin
expireJobHandler func(jobID string, reason string) (*adminplugin.TrackedJob, bool, error)
// Topic retention purger // Topic retention purger
topicRetentionPurger *TopicRetentionPurger topicRetentionPurger *TopicRetentionPurger
@ -1020,6 +1021,17 @@ func (s *AdminServer) GetPluginJobDetail(jobID string, activityLimit, relatedLim
return s.plugin.BuildJobDetail(jobID, activityLimit, relatedLimit) return s.plugin.BuildJobDetail(jobID, activityLimit, relatedLimit)
} }
// ExpirePluginJob marks an active plugin job as failed so it no longer blocks scheduling.
func (s *AdminServer) ExpirePluginJob(jobID, reason string) (*adminplugin.TrackedJob, bool, error) {
if handler := s.expireJobHandler; handler != nil {
return handler(jobID, reason)
}
if s.plugin == nil {
return nil, false, fmt.Errorf("plugin is not enabled")
}
return s.plugin.ExpireJob(jobID, reason)
}
// ListPluginActivities returns plugin job activities for monitoring. // ListPluginActivities returns plugin job activities for monitoring.
func (s *AdminServer) ListPluginActivities(jobType string, limit int) []adminplugin.JobActivity { func (s *AdminServer) ListPluginActivities(jobType string, limit int) []adminplugin.JobActivity {
if s.plugin == nil { if s.plugin == nil {

42
weed/admin/dash/plugin_api.go

@ -5,6 +5,7 @@ import (
"crypto/rand" "crypto/rand"
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@ -130,6 +131,47 @@ func (s *AdminServer) GetPluginJobDetailAPI(w http.ResponseWriter, r *http.Reque
writeJSON(w, http.StatusOK, detail) writeJSON(w, http.StatusOK, detail)
} }
// ExpirePluginJobAPI marks a job as failed so it no longer blocks scheduling.
func (s *AdminServer) ExpirePluginJobAPI(w http.ResponseWriter, r *http.Request) {
jobID := strings.TrimSpace(mux.Vars(r)["jobId"])
if jobID == "" {
writeJSONError(w, http.StatusBadRequest, "jobId is required")
return
}
var req struct {
Reason string `json:"reason"`
}
if err := decodeJSONBody(newJSONMaxReader(w, r), &req); err != nil && err != io.EOF {
writeJSONError(w, http.StatusBadRequest, "invalid request body: "+err.Error())
return
}
job, expired, err := s.ExpirePluginJob(jobID, req.Reason)
if err != nil {
if errors.Is(err, plugin.ErrJobNotFound) {
writeJSONError(w, http.StatusNotFound, err.Error())
return
}
writeJSONError(w, http.StatusInternalServerError, err.Error())
return
}
response := map[string]interface{}{
"job_id": jobID,
"expired": expired,
}
if job != nil {
response["job"] = job
}
if !expired {
response["message"] = "job is not active"
}
writeJSON(w, http.StatusOK, response)
}
// GetPluginActivitiesAPI returns recent plugin activities. // GetPluginActivitiesAPI returns recent plugin activities.
func (s *AdminServer) GetPluginActivitiesAPI(w http.ResponseWriter, r *http.Request) { func (s *AdminServer) GetPluginActivitiesAPI(w http.ResponseWriter, r *http.Request) {
query := r.URL.Query() query := r.URL.Query()

109
weed/admin/dash/plugin_api_test.go

@ -1,11 +1,120 @@
package dash package dash
import ( import (
"encoding/json"
"errors"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing" "testing"
"github.com/gorilla/mux"
"github.com/seaweedfs/seaweedfs/weed/admin/plugin"
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb" "github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
) )
func TestExpirePluginJobAPI(t *testing.T) {
makeRequest := func(adminServer *AdminServer, jobID string, body io.Reader) *httptest.ResponseRecorder {
req := httptest.NewRequest(http.MethodPost, "/api/plugin/jobs/"+jobID+"/expire", body)
req = mux.SetURLVars(req, map[string]string{"jobId": jobID})
recorder := httptest.NewRecorder()
adminServer.ExpirePluginJobAPI(recorder, req)
return recorder
}
t.Run("empty job id", func(t *testing.T) {
recorder := makeRequest(&AdminServer{}, "", nil)
if recorder.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d", recorder.Code)
}
})
t.Run("invalid json", func(t *testing.T) {
recorder := makeRequest(&AdminServer{}, "job-id", strings.NewReader("{"))
if recorder.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d", recorder.Code)
}
})
t.Run("job not found", func(t *testing.T) {
adminServer := &AdminServer{
expireJobHandler: func(jobID, reason string) (*plugin.TrackedJob, bool, error) {
return nil, false, plugin.ErrJobNotFound
},
}
recorder := makeRequest(adminServer, "missing", strings.NewReader(`{"reason":"nope"}`))
if recorder.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d", recorder.Code)
}
var payload map[string]any
if err := json.Unmarshal(recorder.Body.Bytes(), &payload); err != nil {
t.Fatalf("failed to unmarshal body: %v", err)
}
if payload["error"] == nil {
t.Fatalf("expected error payload, got %v", payload)
}
})
t.Run("successful expire", func(t *testing.T) {
expected := &plugin.TrackedJob{JobID: "foo", State: "assigned"}
adminServer := &AdminServer{
expireJobHandler: func(jobID, reason string) (*plugin.TrackedJob, bool, error) {
if jobID != "foo" {
return nil, false, errors.New("unexpected")
}
return expected, true, nil
},
}
recorder := makeRequest(adminServer, "foo", strings.NewReader(`{"reason":"cleanup"}`))
if recorder.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", recorder.Code)
}
var payload map[string]any
if err := json.Unmarshal(recorder.Body.Bytes(), &payload); err != nil {
t.Fatalf("failed to decode payload: %v", err)
}
if payload["job_id"] != "foo" {
t.Fatalf("expected job_id foo, got %v", payload["job_id"])
}
if expired, ok := payload["expired"].(bool); !ok || !expired {
t.Fatalf("expected expired=true, got %v", payload["expired"])
}
jobData, ok := payload["job"].(map[string]any)
if !ok || jobData["job_id"] != "foo" {
t.Fatalf("expected job info with job_id, got %v", payload["job"])
}
})
t.Run("non-active job", func(t *testing.T) {
adminServer := &AdminServer{
expireJobHandler: func(jobID, reason string) (*plugin.TrackedJob, bool, error) {
return nil, false, nil
},
}
recorder := makeRequest(adminServer, "bar", strings.NewReader(`{"reason":"ignore"}`))
if recorder.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", recorder.Code)
}
var payload map[string]any
if err := json.Unmarshal(recorder.Body.Bytes(), &payload); err != nil {
t.Fatalf("failed to decode payload: %v", err)
}
if payload["job_id"] != "bar" {
t.Fatalf("expected job_id bar, got %v", payload["job_id"])
}
if expired, ok := payload["expired"].(bool); !ok || expired {
t.Fatalf("expected expired=false, got %v", payload["expired"])
}
if payload["message"] != "job is not active" {
t.Fatalf("expected message job is not active, got %v", payload["message"])
}
if _, exists := payload["job"]; exists {
t.Fatalf("expected no job payload for non-active job, got %v", payload["job"])
}
})
}
func TestBuildJobSpecFromProposalDoesNotReuseProposalID(t *testing.T) { func TestBuildJobSpecFromProposalDoesNotReuseProposalID(t *testing.T) {
t.Parallel() t.Parallel()

1
weed/admin/handlers/admin_handlers.go

@ -242,6 +242,7 @@ func (h *AdminHandlers) registerAPIRoutes(api *mux.Router, enforceWrite bool) {
pluginApi.Handle("/job-types/{jobType}/detect", wrapWrite(h.adminServer.TriggerPluginDetectionAPI)).Methods(http.MethodPost) pluginApi.Handle("/job-types/{jobType}/detect", wrapWrite(h.adminServer.TriggerPluginDetectionAPI)).Methods(http.MethodPost)
pluginApi.Handle("/job-types/{jobType}/run", wrapWrite(h.adminServer.RunPluginJobTypeAPI)).Methods(http.MethodPost) pluginApi.Handle("/job-types/{jobType}/run", wrapWrite(h.adminServer.RunPluginJobTypeAPI)).Methods(http.MethodPost)
pluginApi.Handle("/jobs/execute", wrapWrite(h.adminServer.ExecutePluginJobAPI)).Methods(http.MethodPost) pluginApi.Handle("/jobs/execute", wrapWrite(h.adminServer.ExecutePluginJobAPI)).Methods(http.MethodPost)
pluginApi.Handle("/jobs/{jobId}/expire", wrapWrite(h.adminServer.ExpirePluginJobAPI)).Methods(http.MethodPost)
mqApi := api.PathPrefix("/mq").Subrouter() mqApi := api.PathPrefix("/mq").Subrouter()
mqApi.HandleFunc("/topics/{namespace}/{topic}", h.mqHandlers.GetTopicDetailsAPI).Methods(http.MethodGet) mqApi.HandleFunc("/topics/{namespace}/{topic}", h.mqHandlers.GetTopicDetailsAPI).Methods(http.MethodGet)

6
weed/admin/handlers/admin_handlers_routes_test.go

@ -21,6 +21,9 @@ func TestSetupRoutes_RegistersPluginSchedulerStatesAPI_NoAuth(t *testing.T) {
if !hasRoute(router, http.MethodGet, "/api/plugin/jobs/example/detail") { if !hasRoute(router, http.MethodGet, "/api/plugin/jobs/example/detail") {
t.Fatalf("expected GET /api/plugin/jobs/:jobId/detail to be registered in no-auth mode") t.Fatalf("expected GET /api/plugin/jobs/:jobId/detail to be registered in no-auth mode")
} }
if !hasRoute(router, http.MethodPost, "/api/plugin/jobs/example/expire") {
t.Fatalf("expected POST /api/plugin/jobs/:jobId/expire to be registered in no-auth mode")
}
} }
func TestSetupRoutes_RegistersPluginSchedulerStatesAPI_WithAuth(t *testing.T) { func TestSetupRoutes_RegistersPluginSchedulerStatesAPI_WithAuth(t *testing.T) {
@ -34,6 +37,9 @@ func TestSetupRoutes_RegistersPluginSchedulerStatesAPI_WithAuth(t *testing.T) {
if !hasRoute(router, http.MethodGet, "/api/plugin/jobs/example/detail") { if !hasRoute(router, http.MethodGet, "/api/plugin/jobs/example/detail") {
t.Fatalf("expected GET /api/plugin/jobs/:jobId/detail to be registered in auth mode") t.Fatalf("expected GET /api/plugin/jobs/:jobId/detail to be registered in auth mode")
} }
if !hasRoute(router, http.MethodPost, "/api/plugin/jobs/example/expire") {
t.Fatalf("expected POST /api/plugin/jobs/:jobId/expire to be registered in auth mode")
}
} }
func TestSetupRoutes_RegistersPluginPages_NoAuth(t *testing.T) { func TestSetupRoutes_RegistersPluginPages_NoAuth(t *testing.T) {

2
weed/admin/plugin/plugin.go

@ -80,6 +80,8 @@ type Plugin struct {
jobsMu sync.RWMutex jobsMu sync.RWMutex
jobs map[string]*TrackedJob jobs map[string]*TrackedJob
// serialize stale job cleanup to avoid duplicate expirations
staleJobsMu sync.Mutex
jobDetailsMu sync.Mutex jobDetailsMu sync.Mutex

281
weed/admin/plugin/plugin_monitor.go

@ -2,6 +2,7 @@ package plugin
import ( import (
"encoding/json" "encoding/json"
"fmt"
"sort" "sort"
"strings" "strings"
"time" "time"
@ -9,12 +10,18 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb" "github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
"google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/types/known/timestamppb"
) )
const ( const (
maxTrackedJobsTotal = 1000 maxTrackedJobsTotal = 1000
maxActivityRecords = 4000 maxActivityRecords = 4000
maxRelatedJobs = 100 maxRelatedJobs = 100
// stale active jobs block dedupe and scheduling; use generous defaults to
// avoid expiring legitimate long-running tasks.
defaultStaleActiveJobTimeout = 24 * time.Hour
defaultOrphanedActiveJobTimeout = 15 * time.Minute
) )
var ( var (
@ -23,6 +30,14 @@ var (
StateCanceled = strings.ToLower(plugin_pb.JobState_JOB_STATE_CANCELED.String()) StateCanceled = strings.ToLower(plugin_pb.JobState_JOB_STATE_CANCELED.String())
) )
type activeJobSnapshot struct {
jobID string
jobType string
workerID string
requestID string
lastUpdate time.Time
}
// activityLess reports whether activity a occurred after activity b (newest-first order). // activityLess reports whether activity a occurred after activity b (newest-first order).
// A nil OccurredAt is treated as the zero time. // A nil OccurredAt is treated as the zero time.
func activityLess(a, b JobActivity) bool { func activityLess(a, b JobActivity) bool {
@ -54,6 +69,13 @@ func (r *Plugin) loadPersistedMonitorState() error {
if strings.TrimSpace(job.JobID) == "" { if strings.TrimSpace(job.JobID) == "" {
continue continue
} }
if isActiveTrackedJobState(job.State) {
if detail, detailErr := r.store.LoadJobDetail(job.JobID); detailErr != nil {
glog.Warningf("Plugin failed to load detail snapshot for job %s: %v", job.JobID, detailErr)
} else if detail != nil {
mergeTerminalDetailIntoTracked(&job, detail)
}
}
// Backward compatibility: migrate older inline detail payloads // Backward compatibility: migrate older inline detail payloads
// out of tracked_jobs.json into dedicated per-job detail files. // out of tracked_jobs.json into dedicated per-job detail files.
if hasTrackedJobRichDetails(job) { if hasTrackedJobRichDetails(job) {
@ -81,6 +103,265 @@ func (r *Plugin) loadPersistedMonitorState() error {
return nil return nil
} }
// ExpireJob marks an active job as failed so it no longer blocks scheduling.
func (r *Plugin) ExpireJob(jobID, reason string) (*TrackedJob, bool, error) {
normalizedJobID := strings.TrimSpace(jobID)
if normalizedJobID == "" {
return nil, false, ErrJobNotFound
}
reason = strings.TrimSpace(reason)
if reason == "" {
reason = "job expired by admin request"
}
var jobType string
var requestID string
active := false
r.jobsMu.RLock()
if tracked := r.jobs[normalizedJobID]; tracked != nil {
jobType = tracked.JobType
requestID = tracked.RequestID
active = isActiveTrackedJobState(tracked.State)
}
r.jobsMu.RUnlock()
if jobType == "" || requestID == "" || !active {
if detail, err := r.store.LoadJobDetail(normalizedJobID); err != nil {
return nil, false, err
} else if detail != nil {
if jobType == "" {
jobType = detail.JobType
}
if requestID == "" {
requestID = detail.RequestID
}
if !active && isActiveTrackedJobState(detail.State) {
active = true
}
}
}
if jobType == "" {
return nil, false, ErrJobNotFound
}
if !active {
current, _ := r.GetTrackedJob(normalizedJobID)
if current == nil {
if detail, err := r.store.LoadJobDetail(normalizedJobID); err == nil && detail != nil {
clone := cloneTrackedJob(*detail)
current = &clone
}
}
return current, false, nil
}
now := time.Now().UTC()
r.handleJobCompleted(&plugin_pb.JobCompleted{
JobId: normalizedJobID,
JobType: jobType,
RequestId: requestID,
Success: false,
ErrorMessage: reason,
CompletedAt: timestamppb.New(now),
})
r.appendActivity(JobActivity{
JobID: normalizedJobID,
JobType: jobType,
RequestID: requestID,
Source: "admin_expire",
Message: reason,
Stage: "expired",
OccurredAt: timeToPtr(now),
})
updated, _ := r.GetTrackedJob(normalizedJobID)
return updated, true, nil
}
// expireStaleJobs marks stale active jobs as failed so they stop blocking new work.
func (r *Plugin) expireStaleJobs(now time.Time) int {
if now.IsZero() {
now = time.Now().UTC()
}
r.staleJobsMu.Lock()
defer r.staleJobsMu.Unlock()
snapshots := r.snapshotActiveJobs()
if len(snapshots) == 0 {
return 0
}
expired := 0
for _, snap := range snapshots {
if snap.lastUpdate.IsZero() {
continue
}
if stale, _, _ := r.evaluateStaleJob(now, snap.workerID, snap.lastUpdate); !stale {
continue
}
reason := r.confirmStaleReason(now, snap.jobID)
if reason == "" {
continue
}
r.handleJobCompleted(&plugin_pb.JobCompleted{
JobId: snap.jobID,
JobType: snap.jobType,
RequestId: snap.requestID,
Success: false,
ErrorMessage: reason,
CompletedAt: timestamppb.New(now),
})
expired++
}
return expired
}
func (r *Plugin) snapshotActiveJobs() []activeJobSnapshot {
r.jobsMu.RLock()
defer r.jobsMu.RUnlock()
if len(r.jobs) == 0 {
return nil
}
out := make([]activeJobSnapshot, 0, len(r.jobs))
for _, job := range r.jobs {
if job == nil {
continue
}
if !isActiveTrackedJobState(job.State) {
continue
}
out = append(out, activeJobSnapshot{
jobID: job.JobID,
jobType: job.JobType,
workerID: job.WorkerID,
requestID: job.RequestID,
lastUpdate: jobLastUpdated(job),
})
}
return out
}
func jobLastUpdated(job *TrackedJob) time.Time {
if job == nil {
return time.Time{}
}
if job.UpdatedAt != nil && !job.UpdatedAt.IsZero() {
return *job.UpdatedAt
}
if job.CreatedAt != nil && !job.CreatedAt.IsZero() {
return *job.CreatedAt
}
return time.Time{}
}
func (r *Plugin) evaluateStaleJob(now time.Time, workerID string, lastUpdate time.Time) (bool, time.Duration, string) {
if lastUpdate.IsZero() {
return false, 0, ""
}
timeout := defaultStaleActiveJobTimeout
reason := fmt.Sprintf("job expired after %s without progress", timeout)
workerID = strings.TrimSpace(workerID)
if workerID == "" {
reason = fmt.Sprintf("job expired after %s without executor assignment", timeout)
} else if !r.isWorkerAvailable(workerID) {
timeout = defaultOrphanedActiveJobTimeout
reason = fmt.Sprintf("job expired after %s without worker heartbeat (worker=%s)", timeout, workerID)
}
if now.Sub(lastUpdate) < timeout {
return false, timeout, reason
}
return true, timeout, reason
}
func (r *Plugin) confirmStaleReason(now time.Time, jobID string) string {
r.jobsMu.RLock()
job := r.jobs[jobID]
if job == nil || !isActiveTrackedJobState(job.State) {
r.jobsMu.RUnlock()
return ""
}
lastUpdate := jobLastUpdated(job)
workerID := job.WorkerID
r.jobsMu.RUnlock()
stale, _, reason := r.evaluateStaleJob(now, workerID, lastUpdate)
if !stale {
return ""
}
return reason
}
func (r *Plugin) isWorkerAvailable(workerID string) bool {
workerID = strings.TrimSpace(workerID)
if workerID == "" {
return false
}
_, ok := r.registry.Get(workerID)
return ok
}
func isTerminalTrackedJobState(state string) bool {
normalized := strings.ToLower(strings.TrimSpace(state))
switch normalized {
case StateSucceeded, StateFailed, StateCanceled:
return true
default:
return false
}
}
func mergeTerminalDetailIntoTracked(tracked *TrackedJob, detail *TrackedJob) {
if tracked == nil || detail == nil {
return
}
if !isTerminalTrackedJobState(detail.State) {
return
}
if !isActiveTrackedJobState(tracked.State) {
return
}
if detail.State != "" {
tracked.State = detail.State
}
if detail.Progress != 0 {
tracked.Progress = detail.Progress
}
if detail.Stage != "" {
tracked.Stage = detail.Stage
}
if detail.Message != "" {
tracked.Message = detail.Message
}
if detail.ErrorMessage != "" {
tracked.ErrorMessage = detail.ErrorMessage
}
if detail.ResultSummary != "" {
tracked.ResultSummary = detail.ResultSummary
}
if detail.CompletedAt != nil && !detail.CompletedAt.IsZero() {
tracked.CompletedAt = detail.CompletedAt
}
if detail.UpdatedAt != nil && !detail.UpdatedAt.IsZero() {
tracked.UpdatedAt = detail.UpdatedAt
}
if tracked.UpdatedAt == nil && tracked.CompletedAt != nil {
tracked.UpdatedAt = tracked.CompletedAt
}
}
func (r *Plugin) ListTrackedJobs(jobType string, state string, limit int) []TrackedJob { func (r *Plugin) ListTrackedJobs(jobType string, state string, limit int) []TrackedJob {
r.jobsMu.RLock() r.jobsMu.RLock()
defer r.jobsMu.RUnlock() defer r.jobsMu.RUnlock()

7
weed/admin/plugin/plugin_scheduler.go

@ -61,6 +61,8 @@ func (r *Plugin) schedulerLoop() {
} }
func (r *Plugin) runSchedulerTick() { func (r *Plugin) runSchedulerTick() {
r.expireStaleJobs(time.Now().UTC())
jobTypes := r.registry.DetectableJobTypes() jobTypes := r.registry.DetectableJobTypes()
if len(jobTypes) == 0 { if len(jobTypes) == 0 {
return return
@ -839,11 +841,16 @@ func waitForShutdownOrTimer(shutdown <-chan struct{}, duration time.Duration) bo
} }
} }
// filterProposalsWithActiveJobs removes proposals whose dedupe keys already have active jobs.
// It first expires stale tracked jobs via expireStaleJobs, which can mutate scheduler state,
// so callers should treat this method as a stateful operation.
func (r *Plugin) filterProposalsWithActiveJobs(jobType string, proposals []*plugin_pb.JobProposal) ([]*plugin_pb.JobProposal, int) { func (r *Plugin) filterProposalsWithActiveJobs(jobType string, proposals []*plugin_pb.JobProposal) ([]*plugin_pb.JobProposal, int) {
if len(proposals) == 0 { if len(proposals) == 0 {
return proposals, 0 return proposals, 0
} }
r.expireStaleJobs(time.Now().UTC())
activeKeys := make(map[string]struct{}) activeKeys := make(map[string]struct{})
r.jobsMu.RLock() r.jobsMu.RLock()
for _, job := range r.jobs { for _, job := range r.jobs {

7
weed/admin/plugin/types.go

@ -1,6 +1,9 @@
package plugin package plugin
import "time"
import (
"errors"
"time"
)
const ( const (
// Keep exactly the last 10 successful and last 10 error runs per job type. // Keep exactly the last 10 successful and last 10 error runs per job type.
@ -8,6 +11,8 @@ const (
MaxErrorRunHistory = 10 MaxErrorRunHistory = 10
) )
var ErrJobNotFound = errors.New("job not found")
type RunOutcome string type RunOutcome string
const ( const (

85
weed/admin/view/app/plugin.templ

@ -485,6 +485,11 @@ templ Plugin(page string) {
<div class="modal-content"> <div class="modal-content">
<div class="modal-header"> <div class="modal-header">
<h5 class="modal-title" id="plugin-job-detail-modal-label"><i class="fas fa-file-alt me-2"></i>Job Detail</h5> <h5 class="modal-title" id="plugin-job-detail-modal-label"><i class="fas fa-file-alt me-2"></i>Job Detail</h5>
<div class="ms-auto me-2">
<button type="button" class="btn btn-outline-danger btn-sm" id="plugin-expire-job-btn" disabled>
<i class="fas fa-stop-circle me-1"></i>Expire Job
</button>
</div>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button> <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div> </div>
<div class="modal-body" id="plugin-job-detail-content"> <div class="modal-body" id="plugin-job-detail-content">
@ -1073,6 +1078,66 @@ templ Plugin(page string) {
return html; return html;
} }
function isActiveJobState(candidateState) {
var jobState = candidateState;
if (candidateState && typeof candidateState === 'object' && candidateState.state !== undefined) {
jobState = candidateState.state;
}
var st = String(jobState || '').toLowerCase();
return st === 'job_state_pending' || st === 'job_state_assigned' || st === 'job_state_running' ||
st === 'pending' || st === 'assigned' || st === 'running' || st === 'in_progress';
}
function setExpireButtonState(job) {
var expireBtn = document.getElementById('plugin-expire-job-btn');
if (!expireBtn) {
return;
}
var jobID = job && job.job_id ? String(job.job_id) : '';
var active = isActiveJobState(job);
expireBtn.setAttribute('data-job-id', jobID);
expireBtn.disabled = !jobID || !active;
if (!jobID) {
expireBtn.title = 'Select a job to expire.';
} else if (!active) {
expireBtn.title = 'Job is not active.';
} else {
expireBtn.title = 'Expire job to unblock scheduling.';
}
}
async function expireJob(jobID) {
var normalizedJobID = String(jobID || '').trim();
if (!normalizedJobID) {
return;
}
var reason = window.prompt('Expire job ' + normalizedJobID + '? Optional reason:', 'job expired by admin request');
if (reason === null) {
return;
}
var expireBtn = document.getElementById('plugin-expire-job-btn');
if (expireBtn) {
expireBtn.disabled = true;
}
try {
var response = await pluginRequest('POST', '/api/plugin/jobs/' + encodePath(normalizedJobID) + '/expire', {
reason: reason,
});
if (response && response.expired === false) {
notify(response.message || 'Job is not active.', 'info');
} else {
notify('Job expired: ' + normalizedJobID, 'success');
}
await refreshJobsAndActivities();
await openJobDetail(normalizedJobID);
} catch (e) {
notify('Failed to expire job: ' + e.message, 'error');
}
}
async function openJobDetail(jobID) { async function openJobDetail(jobID) {
var normalizedJobID = String(jobID || '').trim(); var normalizedJobID = String(jobID || '').trim();
if (!normalizedJobID) { if (!normalizedJobID) {
@ -1093,10 +1158,12 @@ templ Plugin(page string) {
modal.show(); modal.show();
} }
setExpireButtonState(null);
contentRoot.innerHTML = '<div class="text-muted">Loading job detail...</div>'; contentRoot.innerHTML = '<div class="text-muted">Loading job detail...</div>';
try { try {
var detail = await pluginRequest('GET', '/api/plugin/jobs/' + encodePath(normalizedJobID) + '/detail?activity_limit=500&related_limit=20'); var detail = await pluginRequest('GET', '/api/plugin/jobs/' + encodePath(normalizedJobID) + '/detail?activity_limit=500&related_limit=20');
var job = (detail && detail.job) ? detail.job : {}; var job = (detail && detail.job) ? detail.job : {};
setExpireButtonState(job);
var runRecord = detail && detail.run_record ? detail.run_record : null; var runRecord = detail && detail.run_record ? detail.run_record : null;
var activities = (detail && Array.isArray(detail.activities)) ? detail.activities : []; var activities = (detail && Array.isArray(detail.activities)) ? detail.activities : [];
var relatedJobs = (detail && Array.isArray(detail.related_jobs)) ? detail.related_jobs : []; var relatedJobs = (detail && Array.isArray(detail.related_jobs)) ? detail.related_jobs : [];
@ -1197,6 +1264,7 @@ templ Plugin(page string) {
contentRoot.innerHTML = html; contentRoot.innerHTML = html;
} catch (e) { } catch (e) {
setExpireButtonState(null);
contentRoot.innerHTML = '<div class="alert alert-danger mb-0">Failed to load job detail: ' + escapeHtml(e.message) + '</div>'; contentRoot.innerHTML = '<div class="alert alert-danger mb-0">Failed to load job detail: ' + escapeHtml(e.message) + '</div>';
} }
} }
@ -1238,8 +1306,7 @@ templ Plugin(page string) {
var allActivities = Array.isArray(state.allActivities) ? state.allActivities : []; var allActivities = Array.isArray(state.allActivities) ? state.allActivities : [];
var activeCount = allJobs.filter(function(job) { var activeCount = allJobs.filter(function(job) {
var st = String(job.state || '').toLowerCase();
return st === 'job_state_pending' || st === 'job_state_assigned' || st === 'job_state_running' || st === 'pending' || st === 'assigned' || st === 'running' || st === 'in_progress';
return isActiveJobState(job);
}).length; }).length;
document.getElementById('plugin-status-workers').textContent = String(state.workers.length); document.getElementById('plugin-status-workers').textContent = String(state.workers.length);
@ -1265,8 +1332,7 @@ templ Plugin(page string) {
if (!jobType) { if (!jobType) {
continue; continue;
} }
var st = String(job.state || '').toLowerCase();
var isActive = st === 'job_state_pending' || st === 'job_state_assigned' || st === 'job_state_running' || st === 'pending' || st === 'assigned' || st === 'running' || st === 'in_progress';
var isActive = isActiveJobState(job);
if (!isActive) { if (!isActive) {
continue; continue;
} }
@ -2778,6 +2844,17 @@ templ Plugin(page string) {
}); });
} }
var expireBtn = document.getElementById('plugin-expire-job-btn');
if (expireBtn) {
expireBtn.addEventListener('click', function() {
var jobID = String(expireBtn.getAttribute('data-job-id') || '').trim();
if (!jobID) {
return;
}
expireJob(jobID);
});
}
document.getElementById('plugin-refresh-all-btn').addEventListener('click', function() { document.getElementById('plugin-refresh-all-btn').addEventListener('click', function() {
refreshAll(); refreshAll();
}); });

2
weed/admin/view/app/plugin_templ.go
File diff suppressed because it is too large
View File

Loading…
Cancel
Save