Browse Source

avoid dead lock

add-ec-vacuum
chrislu 4 months ago
parent
commit
91d641e685
  1. 1
      .gitignore
  2. 37
      weed/admin/maintenance/maintenance_manager.go
  3. 25
      weed/admin/maintenance/maintenance_scanner.go

1
.gitignore

@ -115,3 +115,4 @@ test/s3/versioning/weed-test.log
/docker/admin_integration/data /docker/admin_integration/data
docker/agent_pub_record docker/agent_pub_record
docker/admin_integration/weed-local docker/admin_integration/weed-local
docker/admin_integration/ec_test_files.json

37
weed/admin/maintenance/maintenance_manager.go

@ -381,6 +381,43 @@ func (mm *MaintenanceManager) GetConfig() *MaintenanceConfig {
// GetStats returns maintenance statistics // GetStats returns maintenance statistics
func (mm *MaintenanceManager) GetStats() *MaintenanceStats { func (mm *MaintenanceManager) GetStats() *MaintenanceStats {
// Quick check if scan is in progress - return cached/fast stats to prevent hanging
mm.mutex.RLock()
scanInProgress := mm.scanInProgress
mm.mutex.RUnlock()
if scanInProgress {
glog.V(2).Infof("Scan in progress, returning fast stats to prevent hanging")
// Return basic stats without calling potentially blocking operations
stats := &MaintenanceStats{
TotalTasks: 0,
TasksByStatus: make(map[MaintenanceTaskStatus]int),
TasksByType: make(map[MaintenanceTaskType]int),
ActiveWorkers: 0,
CompletedToday: 0,
FailedToday: 0,
AverageTaskTime: 0,
LastScanTime: time.Now().Add(-time.Minute), // Assume recent scan
}
mm.mutex.RLock()
// Calculate next scan time based on current error state
scanInterval := time.Duration(mm.config.ScanIntervalSeconds) * time.Second
nextScanInterval := scanInterval
if mm.errorCount > 0 {
nextScanInterval = mm.backoffDelay
maxInterval := scanInterval * 10
if nextScanInterval > maxInterval {
nextScanInterval = maxInterval
}
}
stats.NextScanTime = time.Now().Add(nextScanInterval)
mm.mutex.RUnlock()
return stats
}
// Normal path - get full stats from queue
stats := mm.queue.GetStats() stats := mm.queue.GetStats()
mm.mutex.RLock() mm.mutex.RLock()

25
weed/admin/maintenance/maintenance_scanner.go

@ -76,9 +76,14 @@ func (ms *MaintenanceScanner) getVolumeHealthMetrics() ([]*VolumeHealthMetrics,
var metrics []*VolumeHealthMetrics var metrics []*VolumeHealthMetrics
glog.V(1).Infof("Collecting volume health metrics from master") glog.V(1).Infof("Collecting volume health metrics from master")
// Add timeout protection to prevent hanging
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error { err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error {
resp, err := client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
resp, err := client.VolumeList(ctx, &master_pb.VolumeListRequest{})
if err != nil { if err != nil {
return err return err
} }
@ -229,8 +234,12 @@ func (ms *MaintenanceScanner) enrichVolumeMetrics(metrics *[]*VolumeHealthMetric
func (ms *MaintenanceScanner) getECVolumeSet() map[uint32]bool { func (ms *MaintenanceScanner) getECVolumeSet() map[uint32]bool {
ecVolumeSet := make(map[uint32]bool) ecVolumeSet := make(map[uint32]bool)
// Add timeout protection to prevent hanging
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error { err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error {
resp, err := client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
resp, err := client.VolumeList(ctx, &master_pb.VolumeListRequest{})
if err != nil { if err != nil {
return err return err
} }
@ -267,8 +276,12 @@ func (ms *MaintenanceScanner) createECVolumeMetric(volumeID uint32) *VolumeHealt
var metric *VolumeHealthMetrics var metric *VolumeHealthMetrics
var serverWithShards string var serverWithShards string
// Add timeout protection to prevent hanging
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error { err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error {
resp, err := client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
resp, err := client.VolumeList(ctx, &master_pb.VolumeListRequest{})
if err != nil { if err != nil {
return err return err
} }
@ -401,8 +414,12 @@ func (ms *MaintenanceScanner) enrichECVolumeWithDeletionInfo(metric *VolumeHealt
func (ms *MaintenanceScanner) findServersWithECShards(volumeId uint32) ([]string, error) { func (ms *MaintenanceScanner) findServersWithECShards(volumeId uint32) ([]string, error) {
var serversWithShards []string var serversWithShards []string
// Add timeout protection to prevent hanging
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error { err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error {
resp, err := client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
resp, err := client.VolumeList(ctx, &master_pb.VolumeListRequest{})
if err != nil { if err != nil {
return err return err
} }

Loading…
Cancel
Save