From 1de251d5752b6bc3e90dd0d8d3df9aa2a554d2ac Mon Sep 17 00:00:00 2001 From: chrislu Date: Wed, 13 Aug 2025 18:48:05 -0700 Subject: [PATCH] sum from all shards --- weed/admin/dash/ec_shard_management.go | 52 ++++++++++++++------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/weed/admin/dash/ec_shard_management.go b/weed/admin/dash/ec_shard_management.go index 9d8d63aee..3df8bb6c0 100644 --- a/weed/admin/dash/ec_shard_management.go +++ b/weed/admin/dash/ec_shard_management.go @@ -769,7 +769,7 @@ func (s *AdminServer) GetEcVolumeDetails(volumeID uint32, sortBy string, sortOrd // Get EC volume health metrics (deletion information) volumeHealth, err := s.getEcVolumeHealthMetrics(volumeID) if err != nil { - glog.V(1).Infof("Failed to get EC volume health metrics for volume %d: %v", volumeID, err) + glog.V(0).Infof("ERROR: Failed to get EC volume health metrics for volume %d: %v", volumeID, err) // Don't fail the request, just use default values volumeHealth = &EcVolumeHealthInfo{ TotalSize: 0, @@ -853,6 +853,7 @@ func (s *AdminServer) GetEcVolumeDetails(volumeID uint32, sortBy string, sortOrd // getEcVolumeHealthMetrics retrieves health metrics for an EC volume func (s *AdminServer) getEcVolumeHealthMetrics(volumeID uint32) (*EcVolumeHealthInfo, error) { + glog.V(0).Infof("DEBUG: getEcVolumeHealthMetrics called for volume %d", volumeID) // Get list of servers that have shards for this EC volume var servers []string @@ -889,16 +890,18 @@ func (s *AdminServer) getEcVolumeHealthMetrics(volumeID uint32) (*EcVolumeHealth return nil, fmt.Errorf("failed to get topology info: %v", err) } + glog.V(0).Infof("DEBUG: Found %d servers with EC shards for volume %d: %v", len(servers), volumeID, servers) if len(servers) == 0 { return nil, fmt.Errorf("no servers found with EC shards for volume %d", volumeID) } // Aggregate health metrics from ALL servers that have EC shards var aggregatedHealth *EcVolumeHealthInfo - var maxTotalSize uint64 - var maxFileCount uint64 - var maxDeletedBytes uint64 - var maxDeletedCount uint64 + var totalSize uint64 + var totalFileCount uint64 + var totalDeletedBytes uint64 + var totalDeletedCount uint64 + validServers := 0 for _, server := range servers { healthInfo, err := s.getVolumeHealthFromServer(server, volumeID) @@ -906,20 +909,16 @@ func (s *AdminServer) getEcVolumeHealthMetrics(volumeID uint32) (*EcVolumeHealth glog.V(2).Infof("Failed to get volume health from server %s for volume %d: %v", server, volumeID, err) continue // Try next server } + glog.V(0).Infof("DEBUG: getVolumeHealthFromServer returned for %s: healthInfo=%v", server, healthInfo != nil) if healthInfo != nil { - // Use the maximum values across servers - if healthInfo.TotalSize > maxTotalSize { - maxTotalSize = healthInfo.TotalSize - } - if healthInfo.FileCount > maxFileCount { - maxFileCount = healthInfo.FileCount - } - if healthInfo.DeletedByteCount > maxDeletedBytes { - maxDeletedBytes = healthInfo.DeletedByteCount - } - if healthInfo.DeleteCount > maxDeletedCount { - maxDeletedCount = healthInfo.DeleteCount - } + // Sum the values across all servers (each server contributes its shard data) + totalSize += healthInfo.TotalSize + totalFileCount += healthInfo.FileCount + totalDeletedBytes += healthInfo.DeletedByteCount + totalDeletedCount += healthInfo.DeleteCount + validServers++ + + glog.V(0).Infof("DEBUG: Added server %s data: size=%d, files=%d, deleted_bytes=%d", server, healthInfo.TotalSize, healthInfo.FileCount, healthInfo.DeletedByteCount) // Store first non-nil health info as template for aggregated result if aggregatedHealth == nil { @@ -929,17 +928,22 @@ func (s *AdminServer) getEcVolumeHealthMetrics(volumeID uint32) (*EcVolumeHealth } // If we got aggregated data, finalize it - if aggregatedHealth != nil { - aggregatedHealth.TotalSize = maxTotalSize - aggregatedHealth.FileCount = maxFileCount - aggregatedHealth.DeletedByteCount = maxDeletedBytes - aggregatedHealth.DeleteCount = maxDeletedCount + glog.V(0).Infof("DEBUG: Aggregation check - aggregatedHealth=%v, validServers=%d", aggregatedHealth != nil, validServers) + if aggregatedHealth != nil && validServers > 0 { + // Use summed totals from all servers + aggregatedHealth.TotalSize = totalSize + aggregatedHealth.FileCount = totalFileCount + aggregatedHealth.DeletedByteCount = totalDeletedBytes + aggregatedHealth.DeleteCount = totalDeletedCount // Calculate garbage ratio from aggregated data if aggregatedHealth.TotalSize > 0 { aggregatedHealth.GarbageRatio = float64(aggregatedHealth.DeletedByteCount) / float64(aggregatedHealth.TotalSize) } + glog.V(0).Infof("SUCCESS: Aggregated EC volume %d from %d servers: %d total bytes -> %d MB", + volumeID, validServers, totalSize, totalSize/1024/1024) + return aggregatedHealth, nil } @@ -1009,7 +1013,7 @@ func (s *AdminServer) getVolumeHealthFromServer(server string, volumeID uint32) volumeID, server, healthInfo.DeletedByteCount, healthInfo.DeleteCount, healthInfo.TotalSize) } - return nil + return nil // Return from WithVolumeServerClient callback - healthInfo is captured by closure }) return healthInfo, err