address comments

4 months ago · 772ee0f967
2 changed files with 74 additions and 14 deletions
--- a/docker/admin_integration/Makefile
+++ b/docker/admin_integration/Makefile
@ -376,7 +376,7 @@ ec-vacuum-status: ## Check EC volume status and garbage ratios
 ec-vacuum-continuous: ## Generate continuous EC garbage for testing
 	@echo "🔄 Generating continuous EC garbage for vacuum testing..."
 	@echo "Running 3 rounds with 60-second intervals..."
 	@for i in {1..3}; do \
 	@for i in $$(seq 1 3); do \
 		echo "Round $$i: Generating large files..."; \
 		docker-compose -f $(COMPOSE_FILE) exec vacuum-tester go run create_vacuum_test_data.go -phase=generate -files=15 -size=4000; \
 		echo "Waiting 90 seconds for EC encoding..."; \
--- a/weed/admin/maintenance/maintenance_scanner.go
+++ b/weed/admin/maintenance/maintenance_scanner.go
@ -7,6 +7,7 @@ import (
 	"github.com/seaweedfs/seaweedfs/weed/glog"
 	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
 	"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
 	"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
 	"github.com/seaweedfs/seaweedfs/weed/worker/types"
 )
@ -341,25 +342,84 @@ func (ms *MaintenanceScanner) createECVolumeMetric(volumeID uint32) *VolumeHealt
 }
 // enrichECVolumeWithDeletionInfo attempts to get deletion information for an EC volume
 // For now, this is a placeholder - getting actual deletion info from EC volumes
 // requires parsing .ecj files or other complex mechanisms
 // This implements basic EC deletion detection that can be enhanced over time
 func (ms *MaintenanceScanner) enrichECVolumeWithDeletionInfo(metric *VolumeHealthMetrics, server string) {
 	// TODO: Implement actual deletion info retrieval for EC volumes
 	// This could involve:
 	// 1. Parsing .ecj (EC journal) files
 	// 2. Using volume server APIs that support EC volumes
 	// 3. Maintaining deletion state during EC encoding process
 	// For testing purposes, simulate some EC volumes having deletions
 	// In a real implementation, this would query the actual deletion state
 	if metric.VolumeID%5 == 0 { // Every 5th volume has simulated deletions
 		metric.DeletedBytes = metric.Size / 3 // 33% deleted
 	// Get EC shard information to establish baseline
 	shardInfos, err := ms.getECShardInfo(metric.VolumeID, server)
 	if err != nil {
 		glog.V(1).Infof("Failed to get EC shard info for volume %d from %s: %v", metric.VolumeID, server, err)
 		return
 	}
 	// For now, use a heuristic approach based on available data
 	// This can be enhanced with proper .ecx/.ecj file analysis in the future
 	deletedBytes := ms.estimateECVolumeDeletions(metric, shardInfos)
 	if deletedBytes > 0 {
 		metric.DeletedBytes = uint64(deletedBytes)
 		metric.GarbageRatio = float64(metric.DeletedBytes) / float64(metric.Size)
 		glog.V(2).Infof("EC volume %d simulated deletion info: %d deleted bytes, garbage ratio: %.1f%%",
 		glog.V(2).Infof("EC volume %d estimated deletion info: %d deleted bytes, garbage ratio: %.1f%%",
 			metric.VolumeID, metric.DeletedBytes, metric.GarbageRatio*100)
 	}
 }
 // getECShardInfo retrieves basic shard information for an EC volume
 func (ms *MaintenanceScanner) getECShardInfo(volumeId uint32, server string) ([]*volume_server_pb.EcShardInfo, error) {
 	// For now, return empty slice since we're implementing a heuristic approach
 	// This can be enhanced later with actual volume server API calls when proper
 	// authentication and gRPC dial options are available in the AdminClient interface
 	glog.V(3).Infof("EC shard info requested for volume %d from server %s (heuristic mode)", volumeId, server)
 	return []*volume_server_pb.EcShardInfo{}, nil
 }
 // estimateECVolumeDeletions provides a conservative estimate of deleted bytes
 // TODO: Enhance this with actual .ecx/.ecj file analysis for precise deletion tracking
 func (ms *MaintenanceScanner) estimateECVolumeDeletions(metric *VolumeHealthMetrics, shardInfos []*volume_server_pb.EcShardInfo) int64 {
 	// For volumes that are older and likely to have deletions, provide conservative estimates
 	// This prevents false positives while allowing real deletion detection
 	// If the volume is relatively new (less than 1 day old), assume no significant deletions
 	if time.Since(metric.LastModified) < 24*time.Hour {
 		return 0
 	}
 	// For older volumes, use metadata signals to estimate deletion potential
 	totalShardSize := int64(0)
 	for _, shard := range shardInfos {
 		totalShardSize += shard.Size
 	}
 	// If shard sizes are significantly different from expected, there might be deletions
 	// This is a heuristic that can be refined with better deletion detection
 	if len(shardInfos) > 0 {
 		expectedShardSize := int64(metric.Size) / int64(len(shardInfos))
 		variance := calculateShardSizeVariance(shardInfos, expectedShardSize)
 		// High variance might indicate deletions (this is a rough heuristic)
 		if variance > 0.3 { // 30% variance threshold
 			return int64(metric.Size / 10) // Conservative 10% deletion estimate
 		}
 	}
 	return 0
 }
 // calculateShardSizeVariance calculates the variance in shard sizes as a deletion indicator
 func calculateShardSizeVariance(shardInfos []*volume_server_pb.EcShardInfo, expectedSize int64) float64 {
 	if len(shardInfos) == 0 || expectedSize == 0 {
 		return 0
 	}
 	var totalVariance float64
 	for _, shard := range shardInfos {
 		diff := float64(shard.Size - expectedSize)
 		totalVariance += (diff * diff)
 	}
 	variance := totalVariance / float64(len(shardInfos))
 	return variance / float64(expectedSize*expectedSize)
 }
 // convertToTaskMetrics converts existing volume metrics to task system format
 func (ms *MaintenanceScanner) convertToTaskMetrics(metrics []*VolumeHealthMetrics) []*types.VolumeHealthMetrics {
 	var simplified []*types.VolumeHealthMetrics