seaweedfs/weed/topology/race_condition_stress_test.go


								package topology


								import (

									"fmt"

									"sync"

									"sync/atomic"

									"testing"

									"time"


									"github.com/seaweedfs/seaweedfs/weed/sequence"

									"github.com/seaweedfs/seaweedfs/weed/storage/super_block"

									"github.com/seaweedfs/seaweedfs/weed/storage/types"

								)


								// TestRaceConditionStress simulates the original issue scenario:

								// High concurrent writes causing capacity misjudgment

								func TestRaceConditionStress(t *testing.T) {

									// Create a cluster similar to the issue description:

									// 3 volume servers, 200GB each, 5GB volume limit = 40 volumes max per server

									const (

										numServers          = 3

										volumeLimitMB       = 5000                                      // 5GB in MB

										storagePerServerGB  = 200                                       // 200GB per server

										maxVolumesPerServer = storagePerServerGB * 1024 / volumeLimitMB // 200*1024/5000 = 40

										concurrentRequests  = 50                                        // High concurrency like the issue

									)


									// Create test topology

									topo := NewTopology("weedfs", sequence.NewMemorySequencer(), uint64(volumeLimitMB)*1024*1024, 5, false)


									dc := NewDataCenter("dc1")

									topo.LinkChildNode(dc)

									rack := NewRack("rack1")

									dc.LinkChildNode(rack)


									// Create 3 volume servers with realistic capacity

									servers := make([]*DataNode, numServers)

									for i := 0; i < numServers; i++ {

										dn := NewDataNode(fmt.Sprintf("server%d", i+1))

										rack.LinkChildNode(dn)


										// Set up disk with capacity for 40 volumes

										disk := NewDisk(types.HardDriveType.String())

										disk.diskUsages.getOrCreateDisk(types.HardDriveType).maxVolumeCount = maxVolumesPerServer

										dn.LinkChildNode(disk)


										servers[i] = dn

									}


									vg := NewDefaultVolumeGrowth()

									rp, _ := super_block.NewReplicaPlacementFromString("000") // Single replica like the issue


									option := &VolumeGrowOption{

										Collection:       "test-bucket-large", // Same collection name as issue

										ReplicaPlacement: rp,

										DiskType:         types.HardDriveType,

									}


									// Track results

									var successfulAllocations int64

									var failedAllocations int64

									var totalVolumesCreated int64


									var wg sync.WaitGroup


									// Launch concurrent volume creation requests

									startTime := time.Now()

									for i := 0; i < concurrentRequests; i++ {

										wg.Add(1)

										go func(requestId int) {

											defer wg.Done()


											// This is the critical test: multiple threads trying to allocate simultaneously

											servers, reservation, err := vg.findEmptySlotsForOneVolume(topo, option, true)


											if err != nil {

												atomic.AddInt64(&failedAllocations, 1)

												t.Logf("Request %d failed: %v", requestId, err)

												return

											}


											// Simulate volume creation delay (like in real scenario)

											time.Sleep(time.Millisecond * 50)


											// Simulate successful volume creation

											for _, server := range servers {

												disk := server.children[NodeId(types.HardDriveType.String())].(*Disk)

												deltaDiskUsage := &DiskUsageCounts{

													volumeCount: 1,

												}

												disk.UpAdjustDiskUsageDelta(types.HardDriveType, deltaDiskUsage)

												atomic.AddInt64(&totalVolumesCreated, 1)

											}


											// Release reservations (simulates successful registration)

											reservation.releaseAllReservations()

											atomic.AddInt64(&successfulAllocations, 1)


										}(i)

									}


									wg.Wait()

									duration := time.Since(startTime)


									// Verify results

									t.Logf("Test completed in %v", duration)

									t.Logf("Successful allocations: %d", successfulAllocations)

									t.Logf("Failed allocations: %d", failedAllocations)

									t.Logf("Total volumes created: %d", totalVolumesCreated)


									// Check capacity limits are respected

									totalCapacityUsed := int64(0)

									for i, server := range servers {

										disk := server.children[NodeId(types.HardDriveType.String())].(*Disk)

										volumeCount := disk.diskUsages.getOrCreateDisk(types.HardDriveType).volumeCount

										totalCapacityUsed += volumeCount


										t.Logf("Server %d: %d volumes (max: %d)", i+1, volumeCount, maxVolumesPerServer)


										// Critical test: No server should exceed its capacity

										if volumeCount > maxVolumesPerServer {

											t.Errorf("RACE CONDITION DETECTED: Server %d exceeded capacity: %d > %d",

												i+1, volumeCount, maxVolumesPerServer)

										}

									}


									// Verify totals make sense

									if totalVolumesCreated != totalCapacityUsed {

										t.Errorf("Volume count mismatch: created=%d, actual=%d", totalVolumesCreated, totalCapacityUsed)

									}


									// The total should never exceed the cluster capacity (120 volumes for 3 servers × 40 each)

									maxClusterCapacity := int64(numServers * maxVolumesPerServer)

									if totalCapacityUsed > maxClusterCapacity {

										t.Errorf("RACE CONDITION DETECTED: Cluster capacity exceeded: %d > %d",

											totalCapacityUsed, maxClusterCapacity)

									}


									// With reservations, we should have controlled allocation

									// Total requests = successful + failed should equal concurrentRequests

									if successfulAllocations+failedAllocations != concurrentRequests {

										t.Errorf("Request count mismatch: success=%d + failed=%d != total=%d",

											successfulAllocations, failedAllocations, concurrentRequests)

									}


									t.Logf("Race condition test passed: Capacity limits respected with %d concurrent requests",

										concurrentRequests)

								}


								// TestCapacityJudgmentAccuracy verifies that the capacity calculation is accurate

								// under various load conditions

								func TestCapacityJudgmentAccuracy(t *testing.T) {

									// Create a single server with known capacity

									topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 5*1024*1024*1024, 5, false)


									dc := NewDataCenter("dc1")

									topo.LinkChildNode(dc)

									rack := NewRack("rack1")

									dc.LinkChildNode(rack)


									dn := NewDataNode("server1")

									rack.LinkChildNode(dn)


									// Server with capacity for exactly 10 volumes

									disk := NewDisk(types.HardDriveType.String())

									diskUsage := disk.diskUsages.getOrCreateDisk(types.HardDriveType)

									diskUsage.maxVolumeCount = 10

									dn.LinkChildNode(disk)


									// Also set max volume count on the DataNode level (gets propagated up)

									dn.diskUsages.getOrCreateDisk(types.HardDriveType).maxVolumeCount = 10


									vg := NewDefaultVolumeGrowth()

									rp, _ := super_block.NewReplicaPlacementFromString("000")


									option := &VolumeGrowOption{

										Collection:       "test",

										ReplicaPlacement: rp,

										DiskType:         types.HardDriveType,

									}


									// Test accurate capacity reporting at each step

									for i := 0; i < 10; i++ {

										// Check available space before reservation

										availableBefore := dn.AvailableSpaceFor(option)

										availableForReservation := dn.AvailableSpaceForReservation(option)


										expectedAvailable := int64(10 - i)

										if availableBefore != expectedAvailable {

											t.Errorf("Step %d: Expected %d available, got %d", i, expectedAvailable, availableBefore)

										}


										if availableForReservation != expectedAvailable {

											t.Errorf("Step %d: Expected %d available for reservation, got %d", i, expectedAvailable, availableForReservation)

										}


										// Try to reserve and allocate

										_, reservation, err := vg.findEmptySlotsForOneVolume(topo, option, true)

										if err != nil {

											t.Fatalf("Step %d: Unexpected reservation failure: %v", i, err)

										}


										// Check that available space for reservation decreased

										availableAfterReservation := dn.AvailableSpaceForReservation(option)

										if availableAfterReservation != expectedAvailable-1 {

											t.Errorf("Step %d: Expected %d available after reservation, got %d",

												i, expectedAvailable-1, availableAfterReservation)

										}


										// Simulate successful volume creation by properly updating disk usage hierarchy

										disk := dn.children[NodeId(types.HardDriveType.String())].(*Disk)


										// Create a volume usage delta to simulate volume creation

										deltaDiskUsage := &DiskUsageCounts{

											volumeCount: 1,

										}


										// Properly propagate the usage up the hierarchy

										disk.UpAdjustDiskUsageDelta(types.HardDriveType, deltaDiskUsage)


										// Debug: Check the volume count after update

										diskUsageOnNode := dn.diskUsages.getOrCreateDisk(types.HardDriveType)

										currentVolumeCount := atomic.LoadInt64(&diskUsageOnNode.volumeCount)

										t.Logf("Step %d: Volume count after update: %d", i, currentVolumeCount)


										// Release reservation

										reservation.releaseAllReservations()


										// Verify final state

										availableAfter := dn.AvailableSpaceFor(option)

										expectedAfter := int64(10 - i - 1)

										if availableAfter != expectedAfter {

											t.Errorf("Step %d: Expected %d available after creation, got %d",

												i, expectedAfter, availableAfter)

											// More debugging

											diskUsageOnNode := dn.diskUsages.getOrCreateDisk(types.HardDriveType)

											maxVolumes := atomic.LoadInt64(&diskUsageOnNode.maxVolumeCount)

											remoteVolumes := atomic.LoadInt64(&diskUsageOnNode.remoteVolumeCount)

											actualVolumeCount := atomic.LoadInt64(&diskUsageOnNode.volumeCount)

											t.Logf("Debug Step %d: max=%d, volume=%d, remote=%d", i, maxVolumes, actualVolumeCount, remoteVolumes)

										}

									}


									// At this point, no more reservations should succeed

									_, _, err := vg.findEmptySlotsForOneVolume(topo, option, true)

									if err == nil {

										t.Error("Expected reservation to fail when at capacity")

									}


									t.Logf("Capacity judgment accuracy test passed")

								}


								// TestReservationSystemPerformance measures the performance impact of reservations

								func TestReservationSystemPerformance(t *testing.T) {

									// Create topology

									topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)


									dc := NewDataCenter("dc1")

									topo.LinkChildNode(dc)

									rack := NewRack("rack1")

									dc.LinkChildNode(rack)


									dn := NewDataNode("server1")

									rack.LinkChildNode(dn)


									disk := NewDisk(types.HardDriveType.String())

									disk.diskUsages.getOrCreateDisk(types.HardDriveType).maxVolumeCount = 1000

									dn.LinkChildNode(disk)


									vg := NewDefaultVolumeGrowth()

									rp, _ := super_block.NewReplicaPlacementFromString("000")


									option := &VolumeGrowOption{

										Collection:       "test",

										ReplicaPlacement: rp,

										DiskType:         types.HardDriveType,

									}


									// Benchmark reservation operations

									const iterations = 1000


									startTime := time.Now()

									for i := 0; i < iterations; i++ {

										_, reservation, err := vg.findEmptySlotsForOneVolume(topo, option, true)

										if err != nil {

											t.Fatalf("Iteration %d failed: %v", i, err)

										}

										reservation.releaseAllReservations()


										// Simulate volume creation

										diskUsage := dn.diskUsages.getOrCreateDisk(types.HardDriveType)

										atomic.AddInt64(&diskUsage.volumeCount, 1)

									}

									duration := time.Since(startTime)


									avgDuration := duration / iterations

									t.Logf("Performance: %d reservations in %v (avg: %v per reservation)",

										iterations, duration, avgDuration)


									// Performance should be reasonable (less than 1ms per reservation on average)

									if avgDuration > time.Millisecond {

										t.Errorf("Reservation system performance concern: %v per reservation", avgDuration)

									} else {

										t.Logf("Performance test passed: %v per reservation", avgDuration)

									}

								}