Browse Source

Fix volume repeatedly toggling between crowded and uncrowded (#7793)

* Fix volume repeatedly toggling between crowded and uncrowded

Fixes #6712

The issue was that removeFromCrowded() was called in removeFromWritable(),
which is invoked whenever a volume temporarily becomes unwritable (due to
replica count fluctuations, heartbeat issues, or read-only state changes).

This caused unnecessary toggling:
1. Volume becomes temporarily unwritable → removeFromWritable() →
   removeFromCrowded() logs 'becomes uncrowded'
2. Volume becomes writable again
3. CollectDeadNodeAndFullVolumes() runs → setVolumeCrowded() logs
   'becomes crowded'

The fix:
- Remove removeFromCrowded() call from removeFromWritable()
- Only clear crowded status when volume is fully unregistered from
  the layout (when location.Length() == 0 in UnRegisterVolume)

This ensures transient state changes don't cause log spam and the
crowded status accurately reflects the volume's size relative to
the grow threshold.

* Refactor test to use subtests for better readability

Address review feedback: use t.Run subtests to make the test's intent
clearer by giving each verification step a descriptive name.
pull/4975/merge
Chris Lu 4 days ago
committed by GitHub
parent
commit
8518f06777
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 2
      weed/topology/volume_layout.go
  2. 74
      weed/topology/volume_layout_test.go

2
weed/topology/volume_layout.go

@ -206,6 +206,7 @@ func (vl *VolumeLayout) UnRegisterVolume(v *storage.VolumeInfo, dn *DataNode) {
if location.Length() == 0 { if location.Length() == 0 {
delete(vl.vid2location, v.Id) delete(vl.vid2location, v.Id)
vl.removeFromCrowded(v.Id)
} }
} }
@ -400,7 +401,6 @@ func (vl *VolumeLayout) removeFromWritable(vid needle.VolumeId) bool {
break break
} }
} }
vl.removeFromCrowded(vid)
if toDeleteIndex >= 0 { if toDeleteIndex >= 0 {
glog.V(0).Infoln("Volume", vid, "becomes unwritable") glog.V(0).Infoln("Volume", vid, "becomes unwritable")
vl.writables = append(vl.writables[0:toDeleteIndex], vl.writables[toDeleteIndex+1:]...) vl.writables = append(vl.writables[0:toDeleteIndex], vl.writables[toDeleteIndex+1:]...)

74
weed/topology/volume_layout_test.go

@ -3,8 +3,10 @@ package topology
import ( import (
"testing" "testing"
"github.com/seaweedfs/seaweedfs/weed/storage"
"github.com/seaweedfs/seaweedfs/weed/storage/needle" "github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block" "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
) )
func TestVolumesBinaryState(t *testing.T) { func TestVolumesBinaryState(t *testing.T) {
@ -114,3 +116,75 @@ func TestVolumesBinaryState(t *testing.T) {
}) })
} }
} }
func TestVolumeLayoutCrowdedState(t *testing.T) {
rp, _ := super_block.NewReplicaPlacementFromString("000")
ttl, _ := needle.ReadTTL("")
diskType := types.HardDriveType
vl := NewVolumeLayout(rp, ttl, diskType, 1024*1024*1024, false)
vid := needle.VolumeId(1)
dn := &DataNode{
NodeImpl: NodeImpl{
id: "test-node",
},
Ip: "127.0.0.1",
Port: 8080,
}
// Create a volume info
volumeInfo := &storage.VolumeInfo{
Id: vid,
ReplicaPlacement: rp,
Ttl: ttl,
DiskType: string(diskType),
}
// Register the volume
vl.RegisterVolume(volumeInfo, dn)
// Add the volume to writables
vl.accessLock.Lock()
vl.setVolumeWritable(vid)
vl.accessLock.Unlock()
// Mark the volume as crowded
vl.SetVolumeCrowded(vid)
t.Run("should be crowded after being marked", func(t *testing.T) {
vl.accessLock.RLock()
_, isCrowded := vl.crowded[vid]
vl.accessLock.RUnlock()
if !isCrowded {
t.Fatal("Volume should be marked as crowded after SetVolumeCrowded")
}
})
// Remove from writable (simulating temporary unwritable state)
vl.accessLock.Lock()
vl.removeFromWritable(vid)
vl.accessLock.Unlock()
t.Run("should remain crowded after becoming unwritable", func(t *testing.T) {
// This is the fix for issue #6712 - crowded state should persist
vl.accessLock.RLock()
_, stillCrowded := vl.crowded[vid]
vl.accessLock.RUnlock()
if !stillCrowded {
t.Fatal("Volume should remain crowded after becoming unwritable (fix for issue #6712)")
}
})
// Now unregister the volume completely
vl.UnRegisterVolume(volumeInfo, dn)
t.Run("should not be crowded after unregistering", func(t *testing.T) {
vl.accessLock.RLock()
_, stillCrowdedAfterUnregister := vl.crowded[vid]
vl.accessLock.RUnlock()
if stillCrowdedAfterUnregister {
t.Fatal("Volume should be removed from crowded map after full unregistration")
}
})
}
Loading…
Cancel
Save