From c5e831a19fcecb48782d6c8853ebf135d3532393 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Thu, 30 Oct 2025 21:34:36 -0700 Subject: [PATCH] Fix cache pointer race condition with atomic.Pointer Use atomic.Pointer for vidMap cache field to prevent data races during cache trimming in resetVidMap. This addresses the race condition where concurrent GetLocations calls could read the cache pointer while resetVidMap is modifying it during cache chain trimming. Changes: - Changed cache field from *vidMap to atomic.Pointer[vidMap] - Updated all cache access to use Load() and Store() atomic operations - Updated shallowClone, GetLocations, deleteLocation, deleteEcLocation - Updated resetVidMap to use atomic operations for cache trimming --- weed/wdclient/masterclient.go | 8 ++++---- weed/wdclient/vid_map.go | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go index a8ff5c9c7..3a952a0d2 100644 --- a/weed/wdclient/masterclient.go +++ b/weed/wdclient/masterclient.go @@ -584,15 +584,15 @@ func (mc *MasterClient) resetVidMap() { tail := mc.vidMap.shallowClone() nvm := newVidMap(tail.DataCenter) - nvm.cache = tail + nvm.cache.Store(tail) mc.vidMap = nvm //trim - for i := 0; i < mc.vidMapCacheSize && tail.cache != nil; i++ { + for i := 0; i < mc.vidMapCacheSize && tail.cache.Load() != nil; i++ { if i == mc.vidMapCacheSize-1 { - tail.cache = nil + tail.cache.Store(nil) } else { - tail = tail.cache + tail = tail.cache.Load() } } } diff --git a/weed/wdclient/vid_map.go b/weed/wdclient/vid_map.go index 6d6ccd6e6..da41157a4 100644 --- a/weed/wdclient/vid_map.go +++ b/weed/wdclient/vid_map.go @@ -41,7 +41,7 @@ type vidMap struct { ecVid2Locations map[uint32][]Location DataCenter string cursor int32 - cache *vidMap + cache atomic.Pointer[vidMap] } func newVidMap(dataCenter string) *vidMap { @@ -56,12 +56,16 @@ func newVidMap(dataCenter string) *vidMap { // shallowClone creates a shallow copy of the vidMap for use in cache chaining. // The caller is responsible for ensuring thread safety. func (vc *vidMap) shallowClone() *vidMap { - return &vidMap{ + newMap := &vidMap{ vid2Locations: vc.vid2Locations, ecVid2Locations: vc.ecVid2Locations, DataCenter: vc.DataCenter, - cache: vc.cache, } + // Atomically copy the cache pointer + if cachedMap := vc.cache.Load(); cachedMap != nil { + newMap.cache.Store(cachedMap) + } + return newMap } func (vc *vidMap) getLocationIndex(length int) (int, error) { @@ -146,8 +150,8 @@ func (vc *vidMap) GetLocations(vid uint32) (locations []Location, found bool) { return locations, found } - if vc.cache != nil { - return vc.cache.GetLocations(vid) + if cachedMap := vc.cache.Load(); cachedMap != nil { + return cachedMap.GetLocations(vid) } return nil, false @@ -223,8 +227,8 @@ func (vc *vidMap) addEcLocation(vid uint32, location Location) { } func (vc *vidMap) deleteLocation(vid uint32, location Location) { - if vc.cache != nil { - vc.cache.deleteLocation(vid, location) + if cachedMap := vc.cache.Load(); cachedMap != nil { + cachedMap.deleteLocation(vid, location) } vc.Lock() @@ -246,8 +250,8 @@ func (vc *vidMap) deleteLocation(vid uint32, location Location) { } func (vc *vidMap) deleteEcLocation(vid uint32, location Location) { - if vc.cache != nil { - vc.cache.deleteLocation(vid, location) + if cachedMap := vc.cache.Load(); cachedMap != nil { + cachedMap.deleteLocation(vid, location) } vc.Lock()