Browse Source

fix: prevent vidMap swap race condition in LookupFileIdWithFallback

- Hold vidMapLock.RLock() during entire vm.LookupFileId() call
- Prevents resetVidMap() from swapping vidMap mid-operation
- Ensures atomic access to the current vidMap instance
- Added documentation warnings to getStableVidMap() about swap risks
- Enhanced withCurrentVidMap() documentation for clarity

This fixes a subtle race condition where:
1. Thread A: acquires lock, gets vm pointer, releases lock
2. Thread B: calls resetVidMap(), swaps vc.vidMap
3. Thread A: calls vm.LookupFileId() on old/stale vidMap

While the old vidMap remains valid (in cache chain), holding the lock
ensures we consistently use the current vidMap for the entire operation.
pull/7518/head
chrislu 2 weeks ago
parent
commit
c81bb716b5
  1. 16
      weed/wdclient/vidmap_client.go

16
weed/wdclient/vidmap_client.go

@ -57,18 +57,19 @@ func (vc *vidMapClient) GetLookupFileIdFunction() LookupFileIdFunctionType {
// LookupFileIdWithFallback looks up a file ID, checking cache first, then using provider // LookupFileIdWithFallback looks up a file ID, checking cache first, then using provider
func (vc *vidMapClient) LookupFileIdWithFallback(ctx context.Context, fileId string) (fullUrls []string, err error) { func (vc *vidMapClient) LookupFileIdWithFallback(ctx context.Context, fileId string) (fullUrls []string, err error) {
// Try cache first using the fast path
// Try cache first - hold read lock during entire vidMap access to prevent swap during operation
vc.vidMapLock.RLock() vc.vidMapLock.RLock()
vm := vc.vidMap vm := vc.vidMap
dataCenter := vm.DataCenter dataCenter := vm.DataCenter
fullUrls, err = vm.LookupFileId(ctx, fileId)
vc.vidMapLock.RUnlock() vc.vidMapLock.RUnlock()
fullUrls, err = vm.LookupFileId(ctx, fileId)
// Cache hit - return immediately
if err == nil && len(fullUrls) > 0 { if err == nil && len(fullUrls) > 0 {
return return
} }
// Extract volume ID from file ID (format: "volumeId,needle_id_cookie")
// Cache miss - extract volume ID from file ID (format: "volumeId,needle_id_cookie")
parts := strings.Split(fileId, ",") parts := strings.Split(fileId, ",")
if len(parts) != 2 { if len(parts) != 2 {
return nil, fmt.Errorf("invalid fileId %s", fileId) return nil, fmt.Errorf("invalid fileId %s", fileId)
@ -210,7 +211,10 @@ func (vc *vidMapClient) LookupVolumeIdsWithFallback(ctx context.Context, volumeI
return result, errors.Join(lookupErrors...) return result, errors.Join(lookupErrors...)
} }
// getStableVidMap gets a stable pointer to the vidMap, releasing the lock immediately
// getStableVidMap gets a stable pointer to the vidMap, releasing the lock immediately.
// WARNING: Use with caution. The returned vidMap pointer is stable (won't be garbage collected
// due to cache chain), but the vidMapClient.vidMap field may be swapped by resetVidMap().
// For operations that must use the current vidMap atomically, use withCurrentVidMap() instead.
func (vc *vidMapClient) getStableVidMap() *vidMap { func (vc *vidMapClient) getStableVidMap() *vidMap {
vc.vidMapLock.RLock() vc.vidMapLock.RLock()
vm := vc.vidMap vm := vc.vidMap
@ -218,7 +222,9 @@ func (vc *vidMapClient) getStableVidMap() *vidMap {
return vm return vm
} }
// withCurrentVidMap executes a function with the current vidMap under a read lock
// withCurrentVidMap executes a function with the current vidMap under a read lock.
// This guarantees the vidMap instance cannot be swapped during the function execution.
// Use this when you need atomic access to the current vidMap for multiple operations.
func (vc *vidMapClient) withCurrentVidMap(f func(vm *vidMap)) { func (vc *vidMapClient) withCurrentVidMap(f func(vm *vidMap)) {
vc.vidMapLock.RLock() vc.vidMapLock.RLock()
defer vc.vidMapLock.RUnlock() defer vc.vidMapLock.RUnlock()

Loading…
Cancel
Save