From c81bb716b5ec6ddc9c1569407333db785ef4ef7f Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 20 Nov 2025 12:46:37 -0800 Subject: [PATCH] fix: prevent vidMap swap race condition in LookupFileIdWithFallback - Hold vidMapLock.RLock() during entire vm.LookupFileId() call - Prevents resetVidMap() from swapping vidMap mid-operation - Ensures atomic access to the current vidMap instance - Added documentation warnings to getStableVidMap() about swap risks - Enhanced withCurrentVidMap() documentation for clarity This fixes a subtle race condition where: 1. Thread A: acquires lock, gets vm pointer, releases lock 2. Thread B: calls resetVidMap(), swaps vc.vidMap 3. Thread A: calls vm.LookupFileId() on old/stale vidMap While the old vidMap remains valid (in cache chain), holding the lock ensures we consistently use the current vidMap for the entire operation. --- weed/wdclient/vidmap_client.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/weed/wdclient/vidmap_client.go b/weed/wdclient/vidmap_client.go index 51d9855f9..a84100ef7 100644 --- a/weed/wdclient/vidmap_client.go +++ b/weed/wdclient/vidmap_client.go @@ -57,18 +57,19 @@ func (vc *vidMapClient) GetLookupFileIdFunction() LookupFileIdFunctionType { // LookupFileIdWithFallback looks up a file ID, checking cache first, then using provider func (vc *vidMapClient) LookupFileIdWithFallback(ctx context.Context, fileId string) (fullUrls []string, err error) { - // Try cache first using the fast path + // Try cache first - hold read lock during entire vidMap access to prevent swap during operation vc.vidMapLock.RLock() vm := vc.vidMap dataCenter := vm.DataCenter + fullUrls, err = vm.LookupFileId(ctx, fileId) vc.vidMapLock.RUnlock() - fullUrls, err = vm.LookupFileId(ctx, fileId) + // Cache hit - return immediately if err == nil && len(fullUrls) > 0 { return } - // Extract volume ID from file ID (format: "volumeId,needle_id_cookie") + // Cache miss - extract volume ID from file ID (format: "volumeId,needle_id_cookie") parts := strings.Split(fileId, ",") if len(parts) != 2 { return nil, fmt.Errorf("invalid fileId %s", fileId) @@ -210,7 +211,10 @@ func (vc *vidMapClient) LookupVolumeIdsWithFallback(ctx context.Context, volumeI return result, errors.Join(lookupErrors...) } -// getStableVidMap gets a stable pointer to the vidMap, releasing the lock immediately +// getStableVidMap gets a stable pointer to the vidMap, releasing the lock immediately. +// WARNING: Use with caution. The returned vidMap pointer is stable (won't be garbage collected +// due to cache chain), but the vidMapClient.vidMap field may be swapped by resetVidMap(). +// For operations that must use the current vidMap atomically, use withCurrentVidMap() instead. func (vc *vidMapClient) getStableVidMap() *vidMap { vc.vidMapLock.RLock() vm := vc.vidMap @@ -218,7 +222,9 @@ func (vc *vidMapClient) getStableVidMap() *vidMap { return vm } -// withCurrentVidMap executes a function with the current vidMap under a read lock +// withCurrentVidMap executes a function with the current vidMap under a read lock. +// This guarantees the vidMap instance cannot be swapped during the function execution. +// Use this when you need atomic access to the current vidMap for multiple operations. func (vc *vidMapClient) withCurrentVidMap(f func(vm *vidMap)) { vc.vidMapLock.RLock() defer vc.vidMapLock.RUnlock()