From 890e5c8bd873d9a5cd298f0121c11d93da81c080 Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 20 Nov 2025 17:28:03 -0800 Subject: [PATCH] fix: add timeout to master volume lookup to prevent indefinite blocking The masterVolumeProvider.LookupVolumeIds method was using the context directly without a timeout, which could cause it to block indefinitely if the master is slow to respond or unreachable. Problem: err := pb.WithMasterClient(false, p.masterClient.GetMaster(ctx), ...) resp, err := client.LookupVolume(ctx, &master_pb.LookupVolumeRequest{...}) - No timeout on gRPC call to master - Could block indefinitely if master is unresponsive - Inconsistent with FilerClient which uses 5s timeout - This is a fallback path (cache miss) but still needs protection Scenarios where this could hang: 1. Master server under heavy load (slow response) 2. Network issues between client and master 3. Master server hung or deadlocked 4. Master in process of shutting down Fix: timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() err := pb.WithMasterClient(false, p.masterClient.GetMaster(timeoutCtx), ...) resp, err := client.LookupVolume(timeoutCtx, &master_pb.LookupVolumeRequest{...}) Benefits: - Prevents indefinite blocking on master lookup - Consistent with FilerClient timeout pattern (5 seconds) - Faster failure detection when master is unresponsive - Caller's context still honored (timeout is in addition, not replacement) - Improves overall system resilience Note: 5 seconds is a reasonable default for volume lookups: - Long enough for normal master response (~10-50ms) - Short enough to fail fast on issues - Matches FilerClient's grpcTimeout default --- weed/wdclient/masterclient.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go index 496daad3b..afbcbaa4b 100644 --- a/weed/wdclient/masterclient.go +++ b/weed/wdclient/masterclient.go @@ -34,8 +34,12 @@ func (p *masterVolumeProvider) LookupVolumeIds(ctx context.Context, volumeIds [] glog.V(2).Infof("Looking up %d volumes from master: %v", len(volumeIds), volumeIds) - err := pb.WithMasterClient(false, p.masterClient.GetMaster(ctx), p.masterClient.grpcDialOption, false, func(client master_pb.SeaweedClient) error { - resp, err := client.LookupVolume(ctx, &master_pb.LookupVolumeRequest{ + // Use a timeout for the master lookup to prevent indefinite blocking + timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + err := pb.WithMasterClient(false, p.masterClient.GetMaster(timeoutCtx), p.masterClient.grpcDialOption, false, func(client master_pb.SeaweedClient) error { + resp, err := client.LookupVolume(timeoutCtx, &master_pb.LookupVolumeRequest{ VolumeOrFileIds: volumeIds, }) if err != nil {