fix: cache successful volume lookups instead of failed ones (#7698)

The condition was inverted - it was caching lookups with errors instead of successful lookups. This caused every replicated write to make a gRPC call to master for volume location lookup, resulting in ~1 second latency for writeToReplicas. The bug particularly affected TTL volumes because: - More unique volumes are created (separate pools per TTL) - Volumes expire and get recreated frequently - Each new volume requires a fresh lookup (cache miss) - Higher volume churn = more cache misses = more master lookups With this fix, successful lookups are cached for 10 minutes, reducing replication latency from ~1s to ~10ms for cached volumes.
3 months ago · ae7333d28e
1 changed files with 4 additions and 3 deletions
--- a/weed/operation/lookup.go
+++ b/weed/operation/lookup.go
@ -4,12 +4,13 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"github.com/seaweedfs/seaweedfs/weed/pb"
-	"google.golang.org/grpc"
 	"math/rand/v2"
 	"strings"
 	"time"

+	"github.com/seaweedfs/seaweedfs/weed/pb"
+	"google.golang.org/grpc"
+
 	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
 )

@ -101,7 +102,7 @@ func LookupVolumeIds(masterFn GetMasterFn, grpcDialOption grpc.DialOption, vids
 					GrpcPort:   int(loc.GrpcPort),
 				})
 			}
-			if vidLocations.Error != "" {
+			if vidLocations.Error == "" {
 				vc.Set(vidLocations.VolumeOrFileId, locations, 10*time.Minute)
 			}
 			ret[vidLocations.VolumeOrFileId] = &LookupResult{