committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1175 additions and 435 deletions
-
12test/s3/iam/s3_iam_framework.go
-
5weed/cluster/cluster.go
-
5weed/command/iam.go
-
4weed/filer/filer.go
-
47weed/filer/reader_at.go
-
43weed/iamapi/iamapi_server.go
-
27weed/mount/weedfs.go
-
5weed/s3api/s3_constants/header.go
-
6weed/s3api/s3api_bucket_handlers_object_lock_config.go
-
33weed/s3api/s3api_object_handlers.go
-
39weed/s3api/s3api_object_retention.go
-
76weed/s3api/s3api_object_retention_test.go
-
9weed/s3api/s3api_server.go
-
2weed/server/filer_server.go
-
404weed/wdclient/filer_client.go
-
546weed/wdclient/masterclient.go
-
347weed/wdclient/vidmap_client.go
@ -0,0 +1,404 @@ |
|||
package wdclient |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"math/rand" |
|||
"strings" |
|||
"sync/atomic" |
|||
"time" |
|||
|
|||
"google.golang.org/grpc" |
|||
"google.golang.org/grpc/codes" |
|||
"google.golang.org/grpc/status" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
|||
) |
|||
|
|||
// UrlPreference controls which URL to use for volume access
|
|||
type UrlPreference string |
|||
|
|||
const ( |
|||
PreferUrl UrlPreference = "url" // Use private URL (default)
|
|||
PreferPublicUrl UrlPreference = "publicUrl" // Use public URL
|
|||
) |
|||
|
|||
// filerHealth tracks the health status of a filer
|
|||
type filerHealth struct { |
|||
failureCount int32 // atomic: consecutive failures
|
|||
lastFailureTimeNs int64 // atomic: last failure time in Unix nanoseconds
|
|||
} |
|||
|
|||
// FilerClient provides volume location services by querying a filer
|
|||
// It uses the shared vidMap cache for efficient lookups
|
|||
// Supports multiple filer addresses with automatic failover for high availability
|
|||
// Tracks filer health to avoid repeatedly trying known-unhealthy filers
|
|||
type FilerClient struct { |
|||
*vidMapClient |
|||
filerAddresses []pb.ServerAddress |
|||
filerIndex int32 // atomic: current filer index for round-robin
|
|||
filerHealth []*filerHealth // health status per filer (same order as filerAddresses)
|
|||
grpcDialOption grpc.DialOption |
|||
urlPreference UrlPreference |
|||
grpcTimeout time.Duration |
|||
cacheSize int // Number of historical vidMap snapshots to keep
|
|||
clientId int32 // Unique client identifier for gRPC metadata
|
|||
failureThreshold int32 // Circuit breaker: consecutive failures before circuit opens
|
|||
resetTimeout time.Duration // Circuit breaker: time before re-checking unhealthy filer
|
|||
maxRetries int // Retry: maximum retry attempts for transient failures
|
|||
initialRetryWait time.Duration // Retry: initial wait time before first retry
|
|||
retryBackoffFactor float64 // Retry: backoff multiplier for wait time
|
|||
} |
|||
|
|||
// filerVolumeProvider implements VolumeLocationProvider by querying filer
|
|||
// Supports multiple filer addresses with automatic failover
|
|||
type filerVolumeProvider struct { |
|||
filerClient *FilerClient |
|||
} |
|||
|
|||
// FilerClientOption holds optional configuration for FilerClient
|
|||
type FilerClientOption struct { |
|||
GrpcTimeout time.Duration |
|||
UrlPreference UrlPreference |
|||
CacheSize int // Number of historical vidMap snapshots (0 = use default)
|
|||
FailureThreshold int32 // Circuit breaker: consecutive failures before skipping filer (0 = use default of 3)
|
|||
ResetTimeout time.Duration // Circuit breaker: time before re-checking unhealthy filer (0 = use default of 30s)
|
|||
MaxRetries int // Retry: maximum retry attempts for transient failures (0 = use default of 3)
|
|||
InitialRetryWait time.Duration // Retry: initial wait time before first retry (0 = use default of 1s)
|
|||
RetryBackoffFactor float64 // Retry: backoff multiplier for wait time (0 = use default of 1.5)
|
|||
} |
|||
|
|||
// NewFilerClient creates a new client that queries filer(s) for volume locations
|
|||
// Supports multiple filer addresses for high availability with automatic failover
|
|||
// Uses sensible defaults: 5-second gRPC timeout, PreferUrl, DefaultVidMapCacheSize
|
|||
func NewFilerClient(filerAddresses []pb.ServerAddress, grpcDialOption grpc.DialOption, dataCenter string, opts ...*FilerClientOption) *FilerClient { |
|||
if len(filerAddresses) == 0 { |
|||
glog.Fatal("NewFilerClient requires at least one filer address") |
|||
} |
|||
|
|||
// Apply defaults
|
|||
grpcTimeout := 5 * time.Second |
|||
urlPref := PreferUrl |
|||
cacheSize := DefaultVidMapCacheSize |
|||
failureThreshold := int32(3) // Default: 3 consecutive failures before circuit opens
|
|||
resetTimeout := 30 * time.Second // Default: 30 seconds before re-checking unhealthy filer
|
|||
maxRetries := 3 // Default: 3 retry attempts for transient failures
|
|||
initialRetryWait := time.Second // Default: 1 second initial retry wait
|
|||
retryBackoffFactor := 1.5 // Default: 1.5x backoff multiplier
|
|||
|
|||
// Override with provided options
|
|||
if len(opts) > 0 && opts[0] != nil { |
|||
opt := opts[0] |
|||
if opt.GrpcTimeout > 0 { |
|||
grpcTimeout = opt.GrpcTimeout |
|||
} |
|||
if opt.UrlPreference != "" { |
|||
urlPref = opt.UrlPreference |
|||
} |
|||
if opt.CacheSize > 0 { |
|||
cacheSize = opt.CacheSize |
|||
} |
|||
if opt.FailureThreshold > 0 { |
|||
failureThreshold = opt.FailureThreshold |
|||
} |
|||
if opt.ResetTimeout > 0 { |
|||
resetTimeout = opt.ResetTimeout |
|||
} |
|||
if opt.MaxRetries > 0 { |
|||
maxRetries = opt.MaxRetries |
|||
} |
|||
if opt.InitialRetryWait > 0 { |
|||
initialRetryWait = opt.InitialRetryWait |
|||
} |
|||
if opt.RetryBackoffFactor > 0 { |
|||
retryBackoffFactor = opt.RetryBackoffFactor |
|||
} |
|||
} |
|||
|
|||
// Initialize health tracking for each filer
|
|||
health := make([]*filerHealth, len(filerAddresses)) |
|||
for i := range health { |
|||
health[i] = &filerHealth{} |
|||
} |
|||
|
|||
fc := &FilerClient{ |
|||
filerAddresses: filerAddresses, |
|||
filerIndex: 0, |
|||
filerHealth: health, |
|||
grpcDialOption: grpcDialOption, |
|||
urlPreference: urlPref, |
|||
grpcTimeout: grpcTimeout, |
|||
cacheSize: cacheSize, |
|||
clientId: rand.Int31(), // Random client ID for gRPC metadata tracking
|
|||
failureThreshold: failureThreshold, |
|||
resetTimeout: resetTimeout, |
|||
maxRetries: maxRetries, |
|||
initialRetryWait: initialRetryWait, |
|||
retryBackoffFactor: retryBackoffFactor, |
|||
} |
|||
|
|||
// Create provider that references this FilerClient for failover support
|
|||
provider := &filerVolumeProvider{ |
|||
filerClient: fc, |
|||
} |
|||
|
|||
fc.vidMapClient = newVidMapClient(provider, dataCenter, cacheSize) |
|||
|
|||
return fc |
|||
} |
|||
|
|||
// GetLookupFileIdFunction returns a lookup function with URL preference handling
|
|||
func (fc *FilerClient) GetLookupFileIdFunction() LookupFileIdFunctionType { |
|||
if fc.urlPreference == PreferUrl { |
|||
// Use the default implementation from vidMapClient
|
|||
return fc.vidMapClient.GetLookupFileIdFunction() |
|||
} |
|||
|
|||
// Custom implementation that prefers PublicUrl
|
|||
return func(ctx context.Context, fileId string) (fullUrls []string, err error) { |
|||
// Parse file ID to extract volume ID
|
|||
parts := strings.Split(fileId, ",") |
|||
if len(parts) != 2 { |
|||
return nil, fmt.Errorf("invalid fileId format: %s", fileId) |
|||
} |
|||
volumeIdStr := parts[0] |
|||
|
|||
// First try the cache using LookupVolumeIdsWithFallback
|
|||
vidLocations, err := fc.LookupVolumeIdsWithFallback(ctx, []string{volumeIdStr}) |
|||
|
|||
// Check for partial results first (important for multi-volume batched lookups)
|
|||
locations, found := vidLocations[volumeIdStr] |
|||
if !found || len(locations) == 0 { |
|||
// Volume not found - return specific error with context from lookup if available
|
|||
if err != nil { |
|||
return nil, fmt.Errorf("volume %s not found for fileId %s: %w", volumeIdStr, fileId, err) |
|||
} |
|||
return nil, fmt.Errorf("volume %s not found for fileId %s", volumeIdStr, fileId) |
|||
} |
|||
|
|||
// Volume found successfully - ignore any errors about other volumes
|
|||
// (not relevant for single-volume lookup, but defensive for future batching)
|
|||
|
|||
// Build URLs with publicUrl preference, and also prefer same DC
|
|||
var sameDcUrls, otherDcUrls []string |
|||
dataCenter := fc.GetDataCenter() |
|||
for _, loc := range locations { |
|||
url := loc.PublicUrl |
|||
if url == "" { |
|||
url = loc.Url |
|||
} |
|||
httpUrl := "http://" + url + "/" + fileId |
|||
if dataCenter != "" && dataCenter == loc.DataCenter { |
|||
sameDcUrls = append(sameDcUrls, httpUrl) |
|||
} else { |
|||
otherDcUrls = append(otherDcUrls, httpUrl) |
|||
} |
|||
} |
|||
// Shuffle to distribute load across volume servers
|
|||
rand.Shuffle(len(sameDcUrls), func(i, j int) { sameDcUrls[i], sameDcUrls[j] = sameDcUrls[j], sameDcUrls[i] }) |
|||
rand.Shuffle(len(otherDcUrls), func(i, j int) { otherDcUrls[i], otherDcUrls[j] = otherDcUrls[j], otherDcUrls[i] }) |
|||
// Prefer same data center
|
|||
fullUrls = append(sameDcUrls, otherDcUrls...) |
|||
return fullUrls, nil |
|||
} |
|||
} |
|||
|
|||
// isRetryableGrpcError checks if a gRPC error is transient and should be retried
|
|||
//
|
|||
// Note on codes.Aborted: While Aborted can indicate application-level conflicts
|
|||
// (e.g., transaction failures), in the context of volume location lookups (which
|
|||
// are simple read-only operations with no transactions), Aborted is more likely
|
|||
// to indicate transient server issues during restart/recovery. We include it here
|
|||
// for volume lookups but log it for visibility in case misclassification occurs.
|
|||
func isRetryableGrpcError(err error) bool { |
|||
if err == nil { |
|||
return false |
|||
} |
|||
|
|||
// Check gRPC status code
|
|||
st, ok := status.FromError(err) |
|||
if ok { |
|||
switch st.Code() { |
|||
case codes.Unavailable: // Server unavailable (temporary)
|
|||
return true |
|||
case codes.DeadlineExceeded: // Request timeout
|
|||
return true |
|||
case codes.ResourceExhausted: // Rate limited or overloaded
|
|||
return true |
|||
case codes.Aborted: |
|||
// Aborted during read-only volume lookups is likely transient
|
|||
// (e.g., filer restarting), but log for visibility
|
|||
glog.V(1).Infof("Treating Aborted as retryable for volume lookup: %v", err) |
|||
return true |
|||
} |
|||
} |
|||
|
|||
// Fallback to string matching for non-gRPC errors (e.g., network errors)
|
|||
errStr := err.Error() |
|||
return strings.Contains(errStr, "transport") || |
|||
strings.Contains(errStr, "connection") || |
|||
strings.Contains(errStr, "timeout") || |
|||
strings.Contains(errStr, "unavailable") |
|||
} |
|||
|
|||
// shouldSkipUnhealthyFiler checks if we should skip a filer based on recent failures
|
|||
// Circuit breaker pattern: skip filers with multiple recent consecutive failures
|
|||
func (fc *FilerClient) shouldSkipUnhealthyFiler(index int32) bool { |
|||
health := fc.filerHealth[index] |
|||
failureCount := atomic.LoadInt32(&health.failureCount) |
|||
|
|||
// Check if failure count exceeds threshold
|
|||
if failureCount < fc.failureThreshold { |
|||
return false |
|||
} |
|||
|
|||
// Re-check unhealthy filers after reset timeout
|
|||
lastFailureNs := atomic.LoadInt64(&health.lastFailureTimeNs) |
|||
if lastFailureNs == 0 { |
|||
return false // Never failed, shouldn't skip
|
|||
} |
|||
lastFailureTime := time.Unix(0, lastFailureNs) |
|||
if time.Since(lastFailureTime) > fc.resetTimeout { |
|||
return false // Time to re-check
|
|||
} |
|||
|
|||
return true // Skip this unhealthy filer
|
|||
} |
|||
|
|||
// recordFilerSuccess resets failure tracking for a successful filer
|
|||
func (fc *FilerClient) recordFilerSuccess(index int32) { |
|||
health := fc.filerHealth[index] |
|||
atomic.StoreInt32(&health.failureCount, 0) |
|||
} |
|||
|
|||
// recordFilerFailure increments failure count for an unhealthy filer
|
|||
func (fc *FilerClient) recordFilerFailure(index int32) { |
|||
health := fc.filerHealth[index] |
|||
atomic.AddInt32(&health.failureCount, 1) |
|||
atomic.StoreInt64(&health.lastFailureTimeNs, time.Now().UnixNano()) |
|||
} |
|||
|
|||
// LookupVolumeIds queries the filer for volume locations with automatic failover
|
|||
// Tries all configured filer addresses until one succeeds (high availability)
|
|||
// Retries transient gRPC errors (Unavailable, DeadlineExceeded, etc.) with exponential backoff
|
|||
// Note: Unlike master's VolumeIdLocation, filer's Locations message doesn't currently have
|
|||
// an Error field. This implementation handles the current structure while being prepared
|
|||
// for future error reporting enhancements.
|
|||
func (p *filerVolumeProvider) LookupVolumeIds(ctx context.Context, volumeIds []string) (map[string][]Location, error) { |
|||
fc := p.filerClient |
|||
result := make(map[string][]Location) |
|||
|
|||
// Retry transient failures with configurable backoff
|
|||
var lastErr error |
|||
waitTime := fc.initialRetryWait |
|||
maxRetries := fc.maxRetries |
|||
|
|||
for retry := 0; retry < maxRetries; retry++ { |
|||
// Try all filer addresses with round-robin starting from current index
|
|||
// Skip known-unhealthy filers (circuit breaker pattern)
|
|||
i := atomic.LoadInt32(&fc.filerIndex) |
|||
n := int32(len(fc.filerAddresses)) |
|||
|
|||
for x := int32(0); x < n; x++ { |
|||
// Circuit breaker: skip unhealthy filers
|
|||
if fc.shouldSkipUnhealthyFiler(i) { |
|||
glog.V(2).Infof("FilerClient: skipping unhealthy filer %s (consecutive failures: %d)", |
|||
fc.filerAddresses[i], atomic.LoadInt32(&fc.filerHealth[i].failureCount)) |
|||
i++ |
|||
if i >= n { |
|||
i = 0 |
|||
} |
|||
continue |
|||
} |
|||
|
|||
filerAddress := fc.filerAddresses[i] |
|||
|
|||
// Use anonymous function to ensure defer cancel() is called per iteration, not accumulated
|
|||
err := func() error { |
|||
// Create a fresh timeout context for each filer attempt
|
|||
// This ensures each retry gets the full grpcTimeout, not a diminishing deadline
|
|||
timeoutCtx, cancel := context.WithTimeout(ctx, fc.grpcTimeout) |
|||
defer cancel() // Always clean up context, even on panic or early return
|
|||
|
|||
return pb.WithGrpcFilerClient(false, fc.clientId, filerAddress, fc.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { |
|||
resp, err := client.LookupVolume(timeoutCtx, &filer_pb.LookupVolumeRequest{ |
|||
VolumeIds: volumeIds, |
|||
}) |
|||
if err != nil { |
|||
return fmt.Errorf("filer.LookupVolume failed: %w", err) |
|||
} |
|||
|
|||
// Process each volume in the response
|
|||
for vid, locs := range resp.LocationsMap { |
|||
// Convert locations from protobuf to internal format
|
|||
var locations []Location |
|||
for _, loc := range locs.Locations { |
|||
locations = append(locations, Location{ |
|||
Url: loc.Url, |
|||
PublicUrl: loc.PublicUrl, |
|||
DataCenter: loc.DataCenter, |
|||
GrpcPort: int(loc.GrpcPort), |
|||
}) |
|||
} |
|||
|
|||
// Only add to result if we have locations
|
|||
// Empty locations with no gRPC error means "not found" (volume doesn't exist)
|
|||
if len(locations) > 0 { |
|||
result[vid] = locations |
|||
glog.V(4).Infof("FilerClient: volume %s found with %d location(s)", vid, len(locations)) |
|||
} else { |
|||
glog.V(2).Infof("FilerClient: volume %s not found (no locations in response)", vid) |
|||
} |
|||
} |
|||
|
|||
// Check for volumes that weren't in the response at all
|
|||
// This could indicate a problem with the filer
|
|||
for _, vid := range volumeIds { |
|||
if _, found := resp.LocationsMap[vid]; !found { |
|||
glog.V(1).Infof("FilerClient: volume %s missing from filer response", vid) |
|||
} |
|||
} |
|||
|
|||
return nil |
|||
}) |
|||
}() |
|||
|
|||
if err != nil { |
|||
glog.V(1).Infof("FilerClient: filer %s lookup failed (attempt %d/%d, retry %d/%d): %v", filerAddress, x+1, n, retry+1, maxRetries, err) |
|||
fc.recordFilerFailure(i) |
|||
lastErr = err |
|||
i++ |
|||
if i >= n { |
|||
i = 0 |
|||
} |
|||
continue |
|||
} |
|||
|
|||
// Success - update the preferred filer index and reset health tracking
|
|||
atomic.StoreInt32(&fc.filerIndex, i) |
|||
fc.recordFilerSuccess(i) |
|||
glog.V(3).Infof("FilerClient: looked up %d volumes on %s, found %d", len(volumeIds), filerAddress, len(result)) |
|||
return result, nil |
|||
} |
|||
|
|||
// All filers failed on this attempt
|
|||
// Check if the error is retryable (transient gRPC error)
|
|||
if !isRetryableGrpcError(lastErr) { |
|||
// Non-retryable error (e.g., NotFound, PermissionDenied) - fail immediately
|
|||
return nil, fmt.Errorf("all %d filer(s) failed with non-retryable error: %w", n, lastErr) |
|||
} |
|||
|
|||
// Transient error - retry if we have attempts left
|
|||
if retry < maxRetries-1 { |
|||
glog.V(1).Infof("FilerClient: all %d filer(s) failed with retryable error (attempt %d/%d), retrying in %v: %v", |
|||
n, retry+1, maxRetries, waitTime, lastErr) |
|||
time.Sleep(waitTime) |
|||
waitTime = time.Duration(float64(waitTime) * fc.retryBackoffFactor) |
|||
} |
|||
} |
|||
|
|||
// All retries exhausted
|
|||
return nil, fmt.Errorf("all %d filer(s) failed after %d attempts, last error: %w", len(fc.filerAddresses), maxRetries, lastErr) |
|||
} |
|||
@ -0,0 +1,347 @@ |
|||
package wdclient |
|||
|
|||
import ( |
|||
"context" |
|||
"errors" |
|||
"fmt" |
|||
"math/rand" |
|||
"sort" |
|||
"strconv" |
|||
"strings" |
|||
"sync" |
|||
|
|||
"golang.org/x/sync/singleflight" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/glog" |
|||
) |
|||
|
|||
// VolumeLocationProvider is the interface for looking up volume locations
|
|||
// This allows different implementations (master subscription, filer queries, etc.)
|
|||
type VolumeLocationProvider interface { |
|||
// LookupVolumeIds looks up volume locations for the given volume IDs
|
|||
// Returns a map of volume ID to locations
|
|||
LookupVolumeIds(ctx context.Context, volumeIds []string) (map[string][]Location, error) |
|||
} |
|||
|
|||
// vidMapClient provides volume location caching with pluggable lookup
|
|||
// It wraps the battle-tested vidMap with customizable volume lookup strategies
|
|||
type vidMapClient struct { |
|||
vidMap *vidMap |
|||
vidMapLock sync.RWMutex |
|||
vidMapCacheSize int |
|||
provider VolumeLocationProvider |
|||
vidLookupGroup singleflight.Group |
|||
} |
|||
|
|||
const ( |
|||
// DefaultVidMapCacheSize is the default number of historical vidMap snapshots to keep
|
|||
// This provides cache history when volumes move between servers
|
|||
DefaultVidMapCacheSize = 5 |
|||
) |
|||
|
|||
// newVidMapClient creates a new client with the given provider and data center
|
|||
func newVidMapClient(provider VolumeLocationProvider, dataCenter string, cacheSize int) *vidMapClient { |
|||
if cacheSize <= 0 { |
|||
cacheSize = DefaultVidMapCacheSize |
|||
} |
|||
return &vidMapClient{ |
|||
vidMap: newVidMap(dataCenter), |
|||
vidMapCacheSize: cacheSize, |
|||
provider: provider, |
|||
} |
|||
} |
|||
|
|||
// GetLookupFileIdFunction returns a function that can be used to lookup file IDs
|
|||
func (vc *vidMapClient) GetLookupFileIdFunction() LookupFileIdFunctionType { |
|||
return vc.LookupFileIdWithFallback |
|||
} |
|||
|
|||
// LookupFileIdWithFallback looks up a file ID, checking cache first, then using provider
|
|||
func (vc *vidMapClient) LookupFileIdWithFallback(ctx context.Context, fileId string) (fullUrls []string, err error) { |
|||
// Try cache first - hold read lock during entire vidMap access to prevent swap during operation
|
|||
vc.vidMapLock.RLock() |
|||
vm := vc.vidMap |
|||
dataCenter := vm.DataCenter |
|||
fullUrls, err = vm.LookupFileId(ctx, fileId) |
|||
vc.vidMapLock.RUnlock() |
|||
|
|||
// Cache hit - return immediately
|
|||
if err == nil && len(fullUrls) > 0 { |
|||
return |
|||
} |
|||
|
|||
// Cache miss - extract volume ID from file ID (format: "volumeId,needle_id_cookie")
|
|||
parts := strings.Split(fileId, ",") |
|||
if len(parts) != 2 { |
|||
return nil, fmt.Errorf("invalid fileId %s", fileId) |
|||
} |
|||
volumeId := parts[0] |
|||
|
|||
// Use shared lookup logic with batching and singleflight
|
|||
vidLocations, err := vc.LookupVolumeIdsWithFallback(ctx, []string{volumeId}) |
|||
|
|||
// Check for partial results first (important for multi-volume batched lookups)
|
|||
locations, found := vidLocations[volumeId] |
|||
if !found || len(locations) == 0 { |
|||
// Volume not found - return specific error with context from lookup if available
|
|||
if err != nil { |
|||
return nil, fmt.Errorf("volume %s not found for fileId %s: %w", volumeId, fileId, err) |
|||
} |
|||
return nil, fmt.Errorf("volume %s not found for fileId %s", volumeId, fileId) |
|||
} |
|||
|
|||
// Volume found successfully - ignore any errors about other volumes
|
|||
// (not relevant for single-volume lookup, but defensive for future batching)
|
|||
|
|||
// Build HTTP URLs from locations, preferring same data center
|
|||
var sameDcUrls, otherDcUrls []string |
|||
for _, loc := range locations { |
|||
httpUrl := "http://" + loc.Url + "/" + fileId |
|||
if dataCenter != "" && dataCenter == loc.DataCenter { |
|||
sameDcUrls = append(sameDcUrls, httpUrl) |
|||
} else { |
|||
otherDcUrls = append(otherDcUrls, httpUrl) |
|||
} |
|||
} |
|||
|
|||
// Shuffle to distribute load across volume servers
|
|||
rand.Shuffle(len(sameDcUrls), func(i, j int) { sameDcUrls[i], sameDcUrls[j] = sameDcUrls[j], sameDcUrls[i] }) |
|||
rand.Shuffle(len(otherDcUrls), func(i, j int) { otherDcUrls[i], otherDcUrls[j] = otherDcUrls[j], otherDcUrls[i] }) |
|||
|
|||
// Prefer same data center
|
|||
fullUrls = append(sameDcUrls, otherDcUrls...) |
|||
return fullUrls, nil |
|||
} |
|||
|
|||
// LookupVolumeIdsWithFallback looks up volume locations, querying provider if not in cache.
|
|||
// Uses singleflight to coalesce concurrent requests for the same batch of volumes.
|
|||
//
|
|||
// IMPORTANT: This function may return PARTIAL results with a non-nil error.
|
|||
// The result map contains successfully looked up volumes, while the error aggregates
|
|||
// failures for volumes that couldn't be found or had lookup errors.
|
|||
//
|
|||
// Callers MUST check both the result map AND the error:
|
|||
// - result != nil && err == nil: All volumes found successfully
|
|||
// - result != nil && err != nil: Some volumes found, some failed (check both)
|
|||
// - result == nil && err != nil: Complete failure (connection error, etc.)
|
|||
//
|
|||
// Example usage:
|
|||
//
|
|||
// locs, err := mc.LookupVolumeIdsWithFallback(ctx, []string{"1", "2", "999"})
|
|||
// if len(locs) > 0 {
|
|||
// // Process successfully found volumes
|
|||
// }
|
|||
// if err != nil {
|
|||
// // Log/handle failed volumes
|
|||
// }
|
|||
func (vc *vidMapClient) LookupVolumeIdsWithFallback(ctx context.Context, volumeIds []string) (map[string][]Location, error) { |
|||
result := make(map[string][]Location) |
|||
var needsLookup []string |
|||
var lookupErrors []error |
|||
|
|||
// Check cache first and parse volume IDs once
|
|||
vidStringToUint := make(map[string]uint32, len(volumeIds)) |
|||
|
|||
// Get stable pointer to vidMap with minimal lock hold time
|
|||
vm := vc.getStableVidMap() |
|||
|
|||
for _, vidString := range volumeIds { |
|||
vid, err := strconv.ParseUint(vidString, 10, 32) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("invalid volume id %s: %v", vidString, err) |
|||
} |
|||
vidStringToUint[vidString] = uint32(vid) |
|||
|
|||
locations, found := vm.GetLocations(uint32(vid)) |
|||
if found && len(locations) > 0 { |
|||
result[vidString] = locations |
|||
} else { |
|||
needsLookup = append(needsLookup, vidString) |
|||
} |
|||
} |
|||
|
|||
if len(needsLookup) == 0 { |
|||
return result, nil |
|||
} |
|||
|
|||
// Batch query all missing volumes using singleflight on the batch key
|
|||
// Sort for stable key to coalesce identical batches
|
|||
sort.Strings(needsLookup) |
|||
batchKey := strings.Join(needsLookup, ",") |
|||
|
|||
sfResult, err, _ := vc.vidLookupGroup.Do(batchKey, func() (interface{}, error) { |
|||
// Double-check cache for volumes that might have been populated while waiting
|
|||
stillNeedLookup := make([]string, 0, len(needsLookup)) |
|||
batchResult := make(map[string][]Location) |
|||
|
|||
// Get stable pointer with minimal lock hold time
|
|||
vm := vc.getStableVidMap() |
|||
|
|||
for _, vidString := range needsLookup { |
|||
vid := vidStringToUint[vidString] // Use pre-parsed value
|
|||
if locations, found := vm.GetLocations(vid); found && len(locations) > 0 { |
|||
batchResult[vidString] = locations |
|||
} else { |
|||
stillNeedLookup = append(stillNeedLookup, vidString) |
|||
} |
|||
} |
|||
|
|||
if len(stillNeedLookup) == 0 { |
|||
return batchResult, nil |
|||
} |
|||
|
|||
// Query provider with batched volume IDs
|
|||
glog.V(2).Infof("Looking up %d volumes from provider: %v", len(stillNeedLookup), stillNeedLookup) |
|||
|
|||
providerResults, err := vc.provider.LookupVolumeIds(ctx, stillNeedLookup) |
|||
if err != nil { |
|||
return batchResult, fmt.Errorf("provider lookup failed: %v", err) |
|||
} |
|||
|
|||
// Update cache with results
|
|||
for vidString, locations := range providerResults { |
|||
vid, err := strconv.ParseUint(vidString, 10, 32) |
|||
if err != nil { |
|||
glog.Warningf("Failed to parse volume id '%s': %v", vidString, err) |
|||
continue |
|||
} |
|||
|
|||
for _, loc := range locations { |
|||
vc.addLocation(uint32(vid), loc) |
|||
} |
|||
|
|||
if len(locations) > 0 { |
|||
batchResult[vidString] = locations |
|||
} |
|||
} |
|||
|
|||
return batchResult, nil |
|||
}) |
|||
|
|||
if err != nil { |
|||
lookupErrors = append(lookupErrors, err) |
|||
} |
|||
|
|||
// Merge singleflight batch results
|
|||
if batchLocations, ok := sfResult.(map[string][]Location); ok { |
|||
for vid, locs := range batchLocations { |
|||
result[vid] = locs |
|||
} |
|||
} |
|||
|
|||
// Check for volumes that still weren't found
|
|||
for _, vidString := range needsLookup { |
|||
if _, found := result[vidString]; !found { |
|||
lookupErrors = append(lookupErrors, fmt.Errorf("volume %s not found", vidString)) |
|||
} |
|||
} |
|||
|
|||
// Return aggregated errors
|
|||
return result, errors.Join(lookupErrors...) |
|||
} |
|||
|
|||
// getStableVidMap gets a stable pointer to the vidMap, releasing the lock immediately.
|
|||
// WARNING: Use with caution. The returned vidMap pointer is stable (won't be garbage collected
|
|||
// due to cache chain), but the vidMapClient.vidMap field may be swapped by resetVidMap().
|
|||
// For operations that must use the current vidMap atomically, use withCurrentVidMap() instead.
|
|||
func (vc *vidMapClient) getStableVidMap() *vidMap { |
|||
vc.vidMapLock.RLock() |
|||
vm := vc.vidMap |
|||
vc.vidMapLock.RUnlock() |
|||
return vm |
|||
} |
|||
|
|||
// withCurrentVidMap executes a function with the current vidMap under a read lock.
|
|||
// This guarantees the vidMap instance cannot be swapped during the function execution.
|
|||
// Use this when you need atomic access to the current vidMap for multiple operations.
|
|||
func (vc *vidMapClient) withCurrentVidMap(f func(vm *vidMap)) { |
|||
vc.vidMapLock.RLock() |
|||
defer vc.vidMapLock.RUnlock() |
|||
f(vc.vidMap) |
|||
} |
|||
|
|||
// Public methods for external access
|
|||
|
|||
// GetLocations safely retrieves volume locations
|
|||
func (vc *vidMapClient) GetLocations(vid uint32) (locations []Location, found bool) { |
|||
return vc.getStableVidMap().GetLocations(vid) |
|||
} |
|||
|
|||
// GetLocationsClone safely retrieves a clone of volume locations
|
|||
func (vc *vidMapClient) GetLocationsClone(vid uint32) (locations []Location, found bool) { |
|||
return vc.getStableVidMap().GetLocationsClone(vid) |
|||
} |
|||
|
|||
// GetVidLocations safely retrieves volume locations by string ID
|
|||
func (vc *vidMapClient) GetVidLocations(vid string) (locations []Location, err error) { |
|||
return vc.getStableVidMap().GetVidLocations(vid) |
|||
} |
|||
|
|||
// LookupFileId safely looks up URLs for a file ID
|
|||
func (vc *vidMapClient) LookupFileId(ctx context.Context, fileId string) (fullUrls []string, err error) { |
|||
return vc.getStableVidMap().LookupFileId(ctx, fileId) |
|||
} |
|||
|
|||
// LookupVolumeServerUrl safely looks up volume server URLs
|
|||
func (vc *vidMapClient) LookupVolumeServerUrl(vid string) (serverUrls []string, err error) { |
|||
return vc.getStableVidMap().LookupVolumeServerUrl(vid) |
|||
} |
|||
|
|||
// GetDataCenter safely retrieves the data center
|
|||
func (vc *vidMapClient) GetDataCenter() string { |
|||
return vc.getStableVidMap().DataCenter |
|||
} |
|||
|
|||
// Thread-safe helpers for vidMap operations
|
|||
|
|||
// addLocation adds a volume location
|
|||
func (vc *vidMapClient) addLocation(vid uint32, location Location) { |
|||
vc.withCurrentVidMap(func(vm *vidMap) { |
|||
vm.addLocation(vid, location) |
|||
}) |
|||
} |
|||
|
|||
// deleteLocation removes a volume location
|
|||
func (vc *vidMapClient) deleteLocation(vid uint32, location Location) { |
|||
vc.withCurrentVidMap(func(vm *vidMap) { |
|||
vm.deleteLocation(vid, location) |
|||
}) |
|||
} |
|||
|
|||
// addEcLocation adds an EC volume location
|
|||
func (vc *vidMapClient) addEcLocation(vid uint32, location Location) { |
|||
vc.withCurrentVidMap(func(vm *vidMap) { |
|||
vm.addEcLocation(vid, location) |
|||
}) |
|||
} |
|||
|
|||
// deleteEcLocation removes an EC volume location
|
|||
func (vc *vidMapClient) deleteEcLocation(vid uint32, location Location) { |
|||
vc.withCurrentVidMap(func(vm *vidMap) { |
|||
vm.deleteEcLocation(vid, location) |
|||
}) |
|||
} |
|||
|
|||
// resetVidMap resets the volume ID map
|
|||
func (vc *vidMapClient) resetVidMap() { |
|||
vc.vidMapLock.Lock() |
|||
defer vc.vidMapLock.Unlock() |
|||
|
|||
// Preserve the existing vidMap in the cache chain
|
|||
tail := vc.vidMap |
|||
|
|||
nvm := newVidMap(tail.DataCenter) |
|||
nvm.cache.Store(tail) |
|||
vc.vidMap = nvm |
|||
|
|||
// Trim cache chain to vidMapCacheSize
|
|||
node := tail |
|||
for i := 0; i < vc.vidMapCacheSize-1; i++ { |
|||
if node.cache.Load() == nil { |
|||
return |
|||
} |
|||
node = node.cache.Load() |
|||
} |
|||
// node is guaranteed to be non-nil after the loop
|
|||
node.cache.Store(nil) |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue