seaweedfs/weed/wdclient/filer_client.go


								package wdclient


								import (

									"context"

									"fmt"

									"math/rand"

									"strings"

									"sync/atomic"

									"time"


									"google.golang.org/grpc"

									"google.golang.org/grpc/codes"

									"google.golang.org/grpc/status"


									"github.com/seaweedfs/seaweedfs/weed/glog"

									"github.com/seaweedfs/seaweedfs/weed/pb"

									"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"

								)


								// UrlPreference controls which URL to use for volume access

								type UrlPreference string


								const (

									PreferUrl       UrlPreference = "url"       // Use private URL (default)

									PreferPublicUrl UrlPreference = "publicUrl" // Use public URL

								)


								// filerHealth tracks the health status of a filer

								type filerHealth struct {

									failureCount      int32 // atomic: consecutive failures

									lastFailureTimeNs int64 // atomic: last failure time in Unix nanoseconds

								}


								// FilerClient provides volume location services by querying a filer

								// It uses the shared vidMap cache for efficient lookups

								// Supports multiple filer addresses with automatic failover for high availability

								// Tracks filer health to avoid repeatedly trying known-unhealthy filers

								type FilerClient struct {

									*vidMapClient

									filerAddresses     []pb.ServerAddress

									filerIndex         int32          // atomic: current filer index for round-robin

									filerHealth        []*filerHealth // health status per filer (same order as filerAddresses)

									grpcDialOption     grpc.DialOption

									urlPreference      UrlPreference

									grpcTimeout        time.Duration

									cacheSize          int           // Number of historical vidMap snapshots to keep

									clientId           int32         // Unique client identifier for gRPC metadata

									failureThreshold   int32         // Circuit breaker: consecutive failures before circuit opens

									resetTimeout       time.Duration // Circuit breaker: time before re-checking unhealthy filer

									maxRetries         int           // Retry: maximum retry attempts for transient failures

									initialRetryWait   time.Duration // Retry: initial wait time before first retry

									retryBackoffFactor float64       // Retry: backoff multiplier for wait time

								}


								// filerVolumeProvider implements VolumeLocationProvider by querying filer

								// Supports multiple filer addresses with automatic failover

								type filerVolumeProvider struct {

									filerClient *FilerClient

								}


								// FilerClientOption holds optional configuration for FilerClient

								type FilerClientOption struct {

									GrpcTimeout        time.Duration

									UrlPreference      UrlPreference

									CacheSize          int           // Number of historical vidMap snapshots (0 = use default)

									FailureThreshold   int32         // Circuit breaker: consecutive failures before skipping filer (0 = use default of 3)

									ResetTimeout       time.Duration // Circuit breaker: time before re-checking unhealthy filer (0 = use default of 30s)

									MaxRetries         int           // Retry: maximum retry attempts for transient failures (0 = use default of 3)

									InitialRetryWait   time.Duration // Retry: initial wait time before first retry (0 = use default of 1s)

									RetryBackoffFactor float64       // Retry: backoff multiplier for wait time (0 = use default of 1.5)

								}


								// NewFilerClient creates a new client that queries filer(s) for volume locations

								// Supports multiple filer addresses for high availability with automatic failover

								// Uses sensible defaults: 5-second gRPC timeout, PreferUrl, DefaultVidMapCacheSize

								func NewFilerClient(filerAddresses []pb.ServerAddress, grpcDialOption grpc.DialOption, dataCenter string, opts ...*FilerClientOption) *FilerClient {

									if len(filerAddresses) == 0 {

										glog.Fatal("NewFilerClient requires at least one filer address")

									}


									// Apply defaults

									grpcTimeout := 5 * time.Second

									urlPref := PreferUrl

									cacheSize := DefaultVidMapCacheSize

									failureThreshold := int32(3)     // Default: 3 consecutive failures before circuit opens

									resetTimeout := 30 * time.Second // Default: 30 seconds before re-checking unhealthy filer

									maxRetries := 3                  // Default: 3 retry attempts for transient failures

									initialRetryWait := time.Second  // Default: 1 second initial retry wait

									retryBackoffFactor := 1.5        // Default: 1.5x backoff multiplier


									// Override with provided options

									if len(opts) > 0 && opts[0] != nil {

										opt := opts[0]

										if opt.GrpcTimeout > 0 {

											grpcTimeout = opt.GrpcTimeout

										}

										if opt.UrlPreference != "" {

											urlPref = opt.UrlPreference

										}

										if opt.CacheSize > 0 {

											cacheSize = opt.CacheSize

										}

										if opt.FailureThreshold > 0 {

											failureThreshold = opt.FailureThreshold

										}

										if opt.ResetTimeout > 0 {

											resetTimeout = opt.ResetTimeout

										}

										if opt.MaxRetries > 0 {

											maxRetries = opt.MaxRetries

										}

										if opt.InitialRetryWait > 0 {

											initialRetryWait = opt.InitialRetryWait

										}

										if opt.RetryBackoffFactor > 0 {

											retryBackoffFactor = opt.RetryBackoffFactor

										}

									}


									// Initialize health tracking for each filer

									health := make([]*filerHealth, len(filerAddresses))

									for i := range health {

										health[i] = &filerHealth{}

									}


									fc := &FilerClient{

										filerAddresses:     filerAddresses,

										filerIndex:         0,

										filerHealth:        health,

										grpcDialOption:     grpcDialOption,

										urlPreference:      urlPref,

										grpcTimeout:        grpcTimeout,

										cacheSize:          cacheSize,

										clientId:           rand.Int31(), // Random client ID for gRPC metadata tracking

										failureThreshold:   failureThreshold,

										resetTimeout:       resetTimeout,

										maxRetries:         maxRetries,

										initialRetryWait:   initialRetryWait,

										retryBackoffFactor: retryBackoffFactor,

									}


									// Create provider that references this FilerClient for failover support

									provider := &filerVolumeProvider{

										filerClient: fc,

									}


									fc.vidMapClient = newVidMapClient(provider, dataCenter, cacheSize)


									return fc

								}


								// GetLookupFileIdFunction returns a lookup function with URL preference handling

								func (fc *FilerClient) GetLookupFileIdFunction() LookupFileIdFunctionType {

									if fc.urlPreference == PreferUrl {

										// Use the default implementation from vidMapClient

										return fc.vidMapClient.GetLookupFileIdFunction()

									}


									// Custom implementation that prefers PublicUrl

									return func(ctx context.Context, fileId string) (fullUrls []string, err error) {

										// Parse file ID to extract volume ID

										parts := strings.Split(fileId, ",")

										if len(parts) != 2 {

											return nil, fmt.Errorf("invalid fileId format: %s", fileId)

										}

										volumeIdStr := parts[0]


										// First try the cache using LookupVolumeIdsWithFallback

										vidLocations, err := fc.LookupVolumeIdsWithFallback(ctx, []string{volumeIdStr})


										// Check for partial results first (important for multi-volume batched lookups)

										locations, found := vidLocations[volumeIdStr]

										if !found || len(locations) == 0 {

											// Volume not found - return specific error with context from lookup if available

											if err != nil {

												return nil, fmt.Errorf("volume %s not found for fileId %s: %w", volumeIdStr, fileId, err)

											}

											return nil, fmt.Errorf("volume %s not found for fileId %s", volumeIdStr, fileId)

										}


										// Volume found successfully - ignore any errors about other volumes

										// (not relevant for single-volume lookup, but defensive for future batching)


										// Build URLs with publicUrl preference, and also prefer same DC

										var sameDcUrls, otherDcUrls []string

										dataCenter := fc.GetDataCenter()

										for _, loc := range locations {

											url := loc.PublicUrl

											if url == "" {

												url = loc.Url

											}

											httpUrl := "http://" + url + "/" + fileId

											if dataCenter != "" && dataCenter == loc.DataCenter {

												sameDcUrls = append(sameDcUrls, httpUrl)

											} else {

												otherDcUrls = append(otherDcUrls, httpUrl)

											}

										}

										// Shuffle to distribute load across volume servers

										rand.Shuffle(len(sameDcUrls), func(i, j int) { sameDcUrls[i], sameDcUrls[j] = sameDcUrls[j], sameDcUrls[i] })

										rand.Shuffle(len(otherDcUrls), func(i, j int) { otherDcUrls[i], otherDcUrls[j] = otherDcUrls[j], otherDcUrls[i] })

										// Prefer same data center

										fullUrls = append(sameDcUrls, otherDcUrls...)

										return fullUrls, nil

									}

								}


								// isRetryableGrpcError checks if a gRPC error is transient and should be retried

								//

								// Note on codes.Aborted: While Aborted can indicate application-level conflicts

								// (e.g., transaction failures), in the context of volume location lookups (which

								// are simple read-only operations with no transactions), Aborted is more likely

								// to indicate transient server issues during restart/recovery. We include it here

								// for volume lookups but log it for visibility in case misclassification occurs.

								func isRetryableGrpcError(err error) bool {

									if err == nil {

										return false

									}


									// Check gRPC status code

									st, ok := status.FromError(err)

									if ok {

										switch st.Code() {

										case codes.Unavailable: // Server unavailable (temporary)

											return true

										case codes.DeadlineExceeded: // Request timeout

											return true

										case codes.ResourceExhausted: // Rate limited or overloaded

											return true

										case codes.Aborted:

											// Aborted during read-only volume lookups is likely transient

											// (e.g., filer restarting), but log for visibility

											glog.V(1).Infof("Treating Aborted as retryable for volume lookup: %v", err)

											return true

										}

									}


									// Fallback to string matching for non-gRPC errors (e.g., network errors)

									errStr := err.Error()

									return strings.Contains(errStr, "transport") ||

										strings.Contains(errStr, "connection") ||

										strings.Contains(errStr, "timeout") ||

										strings.Contains(errStr, "unavailable")

								}


								// shouldSkipUnhealthyFiler checks if we should skip a filer based on recent failures

								// Circuit breaker pattern: skip filers with multiple recent consecutive failures

								func (fc *FilerClient) shouldSkipUnhealthyFiler(index int32) bool {

									health := fc.filerHealth[index]

									failureCount := atomic.LoadInt32(&health.failureCount)


									// Check if failure count exceeds threshold

									if failureCount < fc.failureThreshold {

										return false

									}


									// Re-check unhealthy filers after reset timeout

									lastFailureNs := atomic.LoadInt64(&health.lastFailureTimeNs)

									if lastFailureNs == 0 {

										return false // Never failed, shouldn't skip

									}

									lastFailureTime := time.Unix(0, lastFailureNs)

									if time.Since(lastFailureTime) > fc.resetTimeout {

										return false // Time to re-check

									}


									return true // Skip this unhealthy filer

								}


								// recordFilerSuccess resets failure tracking for a successful filer

								func (fc *FilerClient) recordFilerSuccess(index int32) {

									health := fc.filerHealth[index]

									atomic.StoreInt32(&health.failureCount, 0)

								}


								// recordFilerFailure increments failure count for an unhealthy filer

								func (fc *FilerClient) recordFilerFailure(index int32) {

									health := fc.filerHealth[index]

									atomic.AddInt32(&health.failureCount, 1)

									atomic.StoreInt64(&health.lastFailureTimeNs, time.Now().UnixNano())

								}


								// LookupVolumeIds queries the filer for volume locations with automatic failover

								// Tries all configured filer addresses until one succeeds (high availability)

								// Retries transient gRPC errors (Unavailable, DeadlineExceeded, etc.) with exponential backoff

								// Note: Unlike master's VolumeIdLocation, filer's Locations message doesn't currently have

								// an Error field. This implementation handles the current structure while being prepared

								// for future error reporting enhancements.

								func (p *filerVolumeProvider) LookupVolumeIds(ctx context.Context, volumeIds []string) (map[string][]Location, error) {

									fc := p.filerClient

									result := make(map[string][]Location)


									// Retry transient failures with configurable backoff

									var lastErr error

									waitTime := fc.initialRetryWait

									maxRetries := fc.maxRetries


									for retry := 0; retry < maxRetries; retry++ {

										// Try all filer addresses with round-robin starting from current index

										// Skip known-unhealthy filers (circuit breaker pattern)

										i := atomic.LoadInt32(&fc.filerIndex)

										n := int32(len(fc.filerAddresses))


										for x := int32(0); x < n; x++ {

											// Circuit breaker: skip unhealthy filers

											if fc.shouldSkipUnhealthyFiler(i) {

												glog.V(2).Infof("FilerClient: skipping unhealthy filer %s (consecutive failures: %d)",

													fc.filerAddresses[i], atomic.LoadInt32(&fc.filerHealth[i].failureCount))

												i++

												if i >= n {

													i = 0

												}

												continue

											}


											filerAddress := fc.filerAddresses[i]


											// Use anonymous function to ensure defer cancel() is called per iteration, not accumulated

											err := func() error {

												// Create a fresh timeout context for each filer attempt

												// This ensures each retry gets the full grpcTimeout, not a diminishing deadline

												timeoutCtx, cancel := context.WithTimeout(ctx, fc.grpcTimeout)

												defer cancel() // Always clean up context, even on panic or early return


												return pb.WithGrpcFilerClient(false, fc.clientId, filerAddress, fc.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {

													resp, err := client.LookupVolume(timeoutCtx, &filer_pb.LookupVolumeRequest{

														VolumeIds: volumeIds,

													})

													if err != nil {

														return fmt.Errorf("filer.LookupVolume failed: %w", err)

													}


													// Process each volume in the response

													for vid, locs := range resp.LocationsMap {

														// Convert locations from protobuf to internal format

														var locations []Location

														for _, loc := range locs.Locations {

															locations = append(locations, Location{

																Url:        loc.Url,

																PublicUrl:  loc.PublicUrl,

																DataCenter: loc.DataCenter,

																GrpcPort:   int(loc.GrpcPort),

															})

														}


														// Only add to result if we have locations

														// Empty locations with no gRPC error means "not found" (volume doesn't exist)

														if len(locations) > 0 {

															result[vid] = locations

															glog.V(4).Infof("FilerClient: volume %s found with %d location(s)", vid, len(locations))

														} else {

															glog.V(2).Infof("FilerClient: volume %s not found (no locations in response)", vid)

														}

													}


													// Check for volumes that weren't in the response at all

													// This could indicate a problem with the filer

													for _, vid := range volumeIds {

														if _, found := resp.LocationsMap[vid]; !found {

															glog.V(1).Infof("FilerClient: volume %s missing from filer response", vid)

														}

													}


													return nil

												})

											}()


											if err != nil {

												glog.V(1).Infof("FilerClient: filer %s lookup failed (attempt %d/%d, retry %d/%d): %v", filerAddress, x+1, n, retry+1, maxRetries, err)

												fc.recordFilerFailure(i)

												lastErr = err

												i++

												if i >= n {

													i = 0

												}

												continue

											}


											// Success - update the preferred filer index and reset health tracking

											atomic.StoreInt32(&fc.filerIndex, i)

											fc.recordFilerSuccess(i)

											glog.V(3).Infof("FilerClient: looked up %d volumes on %s, found %d", len(volumeIds), filerAddress, len(result))

											return result, nil

										}


										// All filers failed on this attempt

										// Check if the error is retryable (transient gRPC error)

										if !isRetryableGrpcError(lastErr) {

											// Non-retryable error (e.g., NotFound, PermissionDenied) - fail immediately

											return nil, fmt.Errorf("all %d filer(s) failed with non-retryable error: %w", n, lastErr)

										}


										// Transient error - retry if we have attempts left

										if retry < maxRetries-1 {

											glog.V(1).Infof("FilerClient: all %d filer(s) failed with retryable error (attempt %d/%d), retrying in %v: %v",

												n, retry+1, maxRetries, waitTime, lastErr)

											time.Sleep(waitTime)

											waitTime = time.Duration(float64(waitTime) * fc.retryBackoffFactor)

										}

									}


									// All retries exhausted

									return nil, fmt.Errorf("all %d filer(s) failed after %d attempts, last error: %w", len(fc.filerAddresses), maxRetries, lastErr)

								}