You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
244 lines
7.3 KiB
244 lines
7.3 KiB
package filer_client
|
|
|
|
import (
|
|
"fmt"
|
|
"math/rand"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
|
|
"google.golang.org/grpc"
|
|
)
|
|
|
|
// filerHealth tracks the health status of a filer
|
|
type filerHealth struct {
|
|
address pb.ServerAddress
|
|
failureCount int32
|
|
lastFailure time.Time
|
|
backoffUntil time.Time
|
|
}
|
|
|
|
// isHealthy returns true if the filer is not in backoff period
|
|
func (fh *filerHealth) isHealthy() bool {
|
|
return time.Now().After(fh.backoffUntil)
|
|
}
|
|
|
|
// recordFailure updates failure count and sets backoff time using exponential backoff
|
|
func (fh *filerHealth) recordFailure() {
|
|
count := atomic.AddInt32(&fh.failureCount, 1)
|
|
fh.lastFailure = time.Now()
|
|
|
|
// Exponential backoff: 1s, 2s, 4s, 8s, 16s, 32s, max 30s
|
|
// Calculate 2^(count-1) but cap the result at 30 seconds
|
|
backoffSeconds := 1 << (count - 1)
|
|
if backoffSeconds > 30 {
|
|
backoffSeconds = 30
|
|
}
|
|
fh.backoffUntil = time.Now().Add(time.Duration(backoffSeconds) * time.Second)
|
|
|
|
glog.V(1).Infof("Filer %v failed %d times, backing off for %ds", fh.address, count, backoffSeconds)
|
|
}
|
|
|
|
// recordSuccess resets failure count and clears backoff
|
|
func (fh *filerHealth) recordSuccess() {
|
|
atomic.StoreInt32(&fh.failureCount, 0)
|
|
fh.backoffUntil = time.Time{}
|
|
}
|
|
|
|
type FilerClientAccessor struct {
|
|
GetGrpcDialOption func() grpc.DialOption
|
|
GetFilers func() []pb.ServerAddress // Returns multiple filer addresses for failover
|
|
|
|
// Health tracking for smart failover
|
|
filerHealthMap sync.Map // map[pb.ServerAddress]*filerHealth
|
|
}
|
|
|
|
// getOrCreateFilerHealth returns the health tracker for a filer, creating one if needed
|
|
func (fca *FilerClientAccessor) getOrCreateFilerHealth(address pb.ServerAddress) *filerHealth {
|
|
if health, ok := fca.filerHealthMap.Load(address); ok {
|
|
return health.(*filerHealth)
|
|
}
|
|
|
|
newHealth := &filerHealth{
|
|
address: address,
|
|
failureCount: 0,
|
|
backoffUntil: time.Time{},
|
|
}
|
|
|
|
actual, _ := fca.filerHealthMap.LoadOrStore(address, newHealth)
|
|
return actual.(*filerHealth)
|
|
}
|
|
|
|
// partitionFilers separates filers into healthy and backoff groups
|
|
func (fca *FilerClientAccessor) partitionFilers(filers []pb.ServerAddress) (healthy, backoff []pb.ServerAddress) {
|
|
for _, filer := range filers {
|
|
health := fca.getOrCreateFilerHealth(filer)
|
|
if health.isHealthy() {
|
|
healthy = append(healthy, filer)
|
|
} else {
|
|
backoff = append(backoff, filer)
|
|
}
|
|
}
|
|
return healthy, backoff
|
|
}
|
|
|
|
// shuffleFilers randomizes the order of filers to distribute load
|
|
func (fca *FilerClientAccessor) shuffleFilers(filers []pb.ServerAddress) []pb.ServerAddress {
|
|
if len(filers) <= 1 {
|
|
return filers
|
|
}
|
|
|
|
shuffled := make([]pb.ServerAddress, len(filers))
|
|
copy(shuffled, filers)
|
|
|
|
// Fisher-Yates shuffle
|
|
for i := len(shuffled) - 1; i > 0; i-- {
|
|
j := rand.Intn(i + 1)
|
|
shuffled[i], shuffled[j] = shuffled[j], shuffled[i]
|
|
}
|
|
|
|
return shuffled
|
|
}
|
|
|
|
func (fca *FilerClientAccessor) WithFilerClient(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error {
|
|
return fca.withMultipleFilers(streamingMode, fn)
|
|
}
|
|
|
|
// withMultipleFilers tries each filer with smart failover and backoff logic
|
|
func (fca *FilerClientAccessor) withMultipleFilers(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error {
|
|
filers := fca.GetFilers()
|
|
if len(filers) == 0 {
|
|
return fmt.Errorf("no filer addresses available")
|
|
}
|
|
|
|
// Partition filers into healthy and backoff groups
|
|
healthyFilers, backoffFilers := fca.partitionFilers(filers)
|
|
|
|
// Shuffle healthy filers to distribute load evenly
|
|
healthyFilers = fca.shuffleFilers(healthyFilers)
|
|
|
|
// Try healthy filers first
|
|
var lastErr error
|
|
for _, filerAddress := range healthyFilers {
|
|
health := fca.getOrCreateFilerHealth(filerAddress)
|
|
|
|
err := pb.WithFilerClient(streamingMode, 0, filerAddress, fca.GetGrpcDialOption(), fn)
|
|
if err == nil {
|
|
// Success - record it and return
|
|
health.recordSuccess()
|
|
glog.V(2).Infof("Filer %v succeeded", filerAddress)
|
|
return nil
|
|
}
|
|
|
|
// Record failure and continue to next filer
|
|
health.recordFailure()
|
|
lastErr = err
|
|
glog.V(1).Infof("Healthy filer %v failed: %v, trying next", filerAddress, err)
|
|
}
|
|
|
|
// If all healthy filers failed, try backoff filers as last resort
|
|
if len(backoffFilers) > 0 {
|
|
glog.V(1).Infof("All healthy filers failed, trying %d backoff filers", len(backoffFilers))
|
|
|
|
for _, filerAddress := range backoffFilers {
|
|
health := fca.getOrCreateFilerHealth(filerAddress)
|
|
|
|
err := pb.WithFilerClient(streamingMode, 0, filerAddress, fca.GetGrpcDialOption(), fn)
|
|
if err == nil {
|
|
// Success - record it and return
|
|
health.recordSuccess()
|
|
glog.V(1).Infof("Backoff filer %v recovered and succeeded", filerAddress)
|
|
return nil
|
|
}
|
|
|
|
// Update failure record
|
|
health.recordFailure()
|
|
lastErr = err
|
|
glog.V(1).Infof("Backoff filer %v still failing: %v", filerAddress, err)
|
|
}
|
|
}
|
|
|
|
return fmt.Errorf("all filer connections failed, last error: %v", lastErr)
|
|
}
|
|
|
|
func (fca *FilerClientAccessor) SaveTopicConfToFiler(t topic.Topic, conf *mq_pb.ConfigureTopicResponse) error {
|
|
|
|
glog.V(0).Infof("save conf for topic %v to filer", t)
|
|
|
|
// save the topic configuration on filer
|
|
return fca.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
return t.WriteConfFile(client, conf)
|
|
})
|
|
}
|
|
|
|
func (fca *FilerClientAccessor) ReadTopicConfFromFiler(t topic.Topic) (conf *mq_pb.ConfigureTopicResponse, err error) {
|
|
|
|
glog.V(1).Infof("load conf for topic %v from filer", t)
|
|
|
|
if err = fca.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
conf, err = t.ReadConfFile(client)
|
|
return err
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return conf, nil
|
|
}
|
|
|
|
// ReadTopicConfFromFilerWithMetadata reads topic configuration along with file creation and modification times
|
|
func (fca *FilerClientAccessor) ReadTopicConfFromFilerWithMetadata(t topic.Topic) (conf *mq_pb.ConfigureTopicResponse, createdAtNs, modifiedAtNs int64, err error) {
|
|
|
|
glog.V(1).Infof("load conf with metadata for topic %v from filer", t)
|
|
|
|
if err = fca.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
conf, createdAtNs, modifiedAtNs, err = t.ReadConfFileWithMetadata(client)
|
|
return err
|
|
}); err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
|
|
return conf, createdAtNs, modifiedAtNs, nil
|
|
}
|
|
|
|
// NewFilerClientAccessor creates a FilerClientAccessor with one or more filers
|
|
func NewFilerClientAccessor(filerAddresses []pb.ServerAddress, grpcDialOption grpc.DialOption) *FilerClientAccessor {
|
|
if len(filerAddresses) == 0 {
|
|
panic("at least one filer address is required")
|
|
}
|
|
|
|
return &FilerClientAccessor{
|
|
GetGrpcDialOption: func() grpc.DialOption {
|
|
return grpcDialOption
|
|
},
|
|
GetFilers: func() []pb.ServerAddress {
|
|
return filerAddresses
|
|
},
|
|
filerHealthMap: sync.Map{},
|
|
}
|
|
}
|
|
|
|
// AddFilerAddresses adds more filer addresses to the existing list
|
|
func (fca *FilerClientAccessor) AddFilerAddresses(additionalFilers []pb.ServerAddress) {
|
|
if len(additionalFilers) == 0 {
|
|
return
|
|
}
|
|
|
|
// Get the current filers if available
|
|
var allFilers []pb.ServerAddress
|
|
if fca.GetFilers != nil {
|
|
allFilers = append(allFilers, fca.GetFilers()...)
|
|
}
|
|
|
|
// Add the additional filers
|
|
allFilers = append(allFilers, additionalFilers...)
|
|
|
|
// Update the filers list
|
|
fca.GetFilers = func() []pb.ServerAddress {
|
|
return allFilers
|
|
}
|
|
}
|