You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
199 lines
6.0 KiB
199 lines
6.0 KiB
package filer_client
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"google.golang.org/grpc"
|
|
)
|
|
|
|
const (
|
|
// FilerDiscoveryInterval is the interval for refreshing filer list from masters
|
|
FilerDiscoveryInterval = 30 * time.Second
|
|
// InitialDiscoveryInterval is the faster interval for initial discovery
|
|
InitialDiscoveryInterval = 5 * time.Second
|
|
// InitialDiscoveryRetries is the number of fast retries during startup
|
|
InitialDiscoveryRetries = 6 // 6 retries * 5 seconds = 30 seconds total
|
|
)
|
|
|
|
// FilerDiscoveryService handles dynamic discovery and refresh of filers from masters
|
|
type FilerDiscoveryService struct {
|
|
masters []pb.ServerAddress
|
|
grpcDialOption grpc.DialOption
|
|
filers []pb.ServerAddress
|
|
filersMutex sync.RWMutex
|
|
refreshTicker *time.Ticker
|
|
stopChan chan struct{}
|
|
wg sync.WaitGroup
|
|
initialRetries int
|
|
}
|
|
|
|
// NewFilerDiscoveryService creates a new filer discovery service
|
|
func NewFilerDiscoveryService(masters []pb.ServerAddress, grpcDialOption grpc.DialOption) *FilerDiscoveryService {
|
|
return &FilerDiscoveryService{
|
|
masters: masters,
|
|
grpcDialOption: grpcDialOption,
|
|
filers: make([]pb.ServerAddress, 0),
|
|
stopChan: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// No need for convertHTTPToGRPC - pb.ServerAddress.ToGrpcAddress() already handles this
|
|
|
|
// discoverFilersFromMaster discovers filers from a single master
|
|
func (fds *FilerDiscoveryService) discoverFilersFromMaster(masterAddr pb.ServerAddress) ([]pb.ServerAddress, error) {
|
|
// Convert HTTP master address to gRPC address (HTTP port + 10000)
|
|
grpcAddr := masterAddr.ToGrpcAddress()
|
|
glog.Infof("FILER DISCOVERY: Connecting to master gRPC at %s (converted from HTTP %s)", grpcAddr, masterAddr)
|
|
|
|
conn, err := grpc.Dial(grpcAddr, fds.grpcDialOption)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to connect to master at %s: %v", grpcAddr, err)
|
|
}
|
|
defer conn.Close()
|
|
|
|
client := master_pb.NewSeaweedClient(conn)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
resp, err := client.ListClusterNodes(ctx, &master_pb.ListClusterNodesRequest{
|
|
ClientType: cluster.FilerType,
|
|
})
|
|
if err != nil {
|
|
glog.Errorf("FILER DISCOVERY: ListClusterNodes failed for master %s: %v", masterAddr, err)
|
|
return nil, fmt.Errorf("failed to list filers from master %s: %v", masterAddr, err)
|
|
}
|
|
|
|
glog.Infof("FILER DISCOVERY: ListClusterNodes returned %d nodes from master %s", len(resp.ClusterNodes), masterAddr)
|
|
|
|
var filers []pb.ServerAddress
|
|
for _, node := range resp.ClusterNodes {
|
|
glog.Infof("FILER DISCOVERY: Found filer HTTP address %s", node.Address)
|
|
// Return HTTP address (lock client will convert to gRPC when needed)
|
|
filers = append(filers, pb.ServerAddress(node.Address))
|
|
}
|
|
|
|
glog.Infof("FILER DISCOVERY: Returning %d filers from master %s", len(filers), masterAddr)
|
|
|
|
return filers, nil
|
|
}
|
|
|
|
// refreshFilers discovers filers from all masters and updates the filer list
|
|
func (fds *FilerDiscoveryService) refreshFilers() {
|
|
glog.V(2).Info("Refreshing filer list from masters")
|
|
|
|
var allFilers []pb.ServerAddress
|
|
var discoveryErrors []error
|
|
|
|
// Try each master to discover filers
|
|
for _, masterAddr := range fds.masters {
|
|
filers, err := fds.discoverFilersFromMaster(masterAddr)
|
|
if err != nil {
|
|
discoveryErrors = append(discoveryErrors, err)
|
|
glog.V(1).Infof("Failed to discover filers from master %s: %v", masterAddr, err)
|
|
continue
|
|
}
|
|
|
|
allFilers = append(allFilers, filers...)
|
|
glog.V(2).Infof("Discovered %d filers from master %s", len(filers), masterAddr)
|
|
}
|
|
|
|
// Deduplicate filers
|
|
filerSet := make(map[pb.ServerAddress]bool)
|
|
for _, filer := range allFilers {
|
|
filerSet[filer] = true
|
|
}
|
|
|
|
uniqueFilers := make([]pb.ServerAddress, 0, len(filerSet))
|
|
for filer := range filerSet {
|
|
uniqueFilers = append(uniqueFilers, filer)
|
|
}
|
|
|
|
// Update the filer list
|
|
fds.filersMutex.Lock()
|
|
oldCount := len(fds.filers)
|
|
fds.filers = uniqueFilers
|
|
newCount := len(fds.filers)
|
|
fds.filersMutex.Unlock()
|
|
|
|
if newCount > 0 {
|
|
glog.V(1).Infof("Filer discovery successful: updated from %d to %d filers", oldCount, newCount)
|
|
} else if len(discoveryErrors) > 0 {
|
|
glog.Warningf("Failed to discover any filers from %d masters, keeping existing %d filers", len(fds.masters), oldCount)
|
|
}
|
|
}
|
|
|
|
// GetFilers returns the current list of filers
|
|
func (fds *FilerDiscoveryService) GetFilers() []pb.ServerAddress {
|
|
fds.filersMutex.RLock()
|
|
defer fds.filersMutex.RUnlock()
|
|
|
|
// Return a copy to avoid concurrent modification
|
|
filers := make([]pb.ServerAddress, len(fds.filers))
|
|
copy(filers, fds.filers)
|
|
return filers
|
|
}
|
|
|
|
// Start begins the filer discovery service
|
|
func (fds *FilerDiscoveryService) Start() error {
|
|
glog.V(1).Info("Starting filer discovery service")
|
|
|
|
// Initial discovery
|
|
fds.refreshFilers()
|
|
|
|
// Start with faster discovery during startup
|
|
fds.initialRetries = InitialDiscoveryRetries
|
|
interval := InitialDiscoveryInterval
|
|
if len(fds.GetFilers()) > 0 {
|
|
// If we found filers immediately, use normal interval
|
|
interval = FilerDiscoveryInterval
|
|
fds.initialRetries = 0
|
|
}
|
|
|
|
// Start periodic refresh
|
|
fds.refreshTicker = time.NewTicker(interval)
|
|
fds.wg.Add(1)
|
|
go func() {
|
|
defer fds.wg.Done()
|
|
for {
|
|
select {
|
|
case <-fds.refreshTicker.C:
|
|
fds.refreshFilers()
|
|
|
|
// Switch to normal interval after initial retries
|
|
if fds.initialRetries > 0 {
|
|
fds.initialRetries--
|
|
if fds.initialRetries == 0 || len(fds.GetFilers()) > 0 {
|
|
glog.V(1).Info("Switching to normal filer discovery interval")
|
|
fds.refreshTicker.Stop()
|
|
fds.refreshTicker = time.NewTicker(FilerDiscoveryInterval)
|
|
}
|
|
}
|
|
case <-fds.stopChan:
|
|
glog.V(1).Info("Filer discovery service stopping")
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Stop stops the filer discovery service
|
|
func (fds *FilerDiscoveryService) Stop() error {
|
|
glog.V(1).Info("Stopping filer discovery service")
|
|
|
|
close(fds.stopChan)
|
|
if fds.refreshTicker != nil {
|
|
fds.refreshTicker.Stop()
|
|
}
|
|
fds.wg.Wait()
|
|
|
|
return nil
|
|
}
|