You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							227 lines
						
					
					
						
							6.6 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							227 lines
						
					
					
						
							6.6 KiB
						
					
					
				| package weed_server | |
| 
 | |
| // https://yusufs.medium.com/creating-distributed-kv-database-by-implementing-raft-consensus-using-golang-d0884eef2e28 | |
| // https://github.com/Jille/raft-grpc-example/blob/cd5bcab0218f008e044fbeee4facdd01b06018ad/application.go#L18 | |
|  | |
| import ( | |
| 	"fmt" | |
| 	"math/rand/v2" | |
| 	"os" | |
| 	"path" | |
| 	"path/filepath" | |
| 	"sort" | |
| 	"strings" | |
| 	"time" | |
| 
 | |
| 	transport "github.com/Jille/raft-grpc-transport" | |
| 	"github.com/armon/go-metrics" | |
| 	"github.com/armon/go-metrics/prometheus" | |
| 	"github.com/hashicorp/raft" | |
| 	boltdb "github.com/hashicorp/raft-boltdb/v2" | |
| 	"github.com/seaweedfs/seaweedfs/weed/glog" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/stats" | |
| 	"google.golang.org/grpc" | |
| ) | |
| 
 | |
| const ( | |
| 	ldbFile            = "logs.dat" | |
| 	sdbFile            = "stable.dat" | |
| 	updatePeersTimeout = 15 * time.Minute | |
| ) | |
| 
 | |
| func getPeerIdx(self pb.ServerAddress, mapPeers map[string]pb.ServerAddress) int { | |
| 	peers := make([]pb.ServerAddress, 0, len(mapPeers)) | |
| 	for _, peer := range mapPeers { | |
| 		peers = append(peers, peer) | |
| 	} | |
| 	sort.Slice(peers, func(i, j int) bool { | |
| 		return strings.Compare(string(peers[i]), string(peers[j])) < 0 | |
| 	}) | |
| 	for i, peer := range peers { | |
| 		if string(peer) == string(self) { | |
| 			return i | |
| 		} | |
| 	} | |
| 	return -1 | |
| } | |
| 
 | |
| func (s *RaftServer) AddPeersConfiguration() (cfg raft.Configuration) { | |
| 	for _, peer := range s.peers { | |
| 		cfg.Servers = append(cfg.Servers, raft.Server{ | |
| 			Suffrage: raft.Voter, | |
| 			ID:       raft.ServerID(peer), | |
| 			Address:  raft.ServerAddress(peer.ToGrpcAddress()), | |
| 		}) | |
| 	} | |
| 	return cfg | |
| } | |
| 
 | |
| func (s *RaftServer) monitorLeaderLoop(updatePeers bool) { | |
| 	for { | |
| 		prevLeader, _ := s.RaftHashicorp.LeaderWithID() | |
| 		select { | |
| 		case isLeader := <-s.RaftHashicorp.LeaderCh(): | |
| 			leader, _ := s.RaftHashicorp.LeaderWithID() | |
| 			if isLeader { | |
| 
 | |
| 				if updatePeers { | |
| 					s.updatePeers() | |
| 					updatePeers = false | |
| 				} | |
| 
 | |
| 				s.topo.DoBarrier() | |
| 
 | |
| 				stats.MasterLeaderChangeCounter.WithLabelValues(fmt.Sprintf("%+v", leader)).Inc() | |
| 			} else { | |
| 				s.topo.BarrierReset() | |
| 			} | |
| 			glog.V(0).Infof("is leader %+v change event: %+v => %+v", isLeader, prevLeader, leader) | |
| 			prevLeader = leader | |
| 			s.topo.LastLeaderChangeTime = time.Now() | |
| 		} | |
| 	} | |
| } | |
| 
 | |
| func (s *RaftServer) updatePeers() { | |
| 	peerLeader := string(s.serverAddr) | |
| 	existsPeerName := make(map[string]bool) | |
| 	for _, server := range s.RaftHashicorp.GetConfiguration().Configuration().Servers { | |
| 		if string(server.ID) == peerLeader { | |
| 			continue | |
| 		} | |
| 		existsPeerName[string(server.ID)] = true | |
| 	} | |
| 	for _, peer := range s.peers { | |
| 		peerName := string(peer) | |
| 		if peerName == peerLeader || existsPeerName[peerName] { | |
| 			continue | |
| 		} | |
| 		glog.V(0).Infof("adding new peer: %s", peerName) | |
| 		s.RaftHashicorp.AddVoter( | |
| 			raft.ServerID(peerName), raft.ServerAddress(peer.ToGrpcAddress()), 0, 0) | |
| 	} | |
| 	for peer := range existsPeerName { | |
| 		if _, found := s.peers[peer]; !found { | |
| 			glog.V(0).Infof("removing old peer: %s", peer) | |
| 			s.RaftHashicorp.RemoveServer(raft.ServerID(peer), 0, 0) | |
| 		} | |
| 	} | |
| 	if _, found := s.peers[peerLeader]; !found { | |
| 		glog.V(0).Infof("removing old leader peer: %s", peerLeader) | |
| 		s.RaftHashicorp.RemoveServer(raft.ServerID(peerLeader), 0, 0) | |
| 	} | |
| } | |
| 
 | |
| func NewHashicorpRaftServer(option *RaftServerOption) (*RaftServer, error) { | |
| 	s := &RaftServer{ | |
| 		peers:      option.Peers, | |
| 		serverAddr: option.ServerAddr, | |
| 		dataDir:    option.DataDir, | |
| 		topo:       option.Topo, | |
| 	} | |
| 
 | |
| 	c := raft.DefaultConfig() | |
| 	c.LocalID = raft.ServerID(s.serverAddr) // TODO maybee the IP:port address will change | |
| 	c.HeartbeatTimeout = time.Duration(float64(option.HeartbeatInterval) * (rand.Float64()*0.25 + 1)) | |
| 	c.ElectionTimeout = option.ElectionTimeout | |
| 	if c.LeaderLeaseTimeout > c.HeartbeatTimeout { | |
| 		c.LeaderLeaseTimeout = c.HeartbeatTimeout | |
| 	} | |
| 	if glog.V(4) { | |
| 		c.LogLevel = "Debug" | |
| 	} else if glog.V(2) { | |
| 		c.LogLevel = "Info" | |
| 	} else if glog.V(1) { | |
| 		c.LogLevel = "Warn" | |
| 	} else if glog.V(0) { | |
| 		c.LogLevel = "Error" | |
| 	} | |
| 
 | |
| 	if err := raft.ValidateConfig(c); err != nil { | |
| 		return nil, fmt.Errorf("raft.ValidateConfig: %w", err) | |
| 	} | |
| 
 | |
| 	if option.RaftBootstrap { | |
| 		os.RemoveAll(path.Join(s.dataDir, ldbFile)) | |
| 		os.RemoveAll(path.Join(s.dataDir, sdbFile)) | |
| 		os.RemoveAll(path.Join(s.dataDir, "snapshots")) | |
| 	} | |
| 	if err := os.MkdirAll(path.Join(s.dataDir, "snapshots"), os.ModePerm); err != nil { | |
| 		return nil, err | |
| 	} | |
| 	baseDir := s.dataDir | |
| 
 | |
| 	ldb, err := boltdb.NewBoltStore(filepath.Join(baseDir, ldbFile)) | |
| 	if err != nil { | |
| 		return nil, fmt.Errorf("boltdb.NewBoltStore(%q): %v", filepath.Join(baseDir, "logs.dat"), err) | |
| 	} | |
| 
 | |
| 	sdb, err := boltdb.NewBoltStore(filepath.Join(baseDir, sdbFile)) | |
| 	if err != nil { | |
| 		return nil, fmt.Errorf("boltdb.NewBoltStore(%q): %v", filepath.Join(baseDir, "stable.dat"), err) | |
| 	} | |
| 
 | |
| 	fss, err := raft.NewFileSnapshotStore(baseDir, 3, os.Stderr) | |
| 	if err != nil { | |
| 		return nil, fmt.Errorf("raft.NewFileSnapshotStore(%q, ...): %v", baseDir, err) | |
| 	} | |
| 
 | |
| 	s.TransportManager = transport.New(raft.ServerAddress(s.serverAddr), []grpc.DialOption{option.GrpcDialOption}) | |
| 
 | |
| 	stateMachine := StateMachine{topo: option.Topo} | |
| 	s.RaftHashicorp, err = raft.NewRaft(c, &stateMachine, ldb, sdb, fss, s.TransportManager.Transport()) | |
| 	if err != nil { | |
| 		return nil, fmt.Errorf("raft.NewRaft: %w", err) | |
| 	} | |
| 
 | |
| 	updatePeers := false | |
| 	if option.RaftBootstrap || len(s.RaftHashicorp.GetConfiguration().Configuration().Servers) == 0 { | |
| 		cfg := s.AddPeersConfiguration() | |
| 		// Need to get lock, in case all servers do this at the same time. | |
| 		peerIdx := getPeerIdx(s.serverAddr, s.peers) | |
| 		timeSleep := time.Duration(float64(c.LeaderLeaseTimeout) * (rand.Float64()*0.25 + 1) * float64(peerIdx)) | |
| 		glog.V(0).Infof("Bootstrapping idx: %d sleep: %v new cluster: %+v", peerIdx, timeSleep, cfg) | |
| 		time.Sleep(timeSleep) | |
| 		f := s.RaftHashicorp.BootstrapCluster(cfg) | |
| 		if err := f.Error(); err != nil { | |
| 			return nil, fmt.Errorf("raft.Raft.BootstrapCluster: %w", err) | |
| 		} | |
| 	} else { | |
| 		updatePeers = true | |
| 	} | |
| 
 | |
| 	go s.monitorLeaderLoop(updatePeers) | |
| 
 | |
| 	ticker := time.NewTicker(c.HeartbeatTimeout * 10) | |
| 	if glog.V(4) { | |
| 		go func() { | |
| 			for { | |
| 				select { | |
| 				case <-ticker.C: | |
| 					cfuture := s.RaftHashicorp.GetConfiguration() | |
| 					if err = cfuture.Error(); err != nil { | |
| 						glog.Fatalf("error getting config: %s", err) | |
| 					} | |
| 					configuration := cfuture.Configuration() | |
| 					glog.V(4).Infof("Showing peers known by %s:\n%+v", s.RaftHashicorp.String(), configuration.Servers) | |
| 				} | |
| 			} | |
| 		}() | |
| 	} | |
| 
 | |
| 	// Configure a prometheus sink as the raft metrics sink | |
| 	if sink, err := prometheus.NewPrometheusSinkFrom(prometheus.PrometheusOpts{ | |
| 		Registerer: stats.Gather, | |
| 	}); err != nil { | |
| 		return nil, fmt.Errorf("NewPrometheusSink: %w", err) | |
| 	} else { | |
| 		metricsConf := metrics.DefaultConfig(stats.Namespace) | |
| 		metricsConf.EnableRuntimeMetrics = false | |
| 		if _, err = metrics.NewGlobal(metricsConf, sink); err != nil { | |
| 			return nil, fmt.Errorf("metrics.NewGlobal: %w", err) | |
| 		} | |
| 	} | |
| 
 | |
| 	return s, nil | |
| }
 |