You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

146 lines
4.4 KiB

  1. package weed_server
  2. // https://yusufs.medium.com/creating-distributed-kv-database-by-implementing-raft-consensus-using-golang-d0884eef2e28
  3. // https://github.com/Jille/raft-grpc-example/blob/cd5bcab0218f008e044fbeee4facdd01b06018ad/application.go#L18
  4. import (
  5. "fmt"
  6. transport "github.com/Jille/raft-grpc-transport"
  7. "github.com/chrislusf/seaweedfs/weed/glog"
  8. "github.com/hashicorp/raft"
  9. boltdb "github.com/hashicorp/raft-boltdb"
  10. "google.golang.org/grpc"
  11. "math/rand"
  12. "os"
  13. "path/filepath"
  14. "time"
  15. )
  16. func (s *RaftServer) AddPeersConfiguration() (cfg raft.Configuration) {
  17. for _, peer := range s.peers {
  18. cfg.Servers = append(cfg.Servers, raft.Server{
  19. Suffrage: raft.Voter,
  20. ID: raft.ServerID(peer.String()),
  21. Address: raft.ServerAddress(peer.ToGrpcAddress()),
  22. })
  23. }
  24. return cfg
  25. }
  26. func (s *RaftServer) UpdatePeers() {
  27. for {
  28. select {
  29. case isLeader := <-s.RaftHashicorp.LeaderCh():
  30. if isLeader {
  31. peerLeader := s.serverAddr.String()
  32. existsPeerName := make(map[string]bool)
  33. for _, server := range s.RaftHashicorp.GetConfiguration().Configuration().Servers {
  34. if string(server.ID) == peerLeader {
  35. continue
  36. }
  37. existsPeerName[string(server.ID)] = true
  38. }
  39. for _, peer := range s.peers {
  40. if peer.String() == peerLeader || existsPeerName[peer.String()] {
  41. continue
  42. }
  43. glog.V(0).Infof("adding new peer: %s", peer.String())
  44. s.RaftHashicorp.AddVoter(
  45. raft.ServerID(peer.String()), raft.ServerAddress(peer.ToGrpcAddress()), 0, 0)
  46. }
  47. for peer, _ := range existsPeerName {
  48. if _, found := s.peers[peer]; !found {
  49. glog.V(0).Infof("removing old peer: %s", peer)
  50. s.RaftHashicorp.RemoveServer(raft.ServerID(peer), 0, 0)
  51. }
  52. }
  53. if _, found := s.peers[peerLeader]; !found {
  54. glog.V(0).Infof("removing old leader peer: %s", peerLeader)
  55. s.RaftHashicorp.RemoveServer(raft.ServerID(peerLeader), 0, 0)
  56. }
  57. }
  58. break
  59. }
  60. }
  61. }
  62. func NewHashicorpRaftServer(option *RaftServerOption) (*RaftServer, error) {
  63. s := &RaftServer{
  64. peers: option.Peers,
  65. serverAddr: option.ServerAddr,
  66. dataDir: option.DataDir,
  67. topo: option.Topo,
  68. }
  69. c := raft.DefaultConfig()
  70. c.LocalID = raft.ServerID(s.serverAddr.String()) // TODO maybee the IP:port address will change
  71. c.NoSnapshotRestoreOnStart = option.RaftResumeState
  72. c.HeartbeatTimeout = time.Duration(float64(option.HeartbeatInterval) * (rand.Float64()*0.25 + 1))
  73. c.ElectionTimeout = option.ElectionTimeout
  74. if c.LeaderLeaseTimeout > c.HeartbeatTimeout {
  75. c.LeaderLeaseTimeout = c.HeartbeatTimeout
  76. }
  77. if glog.V(4) {
  78. c.LogLevel = "Debug"
  79. } else if glog.V(2) {
  80. c.LogLevel = "Info"
  81. } else if glog.V(1) {
  82. c.LogLevel = "Warn"
  83. } else if glog.V(0) {
  84. c.LogLevel = "Error"
  85. }
  86. baseDir := s.dataDir
  87. ldb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "logs.dat"))
  88. if err != nil {
  89. return nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "logs.dat"), err)
  90. }
  91. sdb, err := boltdb.NewBoltStore(filepath.Join(baseDir, "stable.dat"))
  92. if err != nil {
  93. return nil, fmt.Errorf(`boltdb.NewBoltStore(%q): %v`, filepath.Join(baseDir, "stable.dat"), err)
  94. }
  95. fss, err := raft.NewFileSnapshotStore(baseDir, 3, os.Stderr)
  96. if err != nil {
  97. return nil, fmt.Errorf(`raft.NewFileSnapshotStore(%q, ...): %v`, baseDir, err)
  98. }
  99. s.TransportManager = transport.New(raft.ServerAddress(s.serverAddr), []grpc.DialOption{option.GrpcDialOption})
  100. stateMachine := StateMachine{topo: option.Topo}
  101. s.RaftHashicorp, err = raft.NewRaft(c, &stateMachine, ldb, sdb, fss, s.TransportManager.Transport())
  102. if err != nil {
  103. return nil, fmt.Errorf("raft.NewRaft: %v", err)
  104. }
  105. if option.RaftBootstrap || len(s.RaftHashicorp.GetConfiguration().Configuration().Servers) == 0 {
  106. cfg := s.AddPeersConfiguration()
  107. glog.V(0).Infoln("Bootstrapping new cluster %+v", cfg)
  108. f := s.RaftHashicorp.BootstrapCluster(cfg)
  109. if err := f.Error(); err != nil {
  110. return nil, fmt.Errorf("raft.Raft.BootstrapCluster: %v", err)
  111. }
  112. } else {
  113. go s.UpdatePeers()
  114. }
  115. ticker := time.NewTicker(c.HeartbeatTimeout * 10)
  116. if glog.V(4) {
  117. go func() {
  118. for {
  119. select {
  120. case <-ticker.C:
  121. cfuture := s.RaftHashicorp.GetConfiguration()
  122. if err = cfuture.Error(); err != nil {
  123. glog.Fatalf("error getting config: %s", err)
  124. }
  125. configuration := cfuture.Configuration()
  126. glog.V(4).Infof("Showing peers known by %s:\n%+v", s.RaftHashicorp.String(), configuration.Servers)
  127. }
  128. }
  129. }()
  130. }
  131. return s, nil
  132. }