You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 lines
4.7 KiB

11 years ago
11 years ago
11 years ago
  1. package weed_server
  2. import (
  3. "bytes"
  4. "code.google.com/p/weed-fs/go/glog"
  5. "code.google.com/p/weed-fs/go/topology"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "github.com/goraft/raft"
  10. "github.com/gorilla/mux"
  11. "io/ioutil"
  12. "math/rand"
  13. "net/http"
  14. "net/url"
  15. "strings"
  16. "time"
  17. )
  18. type RaftServer struct {
  19. peers []string // initial peers to join with
  20. raftServer raft.Server
  21. dataDir string
  22. httpAddr string
  23. router *mux.Router
  24. topo *topology.Topology
  25. }
  26. func NewRaftServer(r *mux.Router, peers []string, httpAddr string, dataDir string, topo *topology.Topology, pulseSeconds int) *RaftServer {
  27. s := &RaftServer{
  28. peers: peers,
  29. httpAddr: httpAddr,
  30. dataDir: dataDir,
  31. router: r,
  32. topo: topo,
  33. }
  34. if glog.V(4) {
  35. raft.SetLogLevel(2)
  36. }
  37. raft.RegisterCommand(&topology.MaxVolumeIdCommand{})
  38. var err error
  39. transporter := raft.NewHTTPTransporter("/cluster", 0)
  40. transporter.Transport.MaxIdleConnsPerHost = 1024
  41. s.raftServer, err = raft.NewServer(s.httpAddr, s.dataDir, transporter, nil, topo, "")
  42. if err != nil {
  43. glog.V(0).Infoln(err)
  44. return nil
  45. }
  46. transporter.Install(s.raftServer, s)
  47. s.raftServer.SetHeartbeatInterval(1 * time.Second)
  48. s.raftServer.SetElectionTimeout(time.Duration(pulseSeconds) * 1150 * time.Millisecond)
  49. s.raftServer.Start()
  50. s.router.HandleFunc("/cluster/join", s.joinHandler).Methods("POST")
  51. s.router.HandleFunc("/cluster/status", s.statusHandler).Methods("GET")
  52. // Join to leader if specified.
  53. if len(s.peers) > 0 {
  54. if !s.raftServer.IsLogEmpty() {
  55. glog.V(0).Infoln("Starting cluster with existing logs.")
  56. } else {
  57. glog.V(0).Infoln("Joining cluster:", strings.Join(s.peers, ","))
  58. time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond)
  59. firstJoinError := s.Join(s.peers)
  60. if firstJoinError != nil {
  61. glog.V(0).Infoln("No existing server found. Starting as leader in the new cluster.")
  62. _, err := s.raftServer.Do(&raft.DefaultJoinCommand{
  63. Name: s.raftServer.Name(),
  64. ConnectionString: "http://" + s.httpAddr,
  65. })
  66. if err != nil {
  67. glog.V(0).Infoln(err)
  68. return nil
  69. }
  70. }
  71. var err error
  72. for err != nil {
  73. glog.V(0).Infoln("waiting for peers on", strings.Join(s.peers, ","), "...")
  74. time.Sleep(time.Duration(1000+rand.Intn(2000)) * time.Millisecond)
  75. err = s.Join(s.peers)
  76. }
  77. glog.V(0).Infoln("Joined cluster")
  78. }
  79. // Initialize the server by joining itself.
  80. } else if s.raftServer.IsLogEmpty() {
  81. glog.V(0).Infoln("Initializing new cluster")
  82. _, err := s.raftServer.Do(&raft.DefaultJoinCommand{
  83. Name: s.raftServer.Name(),
  84. ConnectionString: "http://" + s.httpAddr,
  85. })
  86. if err != nil {
  87. glog.V(0).Infoln(err)
  88. return nil
  89. }
  90. } else {
  91. glog.V(0).Infoln("Recovered from log")
  92. }
  93. return s
  94. }
  95. func (s *RaftServer) Peers() (members []string) {
  96. peers := s.raftServer.Peers()
  97. for _, p := range peers {
  98. members = append(members, strings.TrimPrefix(p.ConnectionString, "http://"))
  99. }
  100. return
  101. }
  102. // Join joins an existing cluster.
  103. func (s *RaftServer) Join(peers []string) error {
  104. command := &raft.DefaultJoinCommand{
  105. Name: s.raftServer.Name(),
  106. ConnectionString: "http://" + s.httpAddr,
  107. }
  108. var b bytes.Buffer
  109. json.NewEncoder(&b).Encode(command)
  110. for _, m := range peers {
  111. target := fmt.Sprintf("http://%s/cluster/join", strings.TrimSpace(m))
  112. glog.V(0).Infoln("Attempting to connect to:", target)
  113. err := postFollowingOneRedirect(target, "application/json", &b)
  114. if err != nil {
  115. glog.V(0).Infoln("Post returned error: ", err.Error())
  116. if _, ok := err.(*url.Error); ok {
  117. // If we receive a network error try the next member
  118. continue
  119. }
  120. return err
  121. }
  122. return nil
  123. }
  124. return errors.New("Could not connect to any cluster peers")
  125. }
  126. // a workaround because http POST following redirection misses request body
  127. func postFollowingOneRedirect(target string, contentType string, b *bytes.Buffer) error {
  128. backupReader := bytes.NewReader(b.Bytes())
  129. resp, err := http.Post(target, contentType, b)
  130. if err != nil {
  131. return err
  132. }
  133. defer resp.Body.Close()
  134. reply, _ := ioutil.ReadAll(resp.Body)
  135. statusCode := resp.StatusCode
  136. if statusCode == http.StatusMovedPermanently {
  137. var urlStr string
  138. if urlStr = resp.Header.Get("Location"); urlStr == "" {
  139. return errors.New(fmt.Sprintf("%d response missing Location header", resp.StatusCode))
  140. }
  141. glog.V(0).Infoln("Post redirected to ", urlStr)
  142. resp2, err2 := http.Post(urlStr, contentType, backupReader)
  143. if err2 != nil {
  144. return err2
  145. }
  146. defer resp2.Body.Close()
  147. reply, _ = ioutil.ReadAll(resp2.Body)
  148. statusCode = resp2.StatusCode
  149. }
  150. glog.V(0).Infoln("Post returned status: ", statusCode, string(reply))
  151. if statusCode != http.StatusOK {
  152. return errors.New(string(reply))
  153. }
  154. return nil
  155. }