You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

379 lines
11 KiB

10 years ago
12 years ago
12 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. package storage
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "io/ioutil"
  7. "math/rand"
  8. "strconv"
  9. "strings"
  10. "github.com/chrislusf/seaweedfs/go/glog"
  11. "github.com/chrislusf/seaweedfs/go/operation"
  12. "github.com/chrislusf/seaweedfs/go/security"
  13. "github.com/chrislusf/seaweedfs/go/util"
  14. "github.com/golang/protobuf/proto"
  15. )
  16. const (
  17. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  18. )
  19. type DiskLocation struct {
  20. Directory string
  21. MaxVolumeCount int
  22. volumes map[VolumeId]*Volume
  23. }
  24. func (mn *DiskLocation) reset() {
  25. }
  26. type MasterNodes struct {
  27. nodes []string
  28. lastNode int
  29. }
  30. func (mn *MasterNodes) String() string {
  31. return fmt.Sprintf("nodes:%v, lastNode:%d", mn.nodes, mn.lastNode)
  32. }
  33. func NewMasterNodes(bootstrapNode string) (mn *MasterNodes) {
  34. mn = &MasterNodes{nodes: []string{bootstrapNode}, lastNode: -1}
  35. return
  36. }
  37. func (mn *MasterNodes) reset() {
  38. if len(mn.nodes) > 1 && mn.lastNode > 0 {
  39. mn.lastNode = -mn.lastNode
  40. }
  41. }
  42. func (mn *MasterNodes) findMaster() (string, error) {
  43. if len(mn.nodes) == 0 {
  44. return "", errors.New("No master node found!")
  45. }
  46. if mn.lastNode < 0 {
  47. for _, m := range mn.nodes {
  48. if masters, e := operation.ListMasters(m); e == nil {
  49. if len(masters) == 0 {
  50. continue
  51. }
  52. mn.nodes = masters
  53. mn.lastNode = rand.Intn(len(mn.nodes))
  54. glog.V(2).Info("current master node is :", mn.nodes[mn.lastNode])
  55. break
  56. }
  57. }
  58. }
  59. if mn.lastNode < 0 {
  60. return "", errors.New("No master node available!")
  61. }
  62. return mn.nodes[mn.lastNode], nil
  63. }
  64. /*
  65. * A VolumeServer contains one Store
  66. */
  67. type Store struct {
  68. Ip string
  69. Port int
  70. PublicUrl string
  71. Locations []*DiskLocation
  72. dataCenter string //optional informaton, overwriting master setting if exists
  73. rack string //optional information, overwriting master setting if exists
  74. connected bool
  75. volumeSizeLimit uint64 //read from the master
  76. masterNodes *MasterNodes
  77. }
  78. func (s *Store) String() (str string) {
  79. str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d, masterNodes:%s", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.volumeSizeLimit, s.masterNodes)
  80. return
  81. }
  82. func NewStore(port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, needleMapKind NeedleMapType) (s *Store) {
  83. s = &Store{Port: port, Ip: ip, PublicUrl: publicUrl}
  84. s.Locations = make([]*DiskLocation, 0)
  85. for i := 0; i < len(dirnames); i++ {
  86. location := &DiskLocation{Directory: dirnames[i], MaxVolumeCount: maxVolumeCounts[i]}
  87. location.volumes = make(map[VolumeId]*Volume)
  88. location.loadExistingVolumes(needleMapKind)
  89. s.Locations = append(s.Locations, location)
  90. }
  91. return
  92. }
  93. func (s *Store) AddVolume(volumeListString string, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string) error {
  94. rt, e := NewReplicaPlacementFromString(replicaPlacement)
  95. if e != nil {
  96. return e
  97. }
  98. ttl, e := ReadTTL(ttlString)
  99. if e != nil {
  100. return e
  101. }
  102. for _, range_string := range strings.Split(volumeListString, ",") {
  103. if strings.Index(range_string, "-") < 0 {
  104. id_string := range_string
  105. id, err := NewVolumeId(id_string)
  106. if err != nil {
  107. return fmt.Errorf("Volume Id %s is not a valid unsigned integer!", id_string)
  108. }
  109. e = s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl)
  110. } else {
  111. pair := strings.Split(range_string, "-")
  112. start, start_err := strconv.ParseUint(pair[0], 10, 64)
  113. if start_err != nil {
  114. return fmt.Errorf("Volume Start Id %s is not a valid unsigned integer!", pair[0])
  115. }
  116. end, end_err := strconv.ParseUint(pair[1], 10, 64)
  117. if end_err != nil {
  118. return fmt.Errorf("Volume End Id %s is not a valid unsigned integer!", pair[1])
  119. }
  120. for id := start; id <= end; id++ {
  121. if err := s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl); err != nil {
  122. e = err
  123. }
  124. }
  125. }
  126. }
  127. return e
  128. }
  129. func (s *Store) DeleteCollection(collection string) (e error) {
  130. for _, location := range s.Locations {
  131. for k, v := range location.volumes {
  132. if v.Collection == collection {
  133. e = v.Destroy()
  134. if e != nil {
  135. return
  136. }
  137. delete(location.volumes, k)
  138. }
  139. }
  140. }
  141. return
  142. }
  143. func (s *Store) DeleteVolume(volumes map[VolumeId]*Volume, v *Volume) (e error) {
  144. e = v.Destroy()
  145. if e != nil {
  146. return
  147. }
  148. delete(volumes, v.Id)
  149. return
  150. }
  151. func (s *Store) findVolume(vid VolumeId) *Volume {
  152. for _, location := range s.Locations {
  153. if v, found := location.volumes[vid]; found {
  154. return v
  155. }
  156. }
  157. return nil
  158. }
  159. func (s *Store) findFreeLocation() (ret *DiskLocation) {
  160. max := 0
  161. for _, location := range s.Locations {
  162. currentFreeCount := location.MaxVolumeCount - len(location.volumes)
  163. if currentFreeCount > max {
  164. max = currentFreeCount
  165. ret = location
  166. }
  167. }
  168. return ret
  169. }
  170. func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL) error {
  171. if s.findVolume(vid) != nil {
  172. return fmt.Errorf("Volume Id %d already exists!", vid)
  173. }
  174. if location := s.findFreeLocation(); location != nil {
  175. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  176. location.Directory, vid, collection, replicaPlacement, ttl)
  177. if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, replicaPlacement, ttl); err == nil {
  178. location.volumes[vid] = volume
  179. return nil
  180. } else {
  181. return err
  182. }
  183. }
  184. return fmt.Errorf("No more free space left")
  185. }
  186. func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapType) {
  187. if dirs, err := ioutil.ReadDir(l.Directory); err == nil {
  188. for _, dir := range dirs {
  189. name := dir.Name()
  190. if !dir.IsDir() && strings.HasSuffix(name, ".dat") {
  191. collection := ""
  192. base := name[:len(name)-len(".dat")]
  193. i := strings.Index(base, "_")
  194. if i > 0 {
  195. collection, base = base[0:i], base[i+1:]
  196. }
  197. if vid, err := NewVolumeId(base); err == nil {
  198. if l.volumes[vid] == nil {
  199. if v, e := NewVolume(l.Directory, collection, vid, needleMapKind, nil, nil); e == nil {
  200. l.volumes[vid] = v
  201. glog.V(0).Infof("data file %s, replicaPlacement=%s v=%d size=%d ttl=%s", l.Directory+"/"+name, v.ReplicaPlacement, v.Version(), v.Size(), v.Ttl.String())
  202. }
  203. }
  204. }
  205. }
  206. }
  207. }
  208. glog.V(0).Infoln("Store started on dir:", l.Directory, "with", len(l.volumes), "volumes", "max", l.MaxVolumeCount)
  209. }
  210. func (s *Store) Status() []*VolumeInfo {
  211. var stats []*VolumeInfo
  212. for _, location := range s.Locations {
  213. for k, v := range location.volumes {
  214. s := &VolumeInfo{
  215. Id: VolumeId(k),
  216. Size: v.ContentSize(),
  217. Collection: v.Collection,
  218. ReplicaPlacement: v.ReplicaPlacement,
  219. Version: v.Version(),
  220. FileCount: v.nm.FileCount(),
  221. DeleteCount: v.nm.DeletedCount(),
  222. DeletedByteCount: v.nm.DeletedSize(),
  223. ReadOnly: v.readOnly,
  224. Ttl: v.Ttl}
  225. stats = append(stats, s)
  226. }
  227. }
  228. sortVolumeInfos(stats)
  229. return stats
  230. }
  231. func (s *Store) SetDataCenter(dataCenter string) {
  232. s.dataCenter = dataCenter
  233. }
  234. func (s *Store) SetRack(rack string) {
  235. s.rack = rack
  236. }
  237. func (s *Store) SetBootstrapMaster(bootstrapMaster string) {
  238. s.masterNodes = NewMasterNodes(bootstrapMaster)
  239. }
  240. func (s *Store) SendHeartbeatToMaster() (masterNode string, secretKey security.Secret, e error) {
  241. masterNode, e = s.masterNodes.findMaster()
  242. if e != nil {
  243. return
  244. }
  245. var volumeMessages []*operation.VolumeInformationMessage
  246. maxVolumeCount := 0
  247. var maxFileKey uint64
  248. for _, location := range s.Locations {
  249. maxVolumeCount = maxVolumeCount + location.MaxVolumeCount
  250. for k, v := range location.volumes {
  251. if maxFileKey < v.nm.MaxFileKey() {
  252. maxFileKey = v.nm.MaxFileKey()
  253. }
  254. if !v.expired(s.volumeSizeLimit) {
  255. volumeMessage := &operation.VolumeInformationMessage{
  256. Id: proto.Uint32(uint32(k)),
  257. Size: proto.Uint64(uint64(v.Size())),
  258. Collection: proto.String(v.Collection),
  259. FileCount: proto.Uint64(uint64(v.nm.FileCount())),
  260. DeleteCount: proto.Uint64(uint64(v.nm.DeletedCount())),
  261. DeletedByteCount: proto.Uint64(v.nm.DeletedSize()),
  262. ReadOnly: proto.Bool(v.readOnly),
  263. ReplicaPlacement: proto.Uint32(uint32(v.ReplicaPlacement.Byte())),
  264. Version: proto.Uint32(uint32(v.Version())),
  265. Ttl: proto.Uint32(v.Ttl.ToUint32()),
  266. }
  267. volumeMessages = append(volumeMessages, volumeMessage)
  268. } else {
  269. if v.exiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  270. s.DeleteVolume(location.volumes, v)
  271. glog.V(0).Infoln("volume", v.Id, "is deleted.")
  272. } else {
  273. glog.V(0).Infoln("volume", v.Id, "is expired.")
  274. }
  275. }
  276. }
  277. }
  278. joinMessage := &operation.JoinMessage{
  279. IsInit: proto.Bool(!s.connected),
  280. Ip: proto.String(s.Ip),
  281. Port: proto.Uint32(uint32(s.Port)),
  282. PublicUrl: proto.String(s.PublicUrl),
  283. MaxVolumeCount: proto.Uint32(uint32(maxVolumeCount)),
  284. MaxFileKey: proto.Uint64(maxFileKey),
  285. DataCenter: proto.String(s.dataCenter),
  286. Rack: proto.String(s.rack),
  287. Volumes: volumeMessages,
  288. }
  289. data, err := proto.Marshal(joinMessage)
  290. if err != nil {
  291. return "", "", err
  292. }
  293. joinUrl := "http://" + masterNode + "/dir/join"
  294. jsonBlob, err := util.PostBytes(joinUrl, data)
  295. if err != nil {
  296. s.masterNodes.reset()
  297. return "", "", err
  298. }
  299. var ret operation.JoinResult
  300. if err := json.Unmarshal(jsonBlob, &ret); err != nil {
  301. glog.V(0).Infof("Failed to join %s with response: %s", joinUrl, string(jsonBlob))
  302. return masterNode, "", err
  303. }
  304. if ret.Error != "" {
  305. return masterNode, "", errors.New(ret.Error)
  306. }
  307. s.volumeSizeLimit = ret.VolumeSizeLimit
  308. secretKey = security.Secret(ret.SecretKey)
  309. s.connected = true
  310. return
  311. }
  312. func (s *Store) Close() {
  313. for _, location := range s.Locations {
  314. for _, v := range location.volumes {
  315. v.Close()
  316. }
  317. }
  318. }
  319. func (s *Store) Write(i VolumeId, n *Needle) (size uint32, err error) {
  320. if v := s.findVolume(i); v != nil {
  321. if v.readOnly {
  322. err = fmt.Errorf("Volume %d is read only", i)
  323. return
  324. }
  325. if MaxPossibleVolumeSize >= v.ContentSize()+uint64(size) {
  326. size, err = v.write(n)
  327. } else {
  328. err = fmt.Errorf("Volume Size Limit %d Exceeded! Current size is %d", s.volumeSizeLimit, v.ContentSize())
  329. }
  330. if s.volumeSizeLimit < v.ContentSize()+3*uint64(size) {
  331. glog.V(0).Infoln("volume", i, "size", v.ContentSize(), "will exceed limit", s.volumeSizeLimit)
  332. if _, _, e := s.SendHeartbeatToMaster(); e != nil {
  333. glog.V(0).Infoln("error when reporting size:", e)
  334. }
  335. }
  336. return
  337. }
  338. glog.V(0).Infoln("volume", i, "not found!")
  339. err = fmt.Errorf("Volume %d not found!", i)
  340. return
  341. }
  342. func (s *Store) Delete(i VolumeId, n *Needle) (uint32, error) {
  343. if v := s.findVolume(i); v != nil && !v.readOnly {
  344. return v.delete(n)
  345. }
  346. return 0, nil
  347. }
  348. func (s *Store) Read(i VolumeId, n *Needle) (int, error) {
  349. if v := s.findVolume(i); v != nil {
  350. return v.read(n)
  351. }
  352. return 0, fmt.Errorf("Volume %v not found!", i)
  353. }
  354. func (s *Store) GetVolume(i VolumeId) *Volume {
  355. return s.findVolume(i)
  356. }
  357. func (s *Store) HasVolume(i VolumeId) bool {
  358. v := s.findVolume(i)
  359. return v != nil
  360. }