You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

283 lines
8.0 KiB

13 years ago
13 years ago
6 years ago
6 years ago
13 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
6 years ago
6 years ago
4 years ago
9 years ago
4 years ago
  1. package topology
  2. import (
  3. "errors"
  4. "math/rand"
  5. "strings"
  6. "sync"
  7. "sync/atomic"
  8. "github.com/seaweedfs/seaweedfs/weed/glog"
  9. "github.com/seaweedfs/seaweedfs/weed/stats"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  12. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  13. )
  14. type NodeId string
  15. type Node interface {
  16. Id() NodeId
  17. String() string
  18. AvailableSpaceFor(option *VolumeGrowOption) int64
  19. ReserveOneVolume(r int64, option *VolumeGrowOption) (*DataNode, error)
  20. UpAdjustDiskUsageDelta(deltaDiskUsages *DiskUsages)
  21. UpAdjustMaxVolumeId(vid needle.VolumeId)
  22. GetDiskUsages() *DiskUsages
  23. GetMaxVolumeId() needle.VolumeId
  24. SetParent(Node)
  25. LinkChildNode(node Node)
  26. UnlinkChildNode(nodeId NodeId)
  27. CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64, growThreshold float64)
  28. IsDataNode() bool
  29. IsRack() bool
  30. IsDataCenter() bool
  31. Children() []Node
  32. Parent() Node
  33. GetValue() interface{} //get reference to the topology,dc,rack,datanode
  34. }
  35. type NodeImpl struct {
  36. diskUsages *DiskUsages
  37. id NodeId
  38. parent Node
  39. sync.RWMutex // lock children
  40. children map[NodeId]Node
  41. maxVolumeId needle.VolumeId
  42. //for rack, data center, topology
  43. nodeType string
  44. value interface{}
  45. }
  46. func (n *NodeImpl) GetDiskUsages() *DiskUsages {
  47. return n.diskUsages
  48. }
  49. // the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot
  50. func (n *NodeImpl) PickNodesByWeight(numberOfNodes int, option *VolumeGrowOption, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) {
  51. var totalWeights int64
  52. var errs []string
  53. n.RLock()
  54. candidates := make([]Node, 0, len(n.children))
  55. candidatesWeights := make([]int64, 0, len(n.children))
  56. //pick nodes which has enough free volumes as candidates, and use free volumes number as node weight.
  57. for _, node := range n.children {
  58. if node.AvailableSpaceFor(option) <= 0 {
  59. continue
  60. }
  61. totalWeights += node.AvailableSpaceFor(option)
  62. candidates = append(candidates, node)
  63. candidatesWeights = append(candidatesWeights, node.AvailableSpaceFor(option))
  64. }
  65. n.RUnlock()
  66. if len(candidates) < numberOfNodes {
  67. glog.V(0).Infoln(n.Id(), "failed to pick", numberOfNodes, "from ", len(candidates), "node candidates")
  68. return nil, nil, errors.New("Not enough data nodes found!")
  69. }
  70. //pick nodes randomly by weights, the node picked earlier has higher final weights
  71. sortedCandidates := make([]Node, 0, len(candidates))
  72. for i := 0; i < len(candidates); i++ {
  73. weightsInterval := rand.Int63n(totalWeights)
  74. lastWeights := int64(0)
  75. for k, weights := range candidatesWeights {
  76. if (weightsInterval >= lastWeights) && (weightsInterval < lastWeights+weights) {
  77. sortedCandidates = append(sortedCandidates, candidates[k])
  78. candidatesWeights[k] = 0
  79. totalWeights -= weights
  80. break
  81. }
  82. lastWeights += weights
  83. }
  84. }
  85. restNodes = make([]Node, 0, numberOfNodes-1)
  86. ret := false
  87. n.RLock()
  88. for k, node := range sortedCandidates {
  89. if err := filterFirstNodeFn(node); err == nil {
  90. firstNode = node
  91. if k >= numberOfNodes-1 {
  92. restNodes = sortedCandidates[:numberOfNodes-1]
  93. } else {
  94. restNodes = append(restNodes, sortedCandidates[:k]...)
  95. restNodes = append(restNodes, sortedCandidates[k+1:numberOfNodes]...)
  96. }
  97. ret = true
  98. break
  99. } else {
  100. errs = append(errs, string(node.Id())+":"+err.Error())
  101. }
  102. }
  103. n.RUnlock()
  104. if !ret {
  105. return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n"))
  106. }
  107. return
  108. }
  109. func (n *NodeImpl) IsDataNode() bool {
  110. return n.nodeType == "DataNode"
  111. }
  112. func (n *NodeImpl) IsRack() bool {
  113. return n.nodeType == "Rack"
  114. }
  115. func (n *NodeImpl) IsDataCenter() bool {
  116. return n.nodeType == "DataCenter"
  117. }
  118. func (n *NodeImpl) String() string {
  119. if n.parent != nil {
  120. return n.parent.String() + ":" + string(n.id)
  121. }
  122. return string(n.id)
  123. }
  124. func (n *NodeImpl) Id() NodeId {
  125. return n.id
  126. }
  127. func (n *NodeImpl) getOrCreateDisk(diskType types.DiskType) *DiskUsageCounts {
  128. return n.diskUsages.getOrCreateDisk(diskType)
  129. }
  130. func (n *NodeImpl) AvailableSpaceFor(option *VolumeGrowOption) int64 {
  131. t := n.getOrCreateDisk(option.DiskType)
  132. freeVolumeSlotCount := atomic.LoadInt64(&t.maxVolumeCount) + atomic.LoadInt64(&t.remoteVolumeCount) - atomic.LoadInt64(&t.volumeCount)
  133. ecShardCount := atomic.LoadInt64(&t.ecShardCount)
  134. if ecShardCount > 0 {
  135. freeVolumeSlotCount = freeVolumeSlotCount - ecShardCount/erasure_coding.DataShardsCount - 1
  136. }
  137. return freeVolumeSlotCount
  138. }
  139. func (n *NodeImpl) SetParent(node Node) {
  140. n.parent = node
  141. }
  142. func (n *NodeImpl) Children() (ret []Node) {
  143. n.RLock()
  144. defer n.RUnlock()
  145. for _, c := range n.children {
  146. ret = append(ret, c)
  147. }
  148. return ret
  149. }
  150. func (n *NodeImpl) Parent() Node {
  151. return n.parent
  152. }
  153. func (n *NodeImpl) GetValue() interface{} {
  154. return n.value
  155. }
  156. func (n *NodeImpl) ReserveOneVolume(r int64, option *VolumeGrowOption) (assignedNode *DataNode, err error) {
  157. n.RLock()
  158. defer n.RUnlock()
  159. for _, node := range n.children {
  160. freeSpace := node.AvailableSpaceFor(option)
  161. // fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace)
  162. if freeSpace <= 0 {
  163. continue
  164. }
  165. if r >= freeSpace {
  166. r -= freeSpace
  167. } else {
  168. if node.IsDataNode() && node.AvailableSpaceFor(option) > 0 {
  169. // fmt.Println("vid =", vid, " assigned to node =", node, ", freeSpace =", node.FreeSpace())
  170. dn := node.(*DataNode)
  171. if dn.IsTerminating {
  172. continue
  173. }
  174. return dn, nil
  175. }
  176. assignedNode, err = node.ReserveOneVolume(r, option)
  177. if err == nil {
  178. return
  179. }
  180. }
  181. }
  182. return nil, errors.New("No free volume slot found!")
  183. }
  184. func (n *NodeImpl) UpAdjustDiskUsageDelta(deltaDiskUsages *DiskUsages) { //can be negative
  185. for diskType, diskUsage := range deltaDiskUsages.usages {
  186. existingDisk := n.getOrCreateDisk(diskType)
  187. existingDisk.addDiskUsageCounts(diskUsage)
  188. }
  189. if n.parent != nil {
  190. n.parent.UpAdjustDiskUsageDelta(deltaDiskUsages)
  191. }
  192. }
  193. func (n *NodeImpl) UpAdjustMaxVolumeId(vid needle.VolumeId) { //can be negative
  194. if n.maxVolumeId < vid {
  195. n.maxVolumeId = vid
  196. if n.parent != nil {
  197. n.parent.UpAdjustMaxVolumeId(vid)
  198. }
  199. }
  200. }
  201. func (n *NodeImpl) GetMaxVolumeId() needle.VolumeId {
  202. return n.maxVolumeId
  203. }
  204. func (n *NodeImpl) LinkChildNode(node Node) {
  205. n.Lock()
  206. defer n.Unlock()
  207. n.doLinkChildNode(node)
  208. }
  209. func (n *NodeImpl) doLinkChildNode(node Node) {
  210. if n.children[node.Id()] == nil {
  211. n.children[node.Id()] = node
  212. n.UpAdjustDiskUsageDelta(node.GetDiskUsages())
  213. n.UpAdjustMaxVolumeId(node.GetMaxVolumeId())
  214. node.SetParent(n)
  215. glog.V(0).Infoln(n, "adds child", node.Id())
  216. }
  217. }
  218. func (n *NodeImpl) UnlinkChildNode(nodeId NodeId) {
  219. n.Lock()
  220. defer n.Unlock()
  221. node := n.children[nodeId]
  222. if node != nil {
  223. node.SetParent(nil)
  224. delete(n.children, node.Id())
  225. n.UpAdjustDiskUsageDelta(node.GetDiskUsages().negative())
  226. glog.V(0).Infoln(n, "removes", node.Id())
  227. }
  228. }
  229. func (n *NodeImpl) CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64, growThreshold float64) {
  230. if n.IsRack() {
  231. for _, c := range n.Children() {
  232. dn := c.(*DataNode) //can not cast n to DataNode
  233. dn.RLock()
  234. for _, v := range dn.GetVolumes() {
  235. if v.Size >= volumeSizeLimit {
  236. //fmt.Println("volume",v.Id,"size",v.Size,">",volumeSizeLimit)
  237. n.GetTopology().chanFullVolumes <- v
  238. } else if float64(v.Size) > float64(volumeSizeLimit)*growThreshold {
  239. n.GetTopology().chanCrowdedVolumes <- v
  240. }
  241. copyCount := v.ReplicaPlacement.GetCopyCount()
  242. if copyCount > 1 {
  243. if copyCount > len(n.GetTopology().Lookup(v.Collection, v.Id)) {
  244. stats.MasterReplicaPlacementMismatch.WithLabelValues(v.Collection, v.Id.String()).Set(1)
  245. } else {
  246. stats.MasterReplicaPlacementMismatch.WithLabelValues(v.Collection, v.Id.String()).Set(0)
  247. }
  248. }
  249. }
  250. dn.RUnlock()
  251. }
  252. } else {
  253. for _, c := range n.Children() {
  254. c.CollectDeadNodeAndFullVolumes(freshThreshHold, volumeSizeLimit, growThreshold)
  255. }
  256. }
  257. }
  258. func (n *NodeImpl) GetTopology() *Topology {
  259. var p Node
  260. p = n
  261. for p.Parent() != nil {
  262. p = p.Parent()
  263. }
  264. return p.GetValue().(*Topology)
  265. }