You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

461 lines
12 KiB

6 years ago
6 years ago
4 years ago
2 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
adding locking to avoid nil VolumeLocationList fix panic: runtime error: invalid memory address or nil pointer dereference Oct 22 00:53:44 bedb-master1 weed[8055]: [signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x17658da] Oct 22 00:53:44 bedb-master1 weed[8055]: goroutine 310 [running]: Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*VolumeLocationList).Length(...) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/volume_location_list.go:35 Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*VolumeLayout).enoughCopies(...) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/volume_layout.go:376 Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*VolumeLayout).ensureCorrectWritables(0xc000111d50, 0xc000b55438) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/volume_layout.go:202 +0x5a Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/topology.(*Topology).SyncDataNodeRegistration(0xc00042ac60, 0xc001454d30, 0x1, 0x1, 0xc0005fc000, 0xc00135de40, 0x4, 0xc00135de50, 0x10, 0x10d, ...) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/topology/topology.go:224 +0x616 Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/server.(*MasterServer).SendHeartbeat(0xc000162700, 0x23b97c0, 0xc000ae2c90, 0x0, 0x0) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/server/master_grpc_server.go:106 +0x325 Oct 22 00:53:44 bedb-master1 weed[8055]: github.com/chrislusf/seaweedfs/weed/pb/master_pb._Seaweed_SendHeartbeat_Handler(0x1f8e7c0, 0xc000162700, 0x23b0a60, 0xc00024b440, 0x3172c38, 0xc000ab7100) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/seaweedfs/weed/pb/master_pb/master.pb.go:4250 +0xad Oct 22 00:53:44 bedb-master1 weed[8055]: google.golang.org/grpc.(*Server).processStreamingRPC(0xc0001f31e0, 0x23bb800, 0xc000ac5500, 0xc000ab7100, 0xc0001fea80, 0x311fec0, 0x0, 0x0, 0x0) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:1329 +0xcd8 Oct 22 00:53:44 bedb-master1 weed[8055]: google.golang.org/grpc.(*Server).handleStream(0xc0001f31e0, 0x23bb800, 0xc000ac5500, 0xc000ab7100, 0x0) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:1409 +0xc5c Oct 22 00:53:44 bedb-master1 weed[8055]: google.golang.org/grpc.(*Server).serveStreams.func1.1(0xc0001ce8b0, 0xc0001f31e0, 0x23bb800, 0xc000ac5500, 0xc000ab7100) Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:746 +0xa5 Oct 22 00:53:44 bedb-master1 weed[8055]: created by google.golang.org/grpc.(*Server).serveStreams.func1 Oct 22 00:53:44 bedb-master1 weed[8055]: #011/root/go/pkg/mod/google.golang.org/grpc@v1.29.1/server.go:744 +0xa5 Oct 22 00:53:44 bedb-master1 systemd[1]: weedmaster.service: Main process exited, code=exited, status=2/INVALIDARGUMENT Oct 22 00:53:44 bedb-master1 systemd[1]: weedmaster.service: Failed with result 'exit-code'.
4 years ago
7 years ago
3 years ago
3 years ago
  1. package topology
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "math/rand/v2"
  7. "sync"
  8. "time"
  9. "github.com/seaweedfs/seaweedfs/weed/pb"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  11. backoff "github.com/cenkalti/backoff/v4"
  12. hashicorpRaft "github.com/hashicorp/raft"
  13. "github.com/seaweedfs/raft"
  14. "github.com/seaweedfs/seaweedfs/weed/glog"
  15. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  16. "github.com/seaweedfs/seaweedfs/weed/sequence"
  17. "github.com/seaweedfs/seaweedfs/weed/stats"
  18. "github.com/seaweedfs/seaweedfs/weed/storage"
  19. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  20. "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
  21. "github.com/seaweedfs/seaweedfs/weed/util"
  22. )
  23. type Topology struct {
  24. vacuumLockCounter int64
  25. NodeImpl
  26. collectionMap *util.ConcurrentReadMap
  27. ecShardMap map[needle.VolumeId]*EcShardLocations
  28. ecShardMapLock sync.RWMutex
  29. pulse int64
  30. volumeSizeLimit uint64
  31. replicationAsMin bool
  32. isDisableVacuum bool
  33. Sequence sequence.Sequencer
  34. chanFullVolumes chan storage.VolumeInfo
  35. chanCrowdedVolumes chan storage.VolumeInfo
  36. Configuration *Configuration
  37. RaftServer raft.Server
  38. RaftServerAccessLock sync.RWMutex
  39. HashicorpRaft *hashicorpRaft.Raft
  40. barrierLock sync.Mutex
  41. barrierDone bool
  42. UuidAccessLock sync.RWMutex
  43. UuidMap map[string][]string
  44. LastLeaderChangeTime time.Time
  45. }
  46. func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int, replicationAsMin bool) *Topology {
  47. t := &Topology{}
  48. t.id = NodeId(id)
  49. t.nodeType = "Topology"
  50. t.NodeImpl.value = t
  51. t.diskUsages = newDiskUsages()
  52. t.children = make(map[NodeId]Node)
  53. t.collectionMap = util.NewConcurrentReadMap()
  54. t.ecShardMap = make(map[needle.VolumeId]*EcShardLocations)
  55. t.pulse = int64(pulse)
  56. t.volumeSizeLimit = volumeSizeLimit
  57. t.replicationAsMin = replicationAsMin
  58. t.Sequence = seq
  59. t.chanFullVolumes = make(chan storage.VolumeInfo)
  60. t.chanCrowdedVolumes = make(chan storage.VolumeInfo)
  61. t.Configuration = &Configuration{}
  62. return t
  63. }
  64. func (t *Topology) IsChildLocked() (bool, error) {
  65. if t.IsLocked() {
  66. return true, errors.New("topology is locked")
  67. }
  68. for _, dcNode := range t.Children() {
  69. if dcNode.IsLocked() {
  70. return true, fmt.Errorf("topology child %s is locked", dcNode.String())
  71. }
  72. for _, rackNode := range dcNode.Children() {
  73. if rackNode.IsLocked() {
  74. return true, fmt.Errorf("dc %s child %s is locked", dcNode.String(), rackNode.String())
  75. }
  76. for _, dataNode := range rackNode.Children() {
  77. if dataNode.IsLocked() {
  78. return true, fmt.Errorf("rack %s child %s is locked", rackNode.String(), dataNode.Id())
  79. }
  80. }
  81. }
  82. }
  83. return false, nil
  84. }
  85. func (t *Topology) IsLeader() bool {
  86. t.RaftServerAccessLock.RLock()
  87. defer t.RaftServerAccessLock.RUnlock()
  88. if t.RaftServer != nil {
  89. if t.RaftServer.State() == raft.Leader {
  90. return true
  91. }
  92. if leader, err := t.Leader(); err == nil {
  93. if pb.ServerAddress(t.RaftServer.Name()) == leader {
  94. return true
  95. }
  96. }
  97. } else if t.HashicorpRaft != nil {
  98. if t.HashicorpRaft.State() == hashicorpRaft.Leader {
  99. return true
  100. }
  101. }
  102. return false
  103. }
  104. func (t *Topology) IsLeaderAndCanRead() bool {
  105. if t.RaftServer != nil {
  106. return t.IsLeader()
  107. } else if t.HashicorpRaft != nil {
  108. return t.IsLeader() && t.DoBarrier()
  109. } else {
  110. return false
  111. }
  112. }
  113. func (t *Topology) DoBarrier() bool {
  114. t.barrierLock.Lock()
  115. defer t.barrierLock.Unlock()
  116. if t.barrierDone {
  117. return true
  118. }
  119. glog.V(0).Infof("raft do barrier")
  120. barrier := t.HashicorpRaft.Barrier(2 * time.Minute)
  121. if err := barrier.Error(); err != nil {
  122. glog.Errorf("failed to wait for barrier, error %s", err)
  123. return false
  124. }
  125. t.barrierDone = true
  126. glog.V(0).Infof("raft do barrier success")
  127. return true
  128. }
  129. func (t *Topology) BarrierReset() {
  130. t.barrierLock.Lock()
  131. defer t.barrierLock.Unlock()
  132. t.barrierDone = false
  133. }
  134. func (t *Topology) Leader() (l pb.ServerAddress, err error) {
  135. exponentialBackoff := backoff.NewExponentialBackOff()
  136. exponentialBackoff.InitialInterval = 100 * time.Millisecond
  137. exponentialBackoff.MaxElapsedTime = 20 * time.Second
  138. leaderNotSelected := errors.New("leader not selected yet")
  139. l, err = backoff.RetryWithData(
  140. func() (l pb.ServerAddress, err error) {
  141. l, err = t.MaybeLeader()
  142. if err == nil && l == "" {
  143. err = leaderNotSelected
  144. }
  145. return l, err
  146. },
  147. exponentialBackoff)
  148. if err == leaderNotSelected {
  149. l = ""
  150. }
  151. return l, err
  152. }
  153. func (t *Topology) MaybeLeader() (l pb.ServerAddress, err error) {
  154. t.RaftServerAccessLock.RLock()
  155. defer t.RaftServerAccessLock.RUnlock()
  156. if t.RaftServer != nil {
  157. l = pb.ServerAddress(t.RaftServer.Leader())
  158. } else if t.HashicorpRaft != nil {
  159. l = pb.ServerAddress(t.HashicorpRaft.Leader())
  160. } else {
  161. err = errors.New("Raft Server not ready yet!")
  162. }
  163. return
  164. }
  165. func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*DataNode) {
  166. // maybe an issue if lots of collections?
  167. if collection == "" {
  168. for _, c := range t.collectionMap.Items() {
  169. if list := c.(*Collection).Lookup(vid); list != nil {
  170. return list
  171. }
  172. }
  173. } else {
  174. if c, ok := t.collectionMap.Find(collection); ok {
  175. return c.(*Collection).Lookup(vid)
  176. }
  177. }
  178. if locations, found := t.LookupEcShards(vid); found {
  179. for _, loc := range locations.Locations {
  180. dataNodes = append(dataNodes, loc...)
  181. }
  182. return dataNodes
  183. }
  184. return nil
  185. }
  186. func (t *Topology) NextVolumeId() (needle.VolumeId, error) {
  187. if !t.IsLeaderAndCanRead() {
  188. return 0, fmt.Errorf("as leader can not read yet")
  189. }
  190. vid := t.GetMaxVolumeId()
  191. next := vid.Next()
  192. t.RaftServerAccessLock.RLock()
  193. defer t.RaftServerAccessLock.RUnlock()
  194. if t.RaftServer != nil {
  195. if _, err := t.RaftServer.Do(NewMaxVolumeIdCommand(next)); err != nil {
  196. return 0, err
  197. }
  198. } else if t.HashicorpRaft != nil {
  199. b, err := json.Marshal(NewMaxVolumeIdCommand(next))
  200. if err != nil {
  201. return 0, fmt.Errorf("failed marshal NewMaxVolumeIdCommand: %+v", err)
  202. }
  203. if future := t.HashicorpRaft.Apply(b, time.Second); future.Error() != nil {
  204. return 0, future.Error()
  205. }
  206. }
  207. return next, nil
  208. }
  209. func (t *Topology) PickForWrite(requestedCount uint64, option *VolumeGrowOption, volumeLayout *VolumeLayout) (fileId string, count uint64, volumeLocationList *VolumeLocationList, shouldGrow bool, err error) {
  210. var vid needle.VolumeId
  211. vid, count, volumeLocationList, shouldGrow, err = volumeLayout.PickForWrite(requestedCount, option)
  212. if err != nil {
  213. return "", 0, nil, shouldGrow, fmt.Errorf("failed to find writable volumes for collection:%s replication:%s ttl:%s error: %v", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String(), err)
  214. }
  215. if volumeLocationList == nil || volumeLocationList.Length() == 0 {
  216. return "", 0, nil, shouldGrow, fmt.Errorf("%s available for collection:%s replication:%s ttl:%s", noWritableVolumes, option.Collection, option.ReplicaPlacement.String(), option.Ttl.String())
  217. }
  218. nextFileId := t.Sequence.NextFileId(requestedCount)
  219. fileId = needle.NewFileId(vid, nextFileId, rand.Uint32()).String()
  220. return fileId, count, volumeLocationList, shouldGrow, nil
  221. }
  222. func (t *Topology) GetVolumeLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) *VolumeLayout {
  223. return t.collectionMap.Get(collectionName, func() interface{} {
  224. return NewCollection(collectionName, t.volumeSizeLimit, t.replicationAsMin)
  225. }).(*Collection).GetOrCreateVolumeLayout(rp, ttl, diskType)
  226. }
  227. func (t *Topology) ListCollections(includeNormalVolumes, includeEcVolumes bool) (ret []string) {
  228. mapOfCollections := make(map[string]bool)
  229. for _, c := range t.collectionMap.Items() {
  230. mapOfCollections[c.(*Collection).Name] = true
  231. }
  232. if includeEcVolumes {
  233. t.ecShardMapLock.RLock()
  234. for _, ecVolumeLocation := range t.ecShardMap {
  235. mapOfCollections[ecVolumeLocation.Collection] = true
  236. }
  237. t.ecShardMapLock.RUnlock()
  238. }
  239. for k := range mapOfCollections {
  240. ret = append(ret, k)
  241. }
  242. return ret
  243. }
  244. func (t *Topology) FindCollection(collectionName string) (*Collection, bool) {
  245. c, hasCollection := t.collectionMap.Find(collectionName)
  246. if !hasCollection {
  247. return nil, false
  248. }
  249. return c.(*Collection), hasCollection
  250. }
  251. func (t *Topology) DeleteCollection(collectionName string) {
  252. t.collectionMap.Delete(collectionName)
  253. }
  254. func (t *Topology) DeleteLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) {
  255. collection, found := t.FindCollection(collectionName)
  256. if !found {
  257. return
  258. }
  259. collection.DeleteVolumeLayout(rp, ttl, diskType)
  260. if len(collection.storageType2VolumeLayout.Items()) == 0 {
  261. t.DeleteCollection(collectionName)
  262. }
  263. }
  264. func (t *Topology) RegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
  265. diskType := types.ToDiskType(v.DiskType)
  266. vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  267. vl.RegisterVolume(&v, dn)
  268. vl.EnsureCorrectWritables(&v)
  269. }
  270. func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
  271. glog.Infof("removing volume info: %+v from %v", v, dn.id)
  272. if v.ReplicaPlacement.GetCopyCount() > 1 {
  273. stats.MasterReplicaPlacementMismatch.WithLabelValues(v.Collection, v.Id.String()).Set(0)
  274. }
  275. diskType := types.ToDiskType(v.DiskType)
  276. volumeLayout := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  277. volumeLayout.UnRegisterVolume(&v, dn)
  278. if volumeLayout.isEmpty() {
  279. t.DeleteLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  280. }
  281. }
  282. func (t *Topology) DataCenterExists(dcName string) bool {
  283. return dcName == "" || t.GetDataCenter(dcName) != nil
  284. }
  285. func (t *Topology) GetDataCenter(dcName string) (dc *DataCenter) {
  286. t.RLock()
  287. defer t.RUnlock()
  288. for _, c := range t.children {
  289. dc = c.(*DataCenter)
  290. if string(dc.Id()) == dcName {
  291. return dc
  292. }
  293. }
  294. return dc
  295. }
  296. func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter {
  297. t.Lock()
  298. defer t.Unlock()
  299. for _, c := range t.children {
  300. dc := c.(*DataCenter)
  301. if string(dc.Id()) == dcName {
  302. return dc
  303. }
  304. }
  305. dc := NewDataCenter(dcName)
  306. t.doLinkChildNode(dc)
  307. return dc
  308. }
  309. func (t *Topology) ListDataCenters() (dcs []string) {
  310. t.RLock()
  311. defer t.RUnlock()
  312. for _, c := range t.children {
  313. dcs = append(dcs, string(c.(*DataCenter).Id()))
  314. }
  315. return dcs
  316. }
  317. func (t *Topology) ListDCAndRacks() (dcs map[NodeId][]NodeId) {
  318. t.RLock()
  319. defer t.RUnlock()
  320. dcs = make(map[NodeId][]NodeId)
  321. for _, dcNode := range t.children {
  322. dcNodeId := dcNode.(*DataCenter).Id()
  323. for _, rackNode := range dcNode.Children() {
  324. dcs[dcNodeId] = append(dcs[dcNodeId], rackNode.(*Rack).Id())
  325. }
  326. }
  327. return dcs
  328. }
  329. func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformationMessage, dn *DataNode) (newVolumes, deletedVolumes []storage.VolumeInfo) {
  330. // convert into in memory struct storage.VolumeInfo
  331. var volumeInfos []storage.VolumeInfo
  332. for _, v := range volumes {
  333. if vi, err := storage.NewVolumeInfo(v); err == nil {
  334. volumeInfos = append(volumeInfos, vi)
  335. } else {
  336. glog.V(0).Infof("Fail to convert joined volume information: %v", err)
  337. }
  338. }
  339. // find out the delta volumes
  340. var changedVolumes []storage.VolumeInfo
  341. newVolumes, deletedVolumes, changedVolumes = dn.UpdateVolumes(volumeInfos)
  342. for _, v := range newVolumes {
  343. t.RegisterVolumeLayout(v, dn)
  344. }
  345. for _, v := range deletedVolumes {
  346. t.UnRegisterVolumeLayout(v, dn)
  347. }
  348. for _, v := range changedVolumes {
  349. diskType := types.ToDiskType(v.DiskType)
  350. vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  351. vl.EnsureCorrectWritables(&v)
  352. }
  353. return
  354. }
  355. func (t *Topology) IncrementalSyncDataNodeRegistration(newVolumes, deletedVolumes []*master_pb.VolumeShortInformationMessage, dn *DataNode) {
  356. var newVis, oldVis []storage.VolumeInfo
  357. for _, v := range newVolumes {
  358. vi, err := storage.NewVolumeInfoFromShort(v)
  359. if err != nil {
  360. glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err)
  361. continue
  362. }
  363. newVis = append(newVis, vi)
  364. }
  365. for _, v := range deletedVolumes {
  366. vi, err := storage.NewVolumeInfoFromShort(v)
  367. if err != nil {
  368. glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err)
  369. continue
  370. }
  371. oldVis = append(oldVis, vi)
  372. }
  373. dn.DeltaUpdateVolumes(newVis, oldVis)
  374. for _, vi := range newVis {
  375. t.RegisterVolumeLayout(vi, dn)
  376. }
  377. for _, vi := range oldVis {
  378. t.UnRegisterVolumeLayout(vi, dn)
  379. }
  380. return
  381. }
  382. func (t *Topology) DataNodeRegistration(dcName, rackName string, dn *DataNode) {
  383. if dn.Parent() != nil {
  384. return
  385. }
  386. // registration to topo
  387. dc := t.GetOrCreateDataCenter(dcName)
  388. rack := dc.GetOrCreateRack(rackName)
  389. rack.LinkChildNode(dn)
  390. glog.Infof("[%s] reLink To topo ", dn.Id())
  391. }
  392. func (t *Topology) DisableVacuum() {
  393. glog.V(0).Infof("DisableVacuum")
  394. t.isDisableVacuum = true
  395. }
  396. func (t *Topology) EnableVacuum() {
  397. glog.V(0).Infof("EnableVacuum")
  398. t.isDisableVacuum = false
  399. }