You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
3.1 KiB

Merge accumulated changes related to message queue (#5098) * balance partitions on brokers * prepare topic partition first and then publish, move partition * purge unused APIs * clean up * adjust logs * add BalanceTopics() grpc API * configure topic * configure topic command * refactor * repair missing partitions * sequence of operations to ensure ordering * proto to close publishers and consumers * rename file * topic partition versioned by unixTimeNs * create local topic partition * close publishers * randomize the client name * wait until no publishers * logs * close stop publisher channel * send last ack * comments * comment * comments * support list of brokers * add cli options * Update .gitignore * logs * return io.eof directly * refactor * optionally create topic * refactoring * detect consumer disconnection * sub client wait for more messages * subscribe by time stamp * rename * rename to sub_balancer * rename * adjust comments * rename * fix compilation * rename * rename * SubscriberToSubCoordinator * sticky rebalance * go fmt * add tests * balance partitions on brokers * prepare topic partition first and then publish, move partition * purge unused APIs * clean up * adjust logs * add BalanceTopics() grpc API * configure topic * configure topic command * refactor * repair missing partitions * sequence of operations to ensure ordering * proto to close publishers and consumers * rename file * topic partition versioned by unixTimeNs * create local topic partition * close publishers * randomize the client name * wait until no publishers * logs * close stop publisher channel * send last ack * comments * comment * comments * support list of brokers * add cli options * Update .gitignore * logs * return io.eof directly * refactor * optionally create topic * refactoring * detect consumer disconnection * sub client wait for more messages * subscribe by time stamp * rename * rename to sub_balancer * rename * adjust comments * rename * fix compilation * rename * rename * SubscriberToSubCoordinator * sticky rebalance * go fmt * add tests * tracking topic=>broker * merge * comment
1 year ago
1 year ago
Merge accumulated changes related to message queue (#5098) * balance partitions on brokers * prepare topic partition first and then publish, move partition * purge unused APIs * clean up * adjust logs * add BalanceTopics() grpc API * configure topic * configure topic command * refactor * repair missing partitions * sequence of operations to ensure ordering * proto to close publishers and consumers * rename file * topic partition versioned by unixTimeNs * create local topic partition * close publishers * randomize the client name * wait until no publishers * logs * close stop publisher channel * send last ack * comments * comment * comments * support list of brokers * add cli options * Update .gitignore * logs * return io.eof directly * refactor * optionally create topic * refactoring * detect consumer disconnection * sub client wait for more messages * subscribe by time stamp * rename * rename to sub_balancer * rename * adjust comments * rename * fix compilation * rename * rename * SubscriberToSubCoordinator * sticky rebalance * go fmt * add tests * balance partitions on brokers * prepare topic partition first and then publish, move partition * purge unused APIs * clean up * adjust logs * add BalanceTopics() grpc API * configure topic * configure topic command * refactor * repair missing partitions * sequence of operations to ensure ordering * proto to close publishers and consumers * rename file * topic partition versioned by unixTimeNs * create local topic partition * close publishers * randomize the client name * wait until no publishers * logs * close stop publisher channel * send last ack * comments * comment * comments * support list of brokers * add cli options * Update .gitignore * logs * return io.eof directly * refactor * optionally create topic * refactoring * detect consumer disconnection * sub client wait for more messages * subscribe by time stamp * rename * rename to sub_balancer * rename * adjust comments * rename * fix compilation * rename * rename * SubscriberToSubCoordinator * sticky rebalance * go fmt * add tests * tracking topic=>broker * merge * comment
1 year ago
1 year ago
Merge accumulated changes related to message queue (#5098) * balance partitions on brokers * prepare topic partition first and then publish, move partition * purge unused APIs * clean up * adjust logs * add BalanceTopics() grpc API * configure topic * configure topic command * refactor * repair missing partitions * sequence of operations to ensure ordering * proto to close publishers and consumers * rename file * topic partition versioned by unixTimeNs * create local topic partition * close publishers * randomize the client name * wait until no publishers * logs * close stop publisher channel * send last ack * comments * comment * comments * support list of brokers * add cli options * Update .gitignore * logs * return io.eof directly * refactor * optionally create topic * refactoring * detect consumer disconnection * sub client wait for more messages * subscribe by time stamp * rename * rename to sub_balancer * rename * adjust comments * rename * fix compilation * rename * rename * SubscriberToSubCoordinator * sticky rebalance * go fmt * add tests * balance partitions on brokers * prepare topic partition first and then publish, move partition * purge unused APIs * clean up * adjust logs * add BalanceTopics() grpc API * configure topic * configure topic command * refactor * repair missing partitions * sequence of operations to ensure ordering * proto to close publishers and consumers * rename file * topic partition versioned by unixTimeNs * create local topic partition * close publishers * randomize the client name * wait until no publishers * logs * close stop publisher channel * send last ack * comments * comment * comments * support list of brokers * add cli options * Update .gitignore * logs * return io.eof directly * refactor * optionally create topic * refactoring * detect consumer disconnection * sub client wait for more messages * subscribe by time stamp * rename * rename to sub_balancer * rename * adjust comments * rename * fix compilation * rename * rename * SubscriberToSubCoordinator * sticky rebalance * go fmt * add tests * tracking topic=>broker * merge * comment
1 year ago
  1. package pub_balancer
  2. import (
  3. cmap "github.com/orcaman/concurrent-map/v2"
  4. "github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
  5. )
  6. const (
  7. MaxPartitionCount = 8 * 9 * 5 * 7 //2520
  8. LockBrokerBalancer = "broker_balancer"
  9. )
  10. // Balancer collects stats from all brokers.
  11. //
  12. // When publishers wants to create topics, it picks brokers to assign the topic partitions.
  13. // When consumers wants to subscribe topics, it tells which brokers are serving the topic partitions.
  14. //
  15. // When a partition needs to be split or merged, or a partition needs to be moved to another broker,
  16. // the balancer will let the broker tell the consumer instance to stop processing the partition.
  17. // The existing consumer instance will flush the internal state, and then stop processing.
  18. // Then the balancer will tell the brokers to start sending new messages in the new/moved partition to the consumer instances.
  19. //
  20. // Failover to standby consumer instances:
  21. //
  22. // A consumer group can have min and max number of consumer instances.
  23. // For consumer instances joined after the max number, they will be in standby mode.
  24. //
  25. // When a consumer instance is down, the broker will notice this and inform the balancer.
  26. // The balancer will then tell the broker to send the partition to another standby consumer instance.
  27. type Balancer struct {
  28. Brokers cmap.ConcurrentMap[string, *BrokerStats] // key: broker address
  29. // Collected from all brokers when they connect to the broker leader
  30. TopicToBrokers cmap.ConcurrentMap[string, *PartitionSlotToBrokerList] // key: topic name
  31. }
  32. func NewBalancer() *Balancer {
  33. return &Balancer{
  34. Brokers: cmap.New[*BrokerStats](),
  35. TopicToBrokers: cmap.New[*PartitionSlotToBrokerList](),
  36. }
  37. }
  38. func (balancer *Balancer) OnBrokerConnected(broker string) (brokerStats *BrokerStats) {
  39. var found bool
  40. brokerStats, found = balancer.Brokers.Get(broker)
  41. if !found {
  42. brokerStats = NewBrokerStats()
  43. if !balancer.Brokers.SetIfAbsent(broker, brokerStats) {
  44. brokerStats, _ = balancer.Brokers.Get(broker)
  45. }
  46. }
  47. return brokerStats
  48. }
  49. func (balancer *Balancer) OnBrokerDisconnected(broker string, stats *BrokerStats) {
  50. balancer.Brokers.Remove(broker)
  51. // update TopicToBrokers
  52. for _, topic := range stats.Topics {
  53. partitionSlotToBrokerList, found := balancer.TopicToBrokers.Get(topic.String())
  54. if !found {
  55. continue
  56. }
  57. partitionSlotToBrokerList.RemoveBroker(broker)
  58. }
  59. }
  60. func (balancer *Balancer) OnBrokerStatsUpdated(broker string, brokerStats *BrokerStats, receivedStats *mq_pb.BrokerStats) {
  61. brokerStats.UpdateStats(receivedStats)
  62. // update TopicToBrokers
  63. for _, topicPartitionStats := range receivedStats.Stats {
  64. topic := topicPartitionStats.Topic
  65. partition := topicPartitionStats.Partition
  66. partitionSlotToBrokerList, found := balancer.TopicToBrokers.Get(topic.String())
  67. if !found {
  68. partitionSlotToBrokerList = NewPartitionSlotToBrokerList(MaxPartitionCount)
  69. if !balancer.TopicToBrokers.SetIfAbsent(topic.String(), partitionSlotToBrokerList) {
  70. partitionSlotToBrokerList, _ = balancer.TopicToBrokers.Get(topic.String())
  71. }
  72. }
  73. partitionSlotToBrokerList.AddBroker(partition, broker)
  74. }
  75. }