From d935f70e3cf8a03e5d1b118ca70fd66470567435 Mon Sep 17 00:00:00 2001 From: chrislu Date: Mon, 1 Apr 2024 22:32:30 -0700 Subject: [PATCH] shutdown follower when leader stops --- weed/mq/broker/broker_grpc_pub_follow.go | 113 +++++++++++++++++++-- weed/util/buffered_queue/buffered_queue.go | 13 +++ 2 files changed, 120 insertions(+), 6 deletions(-) diff --git a/weed/mq/broker/broker_grpc_pub_follow.go b/weed/mq/broker/broker_grpc_pub_follow.go index e5488a13a..358b310bf 100644 --- a/weed/mq/broker/broker_grpc_pub_follow.go +++ b/weed/mq/broker/broker_grpc_pub_follow.go @@ -2,13 +2,24 @@ package broker import ( "fmt" + "github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/glog" + "github.com/seaweedfs/seaweedfs/weed/mq/topic" "github.com/seaweedfs/seaweedfs/weed/pb/mq_pb" + "github.com/seaweedfs/seaweedfs/weed/util/buffered_queue" + "github.com/seaweedfs/seaweedfs/weed/util/log_buffer" "io" + "time" ) -func (b *MessageQueueBroker) PublishFollowMe(stream mq_pb.SeaweedMessaging_PublishFollowMeServer) error { - req, err := stream.Recv() +type memBuffer struct { + buf []byte + startTime time.Time + stopTime time.Time +} +func (b *MessageQueueBroker) PublishFollowMe(stream mq_pb.SeaweedMessaging_PublishFollowMeServer) (err error) { + var req *mq_pb.PublishFollowMeRequest + req, err = stream.Recv() if err != nil { return err } @@ -17,21 +28,32 @@ func (b *MessageQueueBroker) PublishFollowMe(stream mq_pb.SeaweedMessaging_Publi return fmt.Errorf("missing init message") } - // t, p := topic.FromPbTopic(initMessage.Topic), topic.FromPbPartition(initMessage.Partition) + // create an in-memory queue of buffered messages + inMemoryBuffers := buffered_queue.NewBufferedQueue[memBuffer](4) + logBuffer := b.buildFollowerLogBuffer(inMemoryBuffers) + + lastFlushTsNs := time.Now().UnixNano() + // follow each published messages for { // receive a message - req, err := stream.Recv() + req, err = stream.Recv() if err != nil { if err == io.EOF { + err = nil break } glog.V(0).Infof("topic %v partition %v publish stream error: %v", initMessage.Topic, initMessage.Partition, err) - return err + break } // Process the received message if dataMessage := req.GetData(); dataMessage != nil { + + // TODO: change this to DataMessage + // log the message + logBuffer.AddToBuffer(dataMessage.Key, dataMessage.Value, dataMessage.TsNs) + // send back the ack if err := stream.Send(&mq_pb.PublishFollowMeResponse{ AckTsNs: dataMessage.TsNs, @@ -45,9 +67,88 @@ func (b *MessageQueueBroker) PublishFollowMe(stream mq_pb.SeaweedMessaging_Publi break } else if flushMessage := req.GetFlush(); flushMessage != nil { glog.V(0).Infof("topic %v partition %v publish stream flushed: %v", initMessage.Topic, initMessage.Partition, flushMessage) + + lastFlushTsNs = flushMessage.TsNs + + // drop already flushed messages + for mem, found := inMemoryBuffers.PeekHead(); found; mem, found = inMemoryBuffers.PeekHead() { + if mem.stopTime.UnixNano() <= flushMessage.TsNs { + inMemoryBuffers.Dequeue() + println("dropping flushed messages: ", mem.startTime.UnixNano(), mem.stopTime.UnixNano(), len(mem.buf)) + } else { + break + } + } + } else { glog.Errorf("unknown message: %v", req) } } - return nil + + + t, p := topic.FromPbTopic(initMessage.Topic), topic.FromPbPartition(initMessage.Partition) + + logBuffer.ShutdownLogBuffer() + // wait until all messages are sent to inMemoryBuffers + for !logBuffer.IsAllFlushed() { + time.Sleep(113 * time.Millisecond) + } + + topicDir := fmt.Sprintf("%s/%s/%s", filer.TopicsDir, t.Namespace, t.Name) + partitionGeneration := time.Unix(0, p.UnixTimeNs).UTC().Format(topic.TIME_FORMAT) + partitionDir := fmt.Sprintf("%s/%s/%04d-%04d", topicDir, partitionGeneration, p.RangeStart, p.RangeStop) + + + // flush the remaining messages + inMemoryBuffers.CloseInput() + for mem, found := inMemoryBuffers.Dequeue(); found; mem, found = inMemoryBuffers.Dequeue() { + if len(mem.buf) == 0 { + continue + } + + startTime, stopTime := mem.startTime.UTC(), mem.stopTime.UTC() + + if stopTime.UnixNano() <= lastFlushTsNs { + glog.V(0).Infof("dropping remaining data at %v %v", t, p) + continue + } + + // TODO trim data earlier than lastFlushTsNs + + targetFile := fmt.Sprintf("%s/%s", partitionDir, startTime.Format(topic.TIME_FORMAT)) + + // TODO append block with more metadata + + for { + if err := b.appendToFile(targetFile, mem.buf); err != nil { + glog.V(0).Infof("metadata log write failed %s: %v", targetFile, err) + time.Sleep(737 * time.Millisecond) + } else { + break + } + } + + glog.V(0).Infof("flushed remaining data at %v to %s size %d", mem.stopTime.UnixNano(), targetFile, len(mem.buf)) + } + + glog.V(0).Infof("shut down follower for %v %v", t, p) + + return err +} + +func (b *MessageQueueBroker) buildFollowerLogBuffer(inMemoryBuffers *buffered_queue.BufferedQueue[memBuffer]) *log_buffer.LogBuffer { + lb := log_buffer.NewLogBuffer("follower", + 2*time.Minute, func(logBuffer *log_buffer.LogBuffer, startTime, stopTime time.Time, buf []byte) { + if len(buf) == 0 { + return + } + inMemoryBuffers.Enqueue(memBuffer{ + buf: buf, + startTime: startTime, + stopTime: stopTime, + }) + glog.V(0).Infof("queue up %d~%d size %d", startTime.UnixNano(), stopTime.UnixNano(), len(buf)) + }, nil, func() { + }) + return lb } diff --git a/weed/util/buffered_queue/buffered_queue.go b/weed/util/buffered_queue/buffered_queue.go index edaa0a7ce..042561cdd 100644 --- a/weed/util/buffered_queue/buffered_queue.go +++ b/weed/util/buffered_queue/buffered_queue.go @@ -117,6 +117,19 @@ func (q *BufferedQueue[T]) Dequeue() (T, bool) { return job, true } +func (q *BufferedQueue[T]) PeekHead() (T, bool) { + q.mutex.Lock() + defer q.mutex.Unlock() + + if q.count <= 0 { + var a T + return a, false + } + + job := q.head.items[q.head.headIndex] + return job, true +} + // Size returns the number of items in the queue func (q *BufferedQueue[T]) Size() int { q.mutex.Lock()