You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

350 lines
10 KiB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
1 year ago
5 years ago
5 years ago
5 years ago
1 year ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
  1. package log_buffer
  2. import (
  3. "bytes"
  4. "sync"
  5. "sync/atomic"
  6. "time"
  7. "google.golang.org/protobuf/proto"
  8. "github.com/seaweedfs/seaweedfs/weed/glog"
  9. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  10. "github.com/seaweedfs/seaweedfs/weed/util"
  11. )
  12. const BufferSize = 4 * 1024 * 1024
  13. const PreviousBufferCount = 3
  14. type dataToFlush struct {
  15. startTime time.Time
  16. stopTime time.Time
  17. data *bytes.Buffer
  18. }
  19. type LogFlushFuncType func(startTime, stopTime time.Time, buf []byte)
  20. type LogBuffer struct {
  21. name string
  22. prevBuffers *SealedBuffers
  23. buf []byte
  24. batchIndex int64
  25. idx []int
  26. pos int
  27. startTime time.Time
  28. stopTime time.Time
  29. lastFlushTime time.Time
  30. sizeBuf []byte
  31. flushInterval time.Duration
  32. flushFn LogFlushFuncType
  33. notifyFn func()
  34. isStopping *atomic.Bool
  35. flushChan chan *dataToFlush
  36. lastTsNs int64
  37. sync.RWMutex
  38. }
  39. func NewLogBuffer(name string, flushInterval time.Duration, flushFn LogFlushFuncType, notifyFn func()) *LogBuffer {
  40. lb := &LogBuffer{
  41. name: name,
  42. prevBuffers: newSealedBuffers(PreviousBufferCount),
  43. buf: make([]byte, BufferSize),
  44. sizeBuf: make([]byte, 4),
  45. flushInterval: flushInterval,
  46. flushFn: flushFn,
  47. notifyFn: notifyFn,
  48. flushChan: make(chan *dataToFlush, 256),
  49. isStopping: new(atomic.Bool),
  50. }
  51. go lb.loopFlush()
  52. go lb.loopInterval()
  53. return lb
  54. }
  55. func (logBuffer *LogBuffer) AddToBuffer(partitionKey, data []byte, processingTsNs int64) {
  56. var toFlush *dataToFlush
  57. logBuffer.Lock()
  58. defer func() {
  59. logBuffer.Unlock()
  60. if toFlush != nil {
  61. logBuffer.flushChan <- toFlush
  62. }
  63. if logBuffer.notifyFn != nil {
  64. logBuffer.notifyFn()
  65. }
  66. }()
  67. // need to put the timestamp inside the lock
  68. var ts time.Time
  69. if processingTsNs == 0 {
  70. ts = time.Now()
  71. processingTsNs = ts.UnixNano()
  72. } else {
  73. ts = time.Unix(0, processingTsNs)
  74. }
  75. if logBuffer.lastTsNs >= processingTsNs {
  76. // this is unlikely to happen, but just in case
  77. processingTsNs = logBuffer.lastTsNs + 1
  78. ts = time.Unix(0, processingTsNs)
  79. }
  80. logBuffer.lastTsNs = processingTsNs
  81. logEntry := &filer_pb.LogEntry{
  82. TsNs: processingTsNs,
  83. PartitionKeyHash: util.HashToInt32(partitionKey),
  84. Data: data,
  85. }
  86. logEntryData, _ := proto.Marshal(logEntry)
  87. size := len(logEntryData)
  88. if logBuffer.pos == 0 {
  89. logBuffer.startTime = ts
  90. }
  91. if logBuffer.startTime.Add(logBuffer.flushInterval).Before(ts) || len(logBuffer.buf)-logBuffer.pos < size+4 {
  92. glog.V(0).Infof("%s copyToFlush1 batch:%d start time %v, ts %v, remaining %d bytes", logBuffer.name, logBuffer.batchIndex, logBuffer.startTime, ts, len(logBuffer.buf)-logBuffer.pos)
  93. toFlush = logBuffer.copyToFlush()
  94. logBuffer.startTime = ts
  95. if len(logBuffer.buf) < size+4 {
  96. logBuffer.buf = make([]byte, 2*size+4)
  97. }
  98. }
  99. logBuffer.stopTime = ts
  100. logBuffer.idx = append(logBuffer.idx, logBuffer.pos)
  101. util.Uint32toBytes(logBuffer.sizeBuf, uint32(size))
  102. copy(logBuffer.buf[logBuffer.pos:logBuffer.pos+4], logBuffer.sizeBuf)
  103. copy(logBuffer.buf[logBuffer.pos+4:logBuffer.pos+4+size], logEntryData)
  104. logBuffer.pos += size + 4
  105. // fmt.Printf("partitionKey %v entry size %d total %d count %d\n", string(partitionKey), size, m.pos, len(m.idx))
  106. }
  107. func (logBuffer *LogBuffer) IsStopping() bool {
  108. return logBuffer.isStopping.Load()
  109. }
  110. func (logBuffer *LogBuffer) Shutdown() {
  111. isAlreadyStopped := logBuffer.isStopping.Swap(true)
  112. if isAlreadyStopped {
  113. return
  114. }
  115. toFlush := logBuffer.copyToFlush()
  116. logBuffer.flushChan <- toFlush
  117. close(logBuffer.flushChan)
  118. }
  119. func (logBuffer *LogBuffer) loopFlush() {
  120. for d := range logBuffer.flushChan {
  121. if d != nil {
  122. // glog.V(4).Infof("%s flush [%v, %v] size %d", m.name, d.startTime, d.stopTime, len(d.data.Bytes()))
  123. logBuffer.flushFn(d.startTime, d.stopTime, d.data.Bytes())
  124. d.releaseMemory()
  125. // local logbuffer is different from aggregate logbuffer here
  126. logBuffer.lastFlushTime = d.stopTime
  127. }
  128. }
  129. }
  130. func (logBuffer *LogBuffer) loopInterval() {
  131. for !logBuffer.IsStopping() {
  132. time.Sleep(logBuffer.flushInterval)
  133. if logBuffer.IsStopping() {
  134. return
  135. }
  136. logBuffer.Lock()
  137. toFlush := logBuffer.copyToFlush()
  138. logBuffer.Unlock()
  139. if toFlush != nil {
  140. glog.V(0).Infof("%s flush [%v, %v] size %d", logBuffer.name, toFlush.startTime, toFlush.stopTime, len(toFlush.data.Bytes()))
  141. logBuffer.flushChan <- toFlush
  142. } else {
  143. // glog.V(0).Infof("%s no flush", m.name)
  144. }
  145. }
  146. }
  147. func (logBuffer *LogBuffer) copyToFlush() *dataToFlush {
  148. if logBuffer.pos > 0 {
  149. // fmt.Printf("flush buffer %d pos %d empty space %d\n", len(m.buf), m.pos, len(m.buf)-m.pos)
  150. var d *dataToFlush
  151. if logBuffer.flushFn != nil {
  152. d = &dataToFlush{
  153. startTime: logBuffer.startTime,
  154. stopTime: logBuffer.stopTime,
  155. data: copiedBytes(logBuffer.buf[:logBuffer.pos]),
  156. }
  157. // glog.V(4).Infof("%s flushing [0,%d) with %d entries [%v, %v]", m.name, m.pos, len(m.idx), m.startTime, m.stopTime)
  158. } else {
  159. // glog.V(4).Infof("%s removed from memory [0,%d) with %d entries [%v, %v]", m.name, m.pos, len(m.idx), m.startTime, m.stopTime)
  160. logBuffer.lastFlushTime = logBuffer.stopTime
  161. }
  162. logBuffer.buf = logBuffer.prevBuffers.SealBuffer(logBuffer.startTime, logBuffer.stopTime, logBuffer.buf, logBuffer.pos, logBuffer.batchIndex)
  163. logBuffer.startTime = time.Unix(0, 0)
  164. logBuffer.stopTime = time.Unix(0, 0)
  165. logBuffer.pos = 0
  166. logBuffer.idx = logBuffer.idx[:0]
  167. logBuffer.batchIndex++
  168. return d
  169. }
  170. return nil
  171. }
  172. func (logBuffer *LogBuffer) GetEarliestTime() time.Time{
  173. return logBuffer.startTime
  174. }
  175. func (logBuffer *LogBuffer) GetEarliestPosition() MessagePosition{
  176. return MessagePosition{
  177. Time: logBuffer.startTime,
  178. BatchIndex: logBuffer.batchIndex,
  179. }
  180. }
  181. func (d *dataToFlush) releaseMemory() {
  182. d.data.Reset()
  183. bufferPool.Put(d.data)
  184. }
  185. func (logBuffer *LogBuffer) ReadFromBuffer(lastReadPosition MessagePosition, inMemoryOnly bool) (bufferCopy *bytes.Buffer, batchIndex int64, err error) {
  186. logBuffer.RLock()
  187. defer logBuffer.RUnlock()
  188. // Read from disk and memory
  189. // 1. read from disk, last time is = td
  190. // 2. in memory, the earliest time = tm
  191. // if tm <= td, case 2.1
  192. // read from memory
  193. // if tm is empty, case 2.2
  194. // read from memory
  195. // if td < tm, case 2.3
  196. // read from disk again
  197. var tsMemory time.Time
  198. var tsBatchIndex int64
  199. if !logBuffer.startTime.IsZero() {
  200. tsMemory = logBuffer.startTime
  201. tsBatchIndex = logBuffer.batchIndex
  202. }
  203. for _, prevBuf := range logBuffer.prevBuffers.buffers {
  204. if !prevBuf.startTime.IsZero() && prevBuf.startTime.Before(tsMemory) {
  205. tsMemory = prevBuf.startTime
  206. tsBatchIndex = prevBuf.batchIndex
  207. }
  208. }
  209. if tsMemory.IsZero() { // case 2.2
  210. println("2.2 no data")
  211. return nil, -2,nil
  212. } else if lastReadPosition.Before(tsMemory) && lastReadPosition.BatchIndex +1 < tsBatchIndex { // case 2.3
  213. if inMemoryOnly {
  214. println("2.3 no data", lastReadPosition.BatchIndex, tsBatchIndex)
  215. // FIXME: this is wrong: the data has been flushed to disk already
  216. return nil, tsBatchIndex,nil
  217. }
  218. if !logBuffer.lastFlushTime.IsZero() {
  219. glog.V(0).Infof("resume with last flush time: %v", logBuffer.lastFlushTime)
  220. return nil, -2, ResumeFromDiskError
  221. }
  222. }
  223. // the following is case 2.1
  224. if lastReadPosition.Equal(logBuffer.stopTime) {
  225. return nil, logBuffer.batchIndex, nil
  226. }
  227. if lastReadPosition.After(logBuffer.stopTime) {
  228. // glog.Fatalf("unexpected last read time %v, older than latest %v", lastReadPosition, m.stopTime)
  229. return nil, logBuffer.batchIndex, nil
  230. }
  231. if lastReadPosition.Before(logBuffer.startTime) {
  232. // println("checking ", lastReadPosition.UnixNano())
  233. for _, buf := range logBuffer.prevBuffers.buffers {
  234. if buf.startTime.After(lastReadPosition.Time) {
  235. // glog.V(4).Infof("%s return the %d sealed buffer %v", m.name, i, buf.startTime)
  236. // println("return the", i, "th in memory", buf.startTime.UnixNano())
  237. return copiedBytes(buf.buf[:buf.size]), buf.batchIndex, nil
  238. }
  239. if !buf.startTime.After(lastReadPosition.Time) && buf.stopTime.After(lastReadPosition.Time) {
  240. pos := buf.locateByTs(lastReadPosition.Time)
  241. // fmt.Printf("locate buffer[%d] pos %d\n", i, pos)
  242. return copiedBytes(buf.buf[pos:buf.size]), buf.batchIndex, nil
  243. }
  244. }
  245. // glog.V(4).Infof("%s return the current buf %v", m.name, lastReadPosition)
  246. return copiedBytes(logBuffer.buf[:logBuffer.pos]), logBuffer.batchIndex,nil
  247. }
  248. lastTs := lastReadPosition.UnixNano()
  249. l, h := 0, len(logBuffer.idx)-1
  250. /*
  251. for i, pos := range m.idx {
  252. logEntry, ts := readTs(m.buf, pos)
  253. event := &filer_pb.SubscribeMetadataResponse{}
  254. proto.Unmarshal(logEntry.Data, event)
  255. entry := event.EventNotification.OldEntry
  256. if entry == nil {
  257. entry = event.EventNotification.NewEntry
  258. }
  259. fmt.Printf("entry %d ts: %v offset:%d dir:%s name:%s\n", i, time.Unix(0, ts), pos, event.Directory, entry.Name)
  260. }
  261. fmt.Printf("l=%d, h=%d\n", l, h)
  262. */
  263. for l <= h {
  264. mid := (l + h) / 2
  265. pos := logBuffer.idx[mid]
  266. _, t := readTs(logBuffer.buf, pos)
  267. if t <= lastTs {
  268. l = mid + 1
  269. } else if lastTs < t {
  270. var prevT int64
  271. if mid > 0 {
  272. _, prevT = readTs(logBuffer.buf, logBuffer.idx[mid-1])
  273. }
  274. if prevT <= lastTs {
  275. // fmt.Printf("found l=%d, m-1=%d(ts=%d), m=%d(ts=%d), h=%d [%d, %d) \n", l, mid-1, prevT, mid, t, h, pos, m.pos)
  276. return copiedBytes(logBuffer.buf[pos:logBuffer.pos]), logBuffer.batchIndex, nil
  277. }
  278. h = mid
  279. }
  280. // fmt.Printf("l=%d, h=%d\n", l, h)
  281. }
  282. // FIXME: this could be that the buffer has been flushed already
  283. println("Not sure why no data", lastReadPosition.BatchIndex, tsBatchIndex)
  284. return nil, -2, nil
  285. }
  286. func (logBuffer *LogBuffer) ReleaseMemory(b *bytes.Buffer) {
  287. bufferPool.Put(b)
  288. }
  289. var bufferPool = sync.Pool{
  290. New: func() interface{} {
  291. return new(bytes.Buffer)
  292. },
  293. }
  294. func copiedBytes(buf []byte) (copied *bytes.Buffer) {
  295. copied = bufferPool.Get().(*bytes.Buffer)
  296. copied.Reset()
  297. copied.Write(buf)
  298. return
  299. }
  300. func readTs(buf []byte, pos int) (size int, ts int64) {
  301. size = int(util.BytesToUint32(buf[pos : pos+4]))
  302. entryData := buf[pos+4 : pos+4+size]
  303. logEntry := &filer_pb.LogEntry{}
  304. err := proto.Unmarshal(entryData, logEntry)
  305. if err != nil {
  306. glog.Fatalf("unexpected unmarshal filer_pb.LogEntry: %v", err)
  307. }
  308. return size, logEntry.TsNs
  309. }