You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
448 lines
16 KiB
448 lines
16 KiB
package weed_server
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
"github.com/seaweedfs/seaweedfs/weed/util/log_buffer"
|
|
)
|
|
|
|
const (
|
|
// MaxUnsyncedEvents send empty notification with timestamp when certain amount of events have been filtered
|
|
MaxUnsyncedEvents = 1e3
|
|
)
|
|
|
|
func (fs *FilerServer) SubscribeMetadata(req *filer_pb.SubscribeMetadataRequest, stream filer_pb.SeaweedFiler_SubscribeMetadataServer) error {
|
|
|
|
ctx := stream.Context()
|
|
peerAddress := findClientAddress(ctx, 0)
|
|
|
|
isReplacing, alreadyKnown, clientName := fs.addClient("", req.ClientName, peerAddress, req.ClientId, req.ClientEpoch)
|
|
if isReplacing {
|
|
fs.filer.MetaAggregator.ListenersCond.Broadcast() // nudges the subscribers that are waiting
|
|
} else if alreadyKnown {
|
|
fs.filer.MetaAggregator.ListenersCond.Broadcast() // nudges the subscribers that are waiting
|
|
return fmt.Errorf("duplicated subscription detected for client %s id %d", clientName, req.ClientId)
|
|
}
|
|
defer func() {
|
|
glog.V(0).Infof("disconnect %v subscriber %s clientId:%d", clientName, req.PathPrefix, req.ClientId)
|
|
fs.deleteClient("", clientName, req.ClientId, req.ClientEpoch)
|
|
fs.filer.MetaAggregator.ListenersCond.Broadcast() // nudges the subscribers that are waiting
|
|
}()
|
|
|
|
lastReadTime := log_buffer.NewMessagePosition(req.SinceNs, -2)
|
|
glog.V(0).Infof(" %v starts to subscribe %s from %+v", clientName, req.PathPrefix, lastReadTime)
|
|
|
|
eachEventNotificationFn := fs.eachEventNotificationFn(req, stream, clientName)
|
|
|
|
eachLogEntryFn := eachLogEntryFn(eachEventNotificationFn)
|
|
|
|
var processedTsNs int64
|
|
var readPersistedLogErr error
|
|
var readInMemoryLogErr error
|
|
var isDone bool
|
|
|
|
for {
|
|
|
|
glog.V(4).Infof("read on disk %v aggregated subscribe %s from %+v", clientName, req.PathPrefix, lastReadTime)
|
|
|
|
processedTsNs, isDone, readPersistedLogErr = fs.filer.ReadPersistedLogBuffer(lastReadTime, req.UntilNs, eachLogEntryFn)
|
|
if readPersistedLogErr != nil {
|
|
return fmt.Errorf("reading from persisted logs: %w", readPersistedLogErr)
|
|
}
|
|
if isDone {
|
|
return nil
|
|
}
|
|
|
|
glog.V(4).Infof("processed to %v: %v", clientName, processedTsNs)
|
|
if processedTsNs != 0 {
|
|
lastReadTime = log_buffer.NewMessagePosition(processedTsNs, -2)
|
|
} else {
|
|
// No data found on disk
|
|
// Check if we previously got ResumeFromDiskError from memory, meaning we're in a gap
|
|
if errors.Is(readInMemoryLogErr, log_buffer.ResumeFromDiskError) {
|
|
// We have a gap: requested time < earliest memory time, but no data on disk
|
|
// Skip forward to earliest memory time to avoid infinite loop
|
|
earliestTime := fs.filer.MetaAggregator.MetaLogBuffer.GetEarliestTime()
|
|
if !earliestTime.IsZero() && earliestTime.After(lastReadTime.Time) {
|
|
glog.V(3).Infof("gap detected: skipping from %v to earliest memory time %v for %v",
|
|
lastReadTime.Time, earliestTime, clientName)
|
|
// Position at earliest time; time-based reader will include it
|
|
lastReadTime = log_buffer.NewMessagePosition(earliestTime.UnixNano(), -2)
|
|
readInMemoryLogErr = nil // Clear the error since we're skipping forward
|
|
}
|
|
} else {
|
|
// First pass or no ResumeFromDiskError yet - check the next day for logs
|
|
nextDayTs := util.GetNextDayTsNano(lastReadTime.Time.UnixNano())
|
|
position := log_buffer.NewMessagePosition(nextDayTs, -2)
|
|
found, err := fs.filer.HasPersistedLogFiles(position)
|
|
if err != nil {
|
|
return fmt.Errorf("checking persisted log files: %w", err)
|
|
}
|
|
if found {
|
|
lastReadTime = position
|
|
}
|
|
}
|
|
}
|
|
|
|
glog.V(4).Infof("read in memory %v aggregated subscribe %s from %+v", clientName, req.PathPrefix, lastReadTime)
|
|
|
|
lastReadTime, isDone, readInMemoryLogErr = fs.filer.MetaAggregator.MetaLogBuffer.LoopProcessLogData("aggMeta:"+clientName, lastReadTime, req.UntilNs, func() bool {
|
|
// Check if the client has disconnected by monitoring the context
|
|
select {
|
|
case <-ctx.Done():
|
|
return false
|
|
default:
|
|
}
|
|
|
|
fs.filer.MetaAggregator.ListenersLock.Lock()
|
|
atomic.AddInt64(&fs.filer.MetaAggregator.ListenersWaits, 1)
|
|
fs.filer.MetaAggregator.ListenersCond.Wait()
|
|
atomic.AddInt64(&fs.filer.MetaAggregator.ListenersWaits, -1)
|
|
fs.filer.MetaAggregator.ListenersLock.Unlock()
|
|
return fs.hasClient(req.ClientId, req.ClientEpoch)
|
|
}, eachLogEntryFn)
|
|
if readInMemoryLogErr != nil {
|
|
if errors.Is(readInMemoryLogErr, log_buffer.ResumeFromDiskError) {
|
|
// Memory says data is too old - will read from disk on next iteration
|
|
// But if disk also has no data (gap in history), we'll skip forward
|
|
continue
|
|
}
|
|
glog.Errorf("processed to %v: %v", lastReadTime, readInMemoryLogErr)
|
|
if !errors.Is(readInMemoryLogErr, log_buffer.ResumeError) {
|
|
break
|
|
}
|
|
}
|
|
if isDone {
|
|
return nil
|
|
}
|
|
if !fs.hasClient(req.ClientId, req.ClientEpoch) {
|
|
glog.V(0).Infof("client %v is closed", clientName)
|
|
return nil
|
|
}
|
|
|
|
time.Sleep(1127 * time.Millisecond)
|
|
}
|
|
|
|
return readInMemoryLogErr
|
|
|
|
}
|
|
|
|
func (fs *FilerServer) SubscribeLocalMetadata(req *filer_pb.SubscribeMetadataRequest, stream filer_pb.SeaweedFiler_SubscribeLocalMetadataServer) error {
|
|
|
|
ctx := stream.Context()
|
|
peerAddress := findClientAddress(ctx, 0)
|
|
|
|
// use negative client id to differentiate from addClient()/deleteClient() used in SubscribeMetadata()
|
|
req.ClientId = -req.ClientId
|
|
|
|
isReplacing, alreadyKnown, clientName := fs.addClient("local", req.ClientName, peerAddress, req.ClientId, req.ClientEpoch)
|
|
if isReplacing {
|
|
fs.listenersCond.Broadcast() // nudges the subscribers that are waiting
|
|
} else if alreadyKnown {
|
|
return fmt.Errorf("duplicated local subscription detected for client %s clientId:%d", clientName, req.ClientId)
|
|
}
|
|
defer func() {
|
|
glog.V(0).Infof("disconnect %v local subscriber %s clientId:%d", clientName, req.PathPrefix, req.ClientId)
|
|
fs.deleteClient("local", clientName, req.ClientId, req.ClientEpoch)
|
|
fs.listenersCond.Broadcast() // nudges the subscribers that are waiting
|
|
}()
|
|
|
|
lastReadTime := log_buffer.NewMessagePosition(req.SinceNs, -2)
|
|
glog.V(0).Infof(" + %v local subscribe %s from %+v clientId:%d", clientName, req.PathPrefix, lastReadTime, req.ClientId)
|
|
|
|
eachEventNotificationFn := fs.eachEventNotificationFn(req, stream, clientName)
|
|
|
|
eachLogEntryFn := eachLogEntryFn(eachEventNotificationFn)
|
|
|
|
var processedTsNs int64
|
|
var readPersistedLogErr error
|
|
var readInMemoryLogErr error
|
|
var isDone bool
|
|
var lastCheckedFlushTsNs int64 = -1 // Track the last flushed time we checked
|
|
var lastDiskReadTsNs int64 = -1 // Track the last read position we used for disk read
|
|
|
|
for {
|
|
// Check if new data has been flushed to disk since last check, or if read position advanced
|
|
currentFlushTsNs := fs.filer.LocalMetaLogBuffer.GetLastFlushTsNs()
|
|
currentReadTsNs := lastReadTime.Time.UnixNano()
|
|
// Read from disk if: first time, new flush observed, or read position advanced (draining backlog)
|
|
shouldReadFromDisk := lastCheckedFlushTsNs == -1 ||
|
|
currentFlushTsNs > lastCheckedFlushTsNs ||
|
|
currentReadTsNs > lastDiskReadTsNs
|
|
|
|
if shouldReadFromDisk {
|
|
// Record the position we are about to read from
|
|
lastDiskReadTsNs = currentReadTsNs
|
|
glog.V(4).Infof("read on disk %v local subscribe %s from %+v (lastFlushed: %v)", clientName, req.PathPrefix, lastReadTime, time.Unix(0, currentFlushTsNs))
|
|
processedTsNs, isDone, readPersistedLogErr = fs.filer.ReadPersistedLogBuffer(lastReadTime, req.UntilNs, eachLogEntryFn)
|
|
if readPersistedLogErr != nil {
|
|
glog.V(0).Infof("read on disk %v local subscribe %s from %+v: %v", clientName, req.PathPrefix, lastReadTime, readPersistedLogErr)
|
|
return fmt.Errorf("reading from persisted logs: %w", readPersistedLogErr)
|
|
}
|
|
if isDone {
|
|
return nil
|
|
}
|
|
|
|
// Update the last checked flushed time
|
|
lastCheckedFlushTsNs = currentFlushTsNs
|
|
|
|
if processedTsNs != 0 {
|
|
lastReadTime = log_buffer.NewMessagePosition(processedTsNs, -2)
|
|
} else {
|
|
// No data found on disk
|
|
// Check if we previously got ResumeFromDiskError from memory, meaning we're in a gap
|
|
if readInMemoryLogErr == log_buffer.ResumeFromDiskError {
|
|
// We have a gap: requested time < earliest memory time, but no data on disk
|
|
// Skip forward to earliest memory time to avoid infinite loop
|
|
earliestTime := fs.filer.LocalMetaLogBuffer.GetEarliestTime()
|
|
if !earliestTime.IsZero() && earliestTime.After(lastReadTime.Time) {
|
|
glog.V(3).Infof("gap detected: skipping from %v to earliest memory time %v for %v",
|
|
lastReadTime.Time, earliestTime, clientName)
|
|
// Position at earliest time; time-based reader will include it
|
|
lastReadTime = log_buffer.NewMessagePosition(earliestTime.UnixNano(), -2)
|
|
readInMemoryLogErr = nil // Clear the error since we're skipping forward
|
|
} else {
|
|
// No memory data yet, just wait
|
|
time.Sleep(1127 * time.Millisecond)
|
|
continue
|
|
}
|
|
} else {
|
|
// First pass or no ResumeFromDiskError yet
|
|
// Check the next day for logs
|
|
nextDayTs := util.GetNextDayTsNano(lastReadTime.Time.UnixNano())
|
|
position := log_buffer.NewMessagePosition(nextDayTs, -2)
|
|
found, err := fs.filer.HasPersistedLogFiles(position)
|
|
if err != nil {
|
|
return fmt.Errorf("checking persisted log files: %w", err)
|
|
}
|
|
if found {
|
|
lastReadTime = position
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
glog.V(3).Infof("read in memory %v local subscribe %s from %+v", clientName, req.PathPrefix, lastReadTime)
|
|
|
|
lastReadTime, isDone, readInMemoryLogErr = fs.filer.LocalMetaLogBuffer.LoopProcessLogData("localMeta:"+clientName, lastReadTime, req.UntilNs, func() bool {
|
|
|
|
// Check if the client has disconnected by monitoring the context
|
|
select {
|
|
case <-ctx.Done():
|
|
return false
|
|
default:
|
|
}
|
|
|
|
fs.listenersLock.Lock()
|
|
atomic.AddInt64(&fs.listenersWaits, 1)
|
|
fs.listenersCond.Wait()
|
|
atomic.AddInt64(&fs.listenersWaits, -1)
|
|
fs.listenersLock.Unlock()
|
|
if !fs.hasClient(req.ClientId, req.ClientEpoch) {
|
|
return false
|
|
}
|
|
return true
|
|
}, eachLogEntryFn)
|
|
if readInMemoryLogErr != nil {
|
|
if readInMemoryLogErr == log_buffer.ResumeFromDiskError {
|
|
// Memory buffer says the requested time is too old
|
|
// Retry disk read if: (a) flush advanced, or (b) read position advanced (draining backlog)
|
|
currentFlushTsNs := fs.filer.LocalMetaLogBuffer.GetLastFlushTsNs()
|
|
currentReadTsNs := lastReadTime.Time.UnixNano()
|
|
if currentFlushTsNs > lastCheckedFlushTsNs || currentReadTsNs > lastDiskReadTsNs {
|
|
glog.V(0).Infof("retry disk read %v local subscribe %s (lastFlushed: %v -> %v, readTs: %v -> %v)",
|
|
clientName, req.PathPrefix,
|
|
time.Unix(0, lastCheckedFlushTsNs), time.Unix(0, currentFlushTsNs),
|
|
time.Unix(0, lastDiskReadTsNs), time.Unix(0, currentReadTsNs))
|
|
continue
|
|
}
|
|
// No progress possible, wait for new data to arrive (event-driven, not polling)
|
|
fs.listenersLock.Lock()
|
|
atomic.AddInt64(&fs.listenersWaits, 1)
|
|
fs.listenersCond.Wait()
|
|
atomic.AddInt64(&fs.listenersWaits, -1)
|
|
fs.listenersLock.Unlock()
|
|
continue
|
|
}
|
|
glog.Errorf("processed to %v: %v", lastReadTime, readInMemoryLogErr)
|
|
if readInMemoryLogErr != log_buffer.ResumeError {
|
|
break
|
|
}
|
|
}
|
|
if isDone {
|
|
return nil
|
|
}
|
|
if !fs.hasClient(req.ClientId, req.ClientEpoch) {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return readInMemoryLogErr
|
|
|
|
}
|
|
|
|
func eachLogEntryFn(eachEventNotificationFn func(dirPath string, eventNotification *filer_pb.EventNotification, tsNs int64) error) log_buffer.EachLogEntryFuncType {
|
|
return func(logEntry *filer_pb.LogEntry) (bool, error) {
|
|
event := &filer_pb.SubscribeMetadataResponse{}
|
|
if err := proto.Unmarshal(logEntry.Data, event); err != nil {
|
|
glog.Errorf("unexpected unmarshal filer_pb.SubscribeMetadataResponse: %v", err)
|
|
return false, fmt.Errorf("unexpected unmarshal filer_pb.SubscribeMetadataResponse: %w", err)
|
|
}
|
|
|
|
if err := eachEventNotificationFn(event.Directory, event.EventNotification, event.TsNs); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
func (fs *FilerServer) eachEventNotificationFn(req *filer_pb.SubscribeMetadataRequest, stream filer_pb.SeaweedFiler_SubscribeMetadataServer, clientName string) func(dirPath string, eventNotification *filer_pb.EventNotification, tsNs int64) error {
|
|
filtered := 0
|
|
|
|
return func(dirPath string, eventNotification *filer_pb.EventNotification, tsNs int64) error {
|
|
defer func() {
|
|
if filtered > MaxUnsyncedEvents {
|
|
if err := stream.Send(&filer_pb.SubscribeMetadataResponse{
|
|
EventNotification: &filer_pb.EventNotification{},
|
|
TsNs: tsNs,
|
|
}); err == nil {
|
|
filtered = 0
|
|
}
|
|
}
|
|
}()
|
|
|
|
filtered++
|
|
foundSelf := false
|
|
for _, sig := range eventNotification.Signatures {
|
|
if sig == req.Signature && req.Signature != 0 {
|
|
return nil
|
|
}
|
|
if sig == fs.filer.Signature {
|
|
foundSelf = true
|
|
}
|
|
}
|
|
if !foundSelf {
|
|
eventNotification.Signatures = append(eventNotification.Signatures, fs.filer.Signature)
|
|
}
|
|
|
|
// get complete path to the file or directory
|
|
var entryName string
|
|
if eventNotification.OldEntry != nil {
|
|
entryName = eventNotification.OldEntry.Name
|
|
} else if eventNotification.NewEntry != nil {
|
|
entryName = eventNotification.NewEntry.Name
|
|
}
|
|
|
|
fullpath := util.Join(dirPath, entryName)
|
|
|
|
// skip on filer internal meta logs
|
|
if strings.HasPrefix(fullpath, filer.SystemLogDir) {
|
|
return nil
|
|
}
|
|
|
|
if hasPrefixIn(fullpath, req.PathPrefixes) {
|
|
// good
|
|
} else if matchByDirectory(dirPath, req.Directories) {
|
|
// good
|
|
} else {
|
|
if !strings.HasPrefix(fullpath, req.PathPrefix) {
|
|
if eventNotification.NewParentPath != "" {
|
|
newFullPath := util.Join(eventNotification.NewParentPath, entryName)
|
|
if !strings.HasPrefix(newFullPath, req.PathPrefix) {
|
|
return nil
|
|
}
|
|
} else {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// collect timestamps for path
|
|
stats.FilerServerLastSendTsOfSubscribeGauge.WithLabelValues(fs.option.Host.String(), req.ClientName, req.PathPrefix).Set(float64(tsNs))
|
|
|
|
message := &filer_pb.SubscribeMetadataResponse{
|
|
Directory: dirPath,
|
|
EventNotification: eventNotification,
|
|
TsNs: tsNs,
|
|
}
|
|
// println("sending", dirPath, entryName)
|
|
if err := stream.Send(message); err != nil {
|
|
glog.V(0).Infof("=> client %v: %+v", clientName, err)
|
|
return err
|
|
}
|
|
filtered = 0
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func hasPrefixIn(text string, prefixes []string) bool {
|
|
for _, p := range prefixes {
|
|
if strings.HasPrefix(text, p) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func matchByDirectory(dirPath string, directories []string) bool {
|
|
for _, dir := range directories {
|
|
if dirPath == dir {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (fs *FilerServer) addClient(prefix string, clientType string, clientAddress string, clientId int32, clientEpoch int32) (isReplacing, alreadyKnown bool, clientName string) {
|
|
clientName = clientType + "@" + clientAddress
|
|
glog.V(0).Infof("+ %v listener %v clientId %v clientEpoch %v", prefix, clientName, clientId, clientEpoch)
|
|
if clientId != 0 {
|
|
fs.knownListenersLock.Lock()
|
|
defer fs.knownListenersLock.Unlock()
|
|
epoch, found := fs.knownListeners[clientId]
|
|
if !found || epoch < clientEpoch {
|
|
fs.knownListeners[clientId] = clientEpoch
|
|
isReplacing = true
|
|
} else {
|
|
alreadyKnown = true
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func (fs *FilerServer) deleteClient(prefix string, clientName string, clientId int32, clientEpoch int32) {
|
|
glog.V(0).Infof("- %v listener %v clientId %v clientEpoch %v", prefix, clientName, clientId, clientEpoch)
|
|
if clientId != 0 {
|
|
fs.knownListenersLock.Lock()
|
|
defer fs.knownListenersLock.Unlock()
|
|
epoch, found := fs.knownListeners[clientId]
|
|
if found && epoch <= clientEpoch {
|
|
delete(fs.knownListeners, clientId)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (fs *FilerServer) hasClient(clientId int32, clientEpoch int32) bool {
|
|
if clientId != 0 {
|
|
fs.knownListenersLock.Lock()
|
|
defer fs.knownListenersLock.Unlock()
|
|
epoch, found := fs.knownListeners[clientId]
|
|
if found && epoch <= clientEpoch {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|