Browse Source

filer.sync: fix data races in ChunkTransferStatus

Add sync.RWMutex to ChunkTransferStatus and lock around all field
mutations in fetchAndWrite. ActiveTransfers now returns value copies
under RLock so callers get immutable snapshots.
codex/pr-8889
Chris Lu 21 hours ago
parent
commit
597d383ca4
  1. 8
      weed/replication/sink/filersink/fetch_write.go
  2. 21
      weed/replication/sink/filersink/filer_sink.go

8
weed/replication/sink/filersink/fetch_write.go

@ -290,8 +290,10 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
fullData = data
}
transferStatus.mu.Lock()
transferStatus.BytesReceived = int64(len(fullData))
transferStatus.Status = "uploading"
transferStatus.mu.Unlock()
currentFileId, uploadResult, uploadErr, _ := uploader.UploadWithRetry(
fs,
@ -335,15 +337,21 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
glog.V(1).Infof("skip retrying stale source %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr)
return false
}
transferStatus.mu.Lock()
transferStatus.LastErr = retryErr.Error()
transferStatus.mu.Unlock()
if isEofError(retryErr) {
eofBackoff = nextEofBackoff(eofBackoff)
transferStatus.mu.Lock()
transferStatus.BytesReceived = int64(len(partialData))
transferStatus.Status = fmt.Sprintf("waiting %v", eofBackoff)
transferStatus.mu.Unlock()
glog.V(0).Infof("source connection interrupted while replicating %s for %s (%d bytes received so far), backing off %v: %v",
sourceChunk.GetFileIdString(), path, len(partialData), eofBackoff, retryErr)
time.Sleep(eofBackoff)
transferStatus.mu.Lock()
transferStatus.Status = "downloading"
transferStatus.mu.Unlock()
} else {
glog.V(0).Infof("replicate %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr)
}

21
weed/replication/sink/filersink/filer_sink.go

@ -22,7 +22,11 @@ import (
)
// ChunkTransferStatus tracks the progress of a single chunk being replicated.
// Fields are guarded by mu: ChunkFileId and Path are immutable after creation,
// while BytesReceived, Status, and LastErr are updated by fetchAndWrite and
// read by ActiveTransfers.
type ChunkTransferStatus struct {
mu sync.RWMutex
ChunkFileId string
Path string
BytesReceived int64
@ -112,11 +116,20 @@ func (fs *FilerSink) SetChunkConcurrency(concurrency int) {
}
}
// ActiveTransfers returns a snapshot of all in-progress chunk transfers.
func (fs *FilerSink) ActiveTransfers() []*ChunkTransferStatus {
var transfers []*ChunkTransferStatus
// ActiveTransfers returns an immutable snapshot of all in-progress chunk transfers.
func (fs *FilerSink) ActiveTransfers() []ChunkTransferStatus {
var transfers []ChunkTransferStatus
fs.activeTransfers.Range(func(key, value any) bool {
transfers = append(transfers, value.(*ChunkTransferStatus))
t := value.(*ChunkTransferStatus)
t.mu.RLock()
transfers = append(transfers, ChunkTransferStatus{
ChunkFileId: t.ChunkFileId,
Path: t.Path,
BytesReceived: t.BytesReceived,
Status: t.Status,
LastErr: t.LastErr,
})
t.mu.RUnlock()
return true
})
return transfers

Loading…
Cancel
Save