From 896a85d6e4858a48018fcb7fc41026fcb3861dea Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 15 Sep 2022 03:11:32 -0700 Subject: [PATCH] volume: add "hasSlowRead" option to customize read optimization --- weed/command/server.go | 1 + weed/command/volume.go | 3 ++ weed/server/volume_server.go | 3 ++ weed/server/volume_server_handlers_read.go | 1 + weed/storage/store.go | 9 ++++++ weed/storage/volume_read.go | 34 ++++++++++++++++++---- 6 files changed, 46 insertions(+), 5 deletions(-) diff --git a/weed/command/server.go b/weed/command/server.go index 8dfa63e34..6db57744c 100644 --- a/weed/command/server.go +++ b/weed/command/server.go @@ -131,6 +131,7 @@ func init() { serverOptions.v.pprof = cmdServer.Flag.Bool("volume.pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile") serverOptions.v.idxFolder = cmdServer.Flag.String("volume.dir.idx", "", "directory to store .idx files") serverOptions.v.inflightUploadDataTimeout = cmdServer.Flag.Duration("volume.inflightUploadDataTimeout", 60*time.Second, "inflight upload data wait timeout of volume servers") + serverOptions.v.hasSlowRead = cmdServer.Flag.Bool("volume.hasSlowRead", false, "if true, this prevents slow reads from blocking other requests, but large file read P99 latency will increase.") s3Options.port = cmdServer.Flag.Int("s3.port", 8333, "s3 server http listen port") s3Options.portGrpc = cmdServer.Flag.Int("s3.port.grpc", 0, "s3 server grpc listen port") diff --git a/weed/command/volume.go b/weed/command/volume.go index aa300108a..2e62921ca 100644 --- a/weed/command/volume.go +++ b/weed/command/volume.go @@ -66,6 +66,7 @@ type VolumeServerOptions struct { metricsHttpPort *int // pulseSeconds *int inflightUploadDataTimeout *time.Duration + hasSlowRead *bool } func init() { @@ -96,6 +97,7 @@ func init() { v.metricsHttpPort = cmdVolume.Flag.Int("metricsPort", 0, "Prometheus metrics listen port") v.idxFolder = cmdVolume.Flag.String("dir.idx", "", "directory to store .idx files") v.inflightUploadDataTimeout = cmdVolume.Flag.Duration("inflightUploadDataTimeout", 60*time.Second, "inflight upload data wait timeout of volume servers") + v.hasSlowRead = cmdVolume.Flag.Bool("hasSlowRead", false, "if true, this prevents slow reads from blocking other requests, but large file read P99 latency will increase.") } var cmdVolume = &Command{ @@ -243,6 +245,7 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v int64(*v.concurrentUploadLimitMB)*1024*1024, int64(*v.concurrentDownloadLimitMB)*1024*1024, *v.inflightUploadDataTimeout, + *v.hasSlowRead, ) // starting grpc server grpcS := v.startGrpcService(volumeServer) diff --git a/weed/server/volume_server.go b/weed/server/volume_server.go index 46bf746f7..8bf50ce45 100644 --- a/weed/server/volume_server.go +++ b/weed/server/volume_server.go @@ -28,6 +28,7 @@ type VolumeServer struct { inFlightUploadDataLimitCond *sync.Cond inFlightDownloadDataLimitCond *sync.Cond inflightUploadDataTimeout time.Duration + hasSlowRead bool SeedMasterNodes []pb.ServerAddress currentMaster pb.ServerAddress @@ -64,6 +65,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, concurrentUploadLimit int64, concurrentDownloadLimit int64, inflightUploadDataTimeout time.Duration, + hasSlowRead bool, ) *VolumeServer { v := util.GetViper() @@ -93,6 +95,7 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, concurrentUploadLimit: concurrentUploadLimit, concurrentDownloadLimit: concurrentDownloadLimit, inflightUploadDataTimeout: inflightUploadDataTimeout, + hasSlowRead: hasSlowRead, } vs.SeedMasterNodes = masterNodes diff --git a/weed/server/volume_server_handlers_read.go b/weed/server/volume_server_handlers_read.go index 90f553116..b8f4120a6 100644 --- a/weed/server/volume_server_handlers_read.go +++ b/weed/server/volume_server_handlers_read.go @@ -117,6 +117,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) readOption := &storage.ReadOption{ ReadDeleted: r.FormValue("readDeleted") == "true", + HasSlowRead: vs.hasSlowRead, } var count int diff --git a/weed/storage/store.go b/weed/storage/store.go index 0727cba95..48736c1a9 100644 --- a/weed/storage/store.go +++ b/weed/storage/store.go @@ -35,6 +35,15 @@ type ReadOption struct { IsMetaOnly bool // read status VolumeRevision uint16 IsOutOfRange bool // whether read over MaxPossibleVolumeSize + + // If HasSlowRead is set to true: + // * read requests and write requests compete for the lock. + // * large file read P99 latency on busy sites will go up, due to the need to get locks multiple times. + // * write requests will see lower latency. + // If HasSlowRead is set to false: + // * read requests should complete asap, not blocking other requests. + // * write requests may see high latency when downloading large files. + HasSlowRead bool } /* diff --git a/weed/storage/volume_read.go b/weed/storage/volume_read.go index e9c7cf4cb..e045137b4 100644 --- a/weed/storage/volume_read.go +++ b/weed/storage/volume_read.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/seaweedfs/seaweedfs/weed/util/mem" "io" + "sync" "time" "github.com/seaweedfs/seaweedfs/weed/glog" @@ -101,9 +102,18 @@ func (v *Volume) readNeedleMetaAt(n *needle.Needle, offset int64, size int32) (e // read fills in Needle content by looking up n.Id from NeedleMapper func (v *Volume) readNeedleDataInto(n *needle.Needle, readOption *ReadOption, writer io.Writer, offset int64, size int64) (err error) { - v.dataFileAccessLock.RLock() + if !readOption.HasSlowRead { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + } + + if readOption.HasSlowRead { + v.dataFileAccessLock.RLock() + } nv, ok := v.nm.Get(n.Id) - v.dataFileAccessLock.RUnlock() + if readOption.HasSlowRead { + v.dataFileAccessLock.RUnlock() + } if !ok || nv.Offset.IsZero() { return ErrorNotFound @@ -133,20 +143,26 @@ func (v *Volume) readNeedleDataInto(n *needle.Needle, readOption *ReadOption, wr crc := needle.CRC(0) for x := offset; x < offset+size; x += int64(len(buf)) { - v.dataFileAccessLock.RLock() + if readOption.HasSlowRead { + v.dataFileAccessLock.RLock() + } // possibly re-read needle offset if volume is compacted if readOption.VolumeRevision != v.SuperBlock.CompactionRevision { // the volume is compacted nv, ok = v.nm.Get(n.Id) if !ok || nv.Offset.IsZero() { - v.dataFileAccessLock.RUnlock() + if readOption.HasSlowRead { + v.dataFileAccessLock.RUnlock() + } return ErrorNotFound } actualOffset = nv.Offset.ToActualOffset() readOption.VolumeRevision = v.SuperBlock.CompactionRevision } count, err := n.ReadNeedleData(v.DataBackend, actualOffset, buf, x) - v.dataFileAccessLock.RUnlock() + if readOption.HasSlowRead { + v.dataFileAccessLock.RUnlock() + } toWrite := min(count, int(offset+size-x)) if toWrite > 0 { @@ -174,6 +190,14 @@ func (v *Volume) readNeedleDataInto(n *needle.Needle, readOption *ReadOption, wr } +func synchronizedRead(rwLock *sync.RWMutex, enabled bool, closure func() error) error { + if enabled { + rwLock.RLock() + defer rwLock.RUnlock() + } + return closure() +} + func min(x, y int) int { if x < y { return x