package filer import ( "context" "io" "sync" "golang.org/x/sync/errgroup" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/util/chunk_cache" "github.com/seaweedfs/seaweedfs/weed/wdclient" ) type ChunkGroup struct { lookupFn wdclient.LookupFileIdFunctionType sections map[SectionIndex]*FileChunkSection sectionsLock sync.RWMutex readerCache *ReaderCache concurrentReaders int } // NewChunkGroup creates a ChunkGroup with default concurrency settings. // For better read performance, use NewChunkGroupWithConcurrency instead. func NewChunkGroup(lookupFn wdclient.LookupFileIdFunctionType, chunkCache chunk_cache.ChunkCache, chunks []*filer_pb.FileChunk) (*ChunkGroup, error) { return NewChunkGroupWithConcurrency(lookupFn, chunkCache, chunks, 16) } // NewChunkGroupWithConcurrency creates a ChunkGroup with configurable concurrency. // concurrentReaders controls: // - Maximum parallel chunk fetches during read operations // - Read-ahead prefetch parallelism // - Number of concurrent section reads for large files func NewChunkGroupWithConcurrency(lookupFn wdclient.LookupFileIdFunctionType, chunkCache chunk_cache.ChunkCache, chunks []*filer_pb.FileChunk, concurrentReaders int) (*ChunkGroup, error) { if concurrentReaders <= 0 { concurrentReaders = 16 } // ReaderCache limit should be at least concurrentReaders to allow parallel prefetching readerCacheLimit := concurrentReaders * 2 if readerCacheLimit < 32 { readerCacheLimit = 32 } group := &ChunkGroup{ lookupFn: lookupFn, sections: make(map[SectionIndex]*FileChunkSection), readerCache: NewReaderCache(readerCacheLimit, chunkCache, lookupFn), concurrentReaders: concurrentReaders, } err := group.SetChunks(chunks) return group, err } // GetPrefetchCount returns the number of chunks to prefetch ahead during sequential reads. // This is derived from concurrentReaders to keep the network pipeline full. func (group *ChunkGroup) GetPrefetchCount() int { // Prefetch at least 1, and scale with concurrency (roughly 1/4 of concurrent readers) prefetch := group.concurrentReaders / 4 if prefetch < 1 { prefetch = 1 } if prefetch > 8 { prefetch = 8 // Cap at 8 to avoid excessive memory usage } return prefetch } func (group *ChunkGroup) AddChunk(chunk *filer_pb.FileChunk) error { group.sectionsLock.Lock() defer group.sectionsLock.Unlock() sectionIndexStart, sectionIndexStop := SectionIndex(chunk.Offset/SectionSize), SectionIndex((chunk.Offset+int64(chunk.Size))/SectionSize) for si := sectionIndexStart; si < sectionIndexStop+1; si++ { section, found := group.sections[si] if !found { section = NewFileChunkSection(si) group.sections[si] = section } section.addChunk(chunk) } return nil } func (group *ChunkGroup) ReadDataAt(ctx context.Context, fileSize int64, buff []byte, offset int64) (n int, tsNs int64, err error) { if offset >= fileSize { return 0, 0, io.EOF } group.sectionsLock.RLock() defer group.sectionsLock.RUnlock() sectionIndexStart, sectionIndexStop := SectionIndex(offset/SectionSize), SectionIndex((offset+int64(len(buff)))/SectionSize) numSections := int(sectionIndexStop - sectionIndexStart + 1) // For single section or when concurrency is disabled, use sequential reading if numSections <= 1 || group.concurrentReaders <= 1 { return group.readDataAtSequential(ctx, fileSize, buff, offset, sectionIndexStart, sectionIndexStop) } // For multiple sections, use parallel reading return group.readDataAtParallel(ctx, fileSize, buff, offset, sectionIndexStart, sectionIndexStop) } // readDataAtSequential reads sections sequentially (original behavior) func (group *ChunkGroup) readDataAtSequential(ctx context.Context, fileSize int64, buff []byte, offset int64, sectionIndexStart, sectionIndexStop SectionIndex) (n int, tsNs int64, err error) { for si := sectionIndexStart; si < sectionIndexStop+1; si++ { section, found := group.sections[si] rangeStart, rangeStop := max(offset, int64(si*SectionSize)), min(offset+int64(len(buff)), int64((si+1)*SectionSize)) if rangeStart >= rangeStop { continue } if !found { rangeStop = min(rangeStop, fileSize) for i := rangeStart; i < rangeStop; i++ { buff[i-offset] = 0 } n = int(int64(n) + rangeStop - rangeStart) continue } xn, xTsNs, xErr := section.readDataAt(ctx, group, fileSize, buff[rangeStart-offset:rangeStop-offset], rangeStart) if xErr != nil { return n + xn, max(tsNs, xTsNs), xErr } n += xn tsNs = max(tsNs, xTsNs) } return } // sectionReadResult holds the result of a section read operation type sectionReadResult struct { sectionIndex SectionIndex n int tsNs int64 err error } // readDataAtParallel reads multiple sections in parallel for better throughput func (group *ChunkGroup) readDataAtParallel(ctx context.Context, fileSize int64, buff []byte, offset int64, sectionIndexStart, sectionIndexStop SectionIndex) (n int, tsNs int64, err error) { numSections := int(sectionIndexStop - sectionIndexStart + 1) // Limit concurrency to the smaller of concurrentReaders and numSections maxConcurrent := group.concurrentReaders if numSections < maxConcurrent { maxConcurrent = numSections } g, gCtx := errgroup.WithContext(ctx) g.SetLimit(maxConcurrent) results := make([]sectionReadResult, numSections) for i := 0; i < numSections; i++ { si := sectionIndexStart + SectionIndex(i) idx := i section, found := group.sections[si] rangeStart, rangeStop := max(offset, int64(si*SectionSize)), min(offset+int64(len(buff)), int64((si+1)*SectionSize)) if rangeStart >= rangeStop { continue } if !found { // Zero-fill missing sections synchronously rangeStop = min(rangeStop, fileSize) for j := rangeStart; j < rangeStop; j++ { buff[j-offset] = 0 } results[idx] = sectionReadResult{ sectionIndex: si, n: int(rangeStop - rangeStart), tsNs: 0, err: nil, } continue } // Capture variables for closure sectionCopy := section buffSlice := buff[rangeStart-offset : rangeStop-offset] rangeStartCopy := rangeStart g.Go(func() error { xn, xTsNs, xErr := sectionCopy.readDataAt(gCtx, group, fileSize, buffSlice, rangeStartCopy) results[idx] = sectionReadResult{ sectionIndex: si, n: xn, tsNs: xTsNs, err: xErr, } if xErr != nil && xErr != io.EOF { return xErr } return nil }) } // Wait for all goroutines to complete groupErr := g.Wait() // Aggregate results for _, result := range results { n += result.n tsNs = max(tsNs, result.tsNs) // Collect first non-EOF error from results as fallback if result.err != nil && result.err != io.EOF && err == nil { err = result.err } } // Prioritize errgroup error (first error that cancelled context) if groupErr != nil { err = groupErr } return } func (group *ChunkGroup) SetChunks(chunks []*filer_pb.FileChunk) error { group.sectionsLock.RLock() defer group.sectionsLock.RUnlock() var dataChunks []*filer_pb.FileChunk for _, chunk := range chunks { if !chunk.IsChunkManifest { dataChunks = append(dataChunks, chunk) continue } resolvedChunks, err := ResolveOneChunkManifest(context.Background(), group.lookupFn, chunk) if err != nil { return err } dataChunks = append(dataChunks, resolvedChunks...) } sections := make(map[SectionIndex]*FileChunkSection) for _, chunk := range dataChunks { sectionIndexStart, sectionIndexStop := SectionIndex(chunk.Offset/SectionSize), SectionIndex((chunk.Offset+int64(chunk.Size))/SectionSize) for si := sectionIndexStart; si < sectionIndexStop+1; si++ { section, found := sections[si] if !found { section = NewFileChunkSection(si) sections[si] = section } section.chunks = append(section.chunks, chunk) } } group.sections = sections return nil } const ( // see weedfs_file_lseek.go SEEK_DATA uint32 = 3 // seek to next data after the offset // SEEK_HOLE uint32 = 4 // seek to next hole after the offset ) // FIXME: needa tests func (group *ChunkGroup) SearchChunks(ctx context.Context, offset, fileSize int64, whence uint32) (found bool, out int64) { group.sectionsLock.RLock() defer group.sectionsLock.RUnlock() return group.doSearchChunks(ctx, offset, fileSize, whence) } func (group *ChunkGroup) doSearchChunks(ctx context.Context, offset, fileSize int64, whence uint32) (found bool, out int64) { sectionIndex, maxSectionIndex := SectionIndex(offset/SectionSize), SectionIndex(fileSize/SectionSize) if whence == SEEK_DATA { for si := sectionIndex; si < maxSectionIndex+1; si++ { section, foundSection := group.sections[si] if !foundSection { continue } sectionStart := section.DataStartOffset(ctx, group, offset, fileSize) if sectionStart == -1 { continue } return true, sectionStart } return false, 0 } else { // whence == SEEK_HOLE for si := sectionIndex; si < maxSectionIndex; si++ { section, foundSection := group.sections[si] if !foundSection { return true, offset } holeStart := section.NextStopOffset(ctx, group, offset, fileSize) if holeStart%SectionSize == 0 { continue } return true, holeStart } return true, fileSize } }