2 changed files with 312 additions and 132 deletions
@ -0,0 +1,214 @@ |
|||||
|
package operation |
||||
|
|
||||
|
import ( |
||||
|
"bytes" |
||||
|
"context" |
||||
|
"crypto/md5" |
||||
|
"fmt" |
||||
|
"hash" |
||||
|
"io" |
||||
|
"sync" |
||||
|
|
||||
|
"github.com/seaweedfs/seaweedfs/weed/glog" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/security" |
||||
|
) |
||||
|
|
||||
|
// ChunkedUploadResult contains the result of a chunked upload
|
||||
|
type ChunkedUploadResult struct { |
||||
|
FileChunks []*filer_pb.FileChunk |
||||
|
Md5Hash hash.Hash |
||||
|
TotalSize int64 |
||||
|
SmallContent []byte // For files smaller than threshold
|
||||
|
} |
||||
|
|
||||
|
// ChunkedUploadOption contains options for chunked uploads
|
||||
|
type ChunkedUploadOption struct { |
||||
|
ChunkSize int32 |
||||
|
SmallFileLimit int64 |
||||
|
Collection string |
||||
|
Replication string |
||||
|
DataCenter string |
||||
|
SaveSmallInline bool |
||||
|
Jwt security.EncodedJwt |
||||
|
MimeType string |
||||
|
AssignFunc func(ctx context.Context, count int) (*VolumeAssignRequest, *AssignResult, error) |
||||
|
} |
||||
|
|
||||
|
var chunkBufferPool = sync.Pool{ |
||||
|
New: func() interface{} { |
||||
|
return new(bytes.Buffer) |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
// UploadReaderInChunks reads from reader and uploads in chunks to volume servers
|
||||
|
// This prevents OOM by processing the stream in fixed-size chunks
|
||||
|
// Returns file chunks, MD5 hash, total size, and any small content stored inline
|
||||
|
func UploadReaderInChunks(ctx context.Context, reader io.Reader, opt *ChunkedUploadOption) (*ChunkedUploadResult, error) { |
||||
|
|
||||
|
md5Hash := md5.New() |
||||
|
var partReader = io.TeeReader(reader, md5Hash) |
||||
|
|
||||
|
var fileChunks []*filer_pb.FileChunk |
||||
|
var fileChunksLock sync.Mutex |
||||
|
var uploadErr error |
||||
|
var uploadErrLock sync.Mutex |
||||
|
var chunkOffset int64 = 0 |
||||
|
|
||||
|
var wg sync.WaitGroup |
||||
|
var bytesBufferCounter int64 = 4 |
||||
|
bytesBufferLimitChan := make(chan struct{}, bytesBufferCounter) |
||||
|
|
||||
|
for { |
||||
|
// Throttle buffer usage
|
||||
|
bytesBufferLimitChan <- struct{}{} |
||||
|
|
||||
|
// Check for errors from parallel uploads
|
||||
|
uploadErrLock.Lock() |
||||
|
if uploadErr != nil { |
||||
|
<-bytesBufferLimitChan |
||||
|
uploadErrLock.Unlock() |
||||
|
break |
||||
|
} |
||||
|
uploadErrLock.Unlock() |
||||
|
|
||||
|
// Get buffer from pool
|
||||
|
bytesBuffer := chunkBufferPool.Get().(*bytes.Buffer) |
||||
|
limitedReader := io.LimitReader(partReader, int64(opt.ChunkSize)) |
||||
|
bytesBuffer.Reset() |
||||
|
|
||||
|
// Read one chunk
|
||||
|
dataSize, err := bytesBuffer.ReadFrom(limitedReader) |
||||
|
if err != nil || dataSize == 0 { |
||||
|
chunkBufferPool.Put(bytesBuffer) |
||||
|
<-bytesBufferLimitChan |
||||
|
if err != nil { |
||||
|
uploadErrLock.Lock() |
||||
|
if uploadErr == nil { |
||||
|
uploadErr = err |
||||
|
} |
||||
|
uploadErrLock.Unlock() |
||||
|
} |
||||
|
break |
||||
|
} |
||||
|
|
||||
|
// For small files at offset 0, store inline instead of uploading
|
||||
|
if chunkOffset == 0 && opt.SaveSmallInline && dataSize < opt.SmallFileLimit { |
||||
|
smallContent := make([]byte, dataSize) |
||||
|
bytesBuffer.Read(smallContent) |
||||
|
chunkBufferPool.Put(bytesBuffer) |
||||
|
<-bytesBufferLimitChan |
||||
|
|
||||
|
return &ChunkedUploadResult{ |
||||
|
FileChunks: nil, |
||||
|
Md5Hash: md5Hash, |
||||
|
TotalSize: dataSize, |
||||
|
SmallContent: smallContent, |
||||
|
}, nil |
||||
|
} |
||||
|
|
||||
|
// Upload chunk in parallel goroutine
|
||||
|
wg.Add(1) |
||||
|
go func(offset int64, buf *bytes.Buffer) { |
||||
|
defer func() { |
||||
|
chunkBufferPool.Put(buf) |
||||
|
<-bytesBufferLimitChan |
||||
|
wg.Done() |
||||
|
}() |
||||
|
|
||||
|
// Assign volume for this chunk
|
||||
|
_, assignResult, assignErr := opt.AssignFunc(ctx, 1) |
||||
|
if assignErr != nil { |
||||
|
uploadErrLock.Lock() |
||||
|
if uploadErr == nil { |
||||
|
uploadErr = fmt.Errorf("assign volume: %w", assignErr) |
||||
|
} |
||||
|
uploadErrLock.Unlock() |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// Upload chunk data
|
||||
|
uploadUrl := fmt.Sprintf("http://%s/%s", assignResult.Url, assignResult.Fid) |
||||
|
uploadOption := &UploadOption{ |
||||
|
UploadUrl: uploadUrl, |
||||
|
Cipher: false, |
||||
|
IsInputCompressed: false, |
||||
|
MimeType: opt.MimeType, |
||||
|
PairMap: nil, |
||||
|
Jwt: opt.Jwt, |
||||
|
} |
||||
|
|
||||
|
uploader, uploaderErr := NewUploader() |
||||
|
if uploaderErr != nil { |
||||
|
uploadErrLock.Lock() |
||||
|
if uploadErr == nil { |
||||
|
uploadErr = fmt.Errorf("create uploader: %w", uploaderErr) |
||||
|
} |
||||
|
uploadErrLock.Unlock() |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
uploadResult, uploadResultErr := uploader.UploadData(ctx, buf.Bytes(), uploadOption) |
||||
|
if uploadResultErr != nil { |
||||
|
uploadErrLock.Lock() |
||||
|
if uploadErr == nil { |
||||
|
uploadErr = fmt.Errorf("upload chunk: %w", uploadResultErr) |
||||
|
} |
||||
|
uploadErrLock.Unlock() |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// Create chunk entry
|
||||
|
fid, _ := filer_pb.ToFileIdObject(assignResult.Fid) |
||||
|
chunk := &filer_pb.FileChunk{ |
||||
|
FileId: assignResult.Fid, |
||||
|
Offset: offset, |
||||
|
Size: uint64(uploadResult.Size), |
||||
|
ETag: uploadResult.ContentMd5, |
||||
|
Fid: fid, |
||||
|
CipherKey: uploadResult.CipherKey, |
||||
|
} |
||||
|
|
||||
|
fileChunksLock.Lock() |
||||
|
fileChunks = append(fileChunks, chunk) |
||||
|
glog.V(4).Infof("uploaded chunk %d to %s [%d,%d)", len(fileChunks), chunk.FileId, offset, offset+int64(chunk.Size)) |
||||
|
fileChunksLock.Unlock() |
||||
|
|
||||
|
}(chunkOffset, bytesBuffer) |
||||
|
|
||||
|
// Update offset for next chunk
|
||||
|
chunkOffset += dataSize |
||||
|
|
||||
|
// If this was a partial chunk, we're done
|
||||
|
if dataSize < int64(opt.ChunkSize) { |
||||
|
break |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Wait for all uploads to complete
|
||||
|
wg.Wait() |
||||
|
|
||||
|
// Check for errors
|
||||
|
if uploadErr != nil { |
||||
|
glog.Errorf("chunked upload failed: %v", uploadErr) |
||||
|
return nil, uploadErr |
||||
|
} |
||||
|
|
||||
|
// Sort chunks by offset
|
||||
|
// Note: We could use slices.SortFunc here, but keeping it simple for Go 1.20 compatibility
|
||||
|
for i := 0; i < len(fileChunks); i++ { |
||||
|
for j := i + 1; j < len(fileChunks); j++ { |
||||
|
if fileChunks[i].Offset > fileChunks[j].Offset { |
||||
|
fileChunks[i], fileChunks[j] = fileChunks[j], fileChunks[i] |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return &ChunkedUploadResult{ |
||||
|
FileChunks: fileChunks, |
||||
|
Md5Hash: md5Hash, |
||||
|
TotalSize: chunkOffset, |
||||
|
SmallContent: nil, |
||||
|
}, nil |
||||
|
} |
||||
|
|
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue