You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

238 lines
7.3 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 months ago
4 years ago
4 months ago
4 years ago
4 years ago
9 months ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
4 years ago
  1. package weed_server
  2. import (
  3. "bytes"
  4. "crypto/md5"
  5. "fmt"
  6. "hash"
  7. "io"
  8. "net/http"
  9. "strconv"
  10. "sync"
  11. "time"
  12. "slices"
  13. "github.com/seaweedfs/seaweedfs/weed/glog"
  14. "github.com/seaweedfs/seaweedfs/weed/operation"
  15. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  16. "github.com/seaweedfs/seaweedfs/weed/security"
  17. "github.com/seaweedfs/seaweedfs/weed/stats"
  18. "github.com/seaweedfs/seaweedfs/weed/util"
  19. )
  20. var bufPool = sync.Pool{
  21. New: func() interface{} {
  22. return new(bytes.Buffer)
  23. },
  24. }
  25. func (fs *FilerServer) uploadRequestToChunks(w http.ResponseWriter, r *http.Request, reader io.Reader, chunkSize int32, fileName, contentType string, contentLength int64, so *operation.StorageOption) (fileChunks []*filer_pb.FileChunk, md5Hash hash.Hash, chunkOffset int64, uploadErr error, smallContent []byte) {
  26. query := r.URL.Query()
  27. isAppend := isAppend(r)
  28. if query.Has("offset") {
  29. offset := query.Get("offset")
  30. offsetInt, err := strconv.ParseInt(offset, 10, 64)
  31. if err != nil || offsetInt < 0 {
  32. err = fmt.Errorf("invalid 'offset': '%s'", offset)
  33. return nil, nil, 0, err, nil
  34. }
  35. if isAppend && offsetInt > 0 {
  36. err = fmt.Errorf("cannot set offset when op=append")
  37. return nil, nil, 0, err, nil
  38. }
  39. chunkOffset = offsetInt
  40. }
  41. return fs.uploadReaderToChunks(reader, chunkOffset, chunkSize, fileName, contentType, isAppend, so)
  42. }
  43. func (fs *FilerServer) uploadReaderToChunks(reader io.Reader, startOffset int64, chunkSize int32, fileName, contentType string, isAppend bool, so *operation.StorageOption) (fileChunks []*filer_pb.FileChunk, md5Hash hash.Hash, chunkOffset int64, uploadErr error, smallContent []byte) {
  44. md5Hash = md5.New()
  45. chunkOffset = startOffset
  46. var partReader = io.NopCloser(io.TeeReader(reader, md5Hash))
  47. var wg sync.WaitGroup
  48. var bytesBufferCounter int64 = 4
  49. bytesBufferLimitChan := make(chan struct{}, bytesBufferCounter)
  50. var fileChunksLock sync.Mutex
  51. var uploadErrLock sync.Mutex
  52. for {
  53. // need to throttle used byte buffer
  54. bytesBufferLimitChan <- struct{}{}
  55. // As long as there is an error in the upload of one chunk, it can be terminated early
  56. // uploadErr may be modified in other go routines, lock is needed to avoid race condition
  57. uploadErrLock.Lock()
  58. if uploadErr != nil {
  59. <-bytesBufferLimitChan
  60. uploadErrLock.Unlock()
  61. break
  62. }
  63. uploadErrLock.Unlock()
  64. bytesBuffer := bufPool.Get().(*bytes.Buffer)
  65. limitedReader := io.LimitReader(partReader, int64(chunkSize))
  66. bytesBuffer.Reset()
  67. dataSize, err := bytesBuffer.ReadFrom(limitedReader)
  68. // data, err := io.ReadAll(limitedReader)
  69. if err != nil || dataSize == 0 {
  70. bufPool.Put(bytesBuffer)
  71. <-bytesBufferLimitChan
  72. if err != nil {
  73. uploadErrLock.Lock()
  74. if uploadErr == nil {
  75. uploadErr = err
  76. }
  77. uploadErrLock.Unlock()
  78. }
  79. break
  80. }
  81. if chunkOffset == 0 && !isAppend {
  82. if dataSize < fs.option.SaveToFilerLimit {
  83. chunkOffset += dataSize
  84. smallContent = make([]byte, dataSize)
  85. bytesBuffer.Read(smallContent)
  86. bufPool.Put(bytesBuffer)
  87. <-bytesBufferLimitChan
  88. stats.FilerHandlerCounter.WithLabelValues(stats.ContentSaveToFiler).Inc()
  89. break
  90. }
  91. } else {
  92. stats.FilerHandlerCounter.WithLabelValues(stats.AutoChunk).Inc()
  93. }
  94. wg.Add(1)
  95. go func(offset int64, buf *bytes.Buffer) {
  96. defer func() {
  97. bufPool.Put(buf)
  98. <-bytesBufferLimitChan
  99. wg.Done()
  100. }()
  101. chunks, toChunkErr := fs.dataToChunk(fileName, contentType, buf.Bytes(), offset, so)
  102. if toChunkErr != nil {
  103. uploadErrLock.Lock()
  104. if uploadErr == nil {
  105. uploadErr = toChunkErr
  106. }
  107. uploadErrLock.Unlock()
  108. }
  109. if chunks != nil {
  110. fileChunksLock.Lock()
  111. fileChunksSize := len(fileChunks) + len(chunks)
  112. for _, chunk := range chunks {
  113. fileChunks = append(fileChunks, chunk)
  114. glog.V(4).Infof("uploaded %s chunk %d to %s [%d,%d)", fileName, fileChunksSize, chunk.FileId, offset, offset+int64(chunk.Size))
  115. }
  116. fileChunksLock.Unlock()
  117. }
  118. }(chunkOffset, bytesBuffer)
  119. // reset variables for the next chunk
  120. chunkOffset = chunkOffset + dataSize
  121. // if last chunk was not at full chunk size, but already exhausted the reader
  122. if dataSize < int64(chunkSize) {
  123. break
  124. }
  125. }
  126. wg.Wait()
  127. if uploadErr != nil {
  128. glog.V(0).Infof("upload file %s error: %v", fileName, uploadErr)
  129. for _, chunk := range fileChunks {
  130. glog.V(4).Infof("purging failed uploaded %s chunk %s [%d,%d)", fileName, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size))
  131. }
  132. fs.filer.DeleteUncommittedChunks(fileChunks)
  133. return nil, md5Hash, 0, uploadErr, nil
  134. }
  135. slices.SortFunc(fileChunks, func(a, b *filer_pb.FileChunk) int {
  136. return int(a.Offset - b.Offset)
  137. })
  138. return fileChunks, md5Hash, chunkOffset, nil, smallContent
  139. }
  140. func (fs *FilerServer) doUpload(urlLocation string, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt) (*operation.UploadResult, error, []byte) {
  141. stats.FilerHandlerCounter.WithLabelValues(stats.ChunkUpload).Inc()
  142. start := time.Now()
  143. defer func() {
  144. stats.FilerRequestHistogram.WithLabelValues(stats.ChunkUpload).Observe(time.Since(start).Seconds())
  145. }()
  146. uploadOption := &operation.UploadOption{
  147. UploadUrl: urlLocation,
  148. Filename: fileName,
  149. Cipher: fs.option.Cipher,
  150. IsInputCompressed: false,
  151. MimeType: contentType,
  152. PairMap: pairMap,
  153. Jwt: auth,
  154. }
  155. uploader, err := operation.NewUploader()
  156. if err != nil {
  157. return nil, err, []byte{}
  158. }
  159. uploadResult, err, data := uploader.Upload(limitedReader, uploadOption)
  160. if uploadResult != nil && uploadResult.RetryCount > 0 {
  161. stats.FilerHandlerCounter.WithLabelValues(stats.ChunkUploadRetry).Add(float64(uploadResult.RetryCount))
  162. }
  163. return uploadResult, err, data
  164. }
  165. func (fs *FilerServer) dataToChunk(fileName, contentType string, data []byte, chunkOffset int64, so *operation.StorageOption) ([]*filer_pb.FileChunk, error) {
  166. dataReader := util.NewBytesReader(data)
  167. // retry to assign a different file id
  168. var fileId, urlLocation string
  169. var auth security.EncodedJwt
  170. var uploadErr error
  171. var uploadResult *operation.UploadResult
  172. var failedFileChunks []*filer_pb.FileChunk
  173. err := util.Retry("filerDataToChunk", func() error {
  174. // assign one file id for one chunk
  175. fileId, urlLocation, auth, uploadErr = fs.assignNewFileInfo(so)
  176. if uploadErr != nil {
  177. glog.V(4).Infof("retry later due to assign error: %v", uploadErr)
  178. stats.FilerHandlerCounter.WithLabelValues(stats.ChunkAssignRetry).Inc()
  179. return uploadErr
  180. }
  181. // upload the chunk to the volume server
  182. uploadResult, uploadErr, _ = fs.doUpload(urlLocation, dataReader, fileName, contentType, nil, auth)
  183. if uploadErr != nil {
  184. glog.V(4).Infof("retry later due to upload error: %v", uploadErr)
  185. stats.FilerHandlerCounter.WithLabelValues(stats.ChunkDoUploadRetry).Inc()
  186. fid, _ := filer_pb.ToFileIdObject(fileId)
  187. fileChunk := filer_pb.FileChunk{
  188. FileId: fileId,
  189. Offset: chunkOffset,
  190. Fid: fid,
  191. }
  192. failedFileChunks = append(failedFileChunks, &fileChunk)
  193. return uploadErr
  194. }
  195. return nil
  196. })
  197. if err != nil {
  198. glog.Errorf("upload error: %v", err)
  199. return failedFileChunks, err
  200. }
  201. // if last chunk exhausted the reader exactly at the border
  202. if uploadResult.Size == 0 {
  203. return nil, nil
  204. }
  205. return []*filer_pb.FileChunk{uploadResult.ToPbFileChunk(fileId, chunkOffset, time.Now().UnixNano())}, nil
  206. }