You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

381 lines
11 KiB

more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
2 years ago
2 years ago
3 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
4 years ago
12 months ago
4 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
3 years ago
3 years ago
3 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
3 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
3 years ago
3 years ago
2 years ago
2 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
5 years ago
5 years ago
  1. package filer
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "math"
  7. "strings"
  8. "sync"
  9. "time"
  10. "golang.org/x/exp/slices"
  11. "github.com/seaweedfs/seaweedfs/weed/glog"
  12. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  13. "github.com/seaweedfs/seaweedfs/weed/stats"
  14. "github.com/seaweedfs/seaweedfs/weed/util"
  15. "github.com/seaweedfs/seaweedfs/weed/wdclient"
  16. )
  17. var getLookupFileIdBackoffSchedule = []time.Duration{
  18. 150 * time.Millisecond,
  19. 600 * time.Millisecond,
  20. 1800 * time.Millisecond,
  21. }
  22. func HasData(entry *filer_pb.Entry) bool {
  23. if len(entry.Content) > 0 {
  24. return true
  25. }
  26. return len(entry.GetChunks()) > 0
  27. }
  28. func IsSameData(a, b *filer_pb.Entry) bool {
  29. if len(a.Content) > 0 || len(b.Content) > 0 {
  30. return bytes.Equal(a.Content, b.Content)
  31. }
  32. return isSameChunks(a.Chunks, b.Chunks)
  33. }
  34. func isSameChunks(a, b []*filer_pb.FileChunk) bool {
  35. if len(a) != len(b) {
  36. return false
  37. }
  38. slices.SortFunc(a, func(i, j *filer_pb.FileChunk) int {
  39. return strings.Compare(i.ETag, j.ETag)
  40. })
  41. slices.SortFunc(b, func(i, j *filer_pb.FileChunk) int {
  42. return strings.Compare(i.ETag, j.ETag)
  43. })
  44. for i := 0; i < len(a); i++ {
  45. if a[i].ETag != b[i].ETag {
  46. return false
  47. }
  48. }
  49. return true
  50. }
  51. func NewFileReader(filerClient filer_pb.FilerClient, entry *filer_pb.Entry) io.Reader {
  52. if len(entry.Content) > 0 {
  53. return bytes.NewReader(entry.Content)
  54. }
  55. return NewChunkStreamReader(filerClient, entry.GetChunks())
  56. }
  57. type DoStreamContent func(writer io.Writer) error
  58. func PrepareStreamContent(masterClient wdclient.HasLookupFileIdFunction, chunks []*filer_pb.FileChunk, offset int64, size int64) (DoStreamContent, error) {
  59. return PrepareStreamContentWithThrottler(masterClient, chunks, offset, size, 0)
  60. }
  61. func PrepareStreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, chunks []*filer_pb.FileChunk, offset int64, size int64, downloadMaxBytesPs int64) (DoStreamContent, error) {
  62. glog.V(4).Infof("prepare to stream content for chunks: %d", len(chunks))
  63. chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size)
  64. fileId2Url := make(map[string][]string)
  65. for x := chunkViews.Front(); x != nil; x = x.Next {
  66. chunkView := x.Value
  67. var urlStrings []string
  68. var err error
  69. for _, backoff := range getLookupFileIdBackoffSchedule {
  70. urlStrings, err = masterClient.GetLookupFileIdFunction()(chunkView.FileId)
  71. if err == nil && len(urlStrings) > 0 {
  72. break
  73. }
  74. glog.V(4).Infof("waiting for chunk: %s", chunkView.FileId)
  75. time.Sleep(backoff)
  76. }
  77. if err != nil {
  78. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  79. return nil, err
  80. } else if len(urlStrings) == 0 {
  81. errUrlNotFound := fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
  82. glog.Error(errUrlNotFound)
  83. return nil, errUrlNotFound
  84. }
  85. fileId2Url[chunkView.FileId] = urlStrings
  86. }
  87. return func(writer io.Writer) error {
  88. downloadThrottler := util.NewWriteThrottler(downloadMaxBytesPs)
  89. remaining := size
  90. for x := chunkViews.Front(); x != nil; x = x.Next {
  91. chunkView := x.Value
  92. if offset < chunkView.ViewOffset {
  93. gap := chunkView.ViewOffset - offset
  94. remaining -= gap
  95. glog.V(4).Infof("zero [%d,%d)", offset, chunkView.ViewOffset)
  96. err := writeZero(writer, gap)
  97. if err != nil {
  98. return fmt.Errorf("write zero [%d,%d)", offset, chunkView.ViewOffset)
  99. }
  100. offset = chunkView.ViewOffset
  101. }
  102. urlStrings := fileId2Url[chunkView.FileId]
  103. start := time.Now()
  104. err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize))
  105. offset += int64(chunkView.ViewSize)
  106. remaining -= int64(chunkView.ViewSize)
  107. stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
  108. if err != nil {
  109. stats.FilerHandlerCounter.WithLabelValues("chunkDownloadError").Inc()
  110. return fmt.Errorf("read chunk: %v", err)
  111. }
  112. stats.FilerHandlerCounter.WithLabelValues("chunkDownload").Inc()
  113. downloadThrottler.MaybeSlowdown(int64(chunkView.ViewSize))
  114. }
  115. if remaining > 0 {
  116. glog.V(4).Infof("zero [%d,%d)", offset, offset+remaining)
  117. err := writeZero(writer, remaining)
  118. if err != nil {
  119. return fmt.Errorf("write zero [%d,%d)", offset, offset+remaining)
  120. }
  121. }
  122. return nil
  123. }, nil
  124. }
  125. func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64) error {
  126. streamFn, err := PrepareStreamContent(masterClient, chunks, offset, size)
  127. if err != nil {
  128. return err
  129. }
  130. return streamFn(writer)
  131. }
  132. // ---------------- ReadAllReader ----------------------------------
  133. func writeZero(w io.Writer, size int64) (err error) {
  134. zeroPadding := make([]byte, 1024)
  135. var written int
  136. for size > 0 {
  137. if size > 1024 {
  138. written, err = w.Write(zeroPadding)
  139. } else {
  140. written, err = w.Write(zeroPadding[:size])
  141. }
  142. size -= int64(written)
  143. if err != nil {
  144. return
  145. }
  146. }
  147. return
  148. }
  149. func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) error {
  150. lookupFileIdFn := func(fileId string) (targetUrls []string, err error) {
  151. return masterClient.LookupFileId(fileId)
  152. }
  153. chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, int64(len(buffer)))
  154. idx := 0
  155. for x := chunkViews.Front(); x != nil; x = x.Next {
  156. chunkView := x.Value
  157. urlStrings, err := lookupFileIdFn(chunkView.FileId)
  158. if err != nil {
  159. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  160. return err
  161. }
  162. n, err := util.RetriedFetchChunkData(buffer[idx:idx+int(chunkView.ViewSize)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk)
  163. if err != nil {
  164. return err
  165. }
  166. idx += n
  167. }
  168. return nil
  169. }
  170. // ---------------- ChunkStreamReader ----------------------------------
  171. type ChunkStreamReader struct {
  172. head *Interval[*ChunkView]
  173. chunkView *Interval[*ChunkView]
  174. totalSize int64
  175. logicOffset int64
  176. buffer []byte
  177. bufferOffset int64
  178. bufferLock sync.Mutex
  179. chunk string
  180. lookupFileId wdclient.LookupFileIdFunctionType
  181. }
  182. var _ = io.ReadSeeker(&ChunkStreamReader{})
  183. var _ = io.ReaderAt(&ChunkStreamReader{})
  184. func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  185. chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
  186. var totalSize int64
  187. for x := chunkViews.Front(); x != nil; x = x.Next {
  188. chunk := x.Value
  189. totalSize += int64(chunk.ViewSize)
  190. }
  191. return &ChunkStreamReader{
  192. head: chunkViews.Front(),
  193. chunkView: chunkViews.Front(),
  194. lookupFileId: lookupFileIdFn,
  195. totalSize: totalSize,
  196. }
  197. }
  198. func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  199. lookupFileIdFn := func(fileId string) (targetUrl []string, err error) {
  200. return masterClient.LookupFileId(fileId)
  201. }
  202. return doNewChunkStreamReader(lookupFileIdFn, chunks)
  203. }
  204. func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  205. lookupFileIdFn := LookupFn(filerClient)
  206. return doNewChunkStreamReader(lookupFileIdFn, chunks)
  207. }
  208. func (c *ChunkStreamReader) ReadAt(p []byte, off int64) (n int, err error) {
  209. c.bufferLock.Lock()
  210. defer c.bufferLock.Unlock()
  211. if err = c.prepareBufferFor(off); err != nil {
  212. return
  213. }
  214. c.logicOffset = off
  215. return c.doRead(p)
  216. }
  217. func (c *ChunkStreamReader) Read(p []byte) (n int, err error) {
  218. c.bufferLock.Lock()
  219. defer c.bufferLock.Unlock()
  220. return c.doRead(p)
  221. }
  222. func (c *ChunkStreamReader) doRead(p []byte) (n int, err error) {
  223. // fmt.Printf("do read [%d,%d) at %s[%d,%d)\n", c.logicOffset, c.logicOffset+int64(len(p)), c.chunk, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)))
  224. for n < len(p) {
  225. // println("read", c.logicOffset)
  226. if err = c.prepareBufferFor(c.logicOffset); err != nil {
  227. return
  228. }
  229. t := copy(p[n:], c.buffer[c.logicOffset-c.bufferOffset:])
  230. n += t
  231. c.logicOffset += int64(t)
  232. }
  233. return
  234. }
  235. func (c *ChunkStreamReader) isBufferEmpty() bool {
  236. return len(c.buffer) <= int(c.logicOffset-c.bufferOffset)
  237. }
  238. func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
  239. c.bufferLock.Lock()
  240. defer c.bufferLock.Unlock()
  241. var err error
  242. switch whence {
  243. case io.SeekStart:
  244. case io.SeekCurrent:
  245. offset += c.logicOffset
  246. case io.SeekEnd:
  247. offset = c.totalSize + offset
  248. }
  249. if offset > c.totalSize {
  250. err = io.ErrUnexpectedEOF
  251. } else {
  252. c.logicOffset = offset
  253. }
  254. return offset, err
  255. }
  256. func insideChunk(offset int64, chunk *ChunkView) bool {
  257. return chunk.ViewOffset <= offset && offset < chunk.ViewOffset+int64(chunk.ViewSize)
  258. }
  259. func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
  260. // stay in the same chunk
  261. if c.bufferOffset <= offset && offset < c.bufferOffset+int64(len(c.buffer)) {
  262. return nil
  263. }
  264. // glog.V(2).Infof("c.chunkView: %v buffer:[%d,%d) offset:%d totalSize:%d", c.chunkView, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)), offset, c.totalSize)
  265. // find a possible chunk view
  266. p := c.chunkView
  267. for p != nil {
  268. chunk := p.Value
  269. // glog.V(2).Infof("prepareBufferFor check chunk:[%d,%d)", chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize))
  270. if insideChunk(offset, chunk) {
  271. if c.isBufferEmpty() || c.bufferOffset != chunk.ViewOffset {
  272. c.chunkView = p
  273. return c.fetchChunkToBuffer(chunk)
  274. }
  275. }
  276. if offset < c.bufferOffset {
  277. p = p.Prev
  278. } else {
  279. p = p.Next
  280. }
  281. }
  282. return io.EOF
  283. }
  284. func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
  285. urlStrings, err := c.lookupFileId(chunkView.FileId)
  286. if err != nil {
  287. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  288. return err
  289. }
  290. var buffer bytes.Buffer
  291. var shouldRetry bool
  292. for _, urlString := range urlStrings {
  293. shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize), func(data []byte) {
  294. buffer.Write(data)
  295. })
  296. if !shouldRetry {
  297. break
  298. }
  299. if err != nil {
  300. glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err)
  301. buffer.Reset()
  302. } else {
  303. break
  304. }
  305. }
  306. if err != nil {
  307. return err
  308. }
  309. c.buffer = buffer.Bytes()
  310. c.bufferOffset = chunkView.ViewOffset
  311. c.chunk = chunkView.FileId
  312. // glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.ViewOffset, chunkView.ViewOffset+int64(chunkView.ViewSize))
  313. return nil
  314. }
  315. func (c *ChunkStreamReader) Close() {
  316. // TODO try to release and reuse buffer
  317. }
  318. func VolumeId(fileId string) string {
  319. lastCommaIndex := strings.LastIndex(fileId, ",")
  320. if lastCommaIndex > 0 {
  321. return fileId[:lastCommaIndex]
  322. }
  323. return fileId
  324. }