You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

242 lines
6.9 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
  1. package filer
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/chrislusf/seaweedfs/weed/wdclient"
  6. "io"
  7. "math"
  8. "time"
  9. "github.com/golang/protobuf/proto"
  10. "github.com/chrislusf/seaweedfs/weed/glog"
  11. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  12. "github.com/chrislusf/seaweedfs/weed/util"
  13. )
  14. const (
  15. ManifestBatch = 10000
  16. )
  17. func HasChunkManifest(chunks []*filer_pb.FileChunk) bool {
  18. for _, chunk := range chunks {
  19. if chunk.IsChunkManifest {
  20. return true
  21. }
  22. }
  23. return false
  24. }
  25. func SeparateManifestChunks(chunks []*filer_pb.FileChunk) (manifestChunks, nonManifestChunks []*filer_pb.FileChunk) {
  26. for _, c := range chunks {
  27. if c.IsChunkManifest {
  28. manifestChunks = append(manifestChunks, c)
  29. } else {
  30. nonManifestChunks = append(nonManifestChunks, c)
  31. }
  32. }
  33. return
  34. }
  35. func ResolveChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset, stopOffset int64) (dataChunks, manifestChunks []*filer_pb.FileChunk, manifestResolveErr error) {
  36. // TODO maybe parallel this
  37. for _, chunk := range chunks {
  38. if max(chunk.Offset, startOffset) >= min(chunk.Offset+int64(chunk.Size), stopOffset) {
  39. continue
  40. }
  41. if !chunk.IsChunkManifest {
  42. dataChunks = append(dataChunks, chunk)
  43. continue
  44. }
  45. resolvedChunks, err := ResolveOneChunkManifest(lookupFileIdFn, chunk)
  46. if err != nil {
  47. return chunks, nil, err
  48. }
  49. manifestChunks = append(manifestChunks, chunk)
  50. // recursive
  51. dchunks, mchunks, subErr := ResolveChunkManifest(lookupFileIdFn, resolvedChunks, startOffset, stopOffset)
  52. if subErr != nil {
  53. return chunks, nil, subErr
  54. }
  55. dataChunks = append(dataChunks, dchunks...)
  56. manifestChunks = append(manifestChunks, mchunks...)
  57. }
  58. return
  59. }
  60. func ResolveOneChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunk *filer_pb.FileChunk) (dataChunks []*filer_pb.FileChunk, manifestResolveErr error) {
  61. if !chunk.IsChunkManifest {
  62. return
  63. }
  64. // IsChunkManifest
  65. data, err := fetchChunk(lookupFileIdFn, chunk.GetFileIdString(), chunk.CipherKey, chunk.IsCompressed)
  66. if err != nil {
  67. return nil, fmt.Errorf("fail to read manifest %s: %v", chunk.GetFileIdString(), err)
  68. }
  69. m := &filer_pb.FileChunkManifest{}
  70. if err := proto.Unmarshal(data, m); err != nil {
  71. return nil, fmt.Errorf("fail to unmarshal manifest %s: %v", chunk.GetFileIdString(), err)
  72. }
  73. // recursive
  74. filer_pb.AfterEntryDeserialization(m.Chunks)
  75. return m.Chunks, nil
  76. }
  77. // TODO fetch from cache for weed mount?
  78. func fetchChunk(lookupFileIdFn wdclient.LookupFileIdFunctionType, fileId string, cipherKey []byte, isGzipped bool) ([]byte, error) {
  79. urlStrings, err := lookupFileIdFn(fileId)
  80. if err != nil {
  81. glog.Errorf("operation LookupFileId %s failed, err: %v", fileId, err)
  82. return nil, err
  83. }
  84. return retriedFetchChunkData(urlStrings, cipherKey, isGzipped, true, 0, 0)
  85. }
  86. func retriedFetchChunkData(urlStrings []string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64, size int) ([]byte, error) {
  87. var err error
  88. var shouldRetry bool
  89. receivedData := make([]byte, 0, size)
  90. for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
  91. for _, urlString := range urlStrings {
  92. receivedData = receivedData[:0]
  93. shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, isFullChunk, offset, size, func(data []byte) {
  94. receivedData = append(receivedData, data...)
  95. })
  96. if !shouldRetry {
  97. break
  98. }
  99. if err != nil {
  100. glog.V(0).Infof("read %s failed, err: %v", urlString, err)
  101. } else {
  102. break
  103. }
  104. }
  105. if err != nil && shouldRetry {
  106. glog.V(0).Infof("retry reading in %v", waitTime)
  107. time.Sleep(waitTime)
  108. } else {
  109. break
  110. }
  111. }
  112. return receivedData, err
  113. }
  114. func retriedStreamFetchChunkData(writer io.Writer, urlStrings []string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64, size int) (err error) {
  115. var shouldRetry bool
  116. var totalWritten int
  117. for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
  118. for _, urlString := range urlStrings {
  119. var localProcesed int
  120. shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, isFullChunk, offset, size, func(data []byte) {
  121. if totalWritten > localProcesed {
  122. toBeSkipped := totalWritten - localProcesed
  123. if len(data) <= toBeSkipped {
  124. localProcesed += len(data)
  125. return // skip if already processed
  126. }
  127. data = data[toBeSkipped:]
  128. localProcesed += toBeSkipped
  129. }
  130. writer.Write(data)
  131. localProcesed += len(data)
  132. totalWritten += len(data)
  133. })
  134. if !shouldRetry {
  135. break
  136. }
  137. if err != nil {
  138. glog.V(0).Infof("read %s failed, err: %v", urlString, err)
  139. } else {
  140. break
  141. }
  142. }
  143. if err != nil && shouldRetry {
  144. glog.V(0).Infof("retry reading in %v", waitTime)
  145. time.Sleep(waitTime)
  146. } else {
  147. break
  148. }
  149. }
  150. return err
  151. }
  152. func MaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk) (chunks []*filer_pb.FileChunk, err error) {
  153. return doMaybeManifestize(saveFunc, inputChunks, ManifestBatch, mergeIntoManifest)
  154. }
  155. func doMaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk, mergeFactor int, mergefn func(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error)) (chunks []*filer_pb.FileChunk, err error) {
  156. var dataChunks []*filer_pb.FileChunk
  157. for _, chunk := range inputChunks {
  158. if !chunk.IsChunkManifest {
  159. dataChunks = append(dataChunks, chunk)
  160. } else {
  161. chunks = append(chunks, chunk)
  162. }
  163. }
  164. remaining := len(dataChunks)
  165. for i := 0; i+mergeFactor <= len(dataChunks); i += mergeFactor {
  166. chunk, err := mergefn(saveFunc, dataChunks[i:i+mergeFactor])
  167. if err != nil {
  168. return dataChunks, err
  169. }
  170. chunks = append(chunks, chunk)
  171. remaining -= mergeFactor
  172. }
  173. // remaining
  174. for i := len(dataChunks) - remaining; i < len(dataChunks); i++ {
  175. chunks = append(chunks, dataChunks[i])
  176. }
  177. return
  178. }
  179. func mergeIntoManifest(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error) {
  180. filer_pb.BeforeEntrySerialization(dataChunks)
  181. // create and serialize the manifest
  182. data, serErr := proto.Marshal(&filer_pb.FileChunkManifest{
  183. Chunks: dataChunks,
  184. })
  185. if serErr != nil {
  186. return nil, fmt.Errorf("serializing manifest: %v", serErr)
  187. }
  188. minOffset, maxOffset := int64(math.MaxInt64), int64(math.MinInt64)
  189. for _, chunk := range dataChunks {
  190. if minOffset > int64(chunk.Offset) {
  191. minOffset = chunk.Offset
  192. }
  193. if maxOffset < int64(chunk.Size)+chunk.Offset {
  194. maxOffset = int64(chunk.Size) + chunk.Offset
  195. }
  196. }
  197. manifestChunk, _, _, err = saveFunc(bytes.NewReader(data), "", 0)
  198. if err != nil {
  199. return nil, err
  200. }
  201. manifestChunk.IsChunkManifest = true
  202. manifestChunk.Offset = minOffset
  203. manifestChunk.Size = uint64(maxOffset - minOffset)
  204. return
  205. }
  206. type SaveDataAsChunkFunctionType func(reader io.Reader, name string, offset int64) (chunk *filer_pb.FileChunk, collection, replication string, err error)