304 lines
8.1 KiB

5 years ago
5 years ago
5 years ago
5 years ago
  1. package erasure_coding
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "github.com/klauspost/reedsolomon"
  7. "github.com/chrislusf/seaweedfs/weed/glog"
  8. "github.com/chrislusf/seaweedfs/weed/storage/idx"
  9. "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
  10. "github.com/chrislusf/seaweedfs/weed/storage/types"
  11. "github.com/chrislusf/seaweedfs/weed/util"
  12. )
  13. const (
  14. DataShardsCount = 10
  15. ParityShardsCount = 4
  16. TotalShardsCount = DataShardsCount + ParityShardsCount
  17. ErasureCodingLargeBlockSize = 1024 * 1024 * 1024 // 1GB
  18. ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
  19. )
  20. // WriteSortedFileFromIdx generates .ecx file from existing .idx file
  21. // all keys are sorted in ascending order
  22. func WriteSortedFileFromIdx(baseFileName string, ext string) (e error) {
  23. nm, err := readNeedleMap(baseFileName)
  24. if nm != nil {
  25. defer nm.Close()
  26. }
  27. if err != nil {
  28. return fmt.Errorf("readNeedleMap: %v", err)
  29. }
  30. ecxFile, err := os.OpenFile(baseFileName+ext, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
  31. if err != nil {
  32. return fmt.Errorf("failed to open ecx file: %v", err)
  33. }
  34. defer ecxFile.Close()
  35. err = nm.AscendingVisit(func(value needle_map.NeedleValue) error {
  36. bytes := value.ToBytes()
  37. _, writeErr := ecxFile.Write(bytes)
  38. return writeErr
  39. })
  40. if err != nil {
  41. return fmt.Errorf("failed to visit idx file: %v", err)
  42. }
  43. return nil
  44. }
  45. // WriteEcFiles generates .ec00 ~ .ec13 files
  46. func WriteEcFiles(baseFileName string) error {
  47. return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
  48. }
  49. func RebuildEcFiles(baseFileName string) ([]uint32, error) {
  50. return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
  51. }
  52. func ToExt(ecIndex int) string {
  53. return fmt.Sprintf(".ec%02d", ecIndex)
  54. }
  55. func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error {
  56. file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  57. if err != nil {
  58. return fmt.Errorf("failed to open dat file: %v", err)
  59. }
  60. defer file.Close()
  61. fi, err := file.Stat()
  62. if err != nil {
  63. return fmt.Errorf("failed to stat dat file: %v", err)
  64. }
  65. err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
  66. if err != nil {
  67. return fmt.Errorf("encodeDatFile: %v", err)
  68. }
  69. return nil
  70. }
  71. func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) (generatedShardIds []uint32, err error) {
  72. shardHasData := make([]bool, TotalShardsCount)
  73. inputFiles := make([]*os.File, TotalShardsCount)
  74. outputFiles := make([]*os.File, TotalShardsCount)
  75. for shardId := 0; shardId < TotalShardsCount; shardId++ {
  76. shardFileName := baseFileName + ToExt(shardId)
  77. if util.FileExists(shardFileName) {
  78. shardHasData[shardId] = true
  79. inputFiles[shardId], err = os.OpenFile(shardFileName, os.O_RDONLY, 0)
  80. if err != nil {
  81. return nil, err
  82. }
  83. defer inputFiles[shardId].Close()
  84. } else {
  85. outputFiles[shardId], err = os.OpenFile(shardFileName, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644)
  86. if err != nil {
  87. return nil, err
  88. }
  89. defer outputFiles[shardId].Close()
  90. generatedShardIds = append(generatedShardIds, uint32(shardId))
  91. }
  92. }
  93. err = rebuildEcFiles(shardHasData, inputFiles, outputFiles)
  94. if err != nil {
  95. return nil, fmt.Errorf("rebuildEcFiles: %v", err)
  96. }
  97. return
  98. }
  99. func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
  100. bufferSize := int64(len(buffers[0]))
  101. batchCount := blockSize / bufferSize
  102. if blockSize%bufferSize != 0 {
  103. glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize)
  104. }
  105. for b := int64(0); b < batchCount; b++ {
  106. err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs)
  107. if err != nil {
  108. return err
  109. }
  110. }
  111. return nil
  112. }
  113. func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error) {
  114. for i := 0; i < TotalShardsCount; i++ {
  115. fname := baseFileName + ToExt(i)
  116. openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY
  117. if forRead {
  118. openOption = os.O_RDONLY
  119. }
  120. f, err := os.OpenFile(fname, openOption, 0644)
  121. if err != nil {
  122. return files, fmt.Errorf("failed to open file %s: %v", fname, err)
  123. }
  124. files = append(files, f)
  125. }
  126. return
  127. }
  128. func closeEcFiles(files []*os.File) {
  129. for _, f := range files {
  130. if f != nil {
  131. f.Close()
  132. }
  133. }
  134. }
  135. func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
  136. // read data into buffers
  137. for i := 0; i < DataShardsCount; i++ {
  138. n, err := file.ReadAt(buffers[i], startOffset+blockSize*int64(i))
  139. if err != nil {
  140. if err != io.EOF {
  141. return err
  142. }
  143. }
  144. if n < len(buffers[i]) {
  145. for t := len(buffers[i]) - 1; t >= n; t-- {
  146. buffers[i][t] = 0
  147. }
  148. }
  149. }
  150. err := enc.Encode(buffers)
  151. if err != nil {
  152. return err
  153. }
  154. for i := 0; i < TotalShardsCount; i++ {
  155. _, err := outputs[i].Write(buffers[i])
  156. if err != nil {
  157. return err
  158. }
  159. }
  160. return nil
  161. }
  162. func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
  163. var processedSize int64
  164. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  165. if err != nil {
  166. return fmt.Errorf("failed to create encoder: %v", err)
  167. }
  168. buffers := make([][]byte, TotalShardsCount)
  169. for i := range buffers {
  170. buffers[i] = make([]byte, bufferSize)
  171. }
  172. outputs, err := openEcFiles(baseFileName, false)
  173. defer closeEcFiles(outputs)
  174. if err != nil {
  175. return fmt.Errorf("failed to open ec files %s: %v", baseFileName, err)
  176. }
  177. for remainingSize > largeBlockSize*DataShardsCount {
  178. err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
  179. if err != nil {
  180. return fmt.Errorf("failed to encode large chunk data: %v", err)
  181. }
  182. remainingSize -= largeBlockSize * DataShardsCount
  183. processedSize += largeBlockSize * DataShardsCount
  184. }
  185. for remainingSize > 0 {
  186. encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
  187. if err != nil {
  188. return fmt.Errorf("failed to encode small chunk data: %v", err)
  189. }
  190. remainingSize -= smallBlockSize * DataShardsCount
  191. processedSize += smallBlockSize * DataShardsCount
  192. }
  193. return nil
  194. }
  195. func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File) error {
  196. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  197. if err != nil {
  198. return fmt.Errorf("failed to create encoder: %v", err)
  199. }
  200. buffers := make([][]byte, TotalShardsCount)
  201. for i := range buffers {
  202. if shardHasData[i] {
  203. buffers[i] = make([]byte, ErasureCodingSmallBlockSize)
  204. }
  205. }
  206. var startOffset int64
  207. var inputBufferDataSize int
  208. for {
  209. // read the input data from files
  210. for i := 0; i < TotalShardsCount; i++ {
  211. if shardHasData[i] {
  212. n, _ := inputFiles[i].ReadAt(buffers[i], startOffset)
  213. if n == 0 {
  214. return nil
  215. }
  216. if inputBufferDataSize == 0 {
  217. inputBufferDataSize = n
  218. }
  219. if inputBufferDataSize != n {
  220. return fmt.Errorf("ec shard size expected %d actual %d", inputBufferDataSize, n)
  221. }
  222. } else {
  223. buffers[i] = nil
  224. }
  225. }
  226. // encode the data
  227. err = enc.Reconstruct(buffers)
  228. if err != nil {
  229. return fmt.Errorf("reconstruct: %v", err)
  230. }
  231. // write the data to output files
  232. for i := 0; i < TotalShardsCount; i++ {
  233. if !shardHasData[i] {
  234. n, _ := outputFiles[i].WriteAt(buffers[i][:inputBufferDataSize], startOffset)
  235. if inputBufferDataSize != n {
  236. return fmt.Errorf("fail to write to %s", outputFiles[i].Name())
  237. }
  238. }
  239. }
  240. startOffset += int64(inputBufferDataSize)
  241. }
  242. }
  243. func readNeedleMap(baseFileName string) (*needle_map.MemDb, error) {
  244. indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644)
  245. if err != nil {
  246. return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err)
  247. }
  248. defer indexFile.Close()
  249. cm := needle_map.NewMemDb()
  250. err = idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
  251. if !offset.IsZero() && size != types.TombstoneFileSize {
  252. cm.Set(key, offset, size)
  253. } else {
  254. cm.Delete(key)
  255. }
  256. return nil
  257. })
  258. return cm, err
  259. }