You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

234 lines
6.4 KiB

6 years ago
5 years ago
5 years ago
5 years ago
6 years ago
6 years ago
6 years ago
  1. package erasure_coding
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/stretchr/testify/assert"
  6. "math/rand"
  7. "os"
  8. "testing"
  9. "github.com/klauspost/reedsolomon"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  12. )
  13. const (
  14. largeBlockSize = 10000
  15. smallBlockSize = 100
  16. )
  17. func TestEncodingDecoding(t *testing.T) {
  18. bufferSize := 50
  19. baseFileName := "1"
  20. err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize)
  21. if err != nil {
  22. t.Logf("generateEcFiles: %v", err)
  23. }
  24. err = WriteSortedFileFromIdx(baseFileName, ".ecx")
  25. if err != nil {
  26. t.Logf("WriteSortedFileFromIdx: %v", err)
  27. }
  28. err = validateFiles(baseFileName)
  29. if err != nil {
  30. t.Logf("WriteSortedFileFromIdx: %v", err)
  31. }
  32. removeGeneratedFiles(baseFileName)
  33. }
  34. func validateFiles(baseFileName string) error {
  35. nm, err := readNeedleMap(baseFileName)
  36. if err != nil {
  37. return fmt.Errorf("readNeedleMap: %v", err)
  38. }
  39. defer nm.Close()
  40. datFile, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  41. if err != nil {
  42. return fmt.Errorf("failed to open dat file: %v", err)
  43. }
  44. defer datFile.Close()
  45. fi, err := datFile.Stat()
  46. if err != nil {
  47. return fmt.Errorf("failed to stat dat file: %v", err)
  48. }
  49. ecFiles, err := openEcFiles(baseFileName, true)
  50. if err != nil {
  51. return fmt.Errorf("error opening ec files: %w", err)
  52. }
  53. defer closeEcFiles(ecFiles)
  54. err = nm.AscendingVisit(func(value needle_map.NeedleValue) error {
  55. return assertSame(datFile, fi.Size(), ecFiles, value.Offset, value.Size)
  56. })
  57. if err != nil {
  58. return fmt.Errorf("failed to check ec files: %v", err)
  59. }
  60. return nil
  61. }
  62. func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size types.Size) error {
  63. data, err := readDatFile(datFile, offset, size)
  64. if err != nil {
  65. return fmt.Errorf("failed to read dat file: %v", err)
  66. }
  67. ecFileStat, _ := ecFiles[0].Stat()
  68. ecData, err := readEcFile(ecFileStat.Size(), ecFiles, offset, size)
  69. if err != nil {
  70. return fmt.Errorf("failed to read ec file: %v", err)
  71. }
  72. if bytes.Compare(data, ecData) != 0 {
  73. return fmt.Errorf("unexpected data read")
  74. }
  75. return nil
  76. }
  77. func readDatFile(datFile *os.File, offset types.Offset, size types.Size) ([]byte, error) {
  78. data := make([]byte, size)
  79. n, err := datFile.ReadAt(data, offset.ToActualOffset())
  80. if err != nil {
  81. return nil, fmt.Errorf("failed to ReadAt dat file: %v", err)
  82. }
  83. if n != int(size) {
  84. return nil, fmt.Errorf("unexpected read size %d, expected %d", n, size)
  85. }
  86. return data, nil
  87. }
  88. func readEcFile(shardDatSize int64, ecFiles []*os.File, offset types.Offset, size types.Size) (data []byte, err error) {
  89. intervals := LocateData(largeBlockSize, smallBlockSize, shardDatSize, offset.ToActualOffset(), size)
  90. for i, interval := range intervals {
  91. if d, e := readOneInterval(interval, ecFiles); e != nil {
  92. return nil, e
  93. } else {
  94. if i == 0 {
  95. data = d
  96. } else {
  97. data = append(data, d...)
  98. }
  99. }
  100. }
  101. return data, nil
  102. }
  103. func readOneInterval(interval Interval, ecFiles []*os.File) (data []byte, err error) {
  104. ecFileIndex, ecFileOffset := interval.ToShardIdAndOffset(largeBlockSize, smallBlockSize)
  105. data = make([]byte, interval.Size)
  106. err = readFromFile(ecFiles[ecFileIndex], data, ecFileOffset)
  107. if false { // do some ec testing
  108. ecData, err := readFromOtherEcFiles(ecFiles, int(ecFileIndex), ecFileOffset, interval.Size)
  109. if err != nil {
  110. return nil, fmt.Errorf("ec reconstruct error: %v", err)
  111. }
  112. if bytes.Compare(data, ecData) != 0 {
  113. return nil, fmt.Errorf("ec compare error")
  114. }
  115. }
  116. return
  117. }
  118. func readFromOtherEcFiles(ecFiles []*os.File, ecFileIndex int, ecFileOffset int64, size types.Size) (data []byte, err error) {
  119. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  120. if err != nil {
  121. return nil, fmt.Errorf("failed to create encoder: %v", err)
  122. }
  123. bufs := make([][]byte, TotalShardsCount)
  124. for i := 0; i < DataShardsCount; {
  125. n := int(rand.Int31n(TotalShardsCount))
  126. if n == ecFileIndex || bufs[n] != nil {
  127. continue
  128. }
  129. bufs[n] = make([]byte, size)
  130. i++
  131. }
  132. for i, buf := range bufs {
  133. if buf == nil {
  134. continue
  135. }
  136. err = readFromFile(ecFiles[i], buf, ecFileOffset)
  137. if err != nil {
  138. return
  139. }
  140. }
  141. if err = enc.ReconstructData(bufs); err != nil {
  142. return nil, err
  143. }
  144. return bufs[ecFileIndex], nil
  145. }
  146. func readFromFile(file *os.File, data []byte, ecFileOffset int64) (err error) {
  147. _, err = file.ReadAt(data, ecFileOffset)
  148. return
  149. }
  150. func removeGeneratedFiles(baseFileName string) {
  151. for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
  152. fname := fmt.Sprintf("%s.ec%02d", baseFileName, i)
  153. os.Remove(fname)
  154. }
  155. os.Remove(baseFileName + ".ecx")
  156. }
  157. func TestLocateData(t *testing.T) {
  158. intervals := LocateData(largeBlockSize, smallBlockSize, largeBlockSize+1, DataShardsCount*largeBlockSize, 1)
  159. if len(intervals) != 1 {
  160. t.Errorf("unexpected interval size %d", len(intervals))
  161. }
  162. if !intervals[0].sameAs(Interval{0, 0, 1, false, 1}) {
  163. t.Errorf("unexpected interval %+v", intervals[0])
  164. }
  165. intervals = LocateData(largeBlockSize, smallBlockSize, largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1-DataShardsCount*largeBlockSize/2-100)
  166. fmt.Printf("%+v\n", intervals)
  167. }
  168. func (this Interval) sameAs(that Interval) bool {
  169. return this.IsLargeBlock == that.IsLargeBlock &&
  170. this.InnerBlockOffset == that.InnerBlockOffset &&
  171. this.BlockIndex == that.BlockIndex &&
  172. this.Size == that.Size
  173. }
  174. func TestLocateData2(t *testing.T) {
  175. intervals := LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, 3221225472, 21479557912, 4194339)
  176. assert.Equal(t, intervals, []Interval{
  177. {BlockIndex: 4, InnerBlockOffset: 527128, Size: 521448, IsLargeBlock: false, LargeBlockRowsCount: 2},
  178. {BlockIndex: 5, InnerBlockOffset: 0, Size: 1048576, IsLargeBlock: false, LargeBlockRowsCount: 2},
  179. {BlockIndex: 6, InnerBlockOffset: 0, Size: 1048576, IsLargeBlock: false, LargeBlockRowsCount: 2},
  180. {BlockIndex: 7, InnerBlockOffset: 0, Size: 1048576, IsLargeBlock: false, LargeBlockRowsCount: 2},
  181. {BlockIndex: 8, InnerBlockOffset: 0, Size: 527163, IsLargeBlock: false, LargeBlockRowsCount: 2},
  182. })
  183. }
  184. func TestLocateData3(t *testing.T) {
  185. intervals := LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, 3221225472, 30782909808, 112568)
  186. for _, interval := range intervals {
  187. fmt.Printf("%+v\n", interval)
  188. }
  189. assert.Equal(t, intervals, []Interval{
  190. {BlockIndex: 8876, InnerBlockOffset: 912752, Size: 112568, IsLargeBlock: false, LargeBlockRowsCount: 2},
  191. })
  192. }