You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

312 lines
8.5 KiB

6 years ago
6 years ago
6 years ago
  1. package erasure_coding
  2. import (
  3. "bytes"
  4. "fmt"
  5. "math/rand"
  6. "os"
  7. "testing"
  8. "github.com/chrislusf/seaweedfs/weed/storage"
  9. "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
  10. "github.com/chrislusf/seaweedfs/weed/storage/types"
  11. "github.com/klauspost/reedsolomon"
  12. )
  13. const (
  14. largeBlockSize = 10000
  15. smallBlockSize = 100
  16. )
  17. func TestEncodingDecoding(t *testing.T) {
  18. bufferSize := 50
  19. baseFileName := "1"
  20. err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize)
  21. if err != nil {
  22. t.Logf("generateEcFiles: %v", err)
  23. }
  24. err = writeSortedEcxFiles(baseFileName)
  25. if err != nil {
  26. t.Logf("writeSortedEcxFiles: %v", err)
  27. }
  28. err = validateFiles(baseFileName)
  29. if err != nil {
  30. t.Logf("writeSortedEcxFiles: %v", err)
  31. }
  32. removeGeneratedFiles(baseFileName)
  33. }
  34. func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error {
  35. file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  36. if err != nil {
  37. return fmt.Errorf("failed to open dat file: %v", err)
  38. }
  39. defer file.Close()
  40. fi, err := file.Stat()
  41. if err != nil {
  42. return fmt.Errorf("failed to stat dat file: %v", err)
  43. }
  44. err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
  45. if err != nil {
  46. return fmt.Errorf("encodeDatFile: %v", err)
  47. }
  48. return nil
  49. }
  50. func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
  51. var processedSize int64
  52. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  53. if err != nil {
  54. return fmt.Errorf("failed to create encoder: %v", err)
  55. }
  56. buffers := make([][]byte, DataShardsCount+ParityShardsCount)
  57. outputs, err := openEcFiles(baseFileName, false)
  58. defer closeEcFiles(outputs)
  59. if err != nil {
  60. return fmt.Errorf("failed to open dat file: %v", err)
  61. }
  62. for i, _ := range buffers {
  63. buffers[i] = make([]byte, bufferSize)
  64. }
  65. for remainingSize > largeBlockSize*DataShardsCount {
  66. err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
  67. if err != nil {
  68. return fmt.Errorf("failed to encode large chunk data: %v", err)
  69. }
  70. remainingSize -= largeBlockSize * DataShardsCount
  71. processedSize += largeBlockSize * DataShardsCount
  72. }
  73. for remainingSize > 0 {
  74. encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
  75. if err != nil {
  76. return fmt.Errorf("failed to encode small chunk data: %v", err)
  77. }
  78. remainingSize -= smallBlockSize * DataShardsCount
  79. processedSize += smallBlockSize * DataShardsCount
  80. }
  81. return nil
  82. }
  83. func writeSortedEcxFiles(baseFileName string) (e error) {
  84. cm, err := readCompactMap(baseFileName)
  85. if err != nil {
  86. return fmt.Errorf("readCompactMap: %v", err)
  87. }
  88. ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
  89. if err != nil {
  90. return fmt.Errorf("failed to open dat file: %v", err)
  91. }
  92. defer ecxFile.Close()
  93. err = cm.AscendingVisit(func(value needle_map.NeedleValue) error {
  94. bytes := value.ToBytes()
  95. _, writeErr := ecxFile.Write(bytes)
  96. return writeErr
  97. })
  98. if err != nil {
  99. return fmt.Errorf("failed to open dat file: %v", err)
  100. }
  101. return nil
  102. }
  103. func validateFiles(baseFileName string) error {
  104. cm, err := readCompactMap(baseFileName)
  105. if err != nil {
  106. return fmt.Errorf("readCompactMap: %v", err)
  107. }
  108. datFile, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  109. if err != nil {
  110. return fmt.Errorf("failed to open dat file: %v", err)
  111. }
  112. defer datFile.Close()
  113. fi, err := datFile.Stat()
  114. if err != nil {
  115. return fmt.Errorf("failed to stat dat file: %v", err)
  116. }
  117. ecFiles, err := openEcFiles(baseFileName, true)
  118. defer closeEcFiles(ecFiles)
  119. err = cm.AscendingVisit(func(value needle_map.NeedleValue) error {
  120. return assertSame(datFile, fi.Size(), ecFiles, value.Offset, value.Size)
  121. })
  122. if err != nil {
  123. return fmt.Errorf("failed to check ec files: %v", err)
  124. }
  125. return nil
  126. }
  127. func readCompactMap(baseFileName string) (*needle_map.CompactMap, error) {
  128. indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644)
  129. if err != nil {
  130. return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err)
  131. }
  132. defer indexFile.Close()
  133. cm := needle_map.NewCompactMap()
  134. err = storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
  135. if !offset.IsZero() && size != types.TombstoneFileSize {
  136. cm.Set(key, offset, size)
  137. } else {
  138. cm.Delete(key)
  139. }
  140. return nil
  141. })
  142. return cm, err
  143. }
  144. func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) error {
  145. data, err := readDatFile(datFile, offset, size)
  146. if err != nil {
  147. return fmt.Errorf("failed to read dat file: %v", err)
  148. }
  149. ecData, err := readEcFile(datSize, ecFiles, offset, size)
  150. if err != nil {
  151. return fmt.Errorf("failed to read ec file: %v", err)
  152. }
  153. if bytes.Compare(data, ecData) != 0 {
  154. return fmt.Errorf("unexpected data read")
  155. }
  156. return nil
  157. }
  158. func readDatFile(datFile *os.File, offset types.Offset, size uint32) ([]byte, error) {
  159. data := make([]byte, size)
  160. n, err := datFile.ReadAt(data, offset.ToAcutalOffset())
  161. if err != nil {
  162. return nil, fmt.Errorf("failed to ReadAt dat file: %v", err)
  163. }
  164. if n != int(size) {
  165. return nil, fmt.Errorf("unexpected read size %d, expected %d", n, size)
  166. }
  167. return data, nil
  168. }
  169. func readEcFile(datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) (data []byte, err error) {
  170. intervals := locateData(largeBlockSize, smallBlockSize, datSize, offset.ToAcutalOffset(), size)
  171. nLargeBlockRows := int(datSize / (largeBlockSize * DataShardsCount))
  172. for i, interval := range intervals {
  173. if d, e := readOneInterval(interval, ecFiles, nLargeBlockRows); e != nil {
  174. return nil, e
  175. } else {
  176. if i == 0 {
  177. data = d
  178. } else {
  179. data = append(data, d...)
  180. }
  181. }
  182. }
  183. return data, nil
  184. }
  185. func readOneInterval(interval Interval, ecFiles []*os.File, nLargeBlockRows int) (data []byte, err error) {
  186. ecFileOffset := interval.innerBlockOffset
  187. rowIndex := interval.blockIndex / DataShardsCount
  188. if interval.isLargeBlock {
  189. ecFileOffset += int64(rowIndex) * largeBlockSize
  190. } else {
  191. ecFileOffset += int64(nLargeBlockRows)*largeBlockSize + int64(rowIndex)*smallBlockSize
  192. }
  193. ecFileIndex := interval.blockIndex % DataShardsCount
  194. data = make([]byte, interval.size)
  195. err = readFromFile(ecFiles[ecFileIndex], data, ecFileOffset)
  196. { // do some ec testing
  197. ecData, err := readFromOtherEcFiles(ecFiles, ecFileIndex, ecFileOffset, interval.size)
  198. if err != nil {
  199. return nil, fmt.Errorf("ec reconstruct error: %v", err)
  200. }
  201. if bytes.Compare(data, ecData) != 0 {
  202. return nil, fmt.Errorf("ec compare error")
  203. }
  204. }
  205. return
  206. }
  207. func readFromOtherEcFiles(ecFiles []*os.File, ecFileIndex int, ecFileOffset int64, size uint32) (data []byte, err error) {
  208. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  209. if err != nil {
  210. return nil, fmt.Errorf("failed to create encoder: %v", err)
  211. }
  212. bufs := make([][]byte, DataShardsCount+ParityShardsCount)
  213. for i := 0; i < DataShardsCount; {
  214. n := int(rand.Int31n(DataShardsCount + ParityShardsCount))
  215. if n == ecFileIndex || bufs[n] != nil {
  216. continue
  217. }
  218. bufs[n] = make([]byte, size)
  219. i++
  220. }
  221. for i, buf := range bufs {
  222. if buf == nil {
  223. continue
  224. }
  225. err = readFromFile(ecFiles[i], buf, ecFileOffset)
  226. if err != nil {
  227. return
  228. }
  229. }
  230. if err = enc.ReconstructData(bufs); err != nil {
  231. return nil, err
  232. }
  233. return bufs[ecFileIndex], nil
  234. }
  235. func readFromFile(file *os.File, data []byte, ecFileOffset int64) (err error) {
  236. _, err = file.ReadAt(data, ecFileOffset)
  237. return
  238. }
  239. func removeGeneratedFiles(baseFileName string) {
  240. for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
  241. fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1)
  242. os.Remove(fname)
  243. }
  244. os.Remove(baseFileName+".ecx")
  245. }
  246. func TestLocateData(t *testing.T) {
  247. intervals := locateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize, 1)
  248. if len(intervals) != 1 {
  249. t.Errorf("unexpected interval size %d", len(intervals))
  250. }
  251. if !intervals[0].sameAs(Interval{0, 0, 1, false}) {
  252. t.Errorf("unexpected interval %+v", intervals[0])
  253. }
  254. intervals = locateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1-DataShardsCount*largeBlockSize/2-100)
  255. fmt.Printf("%+v\n", intervals)
  256. }
  257. func (this Interval) sameAs(that Interval) bool {
  258. return this.isLargeBlock == that.isLargeBlock &&
  259. this.innerBlockOffset == that.innerBlockOffset &&
  260. this.blockIndex == that.blockIndex &&
  261. this.size == that.size
  262. }