You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
4.1 KiB

7 years ago
7 years ago
7 years ago
6 years ago
7 years ago
7 years ago
7 years ago
6 years ago
7 years ago
7 years ago
6 years ago
7 years ago
7 years ago
7 years ago
6 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
6 years ago
7 years ago
  1. package filer2
  2. import (
  3. "fmt"
  4. "hash/fnv"
  5. "sort"
  6. "sync"
  7. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  8. )
  9. func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) {
  10. for _, c := range chunks {
  11. t := uint64(c.Offset + int64(c.Size))
  12. if size < t {
  13. size = t
  14. }
  15. }
  16. return
  17. }
  18. func ETag(chunks []*filer_pb.FileChunk) (etag string) {
  19. if len(chunks) == 1 {
  20. return chunks[0].ETag
  21. }
  22. h := fnv.New32a()
  23. for _, c := range chunks {
  24. h.Write([]byte(c.ETag))
  25. }
  26. return fmt.Sprintf("%x", h.Sum32())
  27. }
  28. func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
  29. visibles := nonOverlappingVisibleIntervals(chunks)
  30. fileIds := make(map[string]bool)
  31. for _, interval := range visibles {
  32. fileIds[interval.fileId] = true
  33. }
  34. for _, chunk := range chunks {
  35. if found := fileIds[chunk.FileId]; found {
  36. compacted = append(compacted, chunk)
  37. } else {
  38. garbage = append(garbage, chunk)
  39. }
  40. }
  41. cleanupIntervals(visibles)
  42. return
  43. }
  44. func FindUnusedFileChunks(oldChunks, newChunks []*filer_pb.FileChunk) (unused []*filer_pb.FileChunk) {
  45. fileIds := make(map[string]bool)
  46. for _, interval := range newChunks {
  47. fileIds[interval.FileId] = true
  48. }
  49. for _, chunk := range oldChunks {
  50. if found := fileIds[chunk.FileId]; !found {
  51. unused = append(unused, chunk)
  52. }
  53. }
  54. return
  55. }
  56. type ChunkView struct {
  57. FileId string
  58. Offset int64
  59. Size uint64
  60. LogicOffset int64
  61. }
  62. func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) {
  63. visibles := nonOverlappingVisibleIntervals(chunks)
  64. stop := offset + int64(size)
  65. for _, chunk := range visibles {
  66. if chunk.start <= offset && offset < chunk.stop && offset < stop {
  67. views = append(views, &ChunkView{
  68. FileId: chunk.fileId,
  69. Offset: offset - chunk.start, // offset is the data starting location in this file id
  70. Size: uint64(min(chunk.stop, stop) - offset),
  71. LogicOffset: offset,
  72. })
  73. offset = min(chunk.stop, stop)
  74. }
  75. }
  76. cleanupIntervals(visibles)
  77. return views
  78. }
  79. func logPrintf(name string, visibles []*visibleInterval) {
  80. /*
  81. log.Printf("%s len %d", name, len(visibles))
  82. for _, v := range visibles {
  83. log.Printf("%s: => %+v", name, v)
  84. }
  85. */
  86. }
  87. var bufPool = sync.Pool{
  88. New: func() interface{} {
  89. return new(visibleInterval)
  90. },
  91. }
  92. func mergeIntoVisibles(visibles []*visibleInterval, chunk *filer_pb.FileChunk) (newVisibles []*visibleInterval) {
  93. for _, v := range visibles {
  94. if v.start < chunk.Offset && chunk.Offset < v.stop {
  95. newVisibles = append(newVisibles, newVisibleInterval(
  96. v.start,
  97. chunk.Offset,
  98. v.fileId,
  99. v.modifiedTime,
  100. ))
  101. }
  102. chunkStop := chunk.Offset + int64(chunk.Size)
  103. if v.start < chunkStop && chunkStop < v.stop {
  104. newVisibles = append(newVisibles, newVisibleInterval(
  105. chunkStop,
  106. v.stop,
  107. v.fileId,
  108. v.modifiedTime,
  109. ))
  110. }
  111. if chunkStop < v.start || v.stop <= chunk.Offset {
  112. newVisibles = append(newVisibles, v)
  113. }
  114. }
  115. newVisibles = append(newVisibles, newVisibleInterval(
  116. chunk.Offset,
  117. chunk.Offset+int64(chunk.Size),
  118. chunk.FileId,
  119. chunk.Mtime,
  120. ))
  121. return
  122. }
  123. func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) {
  124. sort.Slice(chunks, func(i, j int) bool {
  125. return chunks[i].Mtime < chunks[j].Mtime
  126. })
  127. for _, chunk := range chunks {
  128. visibles = mergeIntoVisibles(visibles, chunk)
  129. }
  130. sort.Slice(visibles, func(i, j int) bool {
  131. return visibles[i].start < visibles[j].start
  132. })
  133. logPrintf("visibles", visibles)
  134. return
  135. }
  136. func cleanupIntervals(visibles []*visibleInterval) {
  137. for _, v := range visibles {
  138. bufPool.Put(v)
  139. }
  140. }
  141. // find non-overlapping visible intervals
  142. // visible interval map to one file chunk
  143. type visibleInterval struct {
  144. start int64
  145. stop int64
  146. modifiedTime int64
  147. fileId string
  148. }
  149. func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64) *visibleInterval {
  150. b := bufPool.Get().(*visibleInterval)
  151. b.start = start
  152. b.stop = stop
  153. b.fileId = fileId
  154. b.modifiedTime = modifiedTime
  155. return b
  156. }
  157. func min(x, y int64) int64 {
  158. if x <= y {
  159. return x
  160. }
  161. return y
  162. }