You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

254 lines
6.4 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. package filer2
  2. import (
  3. "sort"
  4. "log"
  5. "math"
  6. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  7. )
  8. func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) {
  9. for _, c := range chunks {
  10. t := uint64(c.Offset + int64(c.Size))
  11. if size < t {
  12. size = t
  13. }
  14. }
  15. return
  16. }
  17. func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
  18. visibles := nonOverlappingVisibleIntervals(chunks)
  19. fileIds := make(map[string]bool)
  20. for _, interval := range visibles {
  21. fileIds[interval.fileId] = true
  22. }
  23. for _, chunk := range chunks {
  24. if found := fileIds[chunk.FileId]; found {
  25. compacted = append(compacted, chunk)
  26. } else {
  27. garbage = append(garbage, chunk)
  28. }
  29. }
  30. return
  31. }
  32. func FindUnusedFileChunks(oldChunks, newChunks []*filer_pb.FileChunk) (unused []*filer_pb.FileChunk) {
  33. fileIds := make(map[string]bool)
  34. for _, interval := range newChunks {
  35. fileIds[interval.FileId] = true
  36. }
  37. for _, chunk := range oldChunks {
  38. if found := fileIds[chunk.FileId]; !found {
  39. unused = append(unused, chunk)
  40. }
  41. }
  42. return
  43. }
  44. type ChunkView struct {
  45. FileId string
  46. Offset int64
  47. Size uint64
  48. LogicOffset int64
  49. }
  50. func ReadFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) {
  51. visibles := nonOverlappingVisibleIntervals(chunks)
  52. stop := offset + int64(size)
  53. for _, chunk := range visibles {
  54. if chunk.start <= offset && offset < chunk.stop && offset < stop {
  55. views = append(views, &ChunkView{
  56. FileId: chunk.fileId,
  57. Offset: offset - chunk.start, // offset is the data starting location in this file id
  58. Size: uint64(min(chunk.stop, stop) - offset),
  59. LogicOffset: offset,
  60. })
  61. offset = min(chunk.stop, stop)
  62. }
  63. }
  64. return views
  65. }
  66. func logPrintf(name string, visibles []*visibleInterval) {
  67. return
  68. log.Printf("%s len %d", name, len(visibles))
  69. for _, v := range visibles {
  70. log.Printf("%s: => %+v", name, v)
  71. }
  72. }
  73. func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) {
  74. sort.Slice(chunks, func(i, j int) bool {
  75. if chunks[i].Offset < chunks[j].Offset {
  76. return true
  77. }
  78. if chunks[i].Offset == chunks[j].Offset {
  79. return chunks[i].Mtime < chunks[j].Mtime
  80. }
  81. return false
  82. })
  83. if len(chunks) == 0 {
  84. return
  85. }
  86. var parallelIntervals, intervals []*visibleInterval
  87. var minStopInterval, upToDateInterval *visibleInterval
  88. watermarkStart := chunks[0].Offset
  89. for _, chunk := range chunks {
  90. // log.Printf("checking chunk: [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size))
  91. logPrintf("parallelIntervals", parallelIntervals)
  92. for len(parallelIntervals) > 0 && watermarkStart < chunk.Offset {
  93. logPrintf("parallelIntervals loop 1", parallelIntervals)
  94. logPrintf("parallelIntervals loop 1 intervals", intervals)
  95. minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals)
  96. nextStop := min(minStopInterval.stop, chunk.Offset)
  97. intervals = append(intervals, newVisibleInterval(
  98. max(watermarkStart, minStopInterval.start),
  99. nextStop,
  100. upToDateInterval.fileId,
  101. upToDateInterval.modifiedTime,
  102. ))
  103. watermarkStart = nextStop
  104. logPrintf("parallelIntervals loop intervals =>", intervals)
  105. // remove processed intervals, possibly multiple
  106. var remaining []*visibleInterval
  107. for _, interval := range parallelIntervals {
  108. if interval.stop != watermarkStart {
  109. remaining = append(remaining, interval)
  110. }
  111. }
  112. parallelIntervals = remaining
  113. logPrintf("parallelIntervals loop 2", parallelIntervals)
  114. logPrintf("parallelIntervals loop 2 intervals", intervals)
  115. }
  116. parallelIntervals = append(parallelIntervals, newVisibleInterval(
  117. chunk.Offset,
  118. chunk.Offset+int64(chunk.Size),
  119. chunk.FileId,
  120. chunk.Mtime,
  121. ))
  122. }
  123. logPrintf("parallelIntervals loop 3", parallelIntervals)
  124. logPrintf("parallelIntervals loop 3 intervals", intervals)
  125. for len(parallelIntervals) > 0 {
  126. minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals)
  127. intervals = append(intervals, newVisibleInterval(
  128. max(watermarkStart, minStopInterval.start),
  129. minStopInterval.stop,
  130. upToDateInterval.fileId,
  131. upToDateInterval.modifiedTime,
  132. ))
  133. watermarkStart = minStopInterval.stop
  134. // remove processed intervals, possibly multiple
  135. var remaining []*visibleInterval
  136. for _, interval := range parallelIntervals {
  137. if interval.stop != watermarkStart {
  138. remaining = append(remaining, interval)
  139. }
  140. }
  141. parallelIntervals = remaining
  142. }
  143. logPrintf("parallelIntervals loop 4", parallelIntervals)
  144. logPrintf("intervals", intervals)
  145. // merge connected intervals, now the intervals are non-intersecting
  146. var lastIntervalIndex int
  147. var prevIntervalIndex int
  148. for i, interval := range intervals {
  149. if i == 0 {
  150. prevIntervalIndex = i
  151. lastIntervalIndex = i
  152. continue
  153. }
  154. if intervals[i-1].fileId != interval.fileId ||
  155. intervals[i-1].stop < intervals[i].start {
  156. visibles = append(visibles, newVisibleInterval(
  157. intervals[prevIntervalIndex].start,
  158. intervals[i-1].stop,
  159. intervals[prevIntervalIndex].fileId,
  160. intervals[prevIntervalIndex].modifiedTime,
  161. ))
  162. prevIntervalIndex = i
  163. }
  164. lastIntervalIndex = i
  165. logPrintf("intervals loop 1 visibles", visibles)
  166. }
  167. visibles = append(visibles, newVisibleInterval(
  168. intervals[prevIntervalIndex].start,
  169. intervals[lastIntervalIndex].stop,
  170. intervals[prevIntervalIndex].fileId,
  171. intervals[prevIntervalIndex].modifiedTime,
  172. ))
  173. logPrintf("visibles", visibles)
  174. return
  175. }
  176. func findMinStopInterval(intervals []*visibleInterval) (minStopInterval, upToDateInterval *visibleInterval) {
  177. var latestMtime int64
  178. latestIntervalIndex := 0
  179. minStop := int64(math.MaxInt64)
  180. minIntervalIndex := 0
  181. for i, interval := range intervals {
  182. if minStop > interval.stop {
  183. minIntervalIndex = i
  184. minStop = interval.stop
  185. }
  186. if latestMtime < interval.modifiedTime {
  187. latestMtime = interval.modifiedTime
  188. latestIntervalIndex = i
  189. }
  190. }
  191. minStopInterval = intervals[minIntervalIndex]
  192. upToDateInterval = intervals[latestIntervalIndex]
  193. return
  194. }
  195. // find non-overlapping visible intervals
  196. // visible interval map to one file chunk
  197. type visibleInterval struct {
  198. start int64
  199. stop int64
  200. modifiedTime int64
  201. fileId string
  202. }
  203. func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64) *visibleInterval {
  204. return &visibleInterval{start: start, stop: stop, fileId: fileId, modifiedTime: modifiedTime}
  205. }
  206. func min(x, y int64) int64 {
  207. if x <= y {
  208. return x
  209. }
  210. return y
  211. }
  212. func max(x, y int64) int64 {
  213. if x > y {
  214. return x
  215. }
  216. return y
  217. }