You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

361 lines
9.4 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. package filer2
  2. import (
  3. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  4. "sort"
  5. "log"
  6. "math"
  7. )
  8. func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) {
  9. for _, c := range chunks {
  10. t := uint64(c.Offset + int64(c.Size))
  11. if size < t {
  12. size = t
  13. }
  14. }
  15. return
  16. }
  17. func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
  18. return
  19. }
  20. func mergeToVisibleIntervals(visibles []*visibleInterval, chunk *filer_pb.FileChunk) (merged []*visibleInterval) {
  21. if len(visibles) == 0 {
  22. return []*visibleInterval{newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId, chunk.Mtime)}
  23. }
  24. log.Printf("merge chunk %+v => %d", chunk, len(visibles))
  25. for _, v := range visibles {
  26. log.Printf("=> %+v", v)
  27. }
  28. var nonOverlappingStop int
  29. // find merge candidates
  30. var mergeCandidates []int
  31. for t := len(visibles) - 1; t >= 0; t-- {
  32. if visibles[t].stop > chunk.Offset {
  33. mergeCandidates = append(mergeCandidates, t)
  34. } else {
  35. nonOverlappingStop = t
  36. break
  37. }
  38. }
  39. log.Printf("merged candidates: %+v, starting from %d", mergeCandidates, nonOverlappingStop)
  40. if len(mergeCandidates) == 0 {
  41. merged = append(visibles, newVisibleInterval(
  42. chunk.Offset,
  43. chunk.Offset+int64(chunk.Size),
  44. chunk.FileId,
  45. chunk.Mtime,
  46. ))
  47. return
  48. }
  49. // reverse merge candidates
  50. i, j := 0, len(mergeCandidates)-1
  51. for i < j {
  52. mergeCandidates[i], mergeCandidates[j] = mergeCandidates[j], mergeCandidates[i]
  53. i++
  54. j--
  55. }
  56. log.Printf("reversed merged candidates: %+v", mergeCandidates)
  57. // add chunk into a possibly connected intervals
  58. var overlappingIntevals []*visibleInterval
  59. for i = 0; i < len(mergeCandidates); i++ {
  60. interval := visibles[mergeCandidates[i]]
  61. if interval.modifiedTime >= chunk.Mtime {
  62. log.Printf("overlappingIntevals add existing interval: [%d,%d)", interval.start, interval.stop)
  63. overlappingIntevals = append(overlappingIntevals, interval)
  64. } else {
  65. start := max(interval.start, chunk.Offset)
  66. stop := min(interval.stop, chunk.Offset+int64(chunk.Size))
  67. if interval.start <= chunk.Offset {
  68. if interval.start < start {
  69. log.Printf("overlappingIntevals add 1: [%d,%d)", interval.start, start)
  70. overlappingIntevals = append(overlappingIntevals, newVisibleInterval(
  71. interval.start,
  72. start,
  73. interval.fileId,
  74. interval.modifiedTime,
  75. ))
  76. }
  77. log.Printf("overlappingIntevals add 2: [%d,%d)", start, stop)
  78. overlappingIntevals = append(overlappingIntevals, newVisibleInterval(
  79. start,
  80. stop,
  81. chunk.FileId,
  82. chunk.Mtime,
  83. ))
  84. if interval.stop < stop {
  85. log.Printf("overlappingIntevals add 3: [%d,%d)", interval.stop, stop)
  86. overlappingIntevals = append(overlappingIntevals, newVisibleInterval(
  87. interval.stop,
  88. stop,
  89. interval.fileId,
  90. interval.modifiedTime,
  91. ))
  92. }
  93. }
  94. }
  95. }
  96. logPrintf("overlappingIntevals", overlappingIntevals)
  97. // merge connected intervals
  98. merged = visibles[:nonOverlappingStop]
  99. var lastInterval *visibleInterval
  100. var prevIntervalIndex int
  101. for i, interval := range overlappingIntevals {
  102. if i == 0 {
  103. prevIntervalIndex = i
  104. continue
  105. }
  106. if overlappingIntevals[prevIntervalIndex].fileId != interval.fileId {
  107. merged = append(merged, newVisibleInterval(
  108. overlappingIntevals[prevIntervalIndex].start,
  109. interval.start,
  110. overlappingIntevals[prevIntervalIndex].fileId,
  111. overlappingIntevals[prevIntervalIndex].modifiedTime,
  112. ))
  113. prevIntervalIndex = i
  114. }
  115. }
  116. if lastInterval != nil {
  117. merged = append(merged, newVisibleInterval(
  118. overlappingIntevals[prevIntervalIndex].start,
  119. lastInterval.start,
  120. overlappingIntevals[prevIntervalIndex].fileId,
  121. overlappingIntevals[prevIntervalIndex].modifiedTime,
  122. ))
  123. }
  124. logPrintf("merged", merged)
  125. return
  126. }
  127. func logPrintf(name string, visibles []*visibleInterval) {
  128. log.Printf("%s len %d", name, len(visibles))
  129. for _, v := range visibles {
  130. log.Printf("%s: => %+v", name, v)
  131. }
  132. }
  133. func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) {
  134. sort.Slice(chunks, func(i, j int) bool {
  135. if chunks[i].Offset < chunks[j].Offset {
  136. return true
  137. }
  138. if chunks[i].Offset == chunks[j].Offset {
  139. return chunks[i].Mtime < chunks[j].Mtime
  140. }
  141. return false
  142. })
  143. if len(chunks) == 0 {
  144. return
  145. }
  146. var parallelIntervals, intervals []*visibleInterval
  147. var minStopInterval, upToDateInterval *visibleInterval
  148. watermarkStart := chunks[0].Offset
  149. for _, chunk := range chunks {
  150. log.Printf("checking chunk: [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size))
  151. logPrintf("parallelIntervals", parallelIntervals)
  152. for len(parallelIntervals) > 0 && watermarkStart < chunk.Offset {
  153. logPrintf("parallelIntervals loop 1", parallelIntervals)
  154. logPrintf("parallelIntervals loop 1 intervals", intervals)
  155. minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals)
  156. nextStop := min(minStopInterval.stop, chunk.Offset)
  157. intervals = append(intervals, newVisibleInterval(
  158. max(watermarkStart, minStopInterval.start),
  159. nextStop,
  160. upToDateInterval.fileId,
  161. upToDateInterval.modifiedTime,
  162. ))
  163. watermarkStart = nextStop
  164. logPrintf("parallelIntervals loop intervals =>", intervals)
  165. // remove processed intervals, possibly multiple
  166. var remaining []*visibleInterval
  167. for _, interval := range parallelIntervals {
  168. if interval.stop != watermarkStart {
  169. remaining = append(remaining, newVisibleInterval(
  170. interval.start,
  171. interval.stop,
  172. interval.fileId,
  173. interval.modifiedTime,
  174. ))
  175. }
  176. }
  177. parallelIntervals = remaining
  178. logPrintf("parallelIntervals loop 2", parallelIntervals)
  179. logPrintf("parallelIntervals loop 2 intervals", intervals)
  180. }
  181. parallelIntervals = append(parallelIntervals, newVisibleInterval(
  182. chunk.Offset,
  183. chunk.Offset+int64(chunk.Size),
  184. chunk.FileId,
  185. chunk.Mtime,
  186. ))
  187. }
  188. logPrintf("parallelIntervals loop 3", parallelIntervals)
  189. logPrintf("parallelIntervals loop 3 intervals", intervals)
  190. for len(parallelIntervals) > 0 {
  191. minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals)
  192. intervals = append(intervals, newVisibleInterval(
  193. max(watermarkStart, minStopInterval.start),
  194. minStopInterval.stop,
  195. upToDateInterval.fileId,
  196. upToDateInterval.modifiedTime,
  197. ))
  198. watermarkStart = minStopInterval.stop
  199. // remove processed intervals, possibly multiple
  200. var remaining []*visibleInterval
  201. for _, interval := range parallelIntervals {
  202. if interval.stop != watermarkStart {
  203. remaining = append(remaining, newVisibleInterval(
  204. interval.start,
  205. interval.stop,
  206. interval.fileId,
  207. interval.modifiedTime,
  208. ))
  209. }
  210. }
  211. parallelIntervals = remaining
  212. }
  213. logPrintf("parallelIntervals loop 4", parallelIntervals)
  214. logPrintf("intervals", intervals)
  215. // merge connected intervals, now the intervals are non-intersecting
  216. var lastInterval *visibleInterval
  217. var prevIntervalIndex int
  218. for i, interval := range intervals {
  219. if i == 0 {
  220. prevIntervalIndex = i
  221. continue
  222. }
  223. if intervals[i-1].fileId != interval.fileId ||
  224. intervals[i-1].stop < intervals[i].start {
  225. visibles = append(visibles, newVisibleInterval(
  226. intervals[prevIntervalIndex].start,
  227. intervals[i-1].stop,
  228. intervals[prevIntervalIndex].fileId,
  229. intervals[prevIntervalIndex].modifiedTime,
  230. ))
  231. prevIntervalIndex = i
  232. }
  233. lastInterval = intervals[i]
  234. logPrintf("intervals loop 1 visibles", visibles)
  235. }
  236. if lastInterval != nil {
  237. visibles = append(visibles, newVisibleInterval(
  238. intervals[prevIntervalIndex].start,
  239. lastInterval.stop,
  240. intervals[prevIntervalIndex].fileId,
  241. intervals[prevIntervalIndex].modifiedTime,
  242. ))
  243. }
  244. logPrintf("visibles", visibles)
  245. return
  246. }
  247. func findMinStopInterval(intervals []*visibleInterval) (minStopInterval, upToDateInterval *visibleInterval) {
  248. var latestMtime int64
  249. latestIntervalIndex := 0
  250. minStop := int64(math.MaxInt64)
  251. minIntervalIndex := 0
  252. for i, interval := range intervals {
  253. if minStop > interval.stop {
  254. minIntervalIndex = i
  255. minStop = interval.stop
  256. }
  257. if latestMtime < interval.modifiedTime {
  258. latestMtime = interval.modifiedTime
  259. latestIntervalIndex = i
  260. }
  261. }
  262. minStopInterval = intervals[minIntervalIndex]
  263. upToDateInterval = intervals[latestIntervalIndex]
  264. return
  265. }
  266. func nonOverlappingVisibleIntervals0(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) {
  267. sort.Slice(chunks, func(i, j int) bool {
  268. if chunks[i].Offset < chunks[j].Offset {
  269. return true
  270. }
  271. if chunks[i].Offset == chunks[j].Offset {
  272. return chunks[i].Mtime < chunks[j].Mtime
  273. }
  274. return false
  275. })
  276. for _, c := range chunks {
  277. visibles = mergeToVisibleIntervals(visibles, c)
  278. }
  279. return
  280. }
  281. // find non-overlapping visible intervals
  282. // visible interval map to one file chunk
  283. type visibleInterval struct {
  284. start int64
  285. stop int64
  286. modifiedTime int64
  287. fileId string
  288. }
  289. func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64) *visibleInterval {
  290. return &visibleInterval{start: start, stop: stop, fileId: fileId, modifiedTime: modifiedTime}
  291. }
  292. type stackOfChunkIds struct {
  293. ids []int
  294. }
  295. func (s *stackOfChunkIds) isEmpty() bool {
  296. return len(s.ids) == 0
  297. }
  298. func (s *stackOfChunkIds) pop() int {
  299. t := s.ids[len(s.ids)-1]
  300. s.ids = s.ids[:len(s.ids)-1]
  301. return t
  302. }
  303. func (s *stackOfChunkIds) push(x int) {
  304. s.ids = append(s.ids, x)
  305. }
  306. func (s *stackOfChunkIds) peek() int {
  307. return s.ids[len(s.ids)-1]
  308. }
  309. func min(x, y int64) int64 {
  310. if x <= y {
  311. return x
  312. }
  313. return y
  314. }
  315. func max(x, y int64) int64 {
  316. if x > y {
  317. return x
  318. }
  319. return y
  320. }