You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

266 lines
6.7 KiB

7 years ago
6 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. package filer2
  2. import (
  3. "fmt"
  4. "hash/fnv"
  5. "math"
  6. "sort"
  7. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  8. )
  9. func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) {
  10. for _, c := range chunks {
  11. t := uint64(c.Offset + int64(c.Size))
  12. if size < t {
  13. size = t
  14. }
  15. }
  16. return
  17. }
  18. func ETag(chunks []*filer_pb.FileChunk) (etag string) {
  19. if len(chunks) == 1 {
  20. return chunks[0].ETag
  21. }
  22. h := fnv.New32a()
  23. for _, c := range chunks {
  24. h.Write([]byte(c.ETag))
  25. }
  26. return fmt.Sprintf("%x", h.Sum32())
  27. }
  28. func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) {
  29. visibles := nonOverlappingVisibleIntervals(chunks)
  30. fileIds := make(map[string]bool)
  31. for _, interval := range visibles {
  32. fileIds[interval.fileId] = true
  33. }
  34. for _, chunk := range chunks {
  35. if found := fileIds[chunk.FileId]; found {
  36. compacted = append(compacted, chunk)
  37. } else {
  38. garbage = append(garbage, chunk)
  39. }
  40. }
  41. return
  42. }
  43. func FindUnusedFileChunks(oldChunks, newChunks []*filer_pb.FileChunk) (unused []*filer_pb.FileChunk) {
  44. fileIds := make(map[string]bool)
  45. for _, interval := range newChunks {
  46. fileIds[interval.FileId] = true
  47. }
  48. for _, chunk := range oldChunks {
  49. if found := fileIds[chunk.FileId]; !found {
  50. unused = append(unused, chunk)
  51. }
  52. }
  53. return
  54. }
  55. type ChunkView struct {
  56. FileId string
  57. Offset int64
  58. Size uint64
  59. LogicOffset int64
  60. }
  61. func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) {
  62. visibles := nonOverlappingVisibleIntervals(chunks)
  63. stop := offset + int64(size)
  64. for _, chunk := range visibles {
  65. if chunk.start <= offset && offset < chunk.stop && offset < stop {
  66. views = append(views, &ChunkView{
  67. FileId: chunk.fileId,
  68. Offset: offset - chunk.start, // offset is the data starting location in this file id
  69. Size: uint64(min(chunk.stop, stop) - offset),
  70. LogicOffset: offset,
  71. })
  72. offset = min(chunk.stop, stop)
  73. }
  74. }
  75. return views
  76. }
  77. func logPrintf(name string, visibles []*visibleInterval) {
  78. /*
  79. log.Printf("%s len %d", name, len(visibles))
  80. for _, v := range visibles {
  81. log.Printf("%s: => %+v", name, v)
  82. }
  83. */
  84. }
  85. func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) {
  86. sort.Slice(chunks, func(i, j int) bool {
  87. if chunks[i].Offset < chunks[j].Offset {
  88. return true
  89. }
  90. if chunks[i].Offset == chunks[j].Offset {
  91. return chunks[i].Mtime < chunks[j].Mtime
  92. }
  93. return false
  94. })
  95. if len(chunks) == 0 {
  96. return
  97. }
  98. var parallelIntervals, intervals []*visibleInterval
  99. var minStopInterval, upToDateInterval *visibleInterval
  100. watermarkStart := chunks[0].Offset
  101. for _, chunk := range chunks {
  102. // log.Printf("checking chunk: [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size))
  103. logPrintf("parallelIntervals", parallelIntervals)
  104. for len(parallelIntervals) > 0 && watermarkStart < chunk.Offset {
  105. logPrintf("parallelIntervals loop 1", parallelIntervals)
  106. logPrintf("parallelIntervals loop 1 intervals", intervals)
  107. minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals)
  108. nextStop := min(minStopInterval.stop, chunk.Offset)
  109. intervals = append(intervals, newVisibleInterval(
  110. max(watermarkStart, minStopInterval.start),
  111. nextStop,
  112. upToDateInterval.fileId,
  113. upToDateInterval.modifiedTime,
  114. ))
  115. watermarkStart = nextStop
  116. logPrintf("parallelIntervals loop intervals =>", intervals)
  117. // remove processed intervals, possibly multiple
  118. var remaining []*visibleInterval
  119. for _, interval := range parallelIntervals {
  120. if interval.stop != watermarkStart {
  121. remaining = append(remaining, interval)
  122. }
  123. }
  124. parallelIntervals = remaining
  125. logPrintf("parallelIntervals loop 2", parallelIntervals)
  126. logPrintf("parallelIntervals loop 2 intervals", intervals)
  127. }
  128. parallelIntervals = append(parallelIntervals, newVisibleInterval(
  129. chunk.Offset,
  130. chunk.Offset+int64(chunk.Size),
  131. chunk.FileId,
  132. chunk.Mtime,
  133. ))
  134. }
  135. logPrintf("parallelIntervals loop 3", parallelIntervals)
  136. logPrintf("parallelIntervals loop 3 intervals", intervals)
  137. for len(parallelIntervals) > 0 {
  138. minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals)
  139. intervals = append(intervals, newVisibleInterval(
  140. max(watermarkStart, minStopInterval.start),
  141. minStopInterval.stop,
  142. upToDateInterval.fileId,
  143. upToDateInterval.modifiedTime,
  144. ))
  145. watermarkStart = minStopInterval.stop
  146. // remove processed intervals, possibly multiple
  147. var remaining []*visibleInterval
  148. for _, interval := range parallelIntervals {
  149. if interval.stop != watermarkStart {
  150. remaining = append(remaining, interval)
  151. }
  152. }
  153. parallelIntervals = remaining
  154. }
  155. logPrintf("parallelIntervals loop 4", parallelIntervals)
  156. logPrintf("intervals", intervals)
  157. // merge connected intervals, now the intervals are non-intersecting
  158. var lastIntervalIndex int
  159. var prevIntervalIndex int
  160. for i, interval := range intervals {
  161. if i == 0 {
  162. prevIntervalIndex = i
  163. lastIntervalIndex = i
  164. continue
  165. }
  166. if intervals[i-1].fileId != interval.fileId ||
  167. intervals[i-1].stop < intervals[i].start {
  168. visibles = append(visibles, newVisibleInterval(
  169. intervals[prevIntervalIndex].start,
  170. intervals[i-1].stop,
  171. intervals[prevIntervalIndex].fileId,
  172. intervals[prevIntervalIndex].modifiedTime,
  173. ))
  174. prevIntervalIndex = i
  175. }
  176. lastIntervalIndex = i
  177. logPrintf("intervals loop 1 visibles", visibles)
  178. }
  179. visibles = append(visibles, newVisibleInterval(
  180. intervals[prevIntervalIndex].start,
  181. intervals[lastIntervalIndex].stop,
  182. intervals[prevIntervalIndex].fileId,
  183. intervals[prevIntervalIndex].modifiedTime,
  184. ))
  185. logPrintf("visibles", visibles)
  186. return
  187. }
  188. func findMinStopInterval(intervals []*visibleInterval) (minStopInterval, upToDateInterval *visibleInterval) {
  189. var latestMtime int64
  190. latestIntervalIndex := 0
  191. minStop := int64(math.MaxInt64)
  192. minIntervalIndex := 0
  193. for i, interval := range intervals {
  194. if minStop > interval.stop {
  195. minIntervalIndex = i
  196. minStop = interval.stop
  197. }
  198. if latestMtime < interval.modifiedTime {
  199. latestMtime = interval.modifiedTime
  200. latestIntervalIndex = i
  201. }
  202. }
  203. minStopInterval = intervals[minIntervalIndex]
  204. upToDateInterval = intervals[latestIntervalIndex]
  205. return
  206. }
  207. // find non-overlapping visible intervals
  208. // visible interval map to one file chunk
  209. type visibleInterval struct {
  210. start int64
  211. stop int64
  212. modifiedTime int64
  213. fileId string
  214. }
  215. func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64) *visibleInterval {
  216. return &visibleInterval{start: start, stop: stop, fileId: fileId, modifiedTime: modifiedTime}
  217. }
  218. func min(x, y int64) int64 {
  219. if x <= y {
  220. return x
  221. }
  222. return y
  223. }
  224. func max(x, y int64) int64 {
  225. if x > y {
  226. return x
  227. }
  228. return y
  229. }