You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

289 lines
8.5 KiB

4 years ago
4 years ago
  1. package filesys
  2. import (
  3. "io"
  4. "log"
  5. "os"
  6. )
  7. type WrittenIntervalNode struct {
  8. DataOffset int64
  9. TempOffset int64
  10. Size int64
  11. Next *WrittenIntervalNode
  12. }
  13. type WrittenIntervalLinkedList struct {
  14. tempFile *os.File
  15. Head *WrittenIntervalNode
  16. Tail *WrittenIntervalNode
  17. }
  18. type WrittenContinuousIntervals struct {
  19. tempFile *os.File
  20. lastOffset int64
  21. lists []*WrittenIntervalLinkedList
  22. }
  23. func (list *WrittenIntervalLinkedList) Offset() int64 {
  24. return list.Head.DataOffset
  25. }
  26. func (list *WrittenIntervalLinkedList) Size() int64 {
  27. return list.Tail.DataOffset + list.Tail.Size - list.Head.DataOffset
  28. }
  29. func (list *WrittenIntervalLinkedList) addNodeToTail(node *WrittenIntervalNode) {
  30. // glog.V(4).Infof("add to tail [%d,%d) + [%d,%d) => [%d,%d)", list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, node.Offset+node.Size, list.Head.Offset, node.Offset+node.Size)
  31. if list.Tail.TempOffset+list.Tail.Size == node.TempOffset {
  32. // already connected
  33. list.Tail.Size += node.Size
  34. } else {
  35. list.Tail.Next = node
  36. list.Tail = node
  37. }
  38. }
  39. func (list *WrittenIntervalLinkedList) addNodeToHead(node *WrittenIntervalNode) {
  40. // glog.V(4).Infof("add to head [%d,%d) + [%d,%d) => [%d,%d)", node.Offset, node.Offset+node.Size, list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, list.Tail.Offset+list.Tail.Size)
  41. node.Next = list.Head
  42. list.Head = node
  43. }
  44. func (list *WrittenIntervalLinkedList) ReadData(buf []byte, start, stop int64) {
  45. t := list.Head
  46. for {
  47. nodeStart, nodeStop := max(start, t.DataOffset), min(stop, t.DataOffset+t.Size)
  48. if nodeStart < nodeStop {
  49. // glog.V(4).Infof("copying start=%d stop=%d t=[%d,%d) => bufSize=%d nodeStart=%d, nodeStop=%d", start, stop, t.DataOffset, t.DataOffset+t.Size, len(buf), nodeStart, nodeStop)
  50. list.tempFile.ReadAt(buf[nodeStart-start:nodeStop-start], t.TempOffset+nodeStart-t.DataOffset)
  51. }
  52. if t.Next == nil {
  53. break
  54. }
  55. t = t.Next
  56. }
  57. }
  58. func (c *WrittenContinuousIntervals) TotalSize() (total int64) {
  59. for _, list := range c.lists {
  60. total += list.Size()
  61. }
  62. return
  63. }
  64. func (list *WrittenIntervalLinkedList) subList(start, stop int64) *WrittenIntervalLinkedList {
  65. var nodes []*WrittenIntervalNode
  66. for t := list.Head; t != nil; t = t.Next {
  67. nodeStart, nodeStop := max(start, t.DataOffset), min(stop, t.DataOffset+t.Size)
  68. if nodeStart >= nodeStop {
  69. // skip non overlapping WrittenIntervalNode
  70. continue
  71. }
  72. nodes = append(nodes, &WrittenIntervalNode{
  73. TempOffset: t.TempOffset + nodeStart - t.DataOffset,
  74. DataOffset: nodeStart,
  75. Size: nodeStop - nodeStart,
  76. Next: nil,
  77. })
  78. }
  79. for i := 1; i < len(nodes); i++ {
  80. nodes[i-1].Next = nodes[i]
  81. }
  82. return &WrittenIntervalLinkedList{
  83. tempFile: list.tempFile,
  84. Head: nodes[0],
  85. Tail: nodes[len(nodes)-1],
  86. }
  87. }
  88. func (c *WrittenContinuousIntervals) debug() {
  89. log.Printf("++")
  90. for _, l := range c.lists {
  91. log.Printf("++++")
  92. for t := l.Head; ; t = t.Next {
  93. log.Printf("[%d,%d) => [%d,%d) %d", t.DataOffset, t.DataOffset+t.Size, t.TempOffset, t.TempOffset+t.Size, t.Size)
  94. if t.Next == nil {
  95. break
  96. }
  97. }
  98. log.Printf("----")
  99. }
  100. log.Printf("--")
  101. }
  102. func (c *WrittenContinuousIntervals) AddInterval(tempOffset int64, dataSize int, dataOffset int64) {
  103. interval := &WrittenIntervalNode{DataOffset: dataOffset, TempOffset: tempOffset, Size: int64(dataSize)}
  104. // append to the tail and return
  105. if len(c.lists) == 1 {
  106. lastSpan := c.lists[0]
  107. if lastSpan.Tail.DataOffset+lastSpan.Tail.Size == dataOffset {
  108. lastSpan.addNodeToTail(interval)
  109. return
  110. }
  111. }
  112. var newLists []*WrittenIntervalLinkedList
  113. for _, list := range c.lists {
  114. // if list is to the left of new interval, add to the new list
  115. if list.Tail.DataOffset+list.Tail.Size <= interval.DataOffset {
  116. newLists = append(newLists, list)
  117. }
  118. // if list is to the right of new interval, add to the new list
  119. if interval.DataOffset+interval.Size <= list.Head.DataOffset {
  120. newLists = append(newLists, list)
  121. }
  122. // if new interval overwrite the right part of the list
  123. if list.Head.DataOffset < interval.DataOffset && interval.DataOffset < list.Tail.DataOffset+list.Tail.Size {
  124. // create a new list of the left part of existing list
  125. newLists = append(newLists, list.subList(list.Offset(), interval.DataOffset))
  126. }
  127. // if new interval overwrite the left part of the list
  128. if list.Head.DataOffset < interval.DataOffset+interval.Size && interval.DataOffset+interval.Size < list.Tail.DataOffset+list.Tail.Size {
  129. // create a new list of the right part of existing list
  130. newLists = append(newLists, list.subList(interval.DataOffset+interval.Size, list.Tail.DataOffset+list.Tail.Size))
  131. }
  132. // skip anything that is fully overwritten by the new interval
  133. }
  134. c.lists = newLists
  135. // add the new interval to the lists, connecting neighbor lists
  136. var prevList, nextList *WrittenIntervalLinkedList
  137. for _, list := range c.lists {
  138. if list.Head.DataOffset == interval.DataOffset+interval.Size {
  139. nextList = list
  140. break
  141. }
  142. }
  143. for _, list := range c.lists {
  144. if list.Head.DataOffset+list.Size() == dataOffset {
  145. list.addNodeToTail(interval)
  146. prevList = list
  147. break
  148. }
  149. }
  150. if prevList != nil && nextList != nil {
  151. // glog.V(4).Infof("connecting [%d,%d) + [%d,%d) => [%d,%d)", prevList.Head.Offset, prevList.Tail.Offset+prevList.Tail.Size, nextList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size, prevList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size)
  152. prevList.Tail.Next = nextList.Head
  153. prevList.Tail = nextList.Tail
  154. c.removeList(nextList)
  155. } else if nextList != nil {
  156. // add to head was not done when checking
  157. nextList.addNodeToHead(interval)
  158. }
  159. if prevList == nil && nextList == nil {
  160. c.lists = append(c.lists, &WrittenIntervalLinkedList{
  161. tempFile: c.tempFile,
  162. Head: interval,
  163. Tail: interval,
  164. })
  165. }
  166. return
  167. }
  168. func (c *WrittenContinuousIntervals) RemoveLargestIntervalLinkedList() *WrittenIntervalLinkedList {
  169. var maxSize int64
  170. maxIndex := -1
  171. for k, list := range c.lists {
  172. if maxSize <= list.Size() {
  173. maxSize = list.Size()
  174. maxIndex = k
  175. }
  176. }
  177. if maxSize <= 0 {
  178. return nil
  179. }
  180. t := c.lists[maxIndex]
  181. t.tempFile = c.tempFile
  182. c.lists = append(c.lists[0:maxIndex], c.lists[maxIndex+1:]...)
  183. return t
  184. }
  185. func (c *WrittenContinuousIntervals) removeList(target *WrittenIntervalLinkedList) {
  186. index := -1
  187. for k, list := range c.lists {
  188. if list.Offset() == target.Offset() {
  189. index = k
  190. }
  191. }
  192. if index < 0 {
  193. return
  194. }
  195. c.lists = append(c.lists[0:index], c.lists[index+1:]...)
  196. }
  197. func (c *WrittenContinuousIntervals) ReadDataAt(data []byte, startOffset int64) (maxStop int64) {
  198. for _, list := range c.lists {
  199. start := max(startOffset, list.Offset())
  200. stop := min(startOffset+int64(len(data)), list.Offset()+list.Size())
  201. if start < stop {
  202. list.ReadData(data[start-startOffset:], start, stop)
  203. maxStop = max(maxStop, stop)
  204. }
  205. }
  206. return
  207. }
  208. func (l *WrittenIntervalLinkedList) ToReader(start int64, stop int64) io.Reader {
  209. // TODO: optimize this to avoid another loop
  210. var readers []io.Reader
  211. for t := l.Head; ; t = t.Next {
  212. startOffset, stopOffset := max(t.DataOffset, start), min(t.DataOffset+t.Size, stop)
  213. if startOffset < stopOffset {
  214. // glog.V(4).Infof("ToReader read [%d,%d) from [%d,%d) %d", t.DataOffset, t.DataOffset+t.Size, t.TempOffset, t.TempOffset+t.Size, t.Size)
  215. readers = append(readers, newFileSectionReader(l.tempFile, startOffset-t.DataOffset+t.TempOffset, startOffset, stopOffset-startOffset))
  216. }
  217. if t.Next == nil {
  218. break
  219. }
  220. }
  221. if len(readers) == 1 {
  222. return readers[0]
  223. }
  224. return io.MultiReader(readers...)
  225. }
  226. type FileSectionReader struct {
  227. file *os.File
  228. tempStartOffset int64
  229. Offset int64
  230. dataStart int64
  231. dataStop int64
  232. }
  233. var _ = io.Reader(&FileSectionReader{})
  234. func newFileSectionReader(tempfile *os.File, offset int64, dataOffset int64, size int64) *FileSectionReader {
  235. return &FileSectionReader{
  236. file: tempfile,
  237. tempStartOffset: offset,
  238. Offset: offset,
  239. dataStart: dataOffset,
  240. dataStop: dataOffset + size,
  241. }
  242. }
  243. func (f *FileSectionReader) Read(p []byte) (n int, err error) {
  244. remaining := (f.dataStop - f.dataStart) - (f.Offset - f.tempStartOffset)
  245. if remaining <= 0 {
  246. return 0, io.EOF
  247. }
  248. dataLen := min(remaining, int64(len(p)))
  249. // glog.V(4).Infof("reading [%d,%d) from %v [%d,%d)/[%d,%d) %d", f.Offset-f.tempStartOffset+f.dataStart, f.Offset-f.tempStartOffset+f.dataStart+dataLen, f.file.Name(), f.Offset, f.Offset+dataLen, f.tempStartOffset, f.tempStartOffset+f.dataStop-f.dataStart, f.dataStop-f.dataStart)
  250. n, err = f.file.ReadAt(p[:dataLen], f.Offset)
  251. if n > 0 {
  252. f.Offset += int64(n)
  253. } else {
  254. err = io.EOF
  255. }
  256. return
  257. }