You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

438 lines
14 KiB

6 years ago
4 years ago
6 years ago
5 years ago
4 years ago
4 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "os"
  8. "time"
  9. "github.com/chrislusf/seaweedfs/weed/glog"
  10. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  11. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  12. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  13. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  14. )
  15. var ErrorNotFound = errors.New("not found")
  16. var ErrorDeleted = errors.New("already deleted")
  17. var ErrorSizeMismatch = errors.New("size mismatch")
  18. // isFileUnchanged checks whether this needle to write is same as last one.
  19. // It requires serialized access in the same volume.
  20. func (v *Volume) isFileUnchanged(n *needle.Needle) bool {
  21. if v.Ttl.String() != "" {
  22. return false
  23. }
  24. nv, ok := v.nm.Get(n.Id)
  25. if ok && !nv.Offset.IsZero() && nv.Size.IsValid() {
  26. oldNeedle := new(needle.Needle)
  27. err := oldNeedle.ReadData(v.DataBackend, nv.Offset.ToAcutalOffset(), nv.Size, v.Version())
  28. if err != nil {
  29. glog.V(0).Infof("Failed to check updated file at offset %d size %d: %v", nv.Offset.ToAcutalOffset(), nv.Size, err)
  30. return false
  31. }
  32. if oldNeedle.Cookie == n.Cookie && oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) {
  33. n.DataSize = oldNeedle.DataSize
  34. return true
  35. }
  36. }
  37. return false
  38. }
  39. // Destroy removes everything related to this volume
  40. func (v *Volume) Destroy() (err error) {
  41. if v.isCompacting {
  42. err = fmt.Errorf("volume %d is compacting", v.Id)
  43. return
  44. }
  45. close(v.asyncRequestsChan)
  46. storageName, storageKey := v.RemoteStorageNameKey()
  47. if v.HasRemoteFile() && storageName != "" && storageKey != "" {
  48. if backendStorage, found := backend.BackendStorages[storageName]; found {
  49. backendStorage.DeleteFile(storageKey)
  50. }
  51. }
  52. v.Close()
  53. os.Remove(v.FileName() + ".dat")
  54. os.Remove(v.FileName() + ".idx")
  55. os.Remove(v.FileName() + ".vif")
  56. os.Remove(v.FileName() + ".sdx")
  57. os.Remove(v.FileName() + ".cpd")
  58. os.Remove(v.FileName() + ".cpx")
  59. os.RemoveAll(v.FileName() + ".ldb")
  60. return
  61. }
  62. func (v *Volume) asyncRequestAppend(request *needle.AsyncRequest) {
  63. v.asyncRequestsChan <- request
  64. }
  65. func (v *Volume) syncWrite(n *needle.Needle) (offset uint64, size Size, isUnchanged bool, err error) {
  66. // glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  67. actualSize := needle.GetActualSize(Size(len(n.Data)), v.Version())
  68. v.dataFileAccessLock.Lock()
  69. defer v.dataFileAccessLock.Unlock()
  70. if MaxPossibleVolumeSize < v.nm.ContentSize()+uint64(actualSize) {
  71. err = fmt.Errorf("volume size limit %d exceeded! current size is %d", MaxPossibleVolumeSize, v.nm.ContentSize())
  72. return
  73. }
  74. if v.isFileUnchanged(n) {
  75. size = Size(n.DataSize)
  76. isUnchanged = true
  77. return
  78. }
  79. // check whether existing needle cookie matches
  80. nv, ok := v.nm.Get(n.Id)
  81. if ok {
  82. existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToAcutalOffset())
  83. if existingNeedleReadErr != nil {
  84. err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr)
  85. return
  86. }
  87. if existingNeedle.Cookie != n.Cookie {
  88. glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie)
  89. err = fmt.Errorf("mismatching cookie %x", n.Cookie)
  90. return
  91. }
  92. }
  93. // append to dat file
  94. n.AppendAtNs = uint64(time.Now().UnixNano())
  95. if offset, size, _, err = n.Append(v.DataBackend, v.Version()); err != nil {
  96. return
  97. }
  98. v.lastAppendAtNs = n.AppendAtNs
  99. // add to needle map
  100. if !ok || uint64(nv.Offset.ToAcutalOffset()) < offset {
  101. if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil {
  102. glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
  103. }
  104. }
  105. if v.lastModifiedTsSeconds < n.LastModified {
  106. v.lastModifiedTsSeconds = n.LastModified
  107. }
  108. return
  109. }
  110. func (v *Volume) writeNeedle2(n *needle.Needle, fsync bool) (offset uint64, size Size, isUnchanged bool, err error) {
  111. // glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  112. if n.Ttl == needle.EMPTY_TTL && v.Ttl != needle.EMPTY_TTL {
  113. n.SetHasTtl()
  114. n.Ttl = v.Ttl
  115. }
  116. if !fsync {
  117. return v.syncWrite(n)
  118. } else {
  119. asyncRequest := needle.NewAsyncRequest(n, true)
  120. // using len(n.Data) here instead of n.Size before n.Size is populated in n.Append()
  121. asyncRequest.ActualSize = needle.GetActualSize(Size(len(n.Data)), v.Version())
  122. v.asyncRequestAppend(asyncRequest)
  123. offset, _, isUnchanged, err = asyncRequest.WaitComplete()
  124. return
  125. }
  126. }
  127. func (v *Volume) doWriteRequest(n *needle.Needle) (offset uint64, size Size, isUnchanged bool, err error) {
  128. // glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  129. if v.isFileUnchanged(n) {
  130. size = Size(n.DataSize)
  131. isUnchanged = true
  132. return
  133. }
  134. // check whether existing needle cookie matches
  135. nv, ok := v.nm.Get(n.Id)
  136. if ok {
  137. existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToAcutalOffset())
  138. if existingNeedleReadErr != nil {
  139. err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr)
  140. return
  141. }
  142. if existingNeedle.Cookie != n.Cookie {
  143. glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie)
  144. err = fmt.Errorf("mismatching cookie %x", n.Cookie)
  145. return
  146. }
  147. }
  148. // append to dat file
  149. n.AppendAtNs = uint64(time.Now().UnixNano())
  150. if offset, size, _, err = n.Append(v.DataBackend, v.Version()); err != nil {
  151. return
  152. }
  153. v.lastAppendAtNs = n.AppendAtNs
  154. // add to needle map
  155. if !ok || uint64(nv.Offset.ToAcutalOffset()) < offset {
  156. if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil {
  157. glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
  158. }
  159. }
  160. if v.lastModifiedTsSeconds < n.LastModified {
  161. v.lastModifiedTsSeconds = n.LastModified
  162. }
  163. return
  164. }
  165. func (v *Volume) syncDelete(n *needle.Needle) (Size, error) {
  166. // glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  167. actualSize := needle.GetActualSize(0, v.Version())
  168. v.dataFileAccessLock.Lock()
  169. defer v.dataFileAccessLock.Unlock()
  170. if MaxPossibleVolumeSize < v.nm.ContentSize()+uint64(actualSize) {
  171. err := fmt.Errorf("volume size limit %d exceeded! current size is %d", MaxPossibleVolumeSize, v.nm.ContentSize())
  172. return 0, err
  173. }
  174. nv, ok := v.nm.Get(n.Id)
  175. // fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
  176. if ok && nv.Size.IsValid() {
  177. size := nv.Size
  178. n.Data = nil
  179. n.AppendAtNs = uint64(time.Now().UnixNano())
  180. offset, _, _, err := n.Append(v.DataBackend, v.Version())
  181. if err != nil {
  182. return size, err
  183. }
  184. v.lastAppendAtNs = n.AppendAtNs
  185. if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil {
  186. return size, err
  187. }
  188. return size, err
  189. }
  190. return 0, nil
  191. }
  192. func (v *Volume) deleteNeedle2(n *needle.Needle) (Size, error) {
  193. // todo: delete info is always appended no fsync, it may need fsync in future
  194. fsync := false
  195. if !fsync {
  196. return v.syncDelete(n)
  197. } else {
  198. asyncRequest := needle.NewAsyncRequest(n, false)
  199. asyncRequest.ActualSize = needle.GetActualSize(0, v.Version())
  200. v.asyncRequestAppend(asyncRequest)
  201. _, size, _, err := asyncRequest.WaitComplete()
  202. return Size(size), err
  203. }
  204. }
  205. func (v *Volume) doDeleteRequest(n *needle.Needle) (Size, error) {
  206. glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  207. nv, ok := v.nm.Get(n.Id)
  208. // fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
  209. if ok && nv.Size.IsValid() {
  210. size := nv.Size
  211. n.Data = nil
  212. n.AppendAtNs = uint64(time.Now().UnixNano())
  213. offset, _, _, err := n.Append(v.DataBackend, v.Version())
  214. if err != nil {
  215. return size, err
  216. }
  217. v.lastAppendAtNs = n.AppendAtNs
  218. if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil {
  219. return size, err
  220. }
  221. return size, err
  222. }
  223. return 0, nil
  224. }
  225. // read fills in Needle content by looking up n.Id from NeedleMapper
  226. func (v *Volume) readNeedle(n *needle.Needle, readOption *ReadOption) (int, error) {
  227. v.dataFileAccessLock.RLock()
  228. defer v.dataFileAccessLock.RUnlock()
  229. nv, ok := v.nm.Get(n.Id)
  230. if !ok || nv.Offset.IsZero() {
  231. return -1, ErrorNotFound
  232. }
  233. readSize := nv.Size
  234. if readSize.IsDeleted() {
  235. if readOption != nil && readOption.ReadDeleted && readSize != TombstoneFileSize {
  236. glog.V(3).Infof("reading deleted %s", n.String())
  237. readSize = -readSize
  238. } else {
  239. return -1, ErrorDeleted
  240. }
  241. }
  242. if readSize == 0 {
  243. return 0, nil
  244. }
  245. err := n.ReadData(v.DataBackend, nv.Offset.ToAcutalOffset(), readSize, v.Version())
  246. if err == needle.ErrorSizeMismatch && OffsetSize == 4 {
  247. err = n.ReadData(v.DataBackend, nv.Offset.ToAcutalOffset()+int64(MaxPossibleVolumeSize), readSize, v.Version())
  248. }
  249. if err != nil {
  250. return 0, err
  251. }
  252. bytesRead := len(n.Data)
  253. if !n.HasTtl() {
  254. return bytesRead, nil
  255. }
  256. ttlMinutes := n.Ttl.Minutes()
  257. if ttlMinutes == 0 {
  258. return bytesRead, nil
  259. }
  260. if !n.HasLastModifiedDate() {
  261. return bytesRead, nil
  262. }
  263. if uint64(time.Now().Unix()) < n.LastModified+uint64(ttlMinutes*60) {
  264. return bytesRead, nil
  265. }
  266. return -1, ErrorNotFound
  267. }
  268. func (v *Volume) startWorker() {
  269. go func() {
  270. chanClosed := false
  271. for {
  272. // chan closed. go thread will exit
  273. if chanClosed {
  274. break
  275. }
  276. currentRequests := make([]*needle.AsyncRequest, 0, 128)
  277. currentBytesToWrite := int64(0)
  278. for {
  279. request, ok := <-v.asyncRequestsChan
  280. // volume may be closed
  281. if !ok {
  282. chanClosed = true
  283. break
  284. }
  285. if MaxPossibleVolumeSize < v.ContentSize()+uint64(currentBytesToWrite+request.ActualSize) {
  286. request.Complete(0, 0, false,
  287. fmt.Errorf("volume size limit %d exceeded! current size is %d", MaxPossibleVolumeSize, v.ContentSize()))
  288. break
  289. }
  290. currentRequests = append(currentRequests, request)
  291. currentBytesToWrite += request.ActualSize
  292. // submit at most 4M bytes or 128 requests at one time to decrease request delay.
  293. // it also need to break if there is no data in channel to avoid io hang.
  294. if currentBytesToWrite >= 4*1024*1024 || len(currentRequests) >= 128 || len(v.asyncRequestsChan) == 0 {
  295. break
  296. }
  297. }
  298. if len(currentRequests) == 0 {
  299. continue
  300. }
  301. v.dataFileAccessLock.Lock()
  302. end, _, e := v.DataBackend.GetStat()
  303. if e != nil {
  304. for i := 0; i < len(currentRequests); i++ {
  305. currentRequests[i].Complete(0, 0, false,
  306. fmt.Errorf("cannot read current volume position: %v", e))
  307. }
  308. v.dataFileAccessLock.Unlock()
  309. continue
  310. }
  311. for i := 0; i < len(currentRequests); i++ {
  312. if currentRequests[i].IsWriteRequest {
  313. offset, size, isUnchanged, err := v.doWriteRequest(currentRequests[i].N)
  314. currentRequests[i].UpdateResult(offset, uint64(size), isUnchanged, err)
  315. } else {
  316. size, err := v.doDeleteRequest(currentRequests[i].N)
  317. currentRequests[i].UpdateResult(0, uint64(size), false, err)
  318. }
  319. }
  320. // if sync error, data is not reliable, we should mark the completed request as fail and rollback
  321. if err := v.DataBackend.Sync(); err != nil {
  322. // todo: this may generate dirty data or cause data inconsistent, may be weed need to panic?
  323. if te := v.DataBackend.Truncate(end); te != nil {
  324. glog.V(0).Infof("Failed to truncate %s back to %d with error: %v", v.DataBackend.Name(), end, te)
  325. }
  326. for i := 0; i < len(currentRequests); i++ {
  327. if currentRequests[i].IsSucceed() {
  328. currentRequests[i].UpdateResult(0, 0, false, err)
  329. }
  330. }
  331. }
  332. for i := 0; i < len(currentRequests); i++ {
  333. currentRequests[i].Submit()
  334. }
  335. v.dataFileAccessLock.Unlock()
  336. }
  337. }()
  338. }
  339. type VolumeFileScanner interface {
  340. VisitSuperBlock(super_block.SuperBlock) error
  341. ReadNeedleBody() bool
  342. VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error
  343. }
  344. func ScanVolumeFile(dirname string, collection string, id needle.VolumeId,
  345. needleMapKind NeedleMapType,
  346. volumeFileScanner VolumeFileScanner) (err error) {
  347. var v *Volume
  348. if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil {
  349. return fmt.Errorf("failed to load volume %d: %v", id, err)
  350. }
  351. if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil {
  352. return fmt.Errorf("failed to process volume %d super block: %v", id, err)
  353. }
  354. defer v.Close()
  355. version := v.Version()
  356. offset := int64(v.SuperBlock.BlockSize())
  357. return ScanVolumeFileFrom(version, v.DataBackend, offset, volumeFileScanner)
  358. }
  359. func ScanVolumeFileFrom(version needle.Version, datBackend backend.BackendStorageFile, offset int64, volumeFileScanner VolumeFileScanner) (err error) {
  360. n, nh, rest, e := needle.ReadNeedleHeader(datBackend, version, offset)
  361. if e != nil {
  362. if e == io.EOF {
  363. return nil
  364. }
  365. return fmt.Errorf("cannot read %s at offset %d: %v", datBackend.Name(), offset, e)
  366. }
  367. for n != nil {
  368. var needleBody []byte
  369. if volumeFileScanner.ReadNeedleBody() {
  370. // println("needle", n.Id.String(), "offset", offset, "size", n.Size, "rest", rest)
  371. if needleBody, err = n.ReadNeedleBody(datBackend, version, offset+NeedleHeaderSize, rest); err != nil {
  372. glog.V(0).Infof("cannot read needle head [%d, %d) body [%d, %d) body length %d: %v", offset, offset+NeedleHeaderSize, offset+NeedleHeaderSize, offset+NeedleHeaderSize+rest, rest, err)
  373. // err = fmt.Errorf("cannot read needle body: %v", err)
  374. // return
  375. }
  376. }
  377. err := volumeFileScanner.VisitNeedle(n, offset, nh, needleBody)
  378. if err == io.EOF {
  379. return nil
  380. }
  381. if err != nil {
  382. glog.V(0).Infof("visit needle error: %v", err)
  383. return fmt.Errorf("visit needle error: %v", err)
  384. }
  385. offset += NeedleHeaderSize + rest
  386. glog.V(4).Infof("==> new entry offset %d", offset)
  387. if n, nh, rest, err = needle.ReadNeedleHeader(datBackend, version, offset); err != nil {
  388. if err == io.EOF {
  389. return nil
  390. }
  391. return fmt.Errorf("cannot read needle header at offset %d: %v", offset, err)
  392. }
  393. glog.V(4).Infof("new entry needle size:%d rest:%d", n.Size, rest)
  394. }
  395. return nil
  396. }