You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

418 lines
13 KiB

6 years ago
6 years ago
5 years ago
6 years ago
6 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "os"
  8. "time"
  9. "github.com/chrislusf/seaweedfs/weed/glog"
  10. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  11. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  12. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  13. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  14. )
  15. var ErrorNotFound = errors.New("not found")
  16. // isFileUnchanged checks whether this needle to write is same as last one.
  17. // It requires serialized access in the same volume.
  18. func (v *Volume) isFileUnchanged(n *needle.Needle) bool {
  19. if v.Ttl.String() != "" {
  20. return false
  21. }
  22. nv, ok := v.nm.Get(n.Id)
  23. if ok && !nv.Offset.IsZero() && nv.Size != TombstoneFileSize {
  24. oldNeedle := new(needle.Needle)
  25. err := oldNeedle.ReadData(v.DataBackend, nv.Offset.ToAcutalOffset(), nv.Size, v.Version())
  26. if err != nil {
  27. glog.V(0).Infof("Failed to check updated file at offset %d size %d: %v", nv.Offset.ToAcutalOffset(), nv.Size, err)
  28. return false
  29. }
  30. if oldNeedle.Cookie == n.Cookie && oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) {
  31. n.DataSize = oldNeedle.DataSize
  32. return true
  33. }
  34. }
  35. return false
  36. }
  37. // Destroy removes everything related to this volume
  38. func (v *Volume) Destroy() (err error) {
  39. if v.isCompacting {
  40. err = fmt.Errorf("volume %d is compacting", v.Id)
  41. return
  42. }
  43. close(v.asyncRequestsChan)
  44. storageName, storageKey := v.RemoteStorageNameKey()
  45. if v.HasRemoteFile() && storageName != "" && storageKey != "" {
  46. if backendStorage, found := backend.BackendStorages[storageName]; found {
  47. backendStorage.DeleteFile(storageKey)
  48. }
  49. }
  50. v.Close()
  51. os.Remove(v.FileName() + ".dat")
  52. os.Remove(v.FileName() + ".idx")
  53. os.Remove(v.FileName() + ".vif")
  54. os.Remove(v.FileName() + ".sdx")
  55. os.Remove(v.FileName() + ".cpd")
  56. os.Remove(v.FileName() + ".cpx")
  57. os.RemoveAll(v.FileName() + ".ldb")
  58. return
  59. }
  60. func (v *Volume) asyncRequestAppend(request *needle.AsyncRequest) {
  61. v.asyncRequestsChan <- request
  62. }
  63. func (v *Volume) writeNeedleDeprecated(n *needle.Needle, fsync bool) (offset uint64, size uint32, isUnchanged bool, err error) {
  64. // glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  65. v.dataFileAccessLock.Lock()
  66. defer v.dataFileAccessLock.Unlock()
  67. if v.isFileUnchanged(n) {
  68. size = n.DataSize
  69. isUnchanged = true
  70. return
  71. }
  72. if n.Ttl == needle.EMPTY_TTL && v.Ttl != needle.EMPTY_TTL {
  73. n.SetHasTtl()
  74. n.Ttl = v.Ttl
  75. }
  76. // check whether existing needle cookie matches
  77. nv, ok := v.nm.Get(n.Id)
  78. if ok {
  79. existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToAcutalOffset())
  80. if existingNeedleReadErr != nil {
  81. err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr)
  82. return
  83. }
  84. if existingNeedle.Cookie != n.Cookie {
  85. glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie)
  86. err = fmt.Errorf("mismatching cookie %x", n.Cookie)
  87. return
  88. }
  89. }
  90. // append to dat file
  91. n.AppendAtNs = uint64(time.Now().UnixNano())
  92. if offset, size, _, err = n.Append(v.DataBackend, v.Version()); err != nil {
  93. return
  94. }
  95. if fsync {
  96. if err = v.DataBackend.Sync(); err != nil {
  97. return
  98. }
  99. }
  100. v.lastAppendAtNs = n.AppendAtNs
  101. // add to needle map
  102. if !ok || uint64(nv.Offset.ToAcutalOffset()) < offset {
  103. if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil {
  104. glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
  105. }
  106. }
  107. if v.lastModifiedTsSeconds < n.LastModified {
  108. v.lastModifiedTsSeconds = n.LastModified
  109. }
  110. return
  111. }
  112. func (v *Volume) writeNeedle2(n *needle.Needle, fsync bool) (offset uint64, size uint32, isUnchanged bool, err error) {
  113. // glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  114. if n.Ttl == needle.EMPTY_TTL && v.Ttl != needle.EMPTY_TTL {
  115. n.SetHasTtl()
  116. n.Ttl = v.Ttl
  117. }
  118. asyncRequest := needle.NewAsyncRequest(n, true, fsync)
  119. // using len(n.Data) here instead of n.Size before n.Size is populated in n.Append()
  120. asyncRequest.ActualSize = needle.GetActualSize(uint32(len(n.Data)), v.Version())
  121. v.asyncRequestAppend(asyncRequest)
  122. offset, _, isUnchanged, err = asyncRequest.WaitComplete()
  123. return
  124. }
  125. func (v *Volume) doWriteRequest(n *needle.Needle) (offset uint64, size uint32, isUnchanged bool, err error) {
  126. // glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  127. if v.isFileUnchanged(n) {
  128. size = n.DataSize
  129. isUnchanged = true
  130. return
  131. }
  132. // check whether existing needle cookie matches
  133. nv, ok := v.nm.Get(n.Id)
  134. if ok {
  135. existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToAcutalOffset())
  136. if existingNeedleReadErr != nil {
  137. err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr)
  138. return
  139. }
  140. if existingNeedle.Cookie != n.Cookie {
  141. glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie)
  142. err = fmt.Errorf("mismatching cookie %x", n.Cookie)
  143. return
  144. }
  145. }
  146. // append to dat file
  147. n.AppendAtNs = uint64(time.Now().UnixNano())
  148. if offset, size, _, err = n.Append(v.DataBackend, v.Version()); err != nil {
  149. return
  150. }
  151. v.lastAppendAtNs = n.AppendAtNs
  152. // add to needle map
  153. if !ok || uint64(nv.Offset.ToAcutalOffset()) < offset {
  154. if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil {
  155. glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
  156. }
  157. }
  158. if v.lastModifiedTsSeconds < n.LastModified {
  159. v.lastModifiedTsSeconds = n.LastModified
  160. }
  161. return
  162. }
  163. func (v *Volume) deleteNeedleDeprecated(n *needle.Needle) (uint32, error) {
  164. glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  165. v.dataFileAccessLock.Lock()
  166. defer v.dataFileAccessLock.Unlock()
  167. nv, ok := v.nm.Get(n.Id)
  168. //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
  169. if ok && nv.Size != TombstoneFileSize {
  170. size := nv.Size
  171. n.Data = nil
  172. n.AppendAtNs = uint64(time.Now().UnixNano())
  173. offset, _, _, err := n.Append(v.DataBackend, v.Version())
  174. if err != nil {
  175. return size, err
  176. }
  177. v.lastAppendAtNs = n.AppendAtNs
  178. if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil {
  179. return size, err
  180. }
  181. return size, err
  182. }
  183. return 0, nil
  184. }
  185. func (v *Volume) deleteNeedle2(n *needle.Needle) (uint32, error) {
  186. asyncRequest := needle.NewAsyncRequest(n, false, false)
  187. asyncRequest.ActualSize = needle.GetActualSize(0, v.Version())
  188. v.asyncRequestAppend(asyncRequest)
  189. _, size, _, err := asyncRequest.WaitComplete()
  190. return uint32(size), err
  191. }
  192. func (v *Volume) doDeleteRequest(n *needle.Needle) (uint32, error) {
  193. glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String())
  194. nv, ok := v.nm.Get(n.Id)
  195. //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
  196. if ok && nv.Size != TombstoneFileSize {
  197. size := nv.Size
  198. n.Data = nil
  199. n.AppendAtNs = uint64(time.Now().UnixNano())
  200. offset, _, _, err := n.Append(v.DataBackend, v.Version())
  201. if err != nil {
  202. return size, err
  203. }
  204. v.lastAppendAtNs = n.AppendAtNs
  205. if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil {
  206. return size, err
  207. }
  208. return size, err
  209. }
  210. return 0, nil
  211. }
  212. // read fills in Needle content by looking up n.Id from NeedleMapper
  213. func (v *Volume) readNeedle(n *needle.Needle) (int, error) {
  214. v.dataFileAccessLock.RLock()
  215. defer v.dataFileAccessLock.RUnlock()
  216. nv, ok := v.nm.Get(n.Id)
  217. if !ok || nv.Offset.IsZero() {
  218. return -1, ErrorNotFound
  219. }
  220. if nv.Size == TombstoneFileSize {
  221. return -1, errors.New("already deleted")
  222. }
  223. if nv.Size == 0 {
  224. return 0, nil
  225. }
  226. err := n.ReadData(v.DataBackend, nv.Offset.ToAcutalOffset(), nv.Size, v.Version())
  227. if err != nil {
  228. return 0, err
  229. }
  230. bytesRead := len(n.Data)
  231. if !n.HasTtl() {
  232. return bytesRead, nil
  233. }
  234. ttlMinutes := n.Ttl.Minutes()
  235. if ttlMinutes == 0 {
  236. return bytesRead, nil
  237. }
  238. if !n.HasLastModifiedDate() {
  239. return bytesRead, nil
  240. }
  241. if uint64(time.Now().Unix()) < n.LastModified+uint64(ttlMinutes*60) {
  242. return bytesRead, nil
  243. }
  244. return -1, ErrorNotFound
  245. }
  246. func (v *Volume) startWorker() {
  247. go func() {
  248. chanClosed := false
  249. for {
  250. // chan closed. go thread will exit
  251. if chanClosed {
  252. break
  253. }
  254. fsync := false
  255. currentRequests := make([]*needle.AsyncRequest, 0, 128)
  256. currentBytesToWrite := int64(0)
  257. for {
  258. request, ok := <-v.asyncRequestsChan
  259. //volume may be closed
  260. if !ok {
  261. chanClosed = true
  262. break
  263. }
  264. if MaxPossibleVolumeSize < v.ContentSize()+uint64(currentBytesToWrite+request.ActualSize) {
  265. request.Complete(0, 0, false,
  266. fmt.Errorf("volume size limit %d exceeded! current size is %d", MaxPossibleVolumeSize, v.ContentSize()))
  267. break
  268. }
  269. currentRequests = append(currentRequests, request)
  270. currentBytesToWrite += request.ActualSize
  271. if request.Fsync {
  272. fsync = true
  273. }
  274. // submit at most 4M bytes or 128 requests at one time to decrease request delay.
  275. // it also need to break if there is no data in channel to avoid io hang.
  276. if currentBytesToWrite >= 4*1024*1024 || len(currentRequests) >= 128 || len(v.asyncRequestsChan) == 0 {
  277. break
  278. }
  279. }
  280. if len(currentRequests) == 0 {
  281. continue
  282. }
  283. v.dataFileAccessLock.Lock()
  284. end, _, e := v.DataBackend.GetStat()
  285. if e != nil {
  286. for i := 0; i < len(currentRequests); i++ {
  287. currentRequests[i].Complete(0, 0, false,
  288. fmt.Errorf("cannot read current volume position: %v", e))
  289. }
  290. v.dataFileAccessLock.Unlock()
  291. continue
  292. }
  293. for i := 0; i < len(currentRequests); i++ {
  294. if currentRequests[i].IsWriteRequest {
  295. offset, size, isUnchanged, err := v.doWriteRequest(currentRequests[i].N)
  296. currentRequests[i].UpdateResult(offset, uint64(size), isUnchanged, err)
  297. } else {
  298. size, err := v.doDeleteRequest(currentRequests[i].N)
  299. currentRequests[i].UpdateResult(0, uint64(size), false, err)
  300. }
  301. }
  302. if fsync {
  303. // if sync error, data is not reliable, we should mark the completed request as fail and rollback
  304. if err := v.DataBackend.Sync(); err != nil {
  305. // todo: this may generate dirty data or cause data inconsistent, may be weed need to panic?
  306. if te := v.DataBackend.Truncate(end); te != nil {
  307. glog.V(0).Infof("Failed to truncate %s back to %d with error: %v", v.DataBackend.Name(), end, te)
  308. }
  309. for i := 0; i < len(currentRequests); i++ {
  310. if currentRequests[i].IsSucceed() {
  311. currentRequests[i].UpdateResult(0, 0, false, err)
  312. }
  313. }
  314. }
  315. }
  316. for i := 0; i < len(currentRequests); i++ {
  317. currentRequests[i].Submit()
  318. }
  319. v.dataFileAccessLock.Unlock()
  320. }
  321. }()
  322. }
  323. type VolumeFileScanner interface {
  324. VisitSuperBlock(super_block.SuperBlock) error
  325. ReadNeedleBody() bool
  326. VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error
  327. }
  328. func ScanVolumeFile(dirname string, collection string, id needle.VolumeId,
  329. needleMapKind NeedleMapType,
  330. volumeFileScanner VolumeFileScanner) (err error) {
  331. var v *Volume
  332. if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil {
  333. return fmt.Errorf("failed to load volume %d: %v", id, err)
  334. }
  335. if v.volumeInfo.Version == 0 {
  336. if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil {
  337. return fmt.Errorf("failed to process volume %d super block: %v", id, err)
  338. }
  339. }
  340. defer v.Close()
  341. version := v.Version()
  342. offset := int64(v.SuperBlock.BlockSize())
  343. return ScanVolumeFileFrom(version, v.DataBackend, offset, volumeFileScanner)
  344. }
  345. func ScanVolumeFileFrom(version needle.Version, datBackend backend.BackendStorageFile, offset int64, volumeFileScanner VolumeFileScanner) (err error) {
  346. n, nh, rest, e := needle.ReadNeedleHeader(datBackend, version, offset)
  347. if e != nil {
  348. if e == io.EOF {
  349. return nil
  350. }
  351. return fmt.Errorf("cannot read %s at offset %d: %v", datBackend.Name(), offset, e)
  352. }
  353. for n != nil {
  354. var needleBody []byte
  355. if volumeFileScanner.ReadNeedleBody() {
  356. if needleBody, err = n.ReadNeedleBody(datBackend, version, offset+NeedleHeaderSize, rest); err != nil {
  357. glog.V(0).Infof("cannot read needle body: %v", err)
  358. //err = fmt.Errorf("cannot read needle body: %v", err)
  359. //return
  360. }
  361. }
  362. err := volumeFileScanner.VisitNeedle(n, offset, nh, needleBody)
  363. if err == io.EOF {
  364. return nil
  365. }
  366. if err != nil {
  367. glog.V(0).Infof("visit needle error: %v", err)
  368. return fmt.Errorf("visit needle error: %v", err)
  369. }
  370. offset += NeedleHeaderSize + rest
  371. glog.V(4).Infof("==> new entry offset %d", offset)
  372. if n, nh, rest, err = needle.ReadNeedleHeader(datBackend, version, offset); err != nil {
  373. if err == io.EOF {
  374. return nil
  375. }
  376. return fmt.Errorf("cannot read needle header at offset %d: %v", offset, err)
  377. }
  378. glog.V(4).Infof("new entry needle size:%d rest:%d", n.Size, rest)
  379. }
  380. return nil
  381. }