You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

447 lines
12 KiB

12 years ago
12 years ago
12 years ago
12 years ago
11 years ago
12 years ago
11 years ago
11 years ago
11 years ago
  1. package storage
  2. import (
  3. "bytes"
  4. "code.google.com/p/weed-fs/go/glog"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path"
  10. "sync"
  11. "time"
  12. )
  13. const (
  14. SuperBlockSize = 8
  15. )
  16. type SuperBlock struct {
  17. Version Version
  18. ReplicaType ReplicationType
  19. }
  20. func (s *SuperBlock) Bytes() []byte {
  21. header := make([]byte, SuperBlockSize)
  22. header[0] = byte(s.Version)
  23. header[1] = s.ReplicaType.Byte()
  24. return header
  25. }
  26. type Volume struct {
  27. Id VolumeId
  28. dir string
  29. Collection string
  30. dataFile *os.File
  31. nm NeedleMapper
  32. readOnly bool
  33. SuperBlock
  34. accessLock sync.Mutex
  35. }
  36. func NewVolume(dirname string, collection string, id VolumeId, replicationType ReplicationType) (v *Volume, e error) {
  37. v = &Volume{dir: dirname, Collection: collection, Id: id}
  38. v.SuperBlock = SuperBlock{ReplicaType: replicationType}
  39. e = v.load(true)
  40. return
  41. }
  42. func loadVolumeWithoutIndex(dirname string, collection string, id VolumeId) (v *Volume, e error) {
  43. v = &Volume{dir: dirname, Collection: collection, Id: id}
  44. v.SuperBlock = SuperBlock{ReplicaType: CopyNil}
  45. e = v.load(false)
  46. return
  47. }
  48. func (v *Volume) FileName() (fileName string) {
  49. if v.Collection == "" {
  50. fileName = path.Join(v.dir, v.Id.String())
  51. } else {
  52. fileName = path.Join(v.dir, v.Collection+"_"+v.Id.String())
  53. }
  54. return
  55. }
  56. func (v *Volume) load(alsoLoadIndex bool) error {
  57. var e error
  58. fileName := v.FileName()
  59. if exists, canRead, canWrite, _ := checkFile(fileName + ".dat"); exists && !canRead {
  60. return fmt.Errorf("cannot read Volume Data file %s.dat", fileName)
  61. } else if !exists || canWrite {
  62. v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644)
  63. } else if exists && canRead {
  64. glog.V(0).Infoln("opening " + fileName + ".dat in READONLY mode")
  65. v.dataFile, e = os.Open(fileName + ".dat")
  66. v.readOnly = true
  67. } else {
  68. return fmt.Errorf("Unknown state about Volume Data file %s.dat", fileName)
  69. }
  70. if e != nil {
  71. if !os.IsPermission(e) {
  72. return fmt.Errorf("cannot load Volume Data %s.dat: %s", fileName, e.Error())
  73. }
  74. }
  75. if v.ReplicaType == CopyNil {
  76. e = v.readSuperBlock()
  77. } else {
  78. e = v.maybeWriteSuperBlock()
  79. }
  80. if e == nil && alsoLoadIndex {
  81. if v.readOnly {
  82. if v.ensureConvertIdxToCdb(fileName) {
  83. v.nm, e = OpenCdbMap(fileName + ".cdb")
  84. return e
  85. }
  86. }
  87. var indexFile *os.File
  88. if v.readOnly {
  89. glog.V(1).Infoln("open to read file", fileName+".idx")
  90. if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDONLY, 0644); e != nil {
  91. return fmt.Errorf("cannot read Volume Data %s.dat: %s", fileName, e.Error())
  92. }
  93. } else {
  94. glog.V(1).Infoln("open to write file", fileName+".idx")
  95. if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644); e != nil {
  96. return fmt.Errorf("cannot write Volume Data %s.dat: %s", fileName, e.Error())
  97. }
  98. }
  99. glog.V(0).Infoln("loading file", fileName+".idx", "readonly", v.readOnly)
  100. if v.nm, e = LoadNeedleMap(indexFile); e != nil {
  101. glog.V(0).Infoln("loading error:", e)
  102. }
  103. }
  104. return e
  105. }
  106. func (v *Volume) Version() Version {
  107. return v.SuperBlock.Version
  108. }
  109. func (v *Volume) Size() int64 {
  110. v.accessLock.Lock()
  111. defer v.accessLock.Unlock()
  112. stat, e := v.dataFile.Stat()
  113. if e == nil {
  114. return stat.Size()
  115. }
  116. glog.V(0).Infof("Failed to read file size %s %s", v.dataFile.Name(), e.Error())
  117. return -1
  118. }
  119. func (v *Volume) Close() {
  120. v.accessLock.Lock()
  121. defer v.accessLock.Unlock()
  122. v.nm.Close()
  123. _ = v.dataFile.Close()
  124. }
  125. func (v *Volume) maybeWriteSuperBlock() error {
  126. stat, e := v.dataFile.Stat()
  127. if e != nil {
  128. glog.V(0).Infof("failed to stat datafile %s: %s", v.dataFile, e.Error())
  129. return e
  130. }
  131. if stat.Size() == 0 {
  132. v.SuperBlock.Version = CurrentVersion
  133. _, e = v.dataFile.Write(v.SuperBlock.Bytes())
  134. if e != nil && os.IsPermission(e) {
  135. //read-only, but zero length - recreate it!
  136. if v.dataFile, e = os.Create(v.dataFile.Name()); e == nil {
  137. if _, e = v.dataFile.Write(v.SuperBlock.Bytes()); e == nil {
  138. v.readOnly = false
  139. }
  140. }
  141. }
  142. }
  143. return e
  144. }
  145. func (v *Volume) readSuperBlock() (err error) {
  146. if _, err = v.dataFile.Seek(0, 0); err != nil {
  147. return fmt.Errorf("cannot seek to the beginning of %s: %s", v.dataFile.Name(), err.Error())
  148. }
  149. header := make([]byte, SuperBlockSize)
  150. if _, e := v.dataFile.Read(header); e != nil {
  151. return fmt.Errorf("cannot read superblock: %s", e.Error())
  152. }
  153. v.SuperBlock, err = ParseSuperBlock(header)
  154. return err
  155. }
  156. func ParseSuperBlock(header []byte) (superBlock SuperBlock, err error) {
  157. superBlock.Version = Version(header[0])
  158. if superBlock.ReplicaType, err = NewReplicationTypeFromByte(header[1]); err != nil {
  159. err = fmt.Errorf("cannot read replica type: %s", err.Error())
  160. }
  161. return
  162. }
  163. func (v *Volume) NeedToReplicate() bool {
  164. return v.ReplicaType.GetCopyCount() > 1
  165. }
  166. func (v *Volume) isFileUnchanged(n *Needle) bool {
  167. nv, ok := v.nm.Get(n.Id)
  168. if ok && nv.Offset > 0 {
  169. if _, err := v.dataFile.Seek(int64(nv.Offset)*NeedlePaddingSize, 0); err != nil {
  170. return false
  171. }
  172. oldNeedle := new(Needle)
  173. oldNeedle.Read(v.dataFile, nv.Size, v.Version())
  174. if oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) {
  175. n.Size = oldNeedle.Size
  176. return true
  177. }
  178. }
  179. return false
  180. }
  181. func (v *Volume) write(n *Needle) (size uint32, err error) {
  182. if v.readOnly {
  183. err = fmt.Errorf("%s is read-only", v.dataFile)
  184. return
  185. }
  186. v.accessLock.Lock()
  187. defer v.accessLock.Unlock()
  188. if v.isFileUnchanged(n) {
  189. size = n.Size
  190. glog.V(4).Infof("needle is unchanged!")
  191. return
  192. }
  193. var offset int64
  194. if offset, err = v.dataFile.Seek(0, 2); err != nil {
  195. return
  196. }
  197. //ensure file writing starting from aligned positions
  198. if offset%NeedlePaddingSize != 0 {
  199. offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize)
  200. if offset, err = v.dataFile.Seek(offset, 0); err != nil {
  201. glog.V(4).Infof("failed to align in datafile %s: %s", v.dataFile.Name(), err.Error())
  202. return
  203. }
  204. }
  205. if size, err = n.Append(v.dataFile, v.Version()); err != nil {
  206. if e := v.dataFile.Truncate(offset); e != nil {
  207. err = fmt.Errorf("%s\ncannot truncate %s: %s", err, v.dataFile, e.Error())
  208. }
  209. return
  210. }
  211. nv, ok := v.nm.Get(n.Id)
  212. if !ok || int64(nv.Offset)*NeedlePaddingSize < offset {
  213. if _, err = v.nm.Put(n.Id, uint32(offset/NeedlePaddingSize), n.Size); err != nil {
  214. glog.V(4).Infof("failed to save in needle map %d: %s", n.Id, err.Error())
  215. }
  216. }
  217. return
  218. }
  219. func (v *Volume) delete(n *Needle) (uint32, error) {
  220. if v.readOnly {
  221. return 0, fmt.Errorf("%s is read-only", v.dataFile)
  222. }
  223. v.accessLock.Lock()
  224. defer v.accessLock.Unlock()
  225. nv, ok := v.nm.Get(n.Id)
  226. //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
  227. if ok {
  228. size := nv.Size
  229. if err := v.nm.Delete(n.Id); err != nil {
  230. return size, err
  231. }
  232. if _, err := v.dataFile.Seek(0, 2); err != nil {
  233. return size, err
  234. }
  235. n.Data = make([]byte, 0)
  236. _, err := n.Append(v.dataFile, v.Version())
  237. return size, err
  238. }
  239. return 0, nil
  240. }
  241. func (v *Volume) read(n *Needle) (int, error) {
  242. v.accessLock.Lock()
  243. defer v.accessLock.Unlock()
  244. nv, ok := v.nm.Get(n.Id)
  245. if ok && nv.Offset > 0 {
  246. if _, err := v.dataFile.Seek(int64(nv.Offset)*NeedlePaddingSize, 0); err != nil {
  247. return -1, err
  248. }
  249. return n.Read(v.dataFile, nv.Size, v.Version())
  250. }
  251. return -1, errors.New("Not Found")
  252. }
  253. func (v *Volume) garbageLevel() float64 {
  254. return float64(v.nm.DeletedSize()) / float64(v.ContentSize())
  255. }
  256. func (v *Volume) Compact() error {
  257. v.accessLock.Lock()
  258. defer v.accessLock.Unlock()
  259. filePath := path.Join(v.dir, v.Id.String())
  260. return v.copyDataAndGenerateIndexFile(filePath+".cpd", filePath+".cpx")
  261. }
  262. func (v *Volume) commitCompact() error {
  263. v.accessLock.Lock()
  264. defer v.accessLock.Unlock()
  265. _ = v.dataFile.Close()
  266. var e error
  267. if e = os.Rename(path.Join(v.dir, v.Id.String()+".cpd"), path.Join(v.dir, v.Id.String()+".dat")); e != nil {
  268. return e
  269. }
  270. if e = os.Rename(path.Join(v.dir, v.Id.String()+".cpx"), path.Join(v.dir, v.Id.String()+".idx")); e != nil {
  271. return e
  272. }
  273. if e = v.load(true); e != nil {
  274. return e
  275. }
  276. return nil
  277. }
  278. func (v *Volume) freeze() error {
  279. if v.readOnly {
  280. return nil
  281. }
  282. nm, ok := v.nm.(*NeedleMap)
  283. if !ok {
  284. return nil
  285. }
  286. v.accessLock.Lock()
  287. defer v.accessLock.Unlock()
  288. bn, _ := baseFilename(v.dataFile.Name())
  289. cdbFn := bn + ".cdb"
  290. glog.V(0).Infof("converting %s to %s", nm.indexFile.Name(), cdbFn)
  291. err := DumpNeedleMapToCdb(cdbFn, nm)
  292. if err != nil {
  293. return err
  294. }
  295. if v.nm, err = OpenCdbMap(cdbFn); err != nil {
  296. return err
  297. }
  298. nm.indexFile.Close()
  299. os.Remove(nm.indexFile.Name())
  300. v.readOnly = true
  301. return nil
  302. }
  303. func ScanVolumeFile(dirname string, collection string, id VolumeId,
  304. visitSuperBlock func(SuperBlock) error,
  305. visitNeedle func(n *Needle, offset int64) error) (err error) {
  306. var v *Volume
  307. if v, err = loadVolumeWithoutIndex(dirname, collection, id); err != nil {
  308. return
  309. }
  310. if err = visitSuperBlock(v.SuperBlock); err != nil {
  311. return
  312. }
  313. version := v.Version()
  314. offset := int64(SuperBlockSize)
  315. n, rest, e := ReadNeedleHeader(v.dataFile, version)
  316. if e != nil {
  317. err = fmt.Errorf("cannot read needle header: %s", e)
  318. return
  319. }
  320. for n != nil {
  321. if err = n.ReadNeedleBody(v.dataFile, version, rest); err != nil {
  322. err = fmt.Errorf("cannot read needle body: %s", err)
  323. return
  324. }
  325. if err = visitNeedle(n, offset); err != nil {
  326. return
  327. }
  328. offset += int64(NeedleHeaderSize + rest)
  329. if n, rest, err = ReadNeedleHeader(v.dataFile, version); err != nil {
  330. if err == io.EOF {
  331. return nil
  332. }
  333. return fmt.Errorf("cannot read needle header: %s", err)
  334. }
  335. }
  336. return
  337. }
  338. func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string) (err error) {
  339. var (
  340. dst, idx *os.File
  341. )
  342. if dst, err = os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE, 0644); err != nil {
  343. return
  344. }
  345. defer dst.Close()
  346. if idx, err = os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE, 0644); err != nil {
  347. return
  348. }
  349. defer idx.Close()
  350. nm := NewNeedleMap(idx)
  351. new_offset := int64(SuperBlockSize)
  352. err = ScanVolumeFile(v.dir, v.Collection, v.Id, func(superBlock SuperBlock) error {
  353. _, err = dst.Write(superBlock.Bytes())
  354. return err
  355. }, func(n *Needle, offset int64) error {
  356. nv, ok := v.nm.Get(n.Id)
  357. //glog.V(0).Infoln("file size is", n.Size, "rest", rest)
  358. if ok && int64(nv.Offset)*NeedlePaddingSize == offset {
  359. if nv.Size > 0 {
  360. if _, err = nm.Put(n.Id, uint32(new_offset/NeedlePaddingSize), n.Size); err != nil {
  361. return fmt.Errorf("cannot put needle: %s", err)
  362. }
  363. if _, err = n.Append(dst, v.Version()); err != nil {
  364. return fmt.Errorf("cannot append needle: %s", err)
  365. }
  366. new_offset += n.DiskSize()
  367. //glog.V(0).Infoln("saving key", n.Id, "volume offset", old_offset, "=>", new_offset, "data_size", n.Size, "rest", rest)
  368. }
  369. }
  370. return nil
  371. })
  372. return
  373. }
  374. func (v *Volume) ContentSize() uint64 {
  375. return v.nm.ContentSize()
  376. }
  377. func checkFile(filename string) (exists, canRead, canWrite bool, modTime time.Time) {
  378. exists = true
  379. fi, err := os.Stat(filename)
  380. if os.IsNotExist(err) {
  381. exists = false
  382. return
  383. }
  384. if fi.Mode()&0400 != 0 {
  385. canRead = true
  386. }
  387. if fi.Mode()&0200 != 0 {
  388. canWrite = true
  389. }
  390. modTime = fi.ModTime()
  391. return
  392. }
  393. func (v *Volume) ensureConvertIdxToCdb(fileName string) (cdbCanRead bool) {
  394. var indexFile *os.File
  395. var e error
  396. _, cdbCanRead, cdbCanWrite, cdbModTime := checkFile(fileName + ".cdb")
  397. _, idxCanRead, _, idxModeTime := checkFile(fileName + ".idx")
  398. if cdbCanRead && cdbModTime.After(idxModeTime) {
  399. return true
  400. }
  401. if !cdbCanWrite {
  402. return false
  403. }
  404. if !idxCanRead {
  405. glog.V(0).Infoln("Can not read file", fileName+".idx!")
  406. return false
  407. }
  408. glog.V(2).Infoln("opening file", fileName+".idx")
  409. if indexFile, e = os.Open(fileName + ".idx"); e != nil {
  410. glog.V(0).Infoln("Failed to read file", fileName+".idx !")
  411. return false
  412. }
  413. defer indexFile.Close()
  414. glog.V(0).Infof("converting %s.idx to %s.cdb", fileName, fileName)
  415. if e = ConvertIndexToCdb(fileName+".cdb", indexFile); e != nil {
  416. glog.V(0).Infof("error converting %s.idx to %s.cdb: %s", fileName, fileName, e.Error())
  417. return false
  418. }
  419. return true
  420. }