You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

438 lines
11 KiB

12 years ago
12 years ago
12 years ago
12 years ago
11 years ago
12 years ago
11 years ago
11 years ago
11 years ago
  1. package storage
  2. import (
  3. "bytes"
  4. "code.google.com/p/weed-fs/go/glog"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path"
  10. "sync"
  11. "time"
  12. )
  13. const (
  14. SuperBlockSize = 8
  15. )
  16. type SuperBlock struct {
  17. Version Version
  18. ReplicaType ReplicationType
  19. }
  20. func (s *SuperBlock) Bytes() []byte {
  21. header := make([]byte, SuperBlockSize)
  22. header[0] = byte(s.Version)
  23. header[1] = s.ReplicaType.Byte()
  24. return header
  25. }
  26. type Volume struct {
  27. Id VolumeId
  28. dir string
  29. dataFile *os.File
  30. nm NeedleMapper
  31. readOnly bool
  32. SuperBlock
  33. accessLock sync.Mutex
  34. }
  35. func NewVolume(dirname string, id VolumeId, replicationType ReplicationType) (v *Volume, e error) {
  36. v = &Volume{dir: dirname, Id: id}
  37. v.SuperBlock = SuperBlock{ReplicaType: replicationType}
  38. e = v.load(true)
  39. return
  40. }
  41. func loadVolumeWithoutIndex(dirname string, id VolumeId) (v *Volume, e error) {
  42. v = &Volume{dir: dirname, Id: id}
  43. v.SuperBlock = SuperBlock{ReplicaType: CopyNil}
  44. e = v.load(false)
  45. return
  46. }
  47. func (v *Volume) load(alsoLoadIndex bool) error {
  48. var e error
  49. fileName := path.Join(v.dir, v.Id.String())
  50. if exists, canRead, canWrite, _ := checkFile(fileName + ".dat"); exists && !canRead {
  51. return fmt.Errorf("cannot read Volume Data file %s.dat", fileName)
  52. } else if !exists || canWrite {
  53. v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644)
  54. } else if exists && canRead {
  55. glog.V(0).Infoln("opening " + fileName + ".dat in READONLY mode")
  56. v.dataFile, e = os.Open(fileName + ".dat")
  57. v.readOnly = true
  58. } else {
  59. return fmt.Errorf("Unknown state about Volume Data file %s.dat", fileName)
  60. }
  61. if e != nil {
  62. if !os.IsPermission(e) {
  63. return fmt.Errorf("cannot load Volume Data %s.dat: %s", fileName, e.Error())
  64. }
  65. }
  66. if v.ReplicaType == CopyNil {
  67. e = v.readSuperBlock()
  68. } else {
  69. e = v.maybeWriteSuperBlock()
  70. }
  71. if e == nil && alsoLoadIndex {
  72. if v.readOnly {
  73. if v.ensureConvertIdxToCdb(fileName) {
  74. v.nm, e = OpenCdbMap(fileName + ".cdb")
  75. return e
  76. }
  77. }
  78. var indexFile *os.File
  79. if v.readOnly {
  80. glog.V(1).Infoln("open to read file", fileName+".idx")
  81. if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDONLY, 0644); e != nil {
  82. return fmt.Errorf("cannot read Volume Data %s.dat: %s", fileName, e.Error())
  83. }
  84. } else {
  85. glog.V(1).Infoln("open to write file", fileName+".idx")
  86. if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644); e != nil {
  87. return fmt.Errorf("cannot write Volume Data %s.dat: %s", fileName, e.Error())
  88. }
  89. }
  90. glog.V(0).Infoln("loading file", fileName+".idx", "readonly", v.readOnly)
  91. if v.nm, e = LoadNeedleMap(indexFile); e != nil {
  92. glog.V(0).Infoln("loading error:", e)
  93. }
  94. }
  95. return e
  96. }
  97. func (v *Volume) Version() Version {
  98. return v.SuperBlock.Version
  99. }
  100. func (v *Volume) Size() int64 {
  101. v.accessLock.Lock()
  102. defer v.accessLock.Unlock()
  103. stat, e := v.dataFile.Stat()
  104. if e == nil {
  105. return stat.Size()
  106. }
  107. glog.V(0).Infof("Failed to read file size %s %s", v.dataFile.Name(), e.Error())
  108. return -1
  109. }
  110. func (v *Volume) Close() {
  111. v.accessLock.Lock()
  112. defer v.accessLock.Unlock()
  113. v.nm.Close()
  114. _ = v.dataFile.Close()
  115. }
  116. func (v *Volume) maybeWriteSuperBlock() error {
  117. stat, e := v.dataFile.Stat()
  118. if e != nil {
  119. glog.V(0).Infof("failed to stat datafile %s: %s", v.dataFile, e.Error())
  120. return e
  121. }
  122. if stat.Size() == 0 {
  123. v.SuperBlock.Version = CurrentVersion
  124. _, e = v.dataFile.Write(v.SuperBlock.Bytes())
  125. if e != nil && os.IsPermission(e) {
  126. //read-only, but zero length - recreate it!
  127. if v.dataFile, e = os.Create(v.dataFile.Name()); e == nil {
  128. if _, e = v.dataFile.Write(v.SuperBlock.Bytes()); e == nil {
  129. v.readOnly = false
  130. }
  131. }
  132. }
  133. }
  134. return e
  135. }
  136. func (v *Volume) readSuperBlock() (err error) {
  137. if _, err = v.dataFile.Seek(0, 0); err != nil {
  138. return fmt.Errorf("cannot seek to the beginning of %s: %s", v.dataFile.Name(), err.Error())
  139. }
  140. header := make([]byte, SuperBlockSize)
  141. if _, e := v.dataFile.Read(header); e != nil {
  142. return fmt.Errorf("cannot read superblock: %s", e.Error())
  143. }
  144. v.SuperBlock, err = ParseSuperBlock(header)
  145. return err
  146. }
  147. func ParseSuperBlock(header []byte) (superBlock SuperBlock, err error) {
  148. superBlock.Version = Version(header[0])
  149. if superBlock.ReplicaType, err = NewReplicationTypeFromByte(header[1]); err != nil {
  150. err = fmt.Errorf("cannot read replica type: %s", err.Error())
  151. }
  152. return
  153. }
  154. func (v *Volume) NeedToReplicate() bool {
  155. return v.ReplicaType.GetCopyCount() > 1
  156. }
  157. func (v *Volume) isFileUnchanged(n *Needle) bool {
  158. nv, ok := v.nm.Get(n.Id)
  159. if ok && nv.Offset > 0 {
  160. if _, err := v.dataFile.Seek(int64(nv.Offset)*NeedlePaddingSize, 0); err != nil {
  161. return false
  162. }
  163. oldNeedle := new(Needle)
  164. oldNeedle.Read(v.dataFile, nv.Size, v.Version())
  165. if oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) {
  166. n.Size = oldNeedle.Size
  167. return true
  168. }
  169. }
  170. return false
  171. }
  172. func (v *Volume) write(n *Needle) (size uint32, err error) {
  173. if v.readOnly {
  174. err = fmt.Errorf("%s is read-only", v.dataFile)
  175. return
  176. }
  177. v.accessLock.Lock()
  178. defer v.accessLock.Unlock()
  179. if v.isFileUnchanged(n) {
  180. size = n.Size
  181. glog.V(4).Infof("needle is unchanged!")
  182. return
  183. }
  184. var offset int64
  185. if offset, err = v.dataFile.Seek(0, 2); err != nil {
  186. return
  187. }
  188. //ensure file writing starting from aligned positions
  189. if offset%NeedlePaddingSize != 0 {
  190. offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize)
  191. if offset, err = v.dataFile.Seek(offset, 0); err != nil {
  192. glog.V(4).Infof("failed to align in datafile %s: %s", v.dataFile.Name(), err.Error())
  193. return
  194. }
  195. }
  196. if size, err = n.Append(v.dataFile, v.Version()); err != nil {
  197. if e := v.dataFile.Truncate(offset); e != nil {
  198. err = fmt.Errorf("%s\ncannot truncate %s: %s", err, v.dataFile, e.Error())
  199. }
  200. return
  201. }
  202. nv, ok := v.nm.Get(n.Id)
  203. if !ok || int64(nv.Offset)*NeedlePaddingSize < offset {
  204. if _, err = v.nm.Put(n.Id, uint32(offset/NeedlePaddingSize), n.Size); err != nil {
  205. glog.V(4).Infof("failed to save in needle map %d: %s", n.Id, err.Error())
  206. }
  207. }
  208. return
  209. }
  210. func (v *Volume) delete(n *Needle) (uint32, error) {
  211. if v.readOnly {
  212. return 0, fmt.Errorf("%s is read-only", v.dataFile)
  213. }
  214. v.accessLock.Lock()
  215. defer v.accessLock.Unlock()
  216. nv, ok := v.nm.Get(n.Id)
  217. //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
  218. if ok {
  219. size := nv.Size
  220. if err := v.nm.Delete(n.Id); err != nil {
  221. return size, err
  222. }
  223. if _, err := v.dataFile.Seek(0, 2); err != nil {
  224. return size, err
  225. }
  226. n.Data = make([]byte, 0)
  227. _, err := n.Append(v.dataFile, v.Version())
  228. return size, err
  229. }
  230. return 0, nil
  231. }
  232. func (v *Volume) read(n *Needle) (int, error) {
  233. v.accessLock.Lock()
  234. defer v.accessLock.Unlock()
  235. nv, ok := v.nm.Get(n.Id)
  236. if ok && nv.Offset > 0 {
  237. if _, err := v.dataFile.Seek(int64(nv.Offset)*NeedlePaddingSize, 0); err != nil {
  238. return -1, err
  239. }
  240. return n.Read(v.dataFile, nv.Size, v.Version())
  241. }
  242. return -1, errors.New("Not Found")
  243. }
  244. func (v *Volume) garbageLevel() float64 {
  245. return float64(v.nm.DeletedSize()) / float64(v.ContentSize())
  246. }
  247. func (v *Volume) Compact() error {
  248. v.accessLock.Lock()
  249. defer v.accessLock.Unlock()
  250. filePath := path.Join(v.dir, v.Id.String())
  251. return v.copyDataAndGenerateIndexFile(filePath+".cpd", filePath+".cpx")
  252. }
  253. func (v *Volume) commitCompact() error {
  254. v.accessLock.Lock()
  255. defer v.accessLock.Unlock()
  256. _ = v.dataFile.Close()
  257. var e error
  258. if e = os.Rename(path.Join(v.dir, v.Id.String()+".cpd"), path.Join(v.dir, v.Id.String()+".dat")); e != nil {
  259. return e
  260. }
  261. if e = os.Rename(path.Join(v.dir, v.Id.String()+".cpx"), path.Join(v.dir, v.Id.String()+".idx")); e != nil {
  262. return e
  263. }
  264. if e = v.load(true); e != nil {
  265. return e
  266. }
  267. return nil
  268. }
  269. func (v *Volume) freeze() error {
  270. if v.readOnly {
  271. return nil
  272. }
  273. nm, ok := v.nm.(*NeedleMap)
  274. if !ok {
  275. return nil
  276. }
  277. v.accessLock.Lock()
  278. defer v.accessLock.Unlock()
  279. bn, _ := nakeFilename(v.dataFile.Name())
  280. cdbFn := bn + ".cdb"
  281. glog.V(0).Infof("converting %s to %s", nm.indexFile.Name(), cdbFn)
  282. err := DumpNeedleMapToCdb(cdbFn, nm)
  283. if err != nil {
  284. return err
  285. }
  286. if v.nm, err = OpenCdbMap(cdbFn); err != nil {
  287. return err
  288. }
  289. nm.indexFile.Close()
  290. os.Remove(nm.indexFile.Name())
  291. v.readOnly = true
  292. return nil
  293. }
  294. func ScanVolumeFile(dirname string, id VolumeId,
  295. visitSuperBlock func(SuperBlock) error,
  296. visitNeedle func(n *Needle, offset int64) error) (err error) {
  297. var v *Volume
  298. if v, err = loadVolumeWithoutIndex(dirname, id); err != nil {
  299. return
  300. }
  301. if err = visitSuperBlock(v.SuperBlock); err != nil {
  302. return
  303. }
  304. version := v.Version()
  305. offset := int64(SuperBlockSize)
  306. n, rest, e := ReadNeedleHeader(v.dataFile, version)
  307. if e != nil {
  308. err = fmt.Errorf("cannot read needle header: %s", e)
  309. return
  310. }
  311. for n != nil {
  312. if err = n.ReadNeedleBody(v.dataFile, version, rest); err != nil {
  313. err = fmt.Errorf("cannot read needle body: %s", err)
  314. return
  315. }
  316. if err = visitNeedle(n, offset); err != nil {
  317. return
  318. }
  319. offset += int64(NeedleHeaderSize + rest)
  320. if n, rest, err = ReadNeedleHeader(v.dataFile, version); err != nil {
  321. if err == io.EOF {
  322. return nil
  323. }
  324. return fmt.Errorf("cannot read needle header: %s", err)
  325. }
  326. }
  327. return
  328. }
  329. func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string) (err error) {
  330. var (
  331. dst, idx *os.File
  332. )
  333. if dst, err = os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE, 0644); err != nil {
  334. return
  335. }
  336. defer dst.Close()
  337. if idx, err = os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE, 0644); err != nil {
  338. return
  339. }
  340. defer idx.Close()
  341. nm := NewNeedleMap(idx)
  342. new_offset := int64(SuperBlockSize)
  343. err = ScanVolumeFile(v.dir, v.Id, func(superBlock SuperBlock) error {
  344. _, err = dst.Write(superBlock.Bytes())
  345. return err
  346. }, func(n *Needle, offset int64) error {
  347. nv, ok := v.nm.Get(n.Id)
  348. //glog.V(0).Infoln("file size is", n.Size, "rest", rest)
  349. if ok && int64(nv.Offset)*NeedlePaddingSize == offset {
  350. if nv.Size > 0 {
  351. if _, err = nm.Put(n.Id, uint32(new_offset/NeedlePaddingSize), n.Size); err != nil {
  352. return fmt.Errorf("cannot put needle: %s", err)
  353. }
  354. if _, err = n.Append(dst, v.Version()); err != nil {
  355. return fmt.Errorf("cannot append needle: %s", err)
  356. }
  357. new_offset += n.DiskSize()
  358. //glog.V(0).Infoln("saving key", n.Id, "volume offset", old_offset, "=>", new_offset, "data_size", n.Size, "rest", rest)
  359. }
  360. }
  361. return nil
  362. })
  363. return
  364. }
  365. func (v *Volume) ContentSize() uint64 {
  366. return v.nm.ContentSize()
  367. }
  368. func checkFile(filename string) (exists, canRead, canWrite bool, modTime time.Time) {
  369. exists = true
  370. fi, err := os.Stat(filename)
  371. if os.IsNotExist(err) {
  372. exists = false
  373. return
  374. }
  375. if fi.Mode()&0400 != 0 {
  376. canRead = true
  377. }
  378. if fi.Mode()&0200 != 0 {
  379. canWrite = true
  380. }
  381. modTime = fi.ModTime()
  382. return
  383. }
  384. func (v *Volume) ensureConvertIdxToCdb(fileName string) (cdbCanRead bool) {
  385. var indexFile *os.File
  386. var e error
  387. _, cdbCanRead, cdbCanWrite, cdbModTime := checkFile(fileName + ".cdb")
  388. _, idxCanRead, _, idxModeTime := checkFile(fileName + ".idx")
  389. if cdbCanRead && cdbModTime.After(idxModeTime) {
  390. return true
  391. }
  392. if !cdbCanWrite {
  393. return false
  394. }
  395. if !idxCanRead {
  396. glog.V(0).Infoln("Can not read file", fileName+".idx!")
  397. return false
  398. }
  399. glog.V(2).Infoln("opening file", fileName+".idx")
  400. if indexFile, e = os.Open(fileName + ".idx"); e != nil {
  401. glog.V(0).Infoln("Failed to read file", fileName+".idx !")
  402. return false
  403. }
  404. defer indexFile.Close()
  405. glog.V(0).Infof("converting %s.idx to %s.cdb", fileName, fileName)
  406. if e = ConvertIndexToCdb(fileName+".cdb", indexFile); e != nil {
  407. glog.V(0).Infof("error converting %s.idx to %s.cdb: %s", fileName, fileName, e.Error())
  408. return false
  409. }
  410. return true
  411. }