You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

405 lines
11 KiB

12 years ago
12 years ago
12 years ago
12 years ago
  1. package storage
  2. import (
  3. "bytes"
  4. "code.google.com/p/weed-fs/go/glog"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path"
  10. "sync"
  11. )
  12. const (
  13. SuperBlockSize = 8
  14. )
  15. type SuperBlock struct {
  16. Version Version
  17. ReplicaType ReplicationType
  18. }
  19. func (s *SuperBlock) Bytes() []byte {
  20. header := make([]byte, SuperBlockSize)
  21. header[0] = byte(s.Version)
  22. header[1] = s.ReplicaType.Byte()
  23. return header
  24. }
  25. type Volume struct {
  26. Id VolumeId
  27. dir string
  28. dataFile *os.File
  29. nm NeedleMapper
  30. readOnly bool
  31. SuperBlock
  32. accessLock sync.Mutex
  33. }
  34. func NewVolume(dirname string, id VolumeId, replicationType ReplicationType) (v *Volume, e error) {
  35. v = &Volume{dir: dirname, Id: id}
  36. v.SuperBlock = SuperBlock{ReplicaType: replicationType}
  37. e = v.load(true)
  38. return
  39. }
  40. func loadVolumeWithoutIndex(dirname string, id VolumeId) (v *Volume, e error) {
  41. v = &Volume{dir: dirname, Id: id}
  42. v.SuperBlock = SuperBlock{ReplicaType: CopyNil}
  43. e = v.load(false)
  44. return
  45. }
  46. func (v *Volume) load(alsoLoadIndex bool) error {
  47. var e error
  48. fileName := path.Join(v.dir, v.Id.String())
  49. v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644)
  50. if e != nil {
  51. if !os.IsPermission(e) {
  52. return fmt.Errorf("cannot create Volume Data %s.dat: %s", fileName, e.Error())
  53. }
  54. if v.dataFile, e = os.Open(fileName + ".dat"); e != nil {
  55. return fmt.Errorf("cannot open Volume Data %s.dat: %s", fileName, e.Error())
  56. }
  57. glog.V(0).Infoln("opening " + fileName + ".dat in READONLY mode")
  58. v.readOnly = true
  59. }
  60. if v.ReplicaType == CopyNil {
  61. e = v.readSuperBlock()
  62. } else {
  63. e = v.maybeWriteSuperBlock()
  64. }
  65. if e == nil && alsoLoadIndex {
  66. var indexFile *os.File
  67. if v.readOnly {
  68. glog.V(2).Infoln("opening file", fileName+".idx")
  69. if indexFile, e = os.Open(fileName + ".idx"); e != nil && !os.IsNotExist(e) {
  70. return fmt.Errorf("cannot open index file %s.idx: %s", fileName, e.Error())
  71. }
  72. if indexFile != nil {
  73. glog.V(2).Infoln("check file", fileName+".cdb")
  74. if _, err := os.Stat(fileName + ".cdb"); os.IsNotExist(err) {
  75. glog.V(0).Infof("converting %s.idx to %s.cdb", fileName, fileName)
  76. if e = ConvertIndexToCdb(fileName+".cdb", indexFile); e != nil {
  77. glog.V(0).Infof("error converting %s.idx to %s.cdb: %s", fileName, e.Error())
  78. } else {
  79. indexFile.Close()
  80. indexFile = nil
  81. }
  82. }
  83. }
  84. glog.V(2).Infoln("open file", fileName+".cdb")
  85. if v.nm, e = OpenCdbMap(fileName + ".cdb"); e != nil {
  86. if os.IsNotExist(e) {
  87. glog.V(0).Infof("Failed to read cdb file %s, fall back to normal readonly mode.", fileName)
  88. } else {
  89. glog.V(0).Infof("%s.cdb open errro:%s", fileName, e.Error())
  90. return e
  91. }
  92. }
  93. }
  94. if v.readOnly {
  95. glog.V(1).Infoln("open to read file", fileName+".idx")
  96. indexFile, e = os.OpenFile(fileName+".idx", os.O_RDONLY, 0644)
  97. if e != nil {
  98. return fmt.Errorf("cannot read Volume Data %s.dat: %s", fileName, e.Error())
  99. }
  100. } else {
  101. glog.V(1).Infoln("open to write file", fileName+".idx")
  102. indexFile, e = os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644)
  103. if e != nil {
  104. return fmt.Errorf("cannot write Volume Data %s.dat: %s", fileName, e.Error())
  105. }
  106. }
  107. glog.V(0).Infoln("loading file", fileName+".idx", "readonly", v.readOnly)
  108. if v.nm, e = LoadNeedleMap(indexFile); e != nil {
  109. glog.V(0).Infoln("loading error:", e)
  110. }
  111. }
  112. return e
  113. }
  114. func (v *Volume) Version() Version {
  115. return v.SuperBlock.Version
  116. }
  117. func (v *Volume) Size() int64 {
  118. v.accessLock.Lock()
  119. defer v.accessLock.Unlock()
  120. stat, e := v.dataFile.Stat()
  121. if e == nil {
  122. return stat.Size()
  123. }
  124. glog.V(0).Infof("Failed to read file size %s %s", v.dataFile.Name(), e.Error())
  125. return -1
  126. }
  127. func (v *Volume) Close() {
  128. v.accessLock.Lock()
  129. defer v.accessLock.Unlock()
  130. v.nm.Close()
  131. _ = v.dataFile.Close()
  132. }
  133. func (v *Volume) maybeWriteSuperBlock() error {
  134. stat, e := v.dataFile.Stat()
  135. if e != nil {
  136. glog.V(0).Infof("failed to stat datafile %s: %s", v.dataFile, e.Error())
  137. return e
  138. }
  139. if stat.Size() == 0 {
  140. v.SuperBlock.Version = CurrentVersion
  141. _, e = v.dataFile.Write(v.SuperBlock.Bytes())
  142. if e != nil && os.IsPermission(e) {
  143. //read-only, but zero length - recreate it!
  144. if v.dataFile, e = os.Create(v.dataFile.Name()); e == nil {
  145. if _, e = v.dataFile.Write(v.SuperBlock.Bytes()); e == nil {
  146. v.readOnly = false
  147. }
  148. }
  149. }
  150. }
  151. return e
  152. }
  153. func (v *Volume) readSuperBlock() (err error) {
  154. if _, err = v.dataFile.Seek(0, 0); err != nil {
  155. return fmt.Errorf("cannot seek to the beginning of %s: %s", v.dataFile, err)
  156. }
  157. header := make([]byte, SuperBlockSize)
  158. if _, e := v.dataFile.Read(header); e != nil {
  159. return fmt.Errorf("cannot read superblock: %s", e)
  160. }
  161. v.SuperBlock, err = ParseSuperBlock(header)
  162. return err
  163. }
  164. func ParseSuperBlock(header []byte) (superBlock SuperBlock, err error) {
  165. superBlock.Version = Version(header[0])
  166. if superBlock.ReplicaType, err = NewReplicationTypeFromByte(header[1]); err != nil {
  167. err = fmt.Errorf("cannot read replica type: %s", err)
  168. }
  169. return
  170. }
  171. func (v *Volume) NeedToReplicate() bool {
  172. return v.ReplicaType.GetCopyCount() > 1
  173. }
  174. func (v *Volume) isFileUnchanged(n *Needle) bool {
  175. nv, ok := v.nm.Get(n.Id)
  176. if ok && nv.Offset > 0 {
  177. if _, err := v.dataFile.Seek(int64(nv.Offset)*NeedlePaddingSize, 0); err != nil {
  178. return false
  179. }
  180. oldNeedle := new(Needle)
  181. oldNeedle.Read(v.dataFile, nv.Size, v.Version())
  182. if oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) {
  183. n.Size = oldNeedle.Size
  184. return true
  185. }
  186. }
  187. return false
  188. }
  189. func (v *Volume) write(n *Needle) (size uint32, err error) {
  190. if v.readOnly {
  191. err = fmt.Errorf("%s is read-only", v.dataFile)
  192. return
  193. }
  194. v.accessLock.Lock()
  195. defer v.accessLock.Unlock()
  196. if v.isFileUnchanged(n) {
  197. size = n.Size
  198. return
  199. }
  200. var offset int64
  201. if offset, err = v.dataFile.Seek(0, 2); err != nil {
  202. return
  203. }
  204. //ensure file writing starting from aligned positions
  205. if offset%NeedlePaddingSize != 0 {
  206. offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize)
  207. if offset, err = v.dataFile.Seek(offset, 0); err != nil {
  208. return
  209. }
  210. }
  211. if size, err = n.Append(v.dataFile, v.Version()); err != nil {
  212. if e := v.dataFile.Truncate(offset); e != nil {
  213. err = fmt.Errorf("%s\ncannot truncate %s: %s", err, v.dataFile, e)
  214. }
  215. return
  216. }
  217. nv, ok := v.nm.Get(n.Id)
  218. if !ok || int64(nv.Offset)*NeedlePaddingSize < offset {
  219. _, err = v.nm.Put(n.Id, uint32(offset/NeedlePaddingSize), n.Size)
  220. }
  221. return
  222. }
  223. func (v *Volume) delete(n *Needle) (uint32, error) {
  224. if v.readOnly {
  225. return 0, fmt.Errorf("%s is read-only", v.dataFile)
  226. }
  227. v.accessLock.Lock()
  228. defer v.accessLock.Unlock()
  229. nv, ok := v.nm.Get(n.Id)
  230. //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
  231. if ok {
  232. size := nv.Size
  233. if err := v.nm.Delete(n.Id); err != nil {
  234. return size, err
  235. }
  236. if _, err := v.dataFile.Seek(0, 2); err != nil {
  237. return size, err
  238. }
  239. n.Data = make([]byte, 0)
  240. _, err := n.Append(v.dataFile, v.Version())
  241. return size, err
  242. }
  243. return 0, nil
  244. }
  245. func (v *Volume) read(n *Needle) (int, error) {
  246. v.accessLock.Lock()
  247. defer v.accessLock.Unlock()
  248. nv, ok := v.nm.Get(n.Id)
  249. if ok && nv.Offset > 0 {
  250. if _, err := v.dataFile.Seek(int64(nv.Offset)*NeedlePaddingSize, 0); err != nil {
  251. return -1, err
  252. }
  253. return n.Read(v.dataFile, nv.Size, v.Version())
  254. }
  255. return -1, errors.New("Not Found")
  256. }
  257. func (v *Volume) garbageLevel() float64 {
  258. return float64(v.nm.DeletedSize()) / float64(v.ContentSize())
  259. }
  260. func (v *Volume) compact() error {
  261. v.accessLock.Lock()
  262. defer v.accessLock.Unlock()
  263. filePath := path.Join(v.dir, v.Id.String())
  264. return v.copyDataAndGenerateIndexFile(filePath+".cpd", filePath+".cpx")
  265. }
  266. func (v *Volume) commitCompact() error {
  267. v.accessLock.Lock()
  268. defer v.accessLock.Unlock()
  269. _ = v.dataFile.Close()
  270. var e error
  271. if e = os.Rename(path.Join(v.dir, v.Id.String()+".cpd"), path.Join(v.dir, v.Id.String()+".dat")); e != nil {
  272. return e
  273. }
  274. if e = os.Rename(path.Join(v.dir, v.Id.String()+".cpx"), path.Join(v.dir, v.Id.String()+".idx")); e != nil {
  275. return e
  276. }
  277. if e = v.load(true); e != nil {
  278. return e
  279. }
  280. return nil
  281. }
  282. func (v *Volume) freeze() error {
  283. if v.readOnly {
  284. return nil
  285. }
  286. nm, ok := v.nm.(*NeedleMap)
  287. if !ok {
  288. return nil
  289. }
  290. v.accessLock.Lock()
  291. defer v.accessLock.Unlock()
  292. bn, _ := nakeFilename(v.dataFile.Name())
  293. cdbFn := bn + ".cdb"
  294. glog.V(0).Infof("converting %s to %s", nm.indexFile.Name(), cdbFn)
  295. err := DumpNeedleMapToCdb(cdbFn, nm)
  296. if err != nil {
  297. return err
  298. }
  299. if v.nm, err = OpenCdbMap(cdbFn); err != nil {
  300. return err
  301. }
  302. nm.indexFile.Close()
  303. os.Remove(nm.indexFile.Name())
  304. v.readOnly = true
  305. return nil
  306. }
  307. func ScanVolumeFile(dirname string, id VolumeId,
  308. visitSuperBlock func(SuperBlock) error,
  309. visitNeedle func(n *Needle, offset uint32) error) (err error) {
  310. var v *Volume
  311. if v, err = loadVolumeWithoutIndex(dirname, id); err != nil {
  312. return
  313. }
  314. if err = visitSuperBlock(v.SuperBlock); err != nil {
  315. return
  316. }
  317. version := v.Version()
  318. offset := uint32(SuperBlockSize)
  319. n, rest, e := ReadNeedleHeader(v.dataFile, version)
  320. if e != nil {
  321. err = fmt.Errorf("cannot read needle header: %s", e)
  322. return
  323. }
  324. for n != nil {
  325. if err = n.ReadNeedleBody(v.dataFile, version, rest); err != nil {
  326. err = fmt.Errorf("cannot read needle body: %s", err)
  327. return
  328. }
  329. if err = visitNeedle(n, offset); err != nil {
  330. return
  331. }
  332. offset += NeedleHeaderSize + rest
  333. if n, rest, err = ReadNeedleHeader(v.dataFile, version); err != nil {
  334. if err == io.EOF {
  335. return nil
  336. }
  337. return fmt.Errorf("cannot read needle header: %s", err)
  338. }
  339. }
  340. return
  341. }
  342. func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string) (err error) {
  343. var (
  344. dst, idx *os.File
  345. )
  346. if dst, err = os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE, 0644); err != nil {
  347. return
  348. }
  349. defer dst.Close()
  350. if idx, err = os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE, 0644); err != nil {
  351. return
  352. }
  353. defer idx.Close()
  354. nm := NewNeedleMap(idx)
  355. new_offset := uint32(SuperBlockSize)
  356. err = ScanVolumeFile(v.dir, v.Id, func(superBlock SuperBlock) error {
  357. _, err = dst.Write(superBlock.Bytes())
  358. return err
  359. }, func(n *Needle, offset uint32) error {
  360. nv, ok := v.nm.Get(n.Id)
  361. //glog.V(0).Infoln("file size is", n.Size, "rest", rest)
  362. if ok && nv.Offset*NeedlePaddingSize == offset {
  363. if nv.Size > 0 {
  364. if _, err = nm.Put(n.Id, new_offset/NeedlePaddingSize, n.Size); err != nil {
  365. return fmt.Errorf("cannot put needle: %s", err)
  366. }
  367. if _, err = n.Append(dst, v.Version()); err != nil {
  368. return fmt.Errorf("cannot append needle: %s", err)
  369. }
  370. new_offset += n.DiskSize()
  371. //glog.V(0).Infoln("saving key", n.Id, "volume offset", old_offset, "=>", new_offset, "data_size", n.Size, "rest", rest)
  372. }
  373. }
  374. return nil
  375. })
  376. return
  377. }
  378. func (v *Volume) ContentSize() uint64 {
  379. return v.nm.ContentSize()
  380. }