459 lines
12 KiB

package storage
import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/seaweedfs/seaweedfs/weed/storage/idx"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/syndtr/goleveldb/leveldb"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
. "github.com/seaweedfs/seaweedfs/weed/storage/types"
)
// mark it every watermarkBatchSize operations
const watermarkBatchSize = 10000
var watermarkKey = []byte("idx_entry_watermark")
type LevelDbNeedleMap struct {
baseNeedleMapper
dbFileName string
db *leveldb.DB
ldbOpts *opt.Options
ldbAccessLock sync.RWMutex
exitChan chan bool
// no need to use atomic
accessFlag int64
ldbTimeout int64
recordCount uint64
}
func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Options, ldbTimeout int64) (m *LevelDbNeedleMap, err error) {
m = &LevelDbNeedleMap{dbFileName: dbFileName}
m.indexFile = indexFile
if !isLevelDbFresh(dbFileName, indexFile) {
glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name())
generateLevelDbFile(dbFileName, indexFile)
glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name())
}
if stat, err := indexFile.Stat(); err != nil {
glog.Fatalf("stat file %s: %v", indexFile.Name(), err)
} else {
m.indexFileOffset = stat.Size()
}
glog.V(1).Infof("Opening %s...", dbFileName)
if m.ldbTimeout == 0 {
if m.db, err = leveldb.OpenFile(dbFileName, opts); err != nil {
if errors.IsCorrupted(err) {
m.db, err = leveldb.RecoverFile(dbFileName, opts)
}
if err != nil {
return
}
}
glog.V(0).Infof("Loading %s... , watermark: %d", dbFileName, getWatermark(m.db))
m.recordCount = uint64(m.indexFileOffset / NeedleMapEntrySize)
watermark := (m.recordCount / watermarkBatchSize) * watermarkBatchSize
err = setWatermark(m.db, watermark)
if err != nil {
glog.Fatalf("set watermark for %s error: %s\n", dbFileName, err)
return
}
}
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
if indexLoadError != nil {
return nil, indexLoadError
}
m.mapMetric = *mm
m.ldbTimeout = ldbTimeout
if m.ldbTimeout > 0 {
m.ldbOpts = opts
m.exitChan = make(chan bool, 1)
m.accessFlag = 0
go lazyLoadingRoutine(m)
}
return
}
func isLevelDbFresh(dbFileName string, indexFile *os.File) bool {
// normally we always write to index file first
dbLogFile, err := os.Open(filepath.Join(dbFileName, "LOG"))
if err != nil {
return false
}
defer dbLogFile.Close()
dbStat, dbStatErr := dbLogFile.Stat()
indexStat, indexStatErr := indexFile.Stat()
if dbStatErr != nil || indexStatErr != nil {
glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr)
return false
}
return dbStat.ModTime().After(indexStat.ModTime())
}
func generateLevelDbFile(dbFileName string, indexFile *os.File) error {
db, err := leveldb.OpenFile(dbFileName, nil)
if err != nil {
return err
}
defer db.Close()
watermark := getWatermark(db)
if stat, err := indexFile.Stat(); err != nil {
glog.Fatalf("stat file %s: %v", indexFile.Name(), err)
return err
} else {
if watermark*NeedleMapEntrySize > uint64(stat.Size()) {
glog.Warningf("wrong watermark %d for filesize %d", watermark, stat.Size())
}
glog.V(0).Infof("generateLevelDbFile %s, watermark %d, num of entries:%d", dbFileName, watermark, (uint64(stat.Size())-watermark*NeedleMapEntrySize)/NeedleMapEntrySize)
}
return idx.WalkIndexFile(indexFile, watermark, func(key NeedleId, offset Offset, size Size) error {
if !offset.IsZero() && size.IsValid() {
levelDbWrite(db, key, offset, size, false, 0)
} else {
levelDbDelete(db, key)
}
return nil
})
}
func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, ok bool) {
bytes := make([]byte, NeedleIdSize)
if m.ldbTimeout > 0 {
m.ldbAccessLock.RLock()
defer m.ldbAccessLock.RUnlock()
loadErr := reloadLdb(m)
if loadErr != nil {
return nil, false
}
}
NeedleIdToBytes(bytes[0:NeedleIdSize], key)
data, err := m.db.Get(bytes, nil)
if err != nil || len(data) != OffsetSize+SizeSize {
return nil, false
}
offset := BytesToOffset(data[0:OffsetSize])
size := BytesToSize(data[OffsetSize : OffsetSize+SizeSize])
return &needle_map.NeedleValue{Key: key, Offset: offset, Size: size}, true
}
func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error {
var oldSize Size
var watermark uint64
if m.ldbTimeout > 0 {
m.ldbAccessLock.RLock()
defer m.ldbAccessLock.RUnlock()
loadErr := reloadLdb(m)
if loadErr != nil {
return loadErr
}
}
if oldNeedle, ok := m.Get(key); ok {
oldSize = oldNeedle.Size
}
m.logPut(key, oldSize, size)
// write to index file first
if err := m.appendToIndexFile(key, offset, size); err != nil {
return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err)
}
m.recordCount++
if m.recordCount%watermarkBatchSize != 0 {
watermark = 0
} else {
watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize
glog.V(1).Infof("put cnt:%d for %s,watermark: %d", m.recordCount, m.dbFileName, watermark)
}
return levelDbWrite(m.db, key, offset, size, watermark == 0, watermark)
}
func getWatermark(db *leveldb.DB) uint64 {
data, err := db.Get(watermarkKey, nil)
if err != nil || len(data) != 8 {
glog.V(1).Infof("read previous watermark from db: %v, %d", err, len(data))
return 0
}
return util.BytesToUint64(data)
}
func setWatermark(db *leveldb.DB, watermark uint64) error {
glog.V(3).Infof("set watermark %d", watermark)
var wmBytes = make([]byte, 8)
util.Uint64toBytes(wmBytes, watermark)
if err := db.Put(watermarkKey, wmBytes, nil); err != nil {
return fmt.Errorf("failed to setWatermark: %v", err)
}
return nil
}
func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, updateWatermark bool, watermark uint64) error {
bytes := needle_map.ToBytes(key, offset, size)
if err := db.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize], nil); err != nil {
return fmt.Errorf("failed to write leveldb: %v", err)
}
// set watermark
if updateWatermark {
return setWatermark(db, watermark)
}
return nil
}
func levelDbDelete(db *leveldb.DB, key NeedleId) error {
bytes := make([]byte, NeedleIdSize)
NeedleIdToBytes(bytes, key)
return db.Delete(bytes, nil)
}
func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error {
var watermark uint64
if m.ldbTimeout > 0 {
m.ldbAccessLock.RLock()
defer m.ldbAccessLock.RUnlock()
loadErr := reloadLdb(m)
if loadErr != nil {
return loadErr
}
}
oldNeedle, found := m.Get(key)
if !found || oldNeedle.Size.IsDeleted() {
return nil
}
m.logDelete(oldNeedle.Size)
// write to index file first
if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil {
return err
}
m.recordCount++
if m.recordCount%watermarkBatchSize != 0 {
watermark = 0
} else {
watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize
}
return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, watermark == 0, watermark)
}
func (m *LevelDbNeedleMap) Close() {
if m.indexFile != nil {
indexFileName := m.indexFile.Name()
if err := m.indexFile.Sync(); err != nil {
glog.Warningf("sync file %s failed: %v", indexFileName, err)
}
if err := m.indexFile.Close(); err != nil {
glog.Warningf("close index file %s failed: %v", indexFileName, err)
}
}
if m.db != nil {
if err := m.db.Close(); err != nil {
glog.Warningf("close levelDB failed: %v", err)
}
}
if m.ldbTimeout > 0 {
m.exitChan <- true
}
}
func (m *LevelDbNeedleMap) Destroy() error {
m.Close()
os.Remove(m.indexFile.Name())
return os.RemoveAll(m.dbFileName)
}
func (m *LevelDbNeedleMap) UpdateNeedleMap(v *Volume, indexFile *os.File, opts *opt.Options, ldbTimeout int64) error {
if v.nm != nil {
v.nm.Close()
v.nm = nil
}
defer func() {
if v.tmpNm != nil {
v.tmpNm.Close()
v.tmpNm = nil
}
}()
levelDbFile := v.FileName(".ldb")
m.indexFile = indexFile
err := os.RemoveAll(levelDbFile)
if err != nil {
return err
}
if err = os.Rename(v.FileName(".cpldb"), levelDbFile); err != nil {
return fmt.Errorf("rename %s: %v", levelDbFile, err)
}
db, err := leveldb.OpenFile(levelDbFile, opts)
if err != nil {
if errors.IsCorrupted(err) {
db, err = leveldb.RecoverFile(levelDbFile, opts)
}
if err != nil {
return err
}
}
m.db = db
stat, e := indexFile.Stat()
if e != nil {
glog.Fatalf("stat file %s: %v", indexFile.Name(), e)
return e
}
m.indexFileOffset = stat.Size()
m.recordCount = uint64(stat.Size() / NeedleMapEntrySize)
//set watermark
watermark := (m.recordCount / watermarkBatchSize) * watermarkBatchSize
err = setWatermark(db, uint64(watermark))
if err != nil {
glog.Fatalf("setting watermark failed %s: %v", indexFile.Name(), err)
return err
}
v.nm = m
v.tmpNm = nil
m.ldbTimeout = ldbTimeout
if m.ldbTimeout > 0 {
m.ldbOpts = opts
m.exitChan = make(chan bool, 1)
m.accessFlag = 0
go lazyLoadingRoutine(m)
}
return e
}
func (m *LevelDbNeedleMap) DoOffsetLoading(v *Volume, indexFile *os.File, startFrom uint64) (err error) {
glog.V(0).Infof("loading idx to leveldb from offset %d for file: %s", startFrom, indexFile.Name())
dbFileName := v.FileName(".cpldb")
db, dbErr := leveldb.OpenFile(dbFileName, nil)
defer func() {
if dbErr == nil {
db.Close()
}
if err != nil {
os.RemoveAll(dbFileName)
}
}()
if dbErr != nil {
if errors.IsCorrupted(err) {
db, dbErr = leveldb.RecoverFile(dbFileName, nil)
}
if dbErr != nil {
return dbErr
}
}
err = idx.WalkIndexFile(indexFile, startFrom, func(key NeedleId, offset Offset, size Size) (e error) {
m.mapMetric.FileCounter++
bytes := make([]byte, NeedleIdSize)
NeedleIdToBytes(bytes[0:NeedleIdSize], key)
// fresh loading
if startFrom == 0 {
m.mapMetric.FileByteCounter += uint64(size)
e = levelDbWrite(db, key, offset, size, false, 0)
return e
}
// increment loading
data, err := db.Get(bytes, nil)
if err != nil {
if !strings.Contains(strings.ToLower(err.Error()), "not found") {
// unexpected error
return err
}
// new needle, unlikely happen
m.mapMetric.FileByteCounter += uint64(size)
e = levelDbWrite(db, key, offset, size, false, 0)
} else {
// needle is found
oldSize := BytesToSize(data[OffsetSize : OffsetSize+SizeSize])
oldOffset := BytesToOffset(data[0:OffsetSize])
if !offset.IsZero() && size.IsValid() {
// updated needle
m.mapMetric.FileByteCounter += uint64(size)
if !oldOffset.IsZero() && oldSize.IsValid() {
m.mapMetric.DeletionCounter++
m.mapMetric.DeletionByteCounter += uint64(oldSize)
}
e = levelDbWrite(db, key, offset, size, false, 0)
} else {
// deleted needle
m.mapMetric.DeletionCounter++
m.mapMetric.DeletionByteCounter += uint64(oldSize)
e = levelDbDelete(db, key)
}
}
return e
})
return err
}
func reloadLdb(m *LevelDbNeedleMap) (err error) {
if m.db != nil {
return nil
}
glog.V(1).Infof("reloading leveldb %s", m.dbFileName)
m.accessFlag = 1
if m.db, err = leveldb.OpenFile(m.dbFileName, m.ldbOpts); err != nil {
if errors.IsCorrupted(err) {
m.db, err = leveldb.RecoverFile(m.dbFileName, m.ldbOpts)
}
if err != nil {
glog.Fatalf("RecoverFile %s failed:%v", m.dbFileName, err)
return err
}
}
return nil
}
func unloadLdb(m *LevelDbNeedleMap) (err error) {
m.ldbAccessLock.Lock()
defer m.ldbAccessLock.Unlock()
if m.db != nil {
glog.V(1).Infof("reached max idle count, unload leveldb, %s", m.dbFileName)
m.db.Close()
m.db = nil
}
return nil
}
func lazyLoadingRoutine(m *LevelDbNeedleMap) (err error) {
glog.V(1).Infof("lazyLoadingRoutine %s", m.dbFileName)
var accessRecord int64
accessRecord = 1
for {
select {
case exit := <-m.exitChan:
if exit {
glog.V(1).Infof("exit from lazyLoadingRoutine")
return nil
}
case <-time.After(time.Hour * 1):
glog.V(1).Infof("timeout %s", m.dbFileName)
if m.accessFlag == 0 {
accessRecord++
glog.V(1).Infof("accessRecord++")
if accessRecord >= m.ldbTimeout {
unloadLdb(m)
}
} else {
glog.V(1).Infof("reset accessRecord %s", m.dbFileName)
// reset accessRecord
accessRecord = 0
}
continue
}
}
}