You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							459 lines
						
					
					
						
							12 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							459 lines
						
					
					
						
							12 KiB
						
					
					
				| package storage | |
| 
 | |
| import ( | |
| 	"fmt" | |
| 	"os" | |
| 	"path/filepath" | |
| 	"strings" | |
| 	"sync" | |
| 	"time" | |
| 
 | |
| 	"github.com/syndtr/goleveldb/leveldb/errors" | |
| 	"github.com/syndtr/goleveldb/leveldb/opt" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/storage/idx" | |
| 	"github.com/seaweedfs/seaweedfs/weed/util" | |
| 
 | |
| 	"github.com/syndtr/goleveldb/leveldb" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/glog" | |
| 	"github.com/seaweedfs/seaweedfs/weed/storage/needle_map" | |
| 	. "github.com/seaweedfs/seaweedfs/weed/storage/types" | |
| ) | |
| 
 | |
| // mark it every watermarkBatchSize operations | |
| const watermarkBatchSize = 10000 | |
| 
 | |
| var watermarkKey = []byte("idx_entry_watermark") | |
| 
 | |
| type LevelDbNeedleMap struct { | |
| 	baseNeedleMapper | |
| 	dbFileName    string | |
| 	db            *leveldb.DB | |
| 	ldbOpts       *opt.Options | |
| 	ldbAccessLock sync.RWMutex | |
| 	exitChan      chan bool | |
| 	// no need to use atomic | |
| 	accessFlag  int64 | |
| 	ldbTimeout  int64 | |
| 	recordCount uint64 | |
| } | |
| 
 | |
| func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Options, ldbTimeout int64) (m *LevelDbNeedleMap, err error) { | |
| 	m = &LevelDbNeedleMap{dbFileName: dbFileName} | |
| 	m.indexFile = indexFile | |
| 	if !isLevelDbFresh(dbFileName, indexFile) { | |
| 		glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) | |
| 		generateLevelDbFile(dbFileName, indexFile) | |
| 		glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) | |
| 	} | |
| 	if stat, err := indexFile.Stat(); err != nil { | |
| 		glog.Fatalf("stat file %s: %v", indexFile.Name(), err) | |
| 	} else { | |
| 		m.indexFileOffset = stat.Size() | |
| 	} | |
| 	glog.V(1).Infof("Opening %s...", dbFileName) | |
| 
 | |
| 	if m.ldbTimeout == 0 { | |
| 		if m.db, err = leveldb.OpenFile(dbFileName, opts); err != nil { | |
| 			if errors.IsCorrupted(err) { | |
| 				m.db, err = leveldb.RecoverFile(dbFileName, opts) | |
| 			} | |
| 			if err != nil { | |
| 				return | |
| 			} | |
| 		} | |
| 		glog.V(0).Infof("Loading %s... , watermark: %d", dbFileName, getWatermark(m.db)) | |
| 		m.recordCount = uint64(m.indexFileOffset / NeedleMapEntrySize) | |
| 		watermark := (m.recordCount / watermarkBatchSize) * watermarkBatchSize | |
| 		err = setWatermark(m.db, watermark) | |
| 		if err != nil { | |
| 			glog.Fatalf("set watermark for %s error: %s\n", dbFileName, err) | |
| 			return | |
| 		} | |
| 	} | |
| 	mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) | |
| 	if indexLoadError != nil { | |
| 		return nil, indexLoadError | |
| 	} | |
| 	m.mapMetric = *mm | |
| 	m.ldbTimeout = ldbTimeout | |
| 	if m.ldbTimeout > 0 { | |
| 		m.ldbOpts = opts | |
| 		m.exitChan = make(chan bool, 1) | |
| 		m.accessFlag = 0 | |
| 		go lazyLoadingRoutine(m) | |
| 	} | |
| 	return | |
| } | |
| 
 | |
| func isLevelDbFresh(dbFileName string, indexFile *os.File) bool { | |
| 	// normally we always write to index file first | |
| 	dbLogFile, err := os.Open(filepath.Join(dbFileName, "LOG")) | |
| 	if err != nil { | |
| 		return false | |
| 	} | |
| 	defer dbLogFile.Close() | |
| 	dbStat, dbStatErr := dbLogFile.Stat() | |
| 	indexStat, indexStatErr := indexFile.Stat() | |
| 	if dbStatErr != nil || indexStatErr != nil { | |
| 		glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr) | |
| 		return false | |
| 	} | |
| 
 | |
| 	return dbStat.ModTime().After(indexStat.ModTime()) | |
| } | |
| 
 | |
| func generateLevelDbFile(dbFileName string, indexFile *os.File) error { | |
| 	db, err := leveldb.OpenFile(dbFileName, nil) | |
| 	if err != nil { | |
| 		return err | |
| 	} | |
| 	defer db.Close() | |
| 
 | |
| 	watermark := getWatermark(db) | |
| 	if stat, err := indexFile.Stat(); err != nil { | |
| 		glog.Fatalf("stat file %s: %v", indexFile.Name(), err) | |
| 		return err | |
| 	} else { | |
| 		if watermark*NeedleMapEntrySize > uint64(stat.Size()) { | |
| 			glog.Warningf("wrong watermark %d for filesize %d", watermark, stat.Size()) | |
| 		} | |
| 		glog.V(0).Infof("generateLevelDbFile %s, watermark %d, num of entries:%d", dbFileName, watermark, (uint64(stat.Size())-watermark*NeedleMapEntrySize)/NeedleMapEntrySize) | |
| 	} | |
| 	return idx.WalkIndexFile(indexFile, watermark, func(key NeedleId, offset Offset, size Size) error { | |
| 		if !offset.IsZero() && size.IsValid() { | |
| 			levelDbWrite(db, key, offset, size, false, 0) | |
| 		} else { | |
| 			levelDbDelete(db, key) | |
| 		} | |
| 		return nil | |
| 	}) | |
| } | |
| 
 | |
| func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, ok bool) { | |
| 	bytes := make([]byte, NeedleIdSize) | |
| 	if m.ldbTimeout > 0 { | |
| 		m.ldbAccessLock.RLock() | |
| 		defer m.ldbAccessLock.RUnlock() | |
| 		loadErr := reloadLdb(m) | |
| 		if loadErr != nil { | |
| 			return nil, false | |
| 		} | |
| 	} | |
| 	NeedleIdToBytes(bytes[0:NeedleIdSize], key) | |
| 	data, err := m.db.Get(bytes, nil) | |
| 	if err != nil || len(data) != OffsetSize+SizeSize { | |
| 		return nil, false | |
| 	} | |
| 	offset := BytesToOffset(data[0:OffsetSize]) | |
| 	size := BytesToSize(data[OffsetSize : OffsetSize+SizeSize]) | |
| 	return &needle_map.NeedleValue{Key: key, Offset: offset, Size: size}, true | |
| } | |
| 
 | |
| func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error { | |
| 	var oldSize Size | |
| 	var watermark uint64 | |
| 	if m.ldbTimeout > 0 { | |
| 		m.ldbAccessLock.RLock() | |
| 		defer m.ldbAccessLock.RUnlock() | |
| 		loadErr := reloadLdb(m) | |
| 		if loadErr != nil { | |
| 			return loadErr | |
| 		} | |
| 	} | |
| 	if oldNeedle, ok := m.Get(key); ok { | |
| 		oldSize = oldNeedle.Size | |
| 	} | |
| 	m.logPut(key, oldSize, size) | |
| 	// write to index file first | |
| 	if err := m.appendToIndexFile(key, offset, size); err != nil { | |
| 		return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err) | |
| 	} | |
| 	m.recordCount++ | |
| 	if m.recordCount%watermarkBatchSize != 0 { | |
| 		watermark = 0 | |
| 	} else { | |
| 		watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize | |
| 		glog.V(1).Infof("put cnt:%d for %s,watermark: %d", m.recordCount, m.dbFileName, watermark) | |
| 	} | |
| 	return levelDbWrite(m.db, key, offset, size, watermark == 0, watermark) | |
| } | |
| 
 | |
| func getWatermark(db *leveldb.DB) uint64 { | |
| 	data, err := db.Get(watermarkKey, nil) | |
| 	if err != nil || len(data) != 8 { | |
| 		glog.V(1).Infof("read previous watermark from db: %v, %d", err, len(data)) | |
| 		return 0 | |
| 	} | |
| 	return util.BytesToUint64(data) | |
| } | |
| 
 | |
| func setWatermark(db *leveldb.DB, watermark uint64) error { | |
| 	glog.V(3).Infof("set watermark %d", watermark) | |
| 	var wmBytes = make([]byte, 8) | |
| 	util.Uint64toBytes(wmBytes, watermark) | |
| 	if err := db.Put(watermarkKey, wmBytes, nil); err != nil { | |
| 		return fmt.Errorf("failed to setWatermark: %v", err) | |
| 	} | |
| 	return nil | |
| } | |
| 
 | |
| func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, updateWatermark bool, watermark uint64) error { | |
| 
 | |
| 	bytes := needle_map.ToBytes(key, offset, size) | |
| 
 | |
| 	if err := db.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize], nil); err != nil { | |
| 		return fmt.Errorf("failed to write leveldb: %v", err) | |
| 	} | |
| 	// set watermark | |
| 	if updateWatermark { | |
| 		return setWatermark(db, watermark) | |
| 	} | |
| 	return nil | |
| } | |
| 
 | |
| func levelDbDelete(db *leveldb.DB, key NeedleId) error { | |
| 	bytes := make([]byte, NeedleIdSize) | |
| 	NeedleIdToBytes(bytes, key) | |
| 	return db.Delete(bytes, nil) | |
| } | |
| 
 | |
| func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error { | |
| 	var watermark uint64 | |
| 	if m.ldbTimeout > 0 { | |
| 		m.ldbAccessLock.RLock() | |
| 		defer m.ldbAccessLock.RUnlock() | |
| 		loadErr := reloadLdb(m) | |
| 		if loadErr != nil { | |
| 			return loadErr | |
| 		} | |
| 	} | |
| 	oldNeedle, found := m.Get(key) | |
| 	if !found || oldNeedle.Size.IsDeleted() { | |
| 		return nil | |
| 	} | |
| 	m.logDelete(oldNeedle.Size) | |
| 
 | |
| 	// write to index file first | |
| 	if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil { | |
| 		return err | |
| 	} | |
| 	m.recordCount++ | |
| 	if m.recordCount%watermarkBatchSize != 0 { | |
| 		watermark = 0 | |
| 	} else { | |
| 		watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize | |
| 	} | |
| 	return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, watermark == 0, watermark) | |
| } | |
| 
 | |
| func (m *LevelDbNeedleMap) Close() { | |
| 	if m.indexFile != nil { | |
| 		indexFileName := m.indexFile.Name() | |
| 		if err := m.indexFile.Sync(); err != nil { | |
| 			glog.Warningf("sync file %s failed: %v", indexFileName, err) | |
| 		} | |
| 		if err := m.indexFile.Close(); err != nil { | |
| 			glog.Warningf("close index file %s failed: %v", indexFileName, err) | |
| 		} | |
| 	} | |
| 
 | |
| 	if m.db != nil { | |
| 		if err := m.db.Close(); err != nil { | |
| 			glog.Warningf("close levelDB failed: %v", err) | |
| 		} | |
| 	} | |
| 	if m.ldbTimeout > 0 { | |
| 		m.exitChan <- true | |
| 	} | |
| } | |
| 
 | |
| func (m *LevelDbNeedleMap) Destroy() error { | |
| 	m.Close() | |
| 	os.Remove(m.indexFile.Name()) | |
| 	return os.RemoveAll(m.dbFileName) | |
| } | |
| 
 | |
| func (m *LevelDbNeedleMap) UpdateNeedleMap(v *Volume, indexFile *os.File, opts *opt.Options, ldbTimeout int64) error { | |
| 	if v.nm != nil { | |
| 		v.nm.Close() | |
| 		v.nm = nil | |
| 	} | |
| 	defer func() { | |
| 		if v.tmpNm != nil { | |
| 			v.tmpNm.Close() | |
| 			v.tmpNm = nil | |
| 		} | |
| 	}() | |
| 	levelDbFile := v.FileName(".ldb") | |
| 	m.indexFile = indexFile | |
| 	err := os.RemoveAll(levelDbFile) | |
| 	if err != nil { | |
| 		return err | |
| 	} | |
| 	if err = os.Rename(v.FileName(".cpldb"), levelDbFile); err != nil { | |
| 		return fmt.Errorf("rename %s: %v", levelDbFile, err) | |
| 	} | |
| 
 | |
| 	db, err := leveldb.OpenFile(levelDbFile, opts) | |
| 	if err != nil { | |
| 		if errors.IsCorrupted(err) { | |
| 			db, err = leveldb.RecoverFile(levelDbFile, opts) | |
| 		} | |
| 		if err != nil { | |
| 			return err | |
| 		} | |
| 	} | |
| 	m.db = db | |
| 
 | |
| 	stat, e := indexFile.Stat() | |
| 	if e != nil { | |
| 		glog.Fatalf("stat file %s: %v", indexFile.Name(), e) | |
| 		return e | |
| 	} | |
| 	m.indexFileOffset = stat.Size() | |
| 	m.recordCount = uint64(stat.Size() / NeedleMapEntrySize) | |
| 
 | |
| 	//set watermark | |
| 	watermark := (m.recordCount / watermarkBatchSize) * watermarkBatchSize | |
| 	err = setWatermark(db, uint64(watermark)) | |
| 	if err != nil { | |
| 		glog.Fatalf("setting watermark failed %s: %v", indexFile.Name(), err) | |
| 		return err | |
| 	} | |
| 	v.nm = m | |
| 	v.tmpNm = nil | |
| 	m.ldbTimeout = ldbTimeout | |
| 	if m.ldbTimeout > 0 { | |
| 		m.ldbOpts = opts | |
| 		m.exitChan = make(chan bool, 1) | |
| 		m.accessFlag = 0 | |
| 		go lazyLoadingRoutine(m) | |
| 	} | |
| 	return e | |
| } | |
| 
 | |
| func (m *LevelDbNeedleMap) DoOffsetLoading(v *Volume, indexFile *os.File, startFrom uint64) (err error) { | |
| 	glog.V(0).Infof("loading idx to leveldb from offset %d for file: %s", startFrom, indexFile.Name()) | |
| 	dbFileName := v.FileName(".cpldb") | |
| 	db, dbErr := leveldb.OpenFile(dbFileName, nil) | |
| 	defer func() { | |
| 		if dbErr == nil { | |
| 			db.Close() | |
| 		} | |
| 		if err != nil { | |
| 			os.RemoveAll(dbFileName) | |
| 		} | |
| 
 | |
| 	}() | |
| 	if dbErr != nil { | |
| 		if errors.IsCorrupted(err) { | |
| 			db, dbErr = leveldb.RecoverFile(dbFileName, nil) | |
| 		} | |
| 		if dbErr != nil { | |
| 			return dbErr | |
| 		} | |
| 	} | |
| 
 | |
| 	err = idx.WalkIndexFile(indexFile, startFrom, func(key NeedleId, offset Offset, size Size) (e error) { | |
| 		m.mapMetric.FileCounter++ | |
| 		bytes := make([]byte, NeedleIdSize) | |
| 		NeedleIdToBytes(bytes[0:NeedleIdSize], key) | |
| 		// fresh loading | |
| 		if startFrom == 0 { | |
| 			m.mapMetric.FileByteCounter += uint64(size) | |
| 			e = levelDbWrite(db, key, offset, size, false, 0) | |
| 			return e | |
| 		} | |
| 		// increment loading | |
| 		data, err := db.Get(bytes, nil) | |
| 		if err != nil { | |
| 			if !strings.Contains(strings.ToLower(err.Error()), "not found") { | |
| 				// unexpected error | |
| 				return err | |
| 			} | |
| 			// new needle, unlikely happen | |
| 			m.mapMetric.FileByteCounter += uint64(size) | |
| 			e = levelDbWrite(db, key, offset, size, false, 0) | |
| 		} else { | |
| 			// needle is found | |
| 			oldSize := BytesToSize(data[OffsetSize : OffsetSize+SizeSize]) | |
| 			oldOffset := BytesToOffset(data[0:OffsetSize]) | |
| 			if !offset.IsZero() && size.IsValid() { | |
| 				// updated needle | |
| 				m.mapMetric.FileByteCounter += uint64(size) | |
| 				if !oldOffset.IsZero() && oldSize.IsValid() { | |
| 					m.mapMetric.DeletionCounter++ | |
| 					m.mapMetric.DeletionByteCounter += uint64(oldSize) | |
| 				} | |
| 				e = levelDbWrite(db, key, offset, size, false, 0) | |
| 			} else { | |
| 				// deleted needle | |
| 				m.mapMetric.DeletionCounter++ | |
| 				m.mapMetric.DeletionByteCounter += uint64(oldSize) | |
| 				e = levelDbDelete(db, key) | |
| 			} | |
| 		} | |
| 		return e | |
| 	}) | |
| 	return err | |
| } | |
| 
 | |
| func reloadLdb(m *LevelDbNeedleMap) (err error) { | |
| 	if m.db != nil { | |
| 		return nil | |
| 	} | |
| 	glog.V(1).Infof("reloading leveldb %s", m.dbFileName) | |
| 	m.accessFlag = 1 | |
| 	if m.db, err = leveldb.OpenFile(m.dbFileName, m.ldbOpts); err != nil { | |
| 		if errors.IsCorrupted(err) { | |
| 			m.db, err = leveldb.RecoverFile(m.dbFileName, m.ldbOpts) | |
| 		} | |
| 		if err != nil { | |
| 			glog.Fatalf("RecoverFile %s failed:%v", m.dbFileName, err) | |
| 			return err | |
| 		} | |
| 	} | |
| 	return nil | |
| } | |
| 
 | |
| func unloadLdb(m *LevelDbNeedleMap) (err error) { | |
| 	m.ldbAccessLock.Lock() | |
| 	defer m.ldbAccessLock.Unlock() | |
| 	if m.db != nil { | |
| 		glog.V(1).Infof("reached max idle count, unload leveldb, %s", m.dbFileName) | |
| 		m.db.Close() | |
| 		m.db = nil | |
| 	} | |
| 	return nil | |
| } | |
| 
 | |
| func lazyLoadingRoutine(m *LevelDbNeedleMap) (err error) { | |
| 	glog.V(1).Infof("lazyLoadingRoutine %s", m.dbFileName) | |
| 	var accessRecord int64 | |
| 	accessRecord = 1 | |
| 	for { | |
| 		select { | |
| 		case exit := <-m.exitChan: | |
| 			if exit { | |
| 				glog.V(1).Infof("exit from lazyLoadingRoutine") | |
| 				return nil | |
| 			} | |
| 		case <-time.After(time.Hour * 1): | |
| 			glog.V(1).Infof("timeout %s", m.dbFileName) | |
| 			if m.accessFlag == 0 { | |
| 				accessRecord++ | |
| 				glog.V(1).Infof("accessRecord++") | |
| 				if accessRecord >= m.ldbTimeout { | |
| 					unloadLdb(m) | |
| 				} | |
| 			} else { | |
| 				glog.V(1).Infof("reset accessRecord %s", m.dbFileName) | |
| 				// reset accessRecord | |
| 				accessRecord = 0 | |
| 			} | |
| 			continue | |
| 		} | |
| 	} | |
| }
 |