You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

511 lines
16 KiB

6 years ago
6 years ago
6 years ago
12 years ago
12 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "fmt"
  4. "path/filepath"
  5. "strings"
  6. "sync/atomic"
  7. "google.golang.org/grpc"
  8. "github.com/chrislusf/seaweedfs/weed/glog"
  9. "github.com/chrislusf/seaweedfs/weed/pb"
  10. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  11. "github.com/chrislusf/seaweedfs/weed/stats"
  12. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  13. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  14. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  15. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  16. )
  17. const (
  18. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  19. )
  20. type ReadOption struct {
  21. ReadDeleted bool
  22. }
  23. /*
  24. * A VolumeServer contains one Store
  25. */
  26. type Store struct {
  27. MasterAddress string
  28. grpcDialOption grpc.DialOption
  29. volumeSizeLimit uint64 // read from the master
  30. Ip string
  31. Port int
  32. PublicUrl string
  33. Locations []*DiskLocation
  34. dataCenter string // optional informaton, overwriting master setting if exists
  35. rack string // optional information, overwriting master setting if exists
  36. connected bool
  37. NeedleMapKind NeedleMapKind
  38. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  39. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  40. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  41. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  42. }
  43. func (s *Store) String() (str string) {
  44. str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  45. return
  46. }
  47. func NewStore(grpcDialOption grpc.DialOption, port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, minFreeSpacePercents []float32, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  48. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  49. s.Locations = make([]*DiskLocation, 0)
  50. for i := 0; i < len(dirnames); i++ {
  51. location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpacePercents[i], idxFolder, diskTypes[i])
  52. location.loadExistingVolumes(needleMapKind)
  53. s.Locations = append(s.Locations, location)
  54. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  55. }
  56. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  57. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  58. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  59. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  60. return
  61. }
  62. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  63. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  64. if e != nil {
  65. return e
  66. }
  67. ttl, e := needle.ReadTTL(ttlString)
  68. if e != nil {
  69. return e
  70. }
  71. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  72. return e
  73. }
  74. func (s *Store) DeleteCollection(collection string) (e error) {
  75. for _, location := range s.Locations {
  76. e = location.DeleteCollectionFromDiskLocation(collection)
  77. if e != nil {
  78. return
  79. }
  80. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  81. }
  82. return
  83. }
  84. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  85. for _, location := range s.Locations {
  86. if v, found := location.FindVolume(vid); found {
  87. return v
  88. }
  89. }
  90. return nil
  91. }
  92. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  93. max := 0
  94. for _, location := range s.Locations {
  95. if diskType != location.DiskType {
  96. continue
  97. }
  98. if location.isDiskSpaceLow {
  99. continue
  100. }
  101. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  102. currentFreeCount *= erasure_coding.DataShardsCount
  103. currentFreeCount -= location.EcVolumesLen()
  104. currentFreeCount /= erasure_coding.DataShardsCount
  105. if currentFreeCount > max {
  106. max = currentFreeCount
  107. ret = location
  108. }
  109. }
  110. return ret
  111. }
  112. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  113. if s.findVolume(vid) != nil {
  114. return fmt.Errorf("Volume Id %d already exists!", vid)
  115. }
  116. if location := s.FindFreeLocation(diskType); location != nil {
  117. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  118. location.Directory, vid, collection, replicaPlacement, ttl)
  119. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  120. location.SetVolume(vid, volume)
  121. glog.V(0).Infof("add volume %d", vid)
  122. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  123. Id: uint32(vid),
  124. Collection: collection,
  125. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  126. Version: uint32(volume.Version()),
  127. Ttl: ttl.ToUint32(),
  128. DiskType: string(diskType),
  129. }
  130. return nil
  131. } else {
  132. return err
  133. }
  134. }
  135. return fmt.Errorf("No more free space left")
  136. }
  137. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  138. for _, location := range s.Locations {
  139. stats := collectStatsForOneLocation(location)
  140. allStats = append(allStats, stats...)
  141. }
  142. sortVolumeInfos(allStats)
  143. return allStats
  144. }
  145. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  146. location.volumesLock.RLock()
  147. defer location.volumesLock.RUnlock()
  148. for k, v := range location.volumes {
  149. s := collectStatForOneVolume(k, v)
  150. stats = append(stats, s)
  151. }
  152. return stats
  153. }
  154. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  155. s = &VolumeInfo{
  156. Id: vid,
  157. Collection: v.Collection,
  158. ReplicaPlacement: v.ReplicaPlacement,
  159. Version: v.Version(),
  160. ReadOnly: v.IsReadOnly(),
  161. Ttl: v.Ttl,
  162. CompactRevision: uint32(v.CompactionRevision),
  163. DiskType: v.DiskType().String(),
  164. }
  165. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  166. v.dataFileAccessLock.RLock()
  167. defer v.dataFileAccessLock.RUnlock()
  168. if v.nm == nil {
  169. return
  170. }
  171. s.FileCount = v.nm.FileCount()
  172. s.DeleteCount = v.nm.DeletedCount()
  173. s.DeletedByteCount = v.nm.DeletedSize()
  174. s.Size = v.nm.ContentSize()
  175. return
  176. }
  177. func (s *Store) SetDataCenter(dataCenter string) {
  178. s.dataCenter = dataCenter
  179. }
  180. func (s *Store) SetRack(rack string) {
  181. s.rack = rack
  182. }
  183. func (s *Store) GetDataCenter() string {
  184. return s.dataCenter
  185. }
  186. func (s *Store) GetRack() string {
  187. return s.rack
  188. }
  189. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  190. var volumeMessages []*master_pb.VolumeInformationMessage
  191. maxVolumeCounts := make(map[string]uint32)
  192. var maxFileKey NeedleId
  193. collectionVolumeSize := make(map[string]uint64)
  194. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  195. for _, location := range s.Locations {
  196. var deleteVids []needle.VolumeId
  197. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  198. location.volumesLock.RLock()
  199. for _, v := range location.volumes {
  200. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  201. if volumeMessage == nil {
  202. continue
  203. }
  204. if maxFileKey < curMaxFileKey {
  205. maxFileKey = curMaxFileKey
  206. }
  207. deleteVolume := false
  208. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  209. volumeMessages = append(volumeMessages, volumeMessage)
  210. } else {
  211. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  212. deleteVids = append(deleteVids, v.Id)
  213. deleteVolume = true
  214. } else {
  215. glog.V(0).Infof("volume %d is expired", v.Id)
  216. }
  217. if v.lastIoError != nil {
  218. deleteVids = append(deleteVids, v.Id)
  219. deleteVolume = true
  220. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  221. }
  222. }
  223. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  224. collectionVolumeSize[v.Collection] = 0
  225. }
  226. if !deleteVolume {
  227. collectionVolumeSize[v.Collection] += volumeMessage.Size
  228. }
  229. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  230. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  231. "IsReadOnly": 0,
  232. "noWriteOrDelete": 0,
  233. "noWriteCanDelete": 0,
  234. "isDiskSpaceLow": 0,
  235. }
  236. }
  237. if !deleteVolume && v.IsReadOnly() {
  238. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  239. if v.noWriteOrDelete {
  240. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  241. }
  242. if v.noWriteCanDelete {
  243. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  244. }
  245. if v.location.isDiskSpaceLow {
  246. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  247. }
  248. }
  249. }
  250. location.volumesLock.RUnlock()
  251. if len(deleteVids) > 0 {
  252. // delete expired volumes.
  253. location.volumesLock.Lock()
  254. for _, vid := range deleteVids {
  255. found, err := location.deleteVolumeById(vid)
  256. if err == nil {
  257. if found {
  258. glog.V(0).Infof("volume %d is deleted", vid)
  259. }
  260. } else {
  261. glog.Warningf("delete volume %d: %v", vid, err)
  262. }
  263. }
  264. location.volumesLock.Unlock()
  265. }
  266. }
  267. for col, size := range collectionVolumeSize {
  268. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  269. }
  270. for col, types := range collectionVolumeReadOnlyCount {
  271. for t, count := range types {
  272. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  273. }
  274. }
  275. return &master_pb.Heartbeat{
  276. Ip: s.Ip,
  277. Port: uint32(s.Port),
  278. PublicUrl: s.PublicUrl,
  279. MaxVolumeCounts: maxVolumeCounts,
  280. MaxFileKey: NeedleIdToUint64(maxFileKey),
  281. DataCenter: s.dataCenter,
  282. Rack: s.rack,
  283. Volumes: volumeMessages,
  284. HasNoVolumes: len(volumeMessages) == 0,
  285. }
  286. }
  287. func (s *Store) Close() {
  288. for _, location := range s.Locations {
  289. location.Close()
  290. }
  291. }
  292. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, fsync bool) (isUnchanged bool, err error) {
  293. if v := s.findVolume(i); v != nil {
  294. if v.IsReadOnly() {
  295. err = fmt.Errorf("volume %d is read only", i)
  296. return
  297. }
  298. _, _, isUnchanged, err = v.writeNeedle2(n, fsync)
  299. return
  300. }
  301. glog.V(0).Infoln("volume", i, "not found!")
  302. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  303. return
  304. }
  305. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  306. if v := s.findVolume(i); v != nil {
  307. if v.noWriteOrDelete {
  308. return 0, fmt.Errorf("volume %d is read only", i)
  309. }
  310. return v.deleteNeedle2(n)
  311. }
  312. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  313. }
  314. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption) (int, error) {
  315. if v := s.findVolume(i); v != nil {
  316. return v.readNeedle(n, readOption)
  317. }
  318. return 0, fmt.Errorf("volume %d not found", i)
  319. }
  320. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  321. return s.findVolume(i)
  322. }
  323. func (s *Store) HasVolume(i needle.VolumeId) bool {
  324. v := s.findVolume(i)
  325. return v != nil
  326. }
  327. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  328. v := s.findVolume(i)
  329. if v == nil {
  330. return fmt.Errorf("volume %d not found", i)
  331. }
  332. v.noWriteLock.Lock()
  333. v.noWriteOrDelete = true
  334. v.noWriteLock.Unlock()
  335. return nil
  336. }
  337. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  338. v := s.findVolume(i)
  339. if v == nil {
  340. return fmt.Errorf("volume %d not found", i)
  341. }
  342. v.noWriteLock.Lock()
  343. v.noWriteOrDelete = false
  344. v.noWriteLock.Unlock()
  345. return nil
  346. }
  347. func (s *Store) MountVolume(i needle.VolumeId) error {
  348. for _, location := range s.Locations {
  349. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  350. glog.V(0).Infof("mount volume %d", i)
  351. v := s.findVolume(i)
  352. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  353. Id: uint32(v.Id),
  354. Collection: v.Collection,
  355. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  356. Version: uint32(v.Version()),
  357. Ttl: v.Ttl.ToUint32(),
  358. DiskType: string(v.location.DiskType),
  359. }
  360. return nil
  361. }
  362. }
  363. return fmt.Errorf("volume %d not found on disk", i)
  364. }
  365. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  366. v := s.findVolume(i)
  367. if v == nil {
  368. return nil
  369. }
  370. message := master_pb.VolumeShortInformationMessage{
  371. Id: uint32(v.Id),
  372. Collection: v.Collection,
  373. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  374. Version: uint32(v.Version()),
  375. Ttl: v.Ttl.ToUint32(),
  376. DiskType: string(v.location.DiskType),
  377. }
  378. for _, location := range s.Locations {
  379. if err := location.UnloadVolume(i); err == nil {
  380. glog.V(0).Infof("UnmountVolume %d", i)
  381. s.DeletedVolumesChan <- message
  382. return nil
  383. }
  384. }
  385. return fmt.Errorf("volume %d not found on disk", i)
  386. }
  387. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  388. v := s.findVolume(i)
  389. if v == nil {
  390. return fmt.Errorf("delete volume %d not found on disk", i)
  391. }
  392. message := master_pb.VolumeShortInformationMessage{
  393. Id: uint32(v.Id),
  394. Collection: v.Collection,
  395. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  396. Version: uint32(v.Version()),
  397. Ttl: v.Ttl.ToUint32(),
  398. DiskType: string(v.location.DiskType),
  399. }
  400. for _, location := range s.Locations {
  401. if err := location.DeleteVolume(i); err == nil {
  402. glog.V(0).Infof("DeleteVolume %d", i)
  403. s.DeletedVolumesChan <- message
  404. return nil
  405. } else {
  406. glog.Errorf("DeleteVolume %d: %v", i, err)
  407. }
  408. }
  409. return fmt.Errorf("volume %d not found on disk", i)
  410. }
  411. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  412. for _, location := range s.Locations {
  413. fileInfo, found := location.LocateVolume(i)
  414. if !found {
  415. continue
  416. }
  417. // load, modify, save
  418. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  419. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  420. volumeInfo, _, _, err := pb.MaybeLoadVolumeInfo(vifFile)
  421. if err != nil {
  422. return fmt.Errorf("volume %d fail to load vif", i)
  423. }
  424. volumeInfo.Replication = replication
  425. err = pb.SaveVolumeInfo(vifFile, volumeInfo)
  426. if err != nil {
  427. return fmt.Errorf("volume %d fail to save vif", i)
  428. }
  429. return nil
  430. }
  431. return fmt.Errorf("volume %d not found on disk", i)
  432. }
  433. func (s *Store) SetVolumeSizeLimit(x uint64) {
  434. atomic.StoreUint64(&s.volumeSizeLimit, x)
  435. }
  436. func (s *Store) GetVolumeSizeLimit() uint64 {
  437. return atomic.LoadUint64(&s.volumeSizeLimit)
  438. }
  439. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  440. volumeSizeLimit := s.GetVolumeSizeLimit()
  441. if volumeSizeLimit == 0 {
  442. return
  443. }
  444. for _, diskLocation := range s.Locations {
  445. if diskLocation.OriginalMaxVolumeCount == 0 {
  446. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  447. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  448. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  449. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  450. volCount := diskLocation.VolumesLen()
  451. maxVolumeCount := volCount
  452. if unclaimedSpaces > int64(volumeSizeLimit) {
  453. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  454. }
  455. diskLocation.MaxVolumeCount = maxVolumeCount
  456. glog.V(2).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  457. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  458. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  459. }
  460. }
  461. return
  462. }