566 lines
17 KiB

6 years ago
6 years ago
6 years ago
12 years ago
12 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "fmt"
  4. "io"
  5. "path/filepath"
  6. "strings"
  7. "sync/atomic"
  8. "github.com/seaweedfs/seaweedfs/weed/pb"
  9. "github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
  10. "github.com/seaweedfs/seaweedfs/weed/util"
  11. "google.golang.org/grpc"
  12. "github.com/seaweedfs/seaweedfs/weed/glog"
  13. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  14. "github.com/seaweedfs/seaweedfs/weed/stats"
  15. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  16. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
  18. . "github.com/seaweedfs/seaweedfs/weed/storage/types"
  19. )
  20. const (
  21. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  22. )
  23. type ReadOption struct {
  24. // request
  25. ReadDeleted bool
  26. AttemptMetaOnly bool
  27. MustMetaOnly bool
  28. // response
  29. IsMetaOnly bool // read status
  30. VolumeRevision uint16
  31. IsOutOfRange bool // whether read over MaxPossibleVolumeSize
  32. }
  33. /*
  34. * A VolumeServer contains one Store
  35. */
  36. type Store struct {
  37. MasterAddress pb.ServerAddress
  38. grpcDialOption grpc.DialOption
  39. volumeSizeLimit uint64 // read from the master
  40. Ip string
  41. Port int
  42. GrpcPort int
  43. PublicUrl string
  44. Locations []*DiskLocation
  45. dataCenter string // optional informaton, overwriting master setting if exists
  46. rack string // optional information, overwriting master setting if exists
  47. connected bool
  48. NeedleMapKind NeedleMapKind
  49. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  50. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  51. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  52. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  53. isStopping bool
  54. }
  55. func (s *Store) String() (str string) {
  56. str = fmt.Sprintf("Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  57. return
  58. }
  59. func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, dirnames []string, maxVolumeCounts []int32,
  60. minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  61. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  62. s.Locations = make([]*DiskLocation, 0)
  63. for i := 0; i < len(dirnames); i++ {
  64. location := NewDiskLocation(dirnames[i], int32(maxVolumeCounts[i]), minFreeSpaces[i], idxFolder, diskTypes[i])
  65. location.loadExistingVolumes(needleMapKind)
  66. s.Locations = append(s.Locations, location)
  67. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  68. }
  69. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  70. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  71. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  72. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  73. return
  74. }
  75. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  76. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  77. if e != nil {
  78. return e
  79. }
  80. ttl, e := needle.ReadTTL(ttlString)
  81. if e != nil {
  82. return e
  83. }
  84. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  85. return e
  86. }
  87. func (s *Store) DeleteCollection(collection string) (e error) {
  88. for _, location := range s.Locations {
  89. e = location.DeleteCollectionFromDiskLocation(collection)
  90. if e != nil {
  91. return
  92. }
  93. stats.DeleteCollectionMetrics(collection)
  94. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  95. }
  96. return
  97. }
  98. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  99. for _, location := range s.Locations {
  100. if v, found := location.FindVolume(vid); found {
  101. return v
  102. }
  103. }
  104. return nil
  105. }
  106. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  107. max := int32(0)
  108. for _, location := range s.Locations {
  109. if diskType != location.DiskType {
  110. continue
  111. }
  112. if location.isDiskSpaceLow {
  113. continue
  114. }
  115. currentFreeCount := location.MaxVolumeCount - int32(location.VolumesLen())
  116. currentFreeCount *= erasure_coding.DataShardsCount
  117. currentFreeCount -= int32(location.EcVolumesLen())
  118. currentFreeCount /= erasure_coding.DataShardsCount
  119. if currentFreeCount > max {
  120. max = currentFreeCount
  121. ret = location
  122. }
  123. }
  124. return ret
  125. }
  126. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  127. if s.findVolume(vid) != nil {
  128. return fmt.Errorf("Volume Id %d already exists!", vid)
  129. }
  130. if location := s.FindFreeLocation(diskType); location != nil {
  131. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  132. location.Directory, vid, collection, replicaPlacement, ttl)
  133. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  134. location.SetVolume(vid, volume)
  135. glog.V(0).Infof("add volume %d", vid)
  136. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  137. Id: uint32(vid),
  138. Collection: collection,
  139. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  140. Version: uint32(volume.Version()),
  141. Ttl: ttl.ToUint32(),
  142. DiskType: string(diskType),
  143. }
  144. return nil
  145. } else {
  146. return err
  147. }
  148. }
  149. return fmt.Errorf("No more free space left")
  150. }
  151. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  152. for _, location := range s.Locations {
  153. stats := collectStatsForOneLocation(location)
  154. allStats = append(allStats, stats...)
  155. }
  156. sortVolumeInfos(allStats)
  157. return allStats
  158. }
  159. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  160. location.volumesLock.RLock()
  161. defer location.volumesLock.RUnlock()
  162. for k, v := range location.volumes {
  163. s := collectStatForOneVolume(k, v)
  164. stats = append(stats, s)
  165. }
  166. return stats
  167. }
  168. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  169. s = &VolumeInfo{
  170. Id: vid,
  171. Collection: v.Collection,
  172. ReplicaPlacement: v.ReplicaPlacement,
  173. Version: v.Version(),
  174. ReadOnly: v.IsReadOnly(),
  175. Ttl: v.Ttl,
  176. CompactRevision: uint32(v.CompactionRevision),
  177. DiskType: v.DiskType().String(),
  178. }
  179. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  180. v.dataFileAccessLock.RLock()
  181. defer v.dataFileAccessLock.RUnlock()
  182. if v.nm == nil {
  183. return
  184. }
  185. s.FileCount = v.nm.FileCount()
  186. s.DeleteCount = v.nm.DeletedCount()
  187. s.DeletedByteCount = v.nm.DeletedSize()
  188. s.Size = v.nm.ContentSize()
  189. return
  190. }
  191. func (s *Store) SetDataCenter(dataCenter string) {
  192. s.dataCenter = dataCenter
  193. }
  194. func (s *Store) SetRack(rack string) {
  195. s.rack = rack
  196. }
  197. func (s *Store) GetDataCenter() string {
  198. return s.dataCenter
  199. }
  200. func (s *Store) GetRack() string {
  201. return s.rack
  202. }
  203. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  204. var volumeMessages []*master_pb.VolumeInformationMessage
  205. maxVolumeCounts := make(map[string]uint32)
  206. var maxFileKey NeedleId
  207. collectionVolumeSize := make(map[string]int64)
  208. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  209. for _, location := range s.Locations {
  210. var deleteVids []needle.VolumeId
  211. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  212. location.volumesLock.RLock()
  213. for _, v := range location.volumes {
  214. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  215. if volumeMessage == nil {
  216. continue
  217. }
  218. if maxFileKey < curMaxFileKey {
  219. maxFileKey = curMaxFileKey
  220. }
  221. shouldDeleteVolume := false
  222. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  223. volumeMessages = append(volumeMessages, volumeMessage)
  224. } else {
  225. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  226. deleteVids = append(deleteVids, v.Id)
  227. shouldDeleteVolume = true
  228. } else {
  229. glog.V(0).Infof("volume %d is expired", v.Id)
  230. }
  231. if v.lastIoError != nil {
  232. deleteVids = append(deleteVids, v.Id)
  233. shouldDeleteVolume = true
  234. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  235. }
  236. }
  237. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  238. collectionVolumeSize[v.Collection] = 0
  239. }
  240. if !shouldDeleteVolume {
  241. collectionVolumeSize[v.Collection] += int64(volumeMessage.Size)
  242. } else {
  243. collectionVolumeSize[v.Collection] -= int64(volumeMessage.Size)
  244. if collectionVolumeSize[v.Collection] <= 0 {
  245. delete(collectionVolumeSize, v.Collection)
  246. }
  247. }
  248. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  249. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  250. stats.IsReadOnly: 0,
  251. stats.NoWriteOrDelete: 0,
  252. stats.NoWriteCanDelete: 0,
  253. stats.IsDiskSpaceLow: 0,
  254. }
  255. }
  256. if !shouldDeleteVolume && v.IsReadOnly() {
  257. collectionVolumeReadOnlyCount[v.Collection][stats.IsReadOnly] += 1
  258. if v.noWriteOrDelete {
  259. collectionVolumeReadOnlyCount[v.Collection][stats.NoWriteOrDelete] += 1
  260. }
  261. if v.noWriteCanDelete {
  262. collectionVolumeReadOnlyCount[v.Collection][stats.NoWriteCanDelete] += 1
  263. }
  264. if v.location.isDiskSpaceLow {
  265. collectionVolumeReadOnlyCount[v.Collection][stats.IsDiskSpaceLow] += 1
  266. }
  267. }
  268. }
  269. location.volumesLock.RUnlock()
  270. if len(deleteVids) > 0 {
  271. // delete expired volumes.
  272. location.volumesLock.Lock()
  273. for _, vid := range deleteVids {
  274. found, err := location.deleteVolumeById(vid)
  275. if err == nil {
  276. if found {
  277. glog.V(0).Infof("volume %d is deleted", vid)
  278. }
  279. } else {
  280. glog.Warningf("delete volume %d: %v", vid, err)
  281. }
  282. }
  283. location.volumesLock.Unlock()
  284. }
  285. }
  286. var uuidList []string
  287. for _, loc := range s.Locations {
  288. uuidList = append(uuidList, loc.DirectoryUuid)
  289. }
  290. for col, size := range collectionVolumeSize {
  291. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  292. }
  293. for col, types := range collectionVolumeReadOnlyCount {
  294. for t, count := range types {
  295. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  296. }
  297. }
  298. return &master_pb.Heartbeat{
  299. Ip: s.Ip,
  300. Port: uint32(s.Port),
  301. GrpcPort: uint32(s.GrpcPort),
  302. PublicUrl: s.PublicUrl,
  303. MaxVolumeCounts: maxVolumeCounts,
  304. MaxFileKey: NeedleIdToUint64(maxFileKey),
  305. DataCenter: s.dataCenter,
  306. Rack: s.rack,
  307. Volumes: volumeMessages,
  308. HasNoVolumes: len(volumeMessages) == 0,
  309. LocationUuids: uuidList,
  310. }
  311. }
  312. func (s *Store) SetStopping() {
  313. s.isStopping = true
  314. for _, location := range s.Locations {
  315. location.SetStopping()
  316. }
  317. }
  318. func (s *Store) Close() {
  319. for _, location := range s.Locations {
  320. location.Close()
  321. }
  322. }
  323. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, checkCookie bool, fsync bool) (isUnchanged bool, err error) {
  324. if v := s.findVolume(i); v != nil {
  325. if v.IsReadOnly() {
  326. err = fmt.Errorf("volume %d is read only", i)
  327. return
  328. }
  329. _, _, isUnchanged, err = v.writeNeedle2(n, checkCookie, fsync && s.isStopping)
  330. return
  331. }
  332. glog.V(0).Infoln("volume", i, "not found!")
  333. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  334. return
  335. }
  336. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  337. if v := s.findVolume(i); v != nil {
  338. if v.noWriteOrDelete {
  339. return 0, fmt.Errorf("volume %d is read only", i)
  340. }
  341. return v.deleteNeedle2(n)
  342. }
  343. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  344. }
  345. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, onReadSizeFn func(size Size)) (int, error) {
  346. if v := s.findVolume(i); v != nil {
  347. return v.readNeedle(n, readOption, onReadSizeFn)
  348. }
  349. return 0, fmt.Errorf("volume %d not found", i)
  350. }
  351. func (s *Store) ReadVolumeNeedleMetaAt(i needle.VolumeId, n *needle.Needle, offset int64, size int32) error {
  352. if v := s.findVolume(i); v != nil {
  353. return v.readNeedleMetaAt(n, offset, size)
  354. }
  355. return fmt.Errorf("volume %d not found", i)
  356. }
  357. func (s *Store) ReadVolumeNeedleDataInto(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, writer io.Writer, offset int64, size int64) error {
  358. if v := s.findVolume(i); v != nil {
  359. return v.readNeedleDataInto(n, readOption, writer, offset, size)
  360. }
  361. return fmt.Errorf("volume %d not found", i)
  362. }
  363. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  364. return s.findVolume(i)
  365. }
  366. func (s *Store) HasVolume(i needle.VolumeId) bool {
  367. v := s.findVolume(i)
  368. return v != nil
  369. }
  370. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  371. v := s.findVolume(i)
  372. if v == nil {
  373. return fmt.Errorf("volume %d not found", i)
  374. }
  375. v.noWriteLock.Lock()
  376. v.noWriteOrDelete = true
  377. v.noWriteLock.Unlock()
  378. return nil
  379. }
  380. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  381. v := s.findVolume(i)
  382. if v == nil {
  383. return fmt.Errorf("volume %d not found", i)
  384. }
  385. v.noWriteLock.Lock()
  386. v.noWriteOrDelete = false
  387. v.noWriteLock.Unlock()
  388. return nil
  389. }
  390. func (s *Store) MountVolume(i needle.VolumeId) error {
  391. for _, location := range s.Locations {
  392. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  393. glog.V(0).Infof("mount volume %d", i)
  394. v := s.findVolume(i)
  395. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  396. Id: uint32(v.Id),
  397. Collection: v.Collection,
  398. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  399. Version: uint32(v.Version()),
  400. Ttl: v.Ttl.ToUint32(),
  401. DiskType: string(v.location.DiskType),
  402. }
  403. return nil
  404. }
  405. }
  406. return fmt.Errorf("volume %d not found on disk", i)
  407. }
  408. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  409. v := s.findVolume(i)
  410. if v == nil {
  411. return nil
  412. }
  413. message := master_pb.VolumeShortInformationMessage{
  414. Id: uint32(v.Id),
  415. Collection: v.Collection,
  416. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  417. Version: uint32(v.Version()),
  418. Ttl: v.Ttl.ToUint32(),
  419. DiskType: string(v.location.DiskType),
  420. }
  421. for _, location := range s.Locations {
  422. err := location.UnloadVolume(i)
  423. if err == nil {
  424. glog.V(0).Infof("UnmountVolume %d", i)
  425. stats.DeleteCollectionMetrics(v.Collection)
  426. s.DeletedVolumesChan <- message
  427. return nil
  428. } else if err == ErrVolumeNotFound {
  429. continue
  430. }
  431. }
  432. return fmt.Errorf("volume %d not found on disk", i)
  433. }
  434. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  435. v := s.findVolume(i)
  436. if v == nil {
  437. return fmt.Errorf("delete volume %d not found on disk", i)
  438. }
  439. message := master_pb.VolumeShortInformationMessage{
  440. Id: uint32(v.Id),
  441. Collection: v.Collection,
  442. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  443. Version: uint32(v.Version()),
  444. Ttl: v.Ttl.ToUint32(),
  445. DiskType: string(v.location.DiskType),
  446. }
  447. for _, location := range s.Locations {
  448. err := location.DeleteVolume(i)
  449. if err == nil {
  450. glog.V(0).Infof("DeleteVolume %d", i)
  451. s.DeletedVolumesChan <- message
  452. return nil
  453. } else if err == ErrVolumeNotFound {
  454. continue
  455. } else {
  456. glog.Errorf("DeleteVolume %d: %v", i, err)
  457. }
  458. }
  459. return fmt.Errorf("volume %d not found on disk", i)
  460. }
  461. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  462. for _, location := range s.Locations {
  463. fileInfo, found := location.LocateVolume(i)
  464. if !found {
  465. continue
  466. }
  467. // load, modify, save
  468. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  469. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  470. volumeInfo, _, _, err := volume_info.MaybeLoadVolumeInfo(vifFile)
  471. if err != nil {
  472. return fmt.Errorf("volume %d fail to load vif: %v", i, err)
  473. }
  474. volumeInfo.Replication = replication
  475. err = volume_info.SaveVolumeInfo(vifFile, volumeInfo)
  476. if err != nil {
  477. return fmt.Errorf("volume %d fail to save vif: %v", i, err)
  478. }
  479. return nil
  480. }
  481. return fmt.Errorf("volume %d not found on disk", i)
  482. }
  483. func (s *Store) SetVolumeSizeLimit(x uint64) {
  484. atomic.StoreUint64(&s.volumeSizeLimit, x)
  485. }
  486. func (s *Store) GetVolumeSizeLimit() uint64 {
  487. return atomic.LoadUint64(&s.volumeSizeLimit)
  488. }
  489. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  490. volumeSizeLimit := s.GetVolumeSizeLimit()
  491. if volumeSizeLimit == 0 {
  492. return
  493. }
  494. for _, diskLocation := range s.Locations {
  495. if diskLocation.OriginalMaxVolumeCount == 0 {
  496. currentMaxVolumeCount := atomic.LoadInt32(&diskLocation.MaxVolumeCount)
  497. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  498. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  499. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  500. volCount := diskLocation.VolumesLen()
  501. maxVolumeCount := int32(volCount)
  502. if unclaimedSpaces > int64(volumeSizeLimit) {
  503. maxVolumeCount += int32(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  504. }
  505. atomic.StoreInt32(&diskLocation.MaxVolumeCount, maxVolumeCount)
  506. glog.V(4).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  507. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  508. hasChanges = hasChanges || currentMaxVolumeCount != atomic.LoadInt32(&diskLocation.MaxVolumeCount)
  509. }
  510. }
  511. return
  512. }