556 lines
17 KiB

6 years ago
6 years ago
6 years ago
12 years ago
12 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "fmt"
  4. "io"
  5. "path/filepath"
  6. "strings"
  7. "sync/atomic"
  8. "github.com/chrislusf/seaweedfs/weed/pb"
  9. "github.com/chrislusf/seaweedfs/weed/storage/volume_info"
  10. "github.com/chrislusf/seaweedfs/weed/util"
  11. "google.golang.org/grpc"
  12. "github.com/chrislusf/seaweedfs/weed/glog"
  13. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  14. "github.com/chrislusf/seaweedfs/weed/stats"
  15. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  16. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  17. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  18. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  19. )
  20. const (
  21. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  22. )
  23. type ReadOption struct {
  24. // request
  25. ReadDeleted bool
  26. AttemptMetaOnly bool
  27. MustMetaOnly bool
  28. // response
  29. IsMetaOnly bool // read status
  30. VolumeRevision uint16
  31. IsOutOfRange bool // whether read over MaxPossibleVolumeSize
  32. }
  33. /*
  34. * A VolumeServer contains one Store
  35. */
  36. type Store struct {
  37. MasterAddress pb.ServerAddress
  38. grpcDialOption grpc.DialOption
  39. volumeSizeLimit uint64 // read from the master
  40. Ip string
  41. Port int
  42. GrpcPort int
  43. PublicUrl string
  44. Locations []*DiskLocation
  45. dataCenter string // optional informaton, overwriting master setting if exists
  46. rack string // optional information, overwriting master setting if exists
  47. connected bool
  48. NeedleMapKind NeedleMapKind
  49. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  50. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  51. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  52. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  53. isStopping bool
  54. }
  55. func (s *Store) String() (str string) {
  56. str = fmt.Sprintf("Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  57. return
  58. }
  59. func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, dirnames []string, maxVolumeCounts []int,
  60. minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  61. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  62. s.Locations = make([]*DiskLocation, 0)
  63. for i := 0; i < len(dirnames); i++ {
  64. location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpaces[i], idxFolder, diskTypes[i])
  65. location.loadExistingVolumes(needleMapKind)
  66. s.Locations = append(s.Locations, location)
  67. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  68. }
  69. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  70. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  71. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  72. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  73. return
  74. }
  75. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  76. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  77. if e != nil {
  78. return e
  79. }
  80. ttl, e := needle.ReadTTL(ttlString)
  81. if e != nil {
  82. return e
  83. }
  84. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  85. return e
  86. }
  87. func (s *Store) DeleteCollection(collection string) (e error) {
  88. for _, location := range s.Locations {
  89. e = location.DeleteCollectionFromDiskLocation(collection)
  90. if e != nil {
  91. return
  92. }
  93. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  94. }
  95. return
  96. }
  97. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  98. for _, location := range s.Locations {
  99. if v, found := location.FindVolume(vid); found {
  100. return v
  101. }
  102. }
  103. return nil
  104. }
  105. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  106. max := 0
  107. for _, location := range s.Locations {
  108. if diskType != location.DiskType {
  109. continue
  110. }
  111. if location.isDiskSpaceLow {
  112. continue
  113. }
  114. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  115. currentFreeCount *= erasure_coding.DataShardsCount
  116. currentFreeCount -= location.EcVolumesLen()
  117. currentFreeCount /= erasure_coding.DataShardsCount
  118. if currentFreeCount > max {
  119. max = currentFreeCount
  120. ret = location
  121. }
  122. }
  123. return ret
  124. }
  125. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  126. if s.findVolume(vid) != nil {
  127. return fmt.Errorf("Volume Id %d already exists!", vid)
  128. }
  129. if location := s.FindFreeLocation(diskType); location != nil {
  130. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  131. location.Directory, vid, collection, replicaPlacement, ttl)
  132. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  133. location.SetVolume(vid, volume)
  134. glog.V(0).Infof("add volume %d", vid)
  135. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  136. Id: uint32(vid),
  137. Collection: collection,
  138. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  139. Version: uint32(volume.Version()),
  140. Ttl: ttl.ToUint32(),
  141. DiskType: string(diskType),
  142. }
  143. return nil
  144. } else {
  145. return err
  146. }
  147. }
  148. return fmt.Errorf("No more free space left")
  149. }
  150. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  151. for _, location := range s.Locations {
  152. stats := collectStatsForOneLocation(location)
  153. allStats = append(allStats, stats...)
  154. }
  155. sortVolumeInfos(allStats)
  156. return allStats
  157. }
  158. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  159. location.volumesLock.RLock()
  160. defer location.volumesLock.RUnlock()
  161. for k, v := range location.volumes {
  162. s := collectStatForOneVolume(k, v)
  163. stats = append(stats, s)
  164. }
  165. return stats
  166. }
  167. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  168. s = &VolumeInfo{
  169. Id: vid,
  170. Collection: v.Collection,
  171. ReplicaPlacement: v.ReplicaPlacement,
  172. Version: v.Version(),
  173. ReadOnly: v.IsReadOnly(),
  174. Ttl: v.Ttl,
  175. CompactRevision: uint32(v.CompactionRevision),
  176. DiskType: v.DiskType().String(),
  177. }
  178. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  179. v.dataFileAccessLock.RLock()
  180. defer v.dataFileAccessLock.RUnlock()
  181. if v.nm == nil {
  182. return
  183. }
  184. s.FileCount = v.nm.FileCount()
  185. s.DeleteCount = v.nm.DeletedCount()
  186. s.DeletedByteCount = v.nm.DeletedSize()
  187. s.Size = v.nm.ContentSize()
  188. return
  189. }
  190. func (s *Store) SetDataCenter(dataCenter string) {
  191. s.dataCenter = dataCenter
  192. }
  193. func (s *Store) SetRack(rack string) {
  194. s.rack = rack
  195. }
  196. func (s *Store) GetDataCenter() string {
  197. return s.dataCenter
  198. }
  199. func (s *Store) GetRack() string {
  200. return s.rack
  201. }
  202. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  203. var volumeMessages []*master_pb.VolumeInformationMessage
  204. maxVolumeCounts := make(map[string]uint32)
  205. var maxFileKey NeedleId
  206. collectionVolumeSize := make(map[string]uint64)
  207. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  208. for _, location := range s.Locations {
  209. var deleteVids []needle.VolumeId
  210. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  211. location.volumesLock.RLock()
  212. for _, v := range location.volumes {
  213. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  214. if volumeMessage == nil {
  215. continue
  216. }
  217. if maxFileKey < curMaxFileKey {
  218. maxFileKey = curMaxFileKey
  219. }
  220. deleteVolume := false
  221. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  222. volumeMessages = append(volumeMessages, volumeMessage)
  223. } else {
  224. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  225. deleteVids = append(deleteVids, v.Id)
  226. deleteVolume = true
  227. } else {
  228. glog.V(0).Infof("volume %d is expired", v.Id)
  229. }
  230. if v.lastIoError != nil {
  231. deleteVids = append(deleteVids, v.Id)
  232. deleteVolume = true
  233. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  234. }
  235. }
  236. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  237. collectionVolumeSize[v.Collection] = 0
  238. }
  239. if !deleteVolume {
  240. collectionVolumeSize[v.Collection] += volumeMessage.Size
  241. } else {
  242. collectionVolumeSize[v.Collection] -= volumeMessage.Size
  243. if collectionVolumeSize[v.Collection] <= 0 {
  244. delete(collectionVolumeSize, v.Collection)
  245. }
  246. }
  247. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  248. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  249. "IsReadOnly": 0,
  250. "noWriteOrDelete": 0,
  251. "noWriteCanDelete": 0,
  252. "isDiskSpaceLow": 0,
  253. }
  254. }
  255. if !deleteVolume && v.IsReadOnly() {
  256. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  257. if v.noWriteOrDelete {
  258. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  259. }
  260. if v.noWriteCanDelete {
  261. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  262. }
  263. if v.location.isDiskSpaceLow {
  264. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  265. }
  266. }
  267. }
  268. location.volumesLock.RUnlock()
  269. if len(deleteVids) > 0 {
  270. // delete expired volumes.
  271. location.volumesLock.Lock()
  272. for _, vid := range deleteVids {
  273. found, err := location.deleteVolumeById(vid)
  274. if err == nil {
  275. if found {
  276. glog.V(0).Infof("volume %d is deleted", vid)
  277. }
  278. } else {
  279. glog.Warningf("delete volume %d: %v", vid, err)
  280. }
  281. }
  282. location.volumesLock.Unlock()
  283. }
  284. }
  285. var uuidList []string
  286. for _, loc := range s.Locations {
  287. uuidList = append(uuidList, loc.DirectoryUuid)
  288. }
  289. for col, size := range collectionVolumeSize {
  290. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  291. }
  292. for col, types := range collectionVolumeReadOnlyCount {
  293. for t, count := range types {
  294. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  295. }
  296. }
  297. return &master_pb.Heartbeat{
  298. Ip: s.Ip,
  299. Port: uint32(s.Port),
  300. GrpcPort: uint32(s.GrpcPort),
  301. PublicUrl: s.PublicUrl,
  302. MaxVolumeCounts: maxVolumeCounts,
  303. MaxFileKey: NeedleIdToUint64(maxFileKey),
  304. DataCenter: s.dataCenter,
  305. Rack: s.rack,
  306. Volumes: volumeMessages,
  307. HasNoVolumes: len(volumeMessages) == 0,
  308. LocationUuids: uuidList,
  309. }
  310. }
  311. func (s *Store) SetStopping() {
  312. s.isStopping = true
  313. for _, location := range s.Locations {
  314. location.SetStopping()
  315. }
  316. }
  317. func (s *Store) Close() {
  318. for _, location := range s.Locations {
  319. location.Close()
  320. }
  321. }
  322. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, checkCookie bool, fsync bool) (isUnchanged bool, err error) {
  323. if v := s.findVolume(i); v != nil {
  324. if v.IsReadOnly() {
  325. err = fmt.Errorf("volume %d is read only", i)
  326. return
  327. }
  328. _, _, isUnchanged, err = v.writeNeedle2(n, checkCookie, fsync && s.isStopping)
  329. return
  330. }
  331. glog.V(0).Infoln("volume", i, "not found!")
  332. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  333. return
  334. }
  335. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  336. if v := s.findVolume(i); v != nil {
  337. if v.noWriteOrDelete {
  338. return 0, fmt.Errorf("volume %d is read only", i)
  339. }
  340. return v.deleteNeedle2(n)
  341. }
  342. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  343. }
  344. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, onReadSizeFn func(size Size)) (int, error) {
  345. if v := s.findVolume(i); v != nil {
  346. return v.readNeedle(n, readOption, onReadSizeFn)
  347. }
  348. return 0, fmt.Errorf("volume %d not found", i)
  349. }
  350. func (s *Store) ReadVolumeNeedleDataInto(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, writer io.Writer, offset int64, size int64) error {
  351. if v := s.findVolume(i); v != nil {
  352. return v.readNeedleDataInto(n, readOption, writer, offset, size)
  353. }
  354. return fmt.Errorf("volume %d not found", i)
  355. }
  356. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  357. return s.findVolume(i)
  358. }
  359. func (s *Store) HasVolume(i needle.VolumeId) bool {
  360. v := s.findVolume(i)
  361. return v != nil
  362. }
  363. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  364. v := s.findVolume(i)
  365. if v == nil {
  366. return fmt.Errorf("volume %d not found", i)
  367. }
  368. v.noWriteLock.Lock()
  369. v.noWriteOrDelete = true
  370. v.noWriteLock.Unlock()
  371. return nil
  372. }
  373. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  374. v := s.findVolume(i)
  375. if v == nil {
  376. return fmt.Errorf("volume %d not found", i)
  377. }
  378. v.noWriteLock.Lock()
  379. v.noWriteOrDelete = false
  380. v.noWriteLock.Unlock()
  381. return nil
  382. }
  383. func (s *Store) MountVolume(i needle.VolumeId) error {
  384. for _, location := range s.Locations {
  385. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  386. glog.V(0).Infof("mount volume %d", i)
  387. v := s.findVolume(i)
  388. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  389. Id: uint32(v.Id),
  390. Collection: v.Collection,
  391. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  392. Version: uint32(v.Version()),
  393. Ttl: v.Ttl.ToUint32(),
  394. DiskType: string(v.location.DiskType),
  395. }
  396. return nil
  397. }
  398. }
  399. return fmt.Errorf("volume %d not found on disk", i)
  400. }
  401. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  402. v := s.findVolume(i)
  403. if v == nil {
  404. return nil
  405. }
  406. message := master_pb.VolumeShortInformationMessage{
  407. Id: uint32(v.Id),
  408. Collection: v.Collection,
  409. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  410. Version: uint32(v.Version()),
  411. Ttl: v.Ttl.ToUint32(),
  412. DiskType: string(v.location.DiskType),
  413. }
  414. for _, location := range s.Locations {
  415. err := location.UnloadVolume(i)
  416. if err == nil {
  417. glog.V(0).Infof("UnmountVolume %d", i)
  418. s.DeletedVolumesChan <- message
  419. return nil
  420. } else if err == ErrVolumeNotFound {
  421. continue
  422. }
  423. }
  424. return fmt.Errorf("volume %d not found on disk", i)
  425. }
  426. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  427. v := s.findVolume(i)
  428. if v == nil {
  429. return fmt.Errorf("delete volume %d not found on disk", i)
  430. }
  431. message := master_pb.VolumeShortInformationMessage{
  432. Id: uint32(v.Id),
  433. Collection: v.Collection,
  434. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  435. Version: uint32(v.Version()),
  436. Ttl: v.Ttl.ToUint32(),
  437. DiskType: string(v.location.DiskType),
  438. }
  439. for _, location := range s.Locations {
  440. err := location.DeleteVolume(i)
  441. if err == nil {
  442. glog.V(0).Infof("DeleteVolume %d", i)
  443. s.DeletedVolumesChan <- message
  444. return nil
  445. } else if err == ErrVolumeNotFound {
  446. continue
  447. } else {
  448. glog.Errorf("DeleteVolume %d: %v", i, err)
  449. }
  450. }
  451. return fmt.Errorf("volume %d not found on disk", i)
  452. }
  453. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  454. for _, location := range s.Locations {
  455. fileInfo, found := location.LocateVolume(i)
  456. if !found {
  457. continue
  458. }
  459. // load, modify, save
  460. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  461. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  462. volumeInfo, _, _, err := volume_info.MaybeLoadVolumeInfo(vifFile)
  463. if err != nil {
  464. return fmt.Errorf("volume %d fail to load vif", i)
  465. }
  466. volumeInfo.Replication = replication
  467. err = volume_info.SaveVolumeInfo(vifFile, volumeInfo)
  468. if err != nil {
  469. return fmt.Errorf("volume %d fail to save vif", i)
  470. }
  471. return nil
  472. }
  473. return fmt.Errorf("volume %d not found on disk", i)
  474. }
  475. func (s *Store) SetVolumeSizeLimit(x uint64) {
  476. atomic.StoreUint64(&s.volumeSizeLimit, x)
  477. }
  478. func (s *Store) GetVolumeSizeLimit() uint64 {
  479. return atomic.LoadUint64(&s.volumeSizeLimit)
  480. }
  481. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  482. volumeSizeLimit := s.GetVolumeSizeLimit()
  483. if volumeSizeLimit == 0 {
  484. return
  485. }
  486. for _, diskLocation := range s.Locations {
  487. if diskLocation.OriginalMaxVolumeCount == 0 {
  488. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  489. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  490. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  491. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  492. volCount := diskLocation.VolumesLen()
  493. maxVolumeCount := volCount
  494. if unclaimedSpaces > int64(volumeSizeLimit) {
  495. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  496. }
  497. diskLocation.MaxVolumeCount = maxVolumeCount
  498. glog.V(2).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  499. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  500. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  501. }
  502. }
  503. return
  504. }