You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

575 lines
18 KiB

6 years ago
6 years ago
6 years ago
12 years ago
12 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "fmt"
  4. "io"
  5. "path/filepath"
  6. "strings"
  7. "sync/atomic"
  8. "github.com/seaweedfs/seaweedfs/weed/pb"
  9. "github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
  10. "github.com/seaweedfs/seaweedfs/weed/util"
  11. "google.golang.org/grpc"
  12. "github.com/seaweedfs/seaweedfs/weed/glog"
  13. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  14. "github.com/seaweedfs/seaweedfs/weed/stats"
  15. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  16. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
  18. . "github.com/seaweedfs/seaweedfs/weed/storage/types"
  19. )
  20. const (
  21. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  22. )
  23. type ReadOption struct {
  24. // request
  25. ReadDeleted bool
  26. AttemptMetaOnly bool
  27. MustMetaOnly bool
  28. // response
  29. IsMetaOnly bool // read status
  30. VolumeRevision uint16
  31. IsOutOfRange bool // whether read over MaxPossibleVolumeSize
  32. // If HasSlowRead is set to true:
  33. // * read requests and write requests compete for the lock.
  34. // * large file read P99 latency on busy sites will go up, due to the need to get locks multiple times.
  35. // * write requests will see lower latency.
  36. // If HasSlowRead is set to false:
  37. // * read requests should complete asap, not blocking other requests.
  38. // * write requests may see high latency when downloading large files.
  39. HasSlowRead bool
  40. }
  41. /*
  42. * A VolumeServer contains one Store
  43. */
  44. type Store struct {
  45. MasterAddress pb.ServerAddress
  46. grpcDialOption grpc.DialOption
  47. volumeSizeLimit uint64 // read from the master
  48. Ip string
  49. Port int
  50. GrpcPort int
  51. PublicUrl string
  52. Locations []*DiskLocation
  53. dataCenter string // optional informaton, overwriting master setting if exists
  54. rack string // optional information, overwriting master setting if exists
  55. connected bool
  56. NeedleMapKind NeedleMapKind
  57. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  58. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  59. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  60. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  61. isStopping bool
  62. }
  63. func (s *Store) String() (str string) {
  64. str = fmt.Sprintf("Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  65. return
  66. }
  67. func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, dirnames []string, maxVolumeCounts []int32,
  68. minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  69. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  70. s.Locations = make([]*DiskLocation, 0)
  71. for i := 0; i < len(dirnames); i++ {
  72. location := NewDiskLocation(dirnames[i], int32(maxVolumeCounts[i]), minFreeSpaces[i], idxFolder, diskTypes[i])
  73. location.loadExistingVolumes(needleMapKind)
  74. s.Locations = append(s.Locations, location)
  75. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  76. }
  77. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  78. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  79. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  80. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  81. return
  82. }
  83. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  84. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  85. if e != nil {
  86. return e
  87. }
  88. ttl, e := needle.ReadTTL(ttlString)
  89. if e != nil {
  90. return e
  91. }
  92. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  93. return e
  94. }
  95. func (s *Store) DeleteCollection(collection string) (e error) {
  96. for _, location := range s.Locations {
  97. e = location.DeleteCollectionFromDiskLocation(collection)
  98. if e != nil {
  99. return
  100. }
  101. stats.DeleteCollectionMetrics(collection)
  102. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  103. }
  104. return
  105. }
  106. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  107. for _, location := range s.Locations {
  108. if v, found := location.FindVolume(vid); found {
  109. return v
  110. }
  111. }
  112. return nil
  113. }
  114. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  115. max := int32(0)
  116. for _, location := range s.Locations {
  117. if diskType != location.DiskType {
  118. continue
  119. }
  120. if location.isDiskSpaceLow {
  121. continue
  122. }
  123. currentFreeCount := location.MaxVolumeCount - int32(location.VolumesLen())
  124. currentFreeCount *= erasure_coding.DataShardsCount
  125. currentFreeCount -= int32(location.EcVolumesLen())
  126. currentFreeCount /= erasure_coding.DataShardsCount
  127. if currentFreeCount > max {
  128. max = currentFreeCount
  129. ret = location
  130. }
  131. }
  132. return ret
  133. }
  134. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  135. if s.findVolume(vid) != nil {
  136. return fmt.Errorf("Volume Id %d already exists!", vid)
  137. }
  138. if location := s.FindFreeLocation(diskType); location != nil {
  139. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  140. location.Directory, vid, collection, replicaPlacement, ttl)
  141. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  142. location.SetVolume(vid, volume)
  143. glog.V(0).Infof("add volume %d", vid)
  144. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  145. Id: uint32(vid),
  146. Collection: collection,
  147. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  148. Version: uint32(volume.Version()),
  149. Ttl: ttl.ToUint32(),
  150. DiskType: string(diskType),
  151. }
  152. return nil
  153. } else {
  154. return err
  155. }
  156. }
  157. return fmt.Errorf("No more free space left")
  158. }
  159. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  160. for _, location := range s.Locations {
  161. stats := collectStatsForOneLocation(location)
  162. allStats = append(allStats, stats...)
  163. }
  164. sortVolumeInfos(allStats)
  165. return allStats
  166. }
  167. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  168. location.volumesLock.RLock()
  169. defer location.volumesLock.RUnlock()
  170. for k, v := range location.volumes {
  171. s := collectStatForOneVolume(k, v)
  172. stats = append(stats, s)
  173. }
  174. return stats
  175. }
  176. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  177. s = &VolumeInfo{
  178. Id: vid,
  179. Collection: v.Collection,
  180. ReplicaPlacement: v.ReplicaPlacement,
  181. Version: v.Version(),
  182. ReadOnly: v.IsReadOnly(),
  183. Ttl: v.Ttl,
  184. CompactRevision: uint32(v.CompactionRevision),
  185. DiskType: v.DiskType().String(),
  186. }
  187. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  188. v.dataFileAccessLock.RLock()
  189. defer v.dataFileAccessLock.RUnlock()
  190. if v.nm == nil {
  191. return
  192. }
  193. s.FileCount = v.nm.FileCount()
  194. s.DeleteCount = v.nm.DeletedCount()
  195. s.DeletedByteCount = v.nm.DeletedSize()
  196. s.Size = v.nm.ContentSize()
  197. return
  198. }
  199. func (s *Store) SetDataCenter(dataCenter string) {
  200. s.dataCenter = dataCenter
  201. }
  202. func (s *Store) SetRack(rack string) {
  203. s.rack = rack
  204. }
  205. func (s *Store) GetDataCenter() string {
  206. return s.dataCenter
  207. }
  208. func (s *Store) GetRack() string {
  209. return s.rack
  210. }
  211. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  212. var volumeMessages []*master_pb.VolumeInformationMessage
  213. maxVolumeCounts := make(map[string]uint32)
  214. var maxFileKey NeedleId
  215. collectionVolumeSize := make(map[string]int64)
  216. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  217. for _, location := range s.Locations {
  218. var deleteVids []needle.VolumeId
  219. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  220. location.volumesLock.RLock()
  221. for _, v := range location.volumes {
  222. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  223. if volumeMessage == nil {
  224. continue
  225. }
  226. if maxFileKey < curMaxFileKey {
  227. maxFileKey = curMaxFileKey
  228. }
  229. shouldDeleteVolume := false
  230. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  231. volumeMessages = append(volumeMessages, volumeMessage)
  232. } else {
  233. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  234. deleteVids = append(deleteVids, v.Id)
  235. shouldDeleteVolume = true
  236. } else {
  237. glog.V(0).Infof("volume %d is expired", v.Id)
  238. }
  239. if v.lastIoError != nil {
  240. deleteVids = append(deleteVids, v.Id)
  241. shouldDeleteVolume = true
  242. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  243. }
  244. }
  245. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  246. collectionVolumeSize[v.Collection] = 0
  247. }
  248. if !shouldDeleteVolume {
  249. collectionVolumeSize[v.Collection] += int64(volumeMessage.Size)
  250. } else {
  251. collectionVolumeSize[v.Collection] -= int64(volumeMessage.Size)
  252. if collectionVolumeSize[v.Collection] <= 0 {
  253. delete(collectionVolumeSize, v.Collection)
  254. }
  255. }
  256. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  257. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  258. stats.IsReadOnly: 0,
  259. stats.NoWriteOrDelete: 0,
  260. stats.NoWriteCanDelete: 0,
  261. stats.IsDiskSpaceLow: 0,
  262. }
  263. }
  264. if !shouldDeleteVolume && v.IsReadOnly() {
  265. collectionVolumeReadOnlyCount[v.Collection][stats.IsReadOnly] += 1
  266. if v.noWriteOrDelete {
  267. collectionVolumeReadOnlyCount[v.Collection][stats.NoWriteOrDelete] += 1
  268. }
  269. if v.noWriteCanDelete {
  270. collectionVolumeReadOnlyCount[v.Collection][stats.NoWriteCanDelete] += 1
  271. }
  272. if v.location.isDiskSpaceLow {
  273. collectionVolumeReadOnlyCount[v.Collection][stats.IsDiskSpaceLow] += 1
  274. }
  275. }
  276. }
  277. location.volumesLock.RUnlock()
  278. if len(deleteVids) > 0 {
  279. // delete expired volumes.
  280. location.volumesLock.Lock()
  281. for _, vid := range deleteVids {
  282. found, err := location.deleteVolumeById(vid)
  283. if err == nil {
  284. if found {
  285. glog.V(0).Infof("volume %d is deleted", vid)
  286. }
  287. } else {
  288. glog.Warningf("delete volume %d: %v", vid, err)
  289. }
  290. }
  291. location.volumesLock.Unlock()
  292. }
  293. }
  294. var uuidList []string
  295. for _, loc := range s.Locations {
  296. uuidList = append(uuidList, loc.DirectoryUuid)
  297. }
  298. for col, size := range collectionVolumeSize {
  299. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  300. }
  301. for col, types := range collectionVolumeReadOnlyCount {
  302. for t, count := range types {
  303. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  304. }
  305. }
  306. return &master_pb.Heartbeat{
  307. Ip: s.Ip,
  308. Port: uint32(s.Port),
  309. GrpcPort: uint32(s.GrpcPort),
  310. PublicUrl: s.PublicUrl,
  311. MaxVolumeCounts: maxVolumeCounts,
  312. MaxFileKey: NeedleIdToUint64(maxFileKey),
  313. DataCenter: s.dataCenter,
  314. Rack: s.rack,
  315. Volumes: volumeMessages,
  316. HasNoVolumes: len(volumeMessages) == 0,
  317. LocationUuids: uuidList,
  318. }
  319. }
  320. func (s *Store) SetStopping() {
  321. s.isStopping = true
  322. for _, location := range s.Locations {
  323. location.SetStopping()
  324. }
  325. }
  326. func (s *Store) Close() {
  327. for _, location := range s.Locations {
  328. location.Close()
  329. }
  330. }
  331. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, checkCookie bool, fsync bool) (isUnchanged bool, err error) {
  332. if v := s.findVolume(i); v != nil {
  333. if v.IsReadOnly() {
  334. err = fmt.Errorf("volume %d is read only", i)
  335. return
  336. }
  337. _, _, isUnchanged, err = v.writeNeedle2(n, checkCookie, fsync && s.isStopping)
  338. return
  339. }
  340. glog.V(0).Infoln("volume", i, "not found!")
  341. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  342. return
  343. }
  344. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  345. if v := s.findVolume(i); v != nil {
  346. if v.noWriteOrDelete {
  347. return 0, fmt.Errorf("volume %d is read only", i)
  348. }
  349. return v.deleteNeedle2(n)
  350. }
  351. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  352. }
  353. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, onReadSizeFn func(size Size)) (int, error) {
  354. if v := s.findVolume(i); v != nil {
  355. return v.readNeedle(n, readOption, onReadSizeFn)
  356. }
  357. return 0, fmt.Errorf("volume %d not found", i)
  358. }
  359. func (s *Store) ReadVolumeNeedleMetaAt(i needle.VolumeId, n *needle.Needle, offset int64, size int32) error {
  360. if v := s.findVolume(i); v != nil {
  361. return v.readNeedleMetaAt(n, offset, size)
  362. }
  363. return fmt.Errorf("volume %d not found", i)
  364. }
  365. func (s *Store) ReadVolumeNeedleDataInto(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, writer io.Writer, offset int64, size int64) error {
  366. if v := s.findVolume(i); v != nil {
  367. return v.readNeedleDataInto(n, readOption, writer, offset, size)
  368. }
  369. return fmt.Errorf("volume %d not found", i)
  370. }
  371. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  372. return s.findVolume(i)
  373. }
  374. func (s *Store) HasVolume(i needle.VolumeId) bool {
  375. v := s.findVolume(i)
  376. return v != nil
  377. }
  378. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  379. v := s.findVolume(i)
  380. if v == nil {
  381. return fmt.Errorf("volume %d not found", i)
  382. }
  383. v.noWriteLock.Lock()
  384. v.noWriteOrDelete = true
  385. v.noWriteLock.Unlock()
  386. return nil
  387. }
  388. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  389. v := s.findVolume(i)
  390. if v == nil {
  391. return fmt.Errorf("volume %d not found", i)
  392. }
  393. v.noWriteLock.Lock()
  394. v.noWriteOrDelete = false
  395. v.noWriteLock.Unlock()
  396. return nil
  397. }
  398. func (s *Store) MountVolume(i needle.VolumeId) error {
  399. for _, location := range s.Locations {
  400. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  401. glog.V(0).Infof("mount volume %d", i)
  402. v := s.findVolume(i)
  403. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  404. Id: uint32(v.Id),
  405. Collection: v.Collection,
  406. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  407. Version: uint32(v.Version()),
  408. Ttl: v.Ttl.ToUint32(),
  409. DiskType: string(v.location.DiskType),
  410. }
  411. return nil
  412. }
  413. }
  414. return fmt.Errorf("volume %d not found on disk", i)
  415. }
  416. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  417. v := s.findVolume(i)
  418. if v == nil {
  419. return nil
  420. }
  421. message := master_pb.VolumeShortInformationMessage{
  422. Id: uint32(v.Id),
  423. Collection: v.Collection,
  424. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  425. Version: uint32(v.Version()),
  426. Ttl: v.Ttl.ToUint32(),
  427. DiskType: string(v.location.DiskType),
  428. }
  429. for _, location := range s.Locations {
  430. err := location.UnloadVolume(i)
  431. if err == nil {
  432. glog.V(0).Infof("UnmountVolume %d", i)
  433. stats.DeleteCollectionMetrics(v.Collection)
  434. s.DeletedVolumesChan <- message
  435. return nil
  436. } else if err == ErrVolumeNotFound {
  437. continue
  438. }
  439. }
  440. return fmt.Errorf("volume %d not found on disk", i)
  441. }
  442. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  443. v := s.findVolume(i)
  444. if v == nil {
  445. return fmt.Errorf("delete volume %d not found on disk", i)
  446. }
  447. message := master_pb.VolumeShortInformationMessage{
  448. Id: uint32(v.Id),
  449. Collection: v.Collection,
  450. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  451. Version: uint32(v.Version()),
  452. Ttl: v.Ttl.ToUint32(),
  453. DiskType: string(v.location.DiskType),
  454. }
  455. for _, location := range s.Locations {
  456. err := location.DeleteVolume(i)
  457. if err == nil {
  458. glog.V(0).Infof("DeleteVolume %d", i)
  459. s.DeletedVolumesChan <- message
  460. return nil
  461. } else if err == ErrVolumeNotFound {
  462. continue
  463. } else {
  464. glog.Errorf("DeleteVolume %d: %v", i, err)
  465. }
  466. }
  467. return fmt.Errorf("volume %d not found on disk", i)
  468. }
  469. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  470. for _, location := range s.Locations {
  471. fileInfo, found := location.LocateVolume(i)
  472. if !found {
  473. continue
  474. }
  475. // load, modify, save
  476. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  477. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  478. volumeInfo, _, _, err := volume_info.MaybeLoadVolumeInfo(vifFile)
  479. if err != nil {
  480. return fmt.Errorf("volume %d fail to load vif: %v", i, err)
  481. }
  482. volumeInfo.Replication = replication
  483. err = volume_info.SaveVolumeInfo(vifFile, volumeInfo)
  484. if err != nil {
  485. return fmt.Errorf("volume %d fail to save vif: %v", i, err)
  486. }
  487. return nil
  488. }
  489. return fmt.Errorf("volume %d not found on disk", i)
  490. }
  491. func (s *Store) SetVolumeSizeLimit(x uint64) {
  492. atomic.StoreUint64(&s.volumeSizeLimit, x)
  493. }
  494. func (s *Store) GetVolumeSizeLimit() uint64 {
  495. return atomic.LoadUint64(&s.volumeSizeLimit)
  496. }
  497. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  498. volumeSizeLimit := s.GetVolumeSizeLimit()
  499. if volumeSizeLimit == 0 {
  500. return
  501. }
  502. for _, diskLocation := range s.Locations {
  503. if diskLocation.OriginalMaxVolumeCount == 0 {
  504. currentMaxVolumeCount := atomic.LoadInt32(&diskLocation.MaxVolumeCount)
  505. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  506. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  507. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  508. volCount := diskLocation.VolumesLen()
  509. maxVolumeCount := int32(volCount)
  510. if unclaimedSpaces > int64(volumeSizeLimit) {
  511. maxVolumeCount += int32(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  512. }
  513. atomic.StoreInt32(&diskLocation.MaxVolumeCount, maxVolumeCount)
  514. glog.V(4).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  515. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  516. hasChanges = hasChanges || currentMaxVolumeCount != atomic.LoadInt32(&diskLocation.MaxVolumeCount)
  517. }
  518. }
  519. return
  520. }