You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

545 lines
17 KiB

6 years ago
6 years ago
6 years ago
12 years ago
12 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "fmt"
  4. "path/filepath"
  5. "strings"
  6. "sync/atomic"
  7. "github.com/chrislusf/seaweedfs/weed/pb"
  8. "github.com/chrislusf/seaweedfs/weed/storage/volume_info"
  9. "github.com/chrislusf/seaweedfs/weed/util"
  10. "google.golang.org/grpc"
  11. "github.com/chrislusf/seaweedfs/weed/glog"
  12. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  13. "github.com/chrislusf/seaweedfs/weed/stats"
  14. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  15. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  16. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  17. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  18. )
  19. const (
  20. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  21. )
  22. type ReadOption struct {
  23. ReadDeleted bool
  24. }
  25. /*
  26. * A VolumeServer contains one Store
  27. */
  28. type Store struct {
  29. MasterAddress pb.ServerAddress
  30. grpcDialOption grpc.DialOption
  31. volumeSizeLimit uint64 // read from the master
  32. Ip string
  33. Port int
  34. GrpcPort int
  35. PublicUrl string
  36. Locations []*DiskLocation
  37. LocationUUIDs []string
  38. dataCenter string // optional informaton, overwriting master setting if exists
  39. rack string // optional information, overwriting master setting if exists
  40. connected bool
  41. NeedleMapKind NeedleMapKind
  42. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  43. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  44. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  45. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  46. isStopping bool
  47. }
  48. func (s *Store) String() (str string) {
  49. str = fmt.Sprintf("Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  50. return
  51. }
  52. func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, dirnames []string, maxVolumeCounts []int,
  53. minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  54. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  55. s.Locations = make([]*DiskLocation, 0)
  56. for i := 0; i < len(dirnames); i++ {
  57. location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpaces[i], idxFolder, diskTypes[i])
  58. location.loadExistingVolumes(needleMapKind)
  59. s.Locations = append(s.Locations, location)
  60. dirUUID, _ := GenerateDirUUID(dirnames[i])
  61. s.LocationUUIDs = append(s.LocationUUIDs, dirUUID)
  62. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  63. }
  64. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  65. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  66. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  67. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  68. return
  69. }
  70. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  71. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  72. if e != nil {
  73. return e
  74. }
  75. ttl, e := needle.ReadTTL(ttlString)
  76. if e != nil {
  77. return e
  78. }
  79. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  80. return e
  81. }
  82. func (s *Store) DeleteCollection(collection string) (e error) {
  83. for _, location := range s.Locations {
  84. e = location.DeleteCollectionFromDiskLocation(collection)
  85. if e != nil {
  86. return
  87. }
  88. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  89. }
  90. return
  91. }
  92. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  93. for _, location := range s.Locations {
  94. if v, found := location.FindVolume(vid); found {
  95. return v
  96. }
  97. }
  98. return nil
  99. }
  100. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  101. max := 0
  102. for _, location := range s.Locations {
  103. if diskType != location.DiskType {
  104. continue
  105. }
  106. if location.isDiskSpaceLow {
  107. continue
  108. }
  109. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  110. currentFreeCount *= erasure_coding.DataShardsCount
  111. currentFreeCount -= location.EcVolumesLen()
  112. currentFreeCount /= erasure_coding.DataShardsCount
  113. if currentFreeCount > max {
  114. max = currentFreeCount
  115. ret = location
  116. }
  117. }
  118. return ret
  119. }
  120. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  121. if s.findVolume(vid) != nil {
  122. return fmt.Errorf("Volume Id %d already exists!", vid)
  123. }
  124. if location := s.FindFreeLocation(diskType); location != nil {
  125. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  126. location.Directory, vid, collection, replicaPlacement, ttl)
  127. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  128. location.SetVolume(vid, volume)
  129. glog.V(0).Infof("add volume %d", vid)
  130. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  131. Id: uint32(vid),
  132. Collection: collection,
  133. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  134. Version: uint32(volume.Version()),
  135. Ttl: ttl.ToUint32(),
  136. DiskType: string(diskType),
  137. }
  138. return nil
  139. } else {
  140. return err
  141. }
  142. }
  143. return fmt.Errorf("No more free space left")
  144. }
  145. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  146. for _, location := range s.Locations {
  147. stats := collectStatsForOneLocation(location)
  148. allStats = append(allStats, stats...)
  149. }
  150. sortVolumeInfos(allStats)
  151. return allStats
  152. }
  153. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  154. location.volumesLock.RLock()
  155. defer location.volumesLock.RUnlock()
  156. for k, v := range location.volumes {
  157. s := collectStatForOneVolume(k, v)
  158. stats = append(stats, s)
  159. }
  160. return stats
  161. }
  162. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  163. s = &VolumeInfo{
  164. Id: vid,
  165. Collection: v.Collection,
  166. ReplicaPlacement: v.ReplicaPlacement,
  167. Version: v.Version(),
  168. ReadOnly: v.IsReadOnly(),
  169. Ttl: v.Ttl,
  170. CompactRevision: uint32(v.CompactionRevision),
  171. DiskType: v.DiskType().String(),
  172. }
  173. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  174. v.dataFileAccessLock.RLock()
  175. defer v.dataFileAccessLock.RUnlock()
  176. if v.nm == nil {
  177. return
  178. }
  179. s.FileCount = v.nm.FileCount()
  180. s.DeleteCount = v.nm.DeletedCount()
  181. s.DeletedByteCount = v.nm.DeletedSize()
  182. s.Size = v.nm.ContentSize()
  183. return
  184. }
  185. func (s *Store) SetDataCenter(dataCenter string) {
  186. s.dataCenter = dataCenter
  187. }
  188. func (s *Store) SetRack(rack string) {
  189. s.rack = rack
  190. }
  191. func (s *Store) GetDataCenter() string {
  192. return s.dataCenter
  193. }
  194. func (s *Store) GetRack() string {
  195. return s.rack
  196. }
  197. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  198. var volumeMessages []*master_pb.VolumeInformationMessage
  199. maxVolumeCounts := make(map[string]uint32)
  200. var maxFileKey NeedleId
  201. collectionVolumeSize := make(map[string]uint64)
  202. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  203. for _, location := range s.Locations {
  204. var deleteVids []needle.VolumeId
  205. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  206. location.volumesLock.RLock()
  207. for _, v := range location.volumes {
  208. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  209. if volumeMessage == nil {
  210. continue
  211. }
  212. if maxFileKey < curMaxFileKey {
  213. maxFileKey = curMaxFileKey
  214. }
  215. deleteVolume := false
  216. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  217. volumeMessages = append(volumeMessages, volumeMessage)
  218. } else {
  219. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  220. deleteVids = append(deleteVids, v.Id)
  221. deleteVolume = true
  222. } else {
  223. glog.V(0).Infof("volume %d is expired", v.Id)
  224. }
  225. if v.lastIoError != nil {
  226. deleteVids = append(deleteVids, v.Id)
  227. deleteVolume = true
  228. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  229. }
  230. }
  231. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  232. collectionVolumeSize[v.Collection] = 0
  233. }
  234. if !deleteVolume {
  235. collectionVolumeSize[v.Collection] += volumeMessage.Size
  236. } else {
  237. collectionVolumeSize[v.Collection] -= volumeMessage.Size
  238. if collectionVolumeSize[v.Collection] <= 0 {
  239. delete(collectionVolumeSize, v.Collection)
  240. }
  241. }
  242. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  243. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  244. "IsReadOnly": 0,
  245. "noWriteOrDelete": 0,
  246. "noWriteCanDelete": 0,
  247. "isDiskSpaceLow": 0,
  248. }
  249. }
  250. if !deleteVolume && v.IsReadOnly() {
  251. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  252. if v.noWriteOrDelete {
  253. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  254. }
  255. if v.noWriteCanDelete {
  256. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  257. }
  258. if v.location.isDiskSpaceLow {
  259. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  260. }
  261. }
  262. }
  263. location.volumesLock.RUnlock()
  264. if len(deleteVids) > 0 {
  265. // delete expired volumes.
  266. location.volumesLock.Lock()
  267. for _, vid := range deleteVids {
  268. found, err := location.deleteVolumeById(vid)
  269. if err == nil {
  270. if found {
  271. glog.V(0).Infof("volume %d is deleted", vid)
  272. }
  273. } else {
  274. glog.Warningf("delete volume %d: %v", vid, err)
  275. }
  276. }
  277. location.volumesLock.Unlock()
  278. }
  279. }
  280. var UUIDList []string
  281. for _, locationUUID := range s.LocationUUIDs {
  282. UUIDList = append(UUIDList, locationUUID)
  283. }
  284. for col, size := range collectionVolumeSize {
  285. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  286. }
  287. for col, types := range collectionVolumeReadOnlyCount {
  288. for t, count := range types {
  289. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  290. }
  291. }
  292. return &master_pb.Heartbeat{
  293. Ip: s.Ip,
  294. Port: uint32(s.Port),
  295. GrpcPort: uint32(s.GrpcPort),
  296. PublicUrl: s.PublicUrl,
  297. MaxVolumeCounts: maxVolumeCounts,
  298. MaxFileKey: NeedleIdToUint64(maxFileKey),
  299. DataCenter: s.dataCenter,
  300. Rack: s.rack,
  301. Volumes: volumeMessages,
  302. HasNoVolumes: len(volumeMessages) == 0,
  303. LocationUUIDs: UUIDList,
  304. }
  305. }
  306. func (s *Store) SetStopping() {
  307. s.isStopping = true
  308. for _, location := range s.Locations {
  309. location.SetStopping()
  310. }
  311. }
  312. func (s *Store) Close() {
  313. for _, location := range s.Locations {
  314. location.Close()
  315. }
  316. }
  317. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, checkCookie bool, fsync bool) (isUnchanged bool, err error) {
  318. if v := s.findVolume(i); v != nil {
  319. if v.IsReadOnly() {
  320. err = fmt.Errorf("volume %d is read only", i)
  321. return
  322. }
  323. _, _, isUnchanged, err = v.writeNeedle2(n, checkCookie, fsync && s.isStopping)
  324. return
  325. }
  326. glog.V(0).Infoln("volume", i, "not found!")
  327. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  328. return
  329. }
  330. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  331. if v := s.findVolume(i); v != nil {
  332. if v.noWriteOrDelete {
  333. return 0, fmt.Errorf("volume %d is read only", i)
  334. }
  335. return v.deleteNeedle2(n)
  336. }
  337. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  338. }
  339. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, onReadSizeFn func(size Size)) (int, error) {
  340. if v := s.findVolume(i); v != nil {
  341. return v.readNeedle(n, readOption, onReadSizeFn)
  342. }
  343. return 0, fmt.Errorf("volume %d not found", i)
  344. }
  345. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  346. return s.findVolume(i)
  347. }
  348. func (s *Store) HasVolume(i needle.VolumeId) bool {
  349. v := s.findVolume(i)
  350. return v != nil
  351. }
  352. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  353. v := s.findVolume(i)
  354. if v == nil {
  355. return fmt.Errorf("volume %d not found", i)
  356. }
  357. v.noWriteLock.Lock()
  358. v.noWriteOrDelete = true
  359. v.noWriteLock.Unlock()
  360. return nil
  361. }
  362. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  363. v := s.findVolume(i)
  364. if v == nil {
  365. return fmt.Errorf("volume %d not found", i)
  366. }
  367. v.noWriteLock.Lock()
  368. v.noWriteOrDelete = false
  369. v.noWriteLock.Unlock()
  370. return nil
  371. }
  372. func (s *Store) MountVolume(i needle.VolumeId) error {
  373. for _, location := range s.Locations {
  374. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  375. glog.V(0).Infof("mount volume %d", i)
  376. v := s.findVolume(i)
  377. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  378. Id: uint32(v.Id),
  379. Collection: v.Collection,
  380. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  381. Version: uint32(v.Version()),
  382. Ttl: v.Ttl.ToUint32(),
  383. DiskType: string(v.location.DiskType),
  384. }
  385. return nil
  386. }
  387. }
  388. return fmt.Errorf("volume %d not found on disk", i)
  389. }
  390. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  391. v := s.findVolume(i)
  392. if v == nil {
  393. return nil
  394. }
  395. message := master_pb.VolumeShortInformationMessage{
  396. Id: uint32(v.Id),
  397. Collection: v.Collection,
  398. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  399. Version: uint32(v.Version()),
  400. Ttl: v.Ttl.ToUint32(),
  401. DiskType: string(v.location.DiskType),
  402. }
  403. for _, location := range s.Locations {
  404. err := location.UnloadVolume(i)
  405. if err == nil {
  406. glog.V(0).Infof("UnmountVolume %d", i)
  407. s.DeletedVolumesChan <- message
  408. return nil
  409. } else if err == ErrVolumeNotFound {
  410. continue
  411. }
  412. }
  413. return fmt.Errorf("volume %d not found on disk", i)
  414. }
  415. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  416. v := s.findVolume(i)
  417. if v == nil {
  418. return fmt.Errorf("delete volume %d not found on disk", i)
  419. }
  420. message := master_pb.VolumeShortInformationMessage{
  421. Id: uint32(v.Id),
  422. Collection: v.Collection,
  423. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  424. Version: uint32(v.Version()),
  425. Ttl: v.Ttl.ToUint32(),
  426. DiskType: string(v.location.DiskType),
  427. }
  428. for _, location := range s.Locations {
  429. err := location.DeleteVolume(i)
  430. if err == nil {
  431. glog.V(0).Infof("DeleteVolume %d", i)
  432. s.DeletedVolumesChan <- message
  433. return nil
  434. } else if err == ErrVolumeNotFound {
  435. continue
  436. } else {
  437. glog.Errorf("DeleteVolume %d: %v", i, err)
  438. }
  439. }
  440. return fmt.Errorf("volume %d not found on disk", i)
  441. }
  442. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  443. for _, location := range s.Locations {
  444. fileInfo, found := location.LocateVolume(i)
  445. if !found {
  446. continue
  447. }
  448. // load, modify, save
  449. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  450. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  451. volumeInfo, _, _, err := volume_info.MaybeLoadVolumeInfo(vifFile)
  452. if err != nil {
  453. return fmt.Errorf("volume %d fail to load vif", i)
  454. }
  455. volumeInfo.Replication = replication
  456. err = volume_info.SaveVolumeInfo(vifFile, volumeInfo)
  457. if err != nil {
  458. return fmt.Errorf("volume %d fail to save vif", i)
  459. }
  460. return nil
  461. }
  462. return fmt.Errorf("volume %d not found on disk", i)
  463. }
  464. func (s *Store) SetVolumeSizeLimit(x uint64) {
  465. atomic.StoreUint64(&s.volumeSizeLimit, x)
  466. }
  467. func (s *Store) GetVolumeSizeLimit() uint64 {
  468. return atomic.LoadUint64(&s.volumeSizeLimit)
  469. }
  470. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  471. volumeSizeLimit := s.GetVolumeSizeLimit()
  472. if volumeSizeLimit == 0 {
  473. return
  474. }
  475. for _, diskLocation := range s.Locations {
  476. if diskLocation.OriginalMaxVolumeCount == 0 {
  477. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  478. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  479. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  480. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  481. volCount := diskLocation.VolumesLen()
  482. maxVolumeCount := volCount
  483. if unclaimedSpaces > int64(volumeSizeLimit) {
  484. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  485. }
  486. diskLocation.MaxVolumeCount = maxVolumeCount
  487. glog.V(2).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  488. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  489. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  490. }
  491. }
  492. return
  493. }