You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

518 lines
16 KiB

6 years ago
6 years ago
6 years ago
12 years ago
12 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
5 years ago
  1. package storage
  2. import (
  3. "fmt"
  4. "github.com/chrislusf/seaweedfs/weed/util"
  5. "path/filepath"
  6. "strings"
  7. "sync/atomic"
  8. "google.golang.org/grpc"
  9. "github.com/chrislusf/seaweedfs/weed/glog"
  10. "github.com/chrislusf/seaweedfs/weed/pb"
  11. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  12. "github.com/chrislusf/seaweedfs/weed/stats"
  13. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  14. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  15. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  16. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  17. )
  18. const (
  19. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  20. )
  21. type ReadOption struct {
  22. ReadDeleted bool
  23. }
  24. /*
  25. * A VolumeServer contains one Store
  26. */
  27. type Store struct {
  28. MasterAddress string
  29. grpcDialOption grpc.DialOption
  30. volumeSizeLimit uint64 // read from the master
  31. Ip string
  32. Port int
  33. PublicUrl string
  34. Locations []*DiskLocation
  35. dataCenter string // optional informaton, overwriting master setting if exists
  36. rack string // optional information, overwriting master setting if exists
  37. connected bool
  38. NeedleMapKind NeedleMapKind
  39. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  40. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  41. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  42. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  43. }
  44. func (s *Store) String() (str string) {
  45. str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  46. return
  47. }
  48. func NewStore(grpcDialOption grpc.DialOption, port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int,
  49. minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  50. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  51. s.Locations = make([]*DiskLocation, 0)
  52. for i := 0; i < len(dirnames); i++ {
  53. location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpaces[i], idxFolder, diskTypes[i])
  54. location.loadExistingVolumes(needleMapKind)
  55. s.Locations = append(s.Locations, location)
  56. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  57. }
  58. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  59. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  60. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  61. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  62. return
  63. }
  64. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  65. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  66. if e != nil {
  67. return e
  68. }
  69. ttl, e := needle.ReadTTL(ttlString)
  70. if e != nil {
  71. return e
  72. }
  73. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  74. return e
  75. }
  76. func (s *Store) DeleteCollection(collection string) (e error) {
  77. for _, location := range s.Locations {
  78. e = location.DeleteCollectionFromDiskLocation(collection)
  79. if e != nil {
  80. return
  81. }
  82. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  83. }
  84. return
  85. }
  86. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  87. for _, location := range s.Locations {
  88. if v, found := location.FindVolume(vid); found {
  89. return v
  90. }
  91. }
  92. return nil
  93. }
  94. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  95. max := 0
  96. for _, location := range s.Locations {
  97. if diskType != location.DiskType {
  98. continue
  99. }
  100. if location.isDiskSpaceLow {
  101. continue
  102. }
  103. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  104. currentFreeCount *= erasure_coding.DataShardsCount
  105. currentFreeCount -= location.EcVolumesLen()
  106. currentFreeCount /= erasure_coding.DataShardsCount
  107. if currentFreeCount > max {
  108. max = currentFreeCount
  109. ret = location
  110. }
  111. }
  112. return ret
  113. }
  114. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  115. if s.findVolume(vid) != nil {
  116. return fmt.Errorf("Volume Id %d already exists!", vid)
  117. }
  118. if location := s.FindFreeLocation(diskType); location != nil {
  119. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  120. location.Directory, vid, collection, replicaPlacement, ttl)
  121. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  122. location.SetVolume(vid, volume)
  123. glog.V(0).Infof("add volume %d", vid)
  124. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  125. Id: uint32(vid),
  126. Collection: collection,
  127. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  128. Version: uint32(volume.Version()),
  129. Ttl: ttl.ToUint32(),
  130. DiskType: string(diskType),
  131. }
  132. return nil
  133. } else {
  134. return err
  135. }
  136. }
  137. return fmt.Errorf("No more free space left")
  138. }
  139. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  140. for _, location := range s.Locations {
  141. stats := collectStatsForOneLocation(location)
  142. allStats = append(allStats, stats...)
  143. }
  144. sortVolumeInfos(allStats)
  145. return allStats
  146. }
  147. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  148. location.volumesLock.RLock()
  149. defer location.volumesLock.RUnlock()
  150. for k, v := range location.volumes {
  151. s := collectStatForOneVolume(k, v)
  152. stats = append(stats, s)
  153. }
  154. return stats
  155. }
  156. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  157. s = &VolumeInfo{
  158. Id: vid,
  159. Collection: v.Collection,
  160. ReplicaPlacement: v.ReplicaPlacement,
  161. Version: v.Version(),
  162. ReadOnly: v.IsReadOnly(),
  163. Ttl: v.Ttl,
  164. CompactRevision: uint32(v.CompactionRevision),
  165. DiskType: v.DiskType().String(),
  166. }
  167. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  168. v.dataFileAccessLock.RLock()
  169. defer v.dataFileAccessLock.RUnlock()
  170. if v.nm == nil {
  171. return
  172. }
  173. s.FileCount = v.nm.FileCount()
  174. s.DeleteCount = v.nm.DeletedCount()
  175. s.DeletedByteCount = v.nm.DeletedSize()
  176. s.Size = v.nm.ContentSize()
  177. return
  178. }
  179. func (s *Store) SetDataCenter(dataCenter string) {
  180. s.dataCenter = dataCenter
  181. }
  182. func (s *Store) SetRack(rack string) {
  183. s.rack = rack
  184. }
  185. func (s *Store) GetDataCenter() string {
  186. return s.dataCenter
  187. }
  188. func (s *Store) GetRack() string {
  189. return s.rack
  190. }
  191. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  192. var volumeMessages []*master_pb.VolumeInformationMessage
  193. maxVolumeCounts := make(map[string]uint32)
  194. var maxFileKey NeedleId
  195. collectionVolumeSize := make(map[string]uint64)
  196. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  197. for _, location := range s.Locations {
  198. var deleteVids []needle.VolumeId
  199. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  200. location.volumesLock.RLock()
  201. for _, v := range location.volumes {
  202. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  203. if volumeMessage == nil {
  204. continue
  205. }
  206. if maxFileKey < curMaxFileKey {
  207. maxFileKey = curMaxFileKey
  208. }
  209. deleteVolume := false
  210. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  211. volumeMessages = append(volumeMessages, volumeMessage)
  212. } else {
  213. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  214. deleteVids = append(deleteVids, v.Id)
  215. deleteVolume = true
  216. } else {
  217. glog.V(0).Infof("volume %d is expired", v.Id)
  218. }
  219. if v.lastIoError != nil {
  220. deleteVids = append(deleteVids, v.Id)
  221. deleteVolume = true
  222. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  223. }
  224. }
  225. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  226. collectionVolumeSize[v.Collection] = 0
  227. }
  228. if !deleteVolume {
  229. collectionVolumeSize[v.Collection] += volumeMessage.Size
  230. } else {
  231. collectionVolumeSize[v.Collection] -= volumeMessage.Size
  232. if collectionVolumeSize[v.Collection] <= 0 {
  233. delete(collectionVolumeSize, v.Collection)
  234. }
  235. }
  236. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  237. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  238. "IsReadOnly": 0,
  239. "noWriteOrDelete": 0,
  240. "noWriteCanDelete": 0,
  241. "isDiskSpaceLow": 0,
  242. }
  243. }
  244. if !deleteVolume && v.IsReadOnly() {
  245. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  246. if v.noWriteOrDelete {
  247. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  248. }
  249. if v.noWriteCanDelete {
  250. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  251. }
  252. if v.location.isDiskSpaceLow {
  253. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  254. }
  255. }
  256. }
  257. location.volumesLock.RUnlock()
  258. if len(deleteVids) > 0 {
  259. // delete expired volumes.
  260. location.volumesLock.Lock()
  261. for _, vid := range deleteVids {
  262. found, err := location.deleteVolumeById(vid)
  263. if err == nil {
  264. if found {
  265. glog.V(0).Infof("volume %d is deleted", vid)
  266. }
  267. } else {
  268. glog.Warningf("delete volume %d: %v", vid, err)
  269. }
  270. }
  271. location.volumesLock.Unlock()
  272. }
  273. }
  274. for col, size := range collectionVolumeSize {
  275. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  276. }
  277. for col, types := range collectionVolumeReadOnlyCount {
  278. for t, count := range types {
  279. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  280. }
  281. }
  282. return &master_pb.Heartbeat{
  283. Ip: s.Ip,
  284. Port: uint32(s.Port),
  285. PublicUrl: s.PublicUrl,
  286. MaxVolumeCounts: maxVolumeCounts,
  287. MaxFileKey: NeedleIdToUint64(maxFileKey),
  288. DataCenter: s.dataCenter,
  289. Rack: s.rack,
  290. Volumes: volumeMessages,
  291. HasNoVolumes: len(volumeMessages) == 0,
  292. }
  293. }
  294. func (s *Store) Close() {
  295. for _, location := range s.Locations {
  296. location.Close()
  297. }
  298. }
  299. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, fsync bool) (isUnchanged bool, err error) {
  300. if v := s.findVolume(i); v != nil {
  301. if v.IsReadOnly() {
  302. err = fmt.Errorf("volume %d is read only", i)
  303. return
  304. }
  305. _, _, isUnchanged, err = v.writeNeedle2(n, fsync)
  306. return
  307. }
  308. glog.V(0).Infoln("volume", i, "not found!")
  309. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  310. return
  311. }
  312. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  313. if v := s.findVolume(i); v != nil {
  314. if v.noWriteOrDelete {
  315. return 0, fmt.Errorf("volume %d is read only", i)
  316. }
  317. return v.deleteNeedle2(n)
  318. }
  319. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  320. }
  321. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption) (int, error) {
  322. if v := s.findVolume(i); v != nil {
  323. return v.readNeedle(n, readOption)
  324. }
  325. return 0, fmt.Errorf("volume %d not found", i)
  326. }
  327. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  328. return s.findVolume(i)
  329. }
  330. func (s *Store) HasVolume(i needle.VolumeId) bool {
  331. v := s.findVolume(i)
  332. return v != nil
  333. }
  334. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  335. v := s.findVolume(i)
  336. if v == nil {
  337. return fmt.Errorf("volume %d not found", i)
  338. }
  339. v.noWriteLock.Lock()
  340. v.noWriteOrDelete = true
  341. v.noWriteLock.Unlock()
  342. return nil
  343. }
  344. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  345. v := s.findVolume(i)
  346. if v == nil {
  347. return fmt.Errorf("volume %d not found", i)
  348. }
  349. v.noWriteLock.Lock()
  350. v.noWriteOrDelete = false
  351. v.noWriteLock.Unlock()
  352. return nil
  353. }
  354. func (s *Store) MountVolume(i needle.VolumeId) error {
  355. for _, location := range s.Locations {
  356. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  357. glog.V(0).Infof("mount volume %d", i)
  358. v := s.findVolume(i)
  359. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  360. Id: uint32(v.Id),
  361. Collection: v.Collection,
  362. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  363. Version: uint32(v.Version()),
  364. Ttl: v.Ttl.ToUint32(),
  365. DiskType: string(v.location.DiskType),
  366. }
  367. return nil
  368. }
  369. }
  370. return fmt.Errorf("volume %d not found on disk", i)
  371. }
  372. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  373. v := s.findVolume(i)
  374. if v == nil {
  375. return nil
  376. }
  377. message := master_pb.VolumeShortInformationMessage{
  378. Id: uint32(v.Id),
  379. Collection: v.Collection,
  380. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  381. Version: uint32(v.Version()),
  382. Ttl: v.Ttl.ToUint32(),
  383. DiskType: string(v.location.DiskType),
  384. }
  385. for _, location := range s.Locations {
  386. if err := location.UnloadVolume(i); err == nil {
  387. glog.V(0).Infof("UnmountVolume %d", i)
  388. s.DeletedVolumesChan <- message
  389. return nil
  390. }
  391. }
  392. return fmt.Errorf("volume %d not found on disk", i)
  393. }
  394. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  395. v := s.findVolume(i)
  396. if v == nil {
  397. return fmt.Errorf("delete volume %d not found on disk", i)
  398. }
  399. message := master_pb.VolumeShortInformationMessage{
  400. Id: uint32(v.Id),
  401. Collection: v.Collection,
  402. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  403. Version: uint32(v.Version()),
  404. Ttl: v.Ttl.ToUint32(),
  405. DiskType: string(v.location.DiskType),
  406. }
  407. for _, location := range s.Locations {
  408. if err := location.DeleteVolume(i); err == nil {
  409. glog.V(0).Infof("DeleteVolume %d", i)
  410. s.DeletedVolumesChan <- message
  411. return nil
  412. } else {
  413. glog.Errorf("DeleteVolume %d: %v", i, err)
  414. }
  415. }
  416. return fmt.Errorf("volume %d not found on disk", i)
  417. }
  418. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  419. for _, location := range s.Locations {
  420. fileInfo, found := location.LocateVolume(i)
  421. if !found {
  422. continue
  423. }
  424. // load, modify, save
  425. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  426. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  427. volumeInfo, _, _, err := pb.MaybeLoadVolumeInfo(vifFile)
  428. if err != nil {
  429. return fmt.Errorf("volume %d fail to load vif", i)
  430. }
  431. volumeInfo.Replication = replication
  432. err = pb.SaveVolumeInfo(vifFile, volumeInfo)
  433. if err != nil {
  434. return fmt.Errorf("volume %d fail to save vif", i)
  435. }
  436. return nil
  437. }
  438. return fmt.Errorf("volume %d not found on disk", i)
  439. }
  440. func (s *Store) SetVolumeSizeLimit(x uint64) {
  441. atomic.StoreUint64(&s.volumeSizeLimit, x)
  442. }
  443. func (s *Store) GetVolumeSizeLimit() uint64 {
  444. return atomic.LoadUint64(&s.volumeSizeLimit)
  445. }
  446. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  447. volumeSizeLimit := s.GetVolumeSizeLimit()
  448. if volumeSizeLimit == 0 {
  449. return
  450. }
  451. for _, diskLocation := range s.Locations {
  452. if diskLocation.OriginalMaxVolumeCount == 0 {
  453. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  454. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  455. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  456. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  457. volCount := diskLocation.VolumesLen()
  458. maxVolumeCount := volCount
  459. if unclaimedSpaces > int64(volumeSizeLimit) {
  460. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  461. }
  462. diskLocation.MaxVolumeCount = maxVolumeCount
  463. glog.V(2).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  464. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  465. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  466. }
  467. }
  468. return
  469. }