401 lines
13 KiB

6 years ago
6 years ago
3 years ago
3 years ago
3 years ago
6 years ago
6 years ago
4 years ago
6 years ago
6 years ago
6 years ago
5 years ago
6 years ago
5 years ago
  1. package weed_server
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "math"
  7. "os"
  8. "time"
  9. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/backend"
  11. "github.com/seaweedfs/seaweedfs/weed/glog"
  12. "github.com/seaweedfs/seaweedfs/weed/operation"
  13. "github.com/seaweedfs/seaweedfs/weed/pb"
  14. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  15. "github.com/seaweedfs/seaweedfs/weed/storage"
  16. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  18. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  19. "github.com/seaweedfs/seaweedfs/weed/util"
  20. )
  21. const BufferSizeLimit = 1024 * 1024 * 2
  22. // VolumeCopy copy the .idx .dat .vif files, and mount the volume
  23. func (vs *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stream volume_server_pb.VolumeServer_VolumeCopyServer) error {
  24. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  25. if v != nil {
  26. glog.V(0).Infof("volume %d already exists. deleted before copying...", req.VolumeId)
  27. err := vs.store.DeleteVolume(needle.VolumeId(req.VolumeId))
  28. if err != nil {
  29. return fmt.Errorf("failed to delete existing volume %d: %v", req.VolumeId, err)
  30. }
  31. glog.V(0).Infof("deleted existing volume %d before copying.", req.VolumeId)
  32. }
  33. // the master will not start compaction for read-only volumes, so it is safe to just copy files directly
  34. // copy .dat and .idx files
  35. // read .idx .dat file size and timestamp
  36. // send .idx file
  37. // send .dat file
  38. // confirm size and timestamp
  39. var volFileInfoResp *volume_server_pb.ReadVolumeFileStatusResponse
  40. var dataBaseFileName, indexBaseFileName, idxFileName, datFileName string
  41. var hasRemoteDatFile bool
  42. err := operation.WithVolumeServerClient(true, pb.ServerAddress(req.SourceDataNode), vs.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  43. var err error
  44. volFileInfoResp, err = client.ReadVolumeFileStatus(context.Background(),
  45. &volume_server_pb.ReadVolumeFileStatusRequest{
  46. VolumeId: req.VolumeId,
  47. })
  48. if nil != err {
  49. return fmt.Errorf("read volume file status failed, %v", err)
  50. }
  51. diskType := volFileInfoResp.DiskType
  52. if req.DiskType != "" {
  53. diskType = req.DiskType
  54. }
  55. location := vs.store.FindFreeLocation(types.ToDiskType(diskType))
  56. if location == nil {
  57. return fmt.Errorf("no space left for disk type %s", types.ToDiskType(diskType).ReadableString())
  58. }
  59. dataBaseFileName = storage.VolumeFileName(location.Directory, volFileInfoResp.Collection, int(req.VolumeId))
  60. indexBaseFileName = storage.VolumeFileName(location.IdxDirectory, volFileInfoResp.Collection, int(req.VolumeId))
  61. hasRemoteDatFile = volFileInfoResp.VolumeInfo != nil && len(volFileInfoResp.VolumeInfo.Files) > 0
  62. util.WriteFile(dataBaseFileName+".note", []byte(fmt.Sprintf("copying from %s", req.SourceDataNode)), 0755)
  63. defer func() {
  64. if err != nil {
  65. os.Remove(dataBaseFileName + ".dat")
  66. os.Remove(indexBaseFileName + ".idx")
  67. os.Remove(dataBaseFileName + ".vif")
  68. os.Remove(dataBaseFileName + ".note")
  69. }
  70. }()
  71. var preallocateSize int64
  72. if grpcErr := pb.WithMasterClient(false, vs.GetMaster(), vs.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  73. resp, err := client.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
  74. if err != nil {
  75. return fmt.Errorf("get master %s configuration: %v", vs.GetMaster(), err)
  76. }
  77. if resp.VolumePreallocate {
  78. preallocateSize = int64(resp.VolumeSizeLimitMB) * (1 << 20)
  79. }
  80. return nil
  81. }); grpcErr != nil {
  82. glog.V(0).Infof("connect to %s: %v", vs.GetMaster(), grpcErr)
  83. }
  84. if preallocateSize > 0 && !hasRemoteDatFile {
  85. volumeFile := dataBaseFileName + ".dat"
  86. _, err := backend.CreateVolumeFile(volumeFile, preallocateSize, 0)
  87. if err != nil {
  88. return fmt.Errorf("create volume file %s: %v", volumeFile, err)
  89. }
  90. }
  91. // println("source:", volFileInfoResp.String())
  92. copyResponse := &volume_server_pb.VolumeCopyResponse{}
  93. reportInterval := int64(1024 * 1024 * 128)
  94. nextReportTarget := reportInterval
  95. var modifiedTsNs int64
  96. var sendErr error
  97. var ioBytePerSecond int64
  98. if req.IoBytePerSecond <= 0 {
  99. ioBytePerSecond = vs.compactionBytePerSecond
  100. } else {
  101. ioBytePerSecond = req.IoBytePerSecond
  102. }
  103. throttler := util.NewWriteThrottler(ioBytePerSecond)
  104. if !hasRemoteDatFile {
  105. if modifiedTsNs, err = vs.doCopyFileWithThrottler(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.DatFileSize, dataBaseFileName, ".dat", false, true, func(processed int64) bool {
  106. if processed > nextReportTarget {
  107. copyResponse.ProcessedBytes = processed
  108. if sendErr = stream.Send(copyResponse); sendErr != nil {
  109. return false
  110. }
  111. nextReportTarget = processed + reportInterval
  112. }
  113. return true
  114. }, throttler); err != nil {
  115. return err
  116. }
  117. if sendErr != nil {
  118. return sendErr
  119. }
  120. if modifiedTsNs > 0 {
  121. os.Chtimes(dataBaseFileName+".dat", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  122. }
  123. }
  124. if modifiedTsNs, err = vs.doCopyFileWithThrottler(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.IdxFileSize, indexBaseFileName, ".idx", false, false, nil, throttler); err != nil {
  125. return err
  126. }
  127. if modifiedTsNs > 0 {
  128. os.Chtimes(indexBaseFileName+".idx", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  129. }
  130. if modifiedTsNs, err = vs.doCopyFileWithThrottler(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.DatFileSize, dataBaseFileName, ".vif", false, true, nil, throttler); err != nil {
  131. return err
  132. }
  133. if modifiedTsNs > 0 {
  134. os.Chtimes(dataBaseFileName+".vif", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  135. }
  136. os.Remove(dataBaseFileName + ".note")
  137. return nil
  138. })
  139. if err != nil {
  140. return err
  141. }
  142. if dataBaseFileName == "" {
  143. return fmt.Errorf("not found volume %d file", req.VolumeId)
  144. }
  145. idxFileName = indexBaseFileName + ".idx"
  146. datFileName = dataBaseFileName + ".dat"
  147. defer func() {
  148. if err != nil && dataBaseFileName != "" {
  149. os.Remove(idxFileName)
  150. os.Remove(datFileName)
  151. os.Remove(dataBaseFileName + ".vif")
  152. }
  153. }()
  154. if err = checkCopyFiles(volFileInfoResp, hasRemoteDatFile, idxFileName, datFileName); err != nil { // added by panyc16
  155. return err
  156. }
  157. // mount the volume
  158. err = vs.store.MountVolume(needle.VolumeId(req.VolumeId))
  159. if err != nil {
  160. return fmt.Errorf("failed to mount volume %d: %v", req.VolumeId, err)
  161. }
  162. if err = stream.Send(&volume_server_pb.VolumeCopyResponse{
  163. LastAppendAtNs: volFileInfoResp.DatFileTimestampSeconds * uint64(time.Second),
  164. }); err != nil {
  165. glog.Errorf("send response: %v", err)
  166. }
  167. return err
  168. }
  169. func (vs *VolumeServer) doCopyFile(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
  170. return vs.doCopyFileWithThrottler(client, isEcVolume, collection, vid, compactRevision, stopOffset, baseFileName, ext, isAppend, ignoreSourceFileNotFound, progressFn, util.NewWriteThrottler(vs.compactionBytePerSecond))
  171. }
  172. func (vs *VolumeServer) doCopyFileWithThrottler(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc, throttler *util.WriteThrottler) (modifiedTsNs int64, err error) {
  173. copyFileClient, err := client.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
  174. VolumeId: vid,
  175. Ext: ext,
  176. CompactionRevision: compactRevision,
  177. StopOffset: stopOffset,
  178. Collection: collection,
  179. IsEcVolume: isEcVolume,
  180. IgnoreSourceFileNotFound: ignoreSourceFileNotFound,
  181. })
  182. if err != nil {
  183. return modifiedTsNs, fmt.Errorf("failed to start copying volume %d %s file: %v", vid, ext, err)
  184. }
  185. modifiedTsNs, err = writeToFile(copyFileClient, baseFileName+ext, throttler, isAppend, progressFn)
  186. if err != nil {
  187. return modifiedTsNs, fmt.Errorf("failed to copy %s file: %v", baseFileName+ext, err)
  188. }
  189. return modifiedTsNs, nil
  190. }
  191. /*
  192. *
  193. only check the the differ of the file size
  194. todo: maybe should check the received count and deleted count of the volume
  195. */
  196. func checkCopyFiles(originFileInf *volume_server_pb.ReadVolumeFileStatusResponse, hasRemoteDatFile bool, idxFileName, datFileName string) error {
  197. stat, err := os.Stat(idxFileName)
  198. if err != nil {
  199. return fmt.Errorf("stat idx file %s failed: %v", idxFileName, err)
  200. }
  201. if originFileInf.IdxFileSize != uint64(stat.Size()) {
  202. return fmt.Errorf("idx file %s size [%v] is not same as origin file size [%v]",
  203. idxFileName, stat.Size(), originFileInf.IdxFileSize)
  204. }
  205. if hasRemoteDatFile {
  206. return nil
  207. }
  208. stat, err = os.Stat(datFileName)
  209. if err != nil {
  210. return fmt.Errorf("get dat file info failed, %v", err)
  211. }
  212. if originFileInf.DatFileSize != uint64(stat.Size()) {
  213. return fmt.Errorf("the dat file size [%v] is not same as origin file size [%v]",
  214. stat.Size(), originFileInf.DatFileSize)
  215. }
  216. return nil
  217. }
  218. func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName string, wt *util.WriteThrottler, isAppend bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
  219. glog.V(4).Infof("writing to %s", fileName)
  220. flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
  221. if isAppend {
  222. flags = os.O_WRONLY | os.O_CREATE
  223. }
  224. dst, err := os.OpenFile(fileName, flags, 0644)
  225. if err != nil {
  226. return modifiedTsNs, nil
  227. }
  228. defer dst.Close()
  229. var progressedBytes int64
  230. for {
  231. resp, receiveErr := client.Recv()
  232. if receiveErr == io.EOF {
  233. break
  234. }
  235. if resp != nil && resp.ModifiedTsNs != 0 {
  236. modifiedTsNs = resp.ModifiedTsNs
  237. }
  238. if receiveErr != nil {
  239. return modifiedTsNs, fmt.Errorf("receiving %s: %v", fileName, receiveErr)
  240. }
  241. dst.Write(resp.FileContent)
  242. progressedBytes += int64(len(resp.FileContent))
  243. if progressFn != nil {
  244. if !progressFn(progressedBytes) {
  245. return modifiedTsNs, fmt.Errorf("interrupted copy operation")
  246. }
  247. }
  248. wt.MaybeSlowdown(int64(len(resp.FileContent)))
  249. }
  250. return modifiedTsNs, nil
  251. }
  252. func (vs *VolumeServer) ReadVolumeFileStatus(ctx context.Context, req *volume_server_pb.ReadVolumeFileStatusRequest) (*volume_server_pb.ReadVolumeFileStatusResponse, error) {
  253. resp := &volume_server_pb.ReadVolumeFileStatusResponse{}
  254. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  255. if v == nil {
  256. return nil, fmt.Errorf("not found volume id %d", req.VolumeId)
  257. }
  258. resp.VolumeId = req.VolumeId
  259. datSize, idxSize, modTime := v.FileStat()
  260. resp.DatFileSize = datSize
  261. resp.IdxFileSize = idxSize
  262. resp.DatFileTimestampSeconds = uint64(modTime.Unix())
  263. resp.IdxFileTimestampSeconds = uint64(modTime.Unix())
  264. resp.FileCount = v.FileCount()
  265. resp.CompactionRevision = uint32(v.CompactionRevision)
  266. resp.Collection = v.Collection
  267. resp.DiskType = string(v.DiskType())
  268. resp.VolumeInfo = v.GetVolumeInfo()
  269. return resp, nil
  270. }
  271. // CopyFile client pulls the volume related file from the source server.
  272. // if req.CompactionRevision != math.MaxUint32, it ensures the compact revision is as expected
  273. // The copying still stop at req.StopOffset, but you can set it to math.MaxUint64 in order to read all data.
  274. func (vs *VolumeServer) CopyFile(req *volume_server_pb.CopyFileRequest, stream volume_server_pb.VolumeServer_CopyFileServer) error {
  275. var fileName string
  276. if !req.IsEcVolume {
  277. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  278. if v == nil {
  279. return fmt.Errorf("not found volume id %d", req.VolumeId)
  280. }
  281. if uint32(v.CompactionRevision) != req.CompactionRevision && req.CompactionRevision != math.MaxUint32 {
  282. return fmt.Errorf("volume %d is compacted", req.VolumeId)
  283. }
  284. v.SyncToDisk()
  285. fileName = v.FileName(req.Ext)
  286. } else {
  287. baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId)) + req.Ext
  288. for _, location := range vs.store.Locations {
  289. tName := util.Join(location.Directory, baseFileName)
  290. if util.FileExists(tName) {
  291. fileName = tName
  292. }
  293. tName = util.Join(location.IdxDirectory, baseFileName)
  294. if util.FileExists(tName) {
  295. fileName = tName
  296. }
  297. }
  298. if fileName == "" {
  299. if req.IgnoreSourceFileNotFound {
  300. return nil
  301. }
  302. return fmt.Errorf("CopyFile not found ec volume id %d", req.VolumeId)
  303. }
  304. }
  305. bytesToRead := int64(req.StopOffset)
  306. file, err := os.Open(fileName)
  307. if err != nil {
  308. if req.IgnoreSourceFileNotFound && err == os.ErrNotExist {
  309. return nil
  310. }
  311. return err
  312. }
  313. defer file.Close()
  314. fileInfo, err := file.Stat()
  315. if err != nil {
  316. return err
  317. }
  318. fileModTsNs := fileInfo.ModTime().UnixNano()
  319. buffer := make([]byte, BufferSizeLimit)
  320. for bytesToRead > 0 {
  321. bytesread, err := file.Read(buffer)
  322. // println(fileName, "read", bytesread, "bytes, with target", bytesToRead)
  323. if err != nil {
  324. if err != io.EOF {
  325. return err
  326. }
  327. // println(fileName, "read", bytesread, "bytes, with target", bytesToRead, "err", err.Error())
  328. break
  329. }
  330. if int64(bytesread) > bytesToRead {
  331. bytesread = int(bytesToRead)
  332. }
  333. err = stream.Send(&volume_server_pb.CopyFileResponse{
  334. FileContent: buffer[:bytesread],
  335. ModifiedTsNs: fileModTsNs,
  336. })
  337. if err != nil {
  338. // println("sending", bytesread, "bytes err", err.Error())
  339. return err
  340. }
  341. fileModTsNs = 0 // only send once
  342. bytesToRead -= int64(bytesread)
  343. }
  344. return nil
  345. }