You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

378 lines
12 KiB

6 years ago
6 years ago
3 years ago
3 years ago
3 years ago
3 years ago
6 years ago
6 years ago
6 years ago
3 years ago
4 years ago
6 years ago
6 years ago
6 years ago
5 years ago
6 years ago
5 years ago
  1. package weed_server
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  6. "github.com/seaweedfs/seaweedfs/weed/storage/backend"
  7. "io"
  8. "math"
  9. "os"
  10. "time"
  11. "github.com/seaweedfs/seaweedfs/weed/glog"
  12. "github.com/seaweedfs/seaweedfs/weed/operation"
  13. "github.com/seaweedfs/seaweedfs/weed/pb"
  14. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  15. "github.com/seaweedfs/seaweedfs/weed/storage"
  16. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  18. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  19. "github.com/seaweedfs/seaweedfs/weed/util"
  20. )
  21. const BufferSizeLimit = 1024 * 1024 * 2
  22. // VolumeCopy copy the .idx .dat .vif files, and mount the volume
  23. func (vs *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stream volume_server_pb.VolumeServer_VolumeCopyServer) error {
  24. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  25. if v != nil {
  26. glog.V(0).Infof("volume %d already exists. deleted before copying...", req.VolumeId)
  27. err := vs.store.DeleteVolume(needle.VolumeId(req.VolumeId))
  28. if err != nil {
  29. return fmt.Errorf("failed to delete existing volume %d: %v", req.VolumeId, err)
  30. }
  31. glog.V(0).Infof("deleted existing volume %d before copying.", req.VolumeId)
  32. }
  33. // the master will not start compaction for read-only volumes, so it is safe to just copy files directly
  34. // copy .dat and .idx files
  35. // read .idx .dat file size and timestamp
  36. // send .idx file
  37. // send .dat file
  38. // confirm size and timestamp
  39. var volFileInfoResp *volume_server_pb.ReadVolumeFileStatusResponse
  40. var dataBaseFileName, indexBaseFileName, idxFileName, datFileName string
  41. err := operation.WithVolumeServerClient(true, pb.ServerAddress(req.SourceDataNode), vs.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  42. var err error
  43. volFileInfoResp, err = client.ReadVolumeFileStatus(context.Background(),
  44. &volume_server_pb.ReadVolumeFileStatusRequest{
  45. VolumeId: req.VolumeId,
  46. })
  47. if nil != err {
  48. return fmt.Errorf("read volume file status failed, %v", err)
  49. }
  50. diskType := volFileInfoResp.DiskType
  51. if req.DiskType != "" {
  52. diskType = req.DiskType
  53. }
  54. location := vs.store.FindFreeLocation(types.ToDiskType(diskType))
  55. if location == nil {
  56. return fmt.Errorf("no space left for disk type %s", types.ToDiskType(diskType).ReadableString())
  57. }
  58. dataBaseFileName = storage.VolumeFileName(location.Directory, volFileInfoResp.Collection, int(req.VolumeId))
  59. indexBaseFileName = storage.VolumeFileName(location.IdxDirectory, volFileInfoResp.Collection, int(req.VolumeId))
  60. util.WriteFile(dataBaseFileName+".note", []byte(fmt.Sprintf("copying from %s", req.SourceDataNode)), 0755)
  61. defer func() {
  62. if err != nil {
  63. os.Remove(dataBaseFileName + ".dat")
  64. os.Remove(indexBaseFileName + ".idx")
  65. os.Remove(dataBaseFileName + ".vif")
  66. os.Remove(dataBaseFileName + ".note")
  67. }
  68. }()
  69. var preallocateSize int64
  70. if grpcErr := pb.WithMasterClient(false, vs.GetMaster(), vs.grpcDialOption, func(client master_pb.SeaweedClient) error {
  71. resp, err := client.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
  72. if err != nil {
  73. return fmt.Errorf("get master %s configuration: %v", vs.GetMaster(), err)
  74. }
  75. if resp.VolumePreallocate {
  76. preallocateSize = int64(resp.VolumeSizeLimitMB) * (1 << 20)
  77. }
  78. return nil
  79. }); grpcErr != nil {
  80. glog.V(0).Infof("connect to %s: %v", vs.GetMaster(), grpcErr)
  81. }
  82. if preallocateSize > 0 {
  83. volumeFile := dataBaseFileName + ".dat"
  84. _, err := backend.CreateVolumeFile(volumeFile, preallocateSize, 0)
  85. if err != nil {
  86. return fmt.Errorf("create volume file %s: %v", volumeFile, err)
  87. }
  88. }
  89. // println("source:", volFileInfoResp.String())
  90. copyResponse := &volume_server_pb.VolumeCopyResponse{}
  91. reportInterval := int64(1024 * 1024 * 128)
  92. nextReportTarget := reportInterval
  93. var modifiedTsNs int64
  94. var sendErr error
  95. if modifiedTsNs, err = vs.doCopyFile(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.DatFileSize, dataBaseFileName, ".dat", false, true, func(processed int64) bool {
  96. if processed > nextReportTarget {
  97. copyResponse.ProcessedBytes = processed
  98. if sendErr = stream.Send(copyResponse); sendErr != nil {
  99. return false
  100. }
  101. nextReportTarget = processed + reportInterval
  102. }
  103. return true
  104. }); err != nil {
  105. return err
  106. }
  107. if sendErr != nil {
  108. return sendErr
  109. }
  110. if modifiedTsNs > 0 {
  111. os.Chtimes(dataBaseFileName+".dat", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  112. }
  113. if modifiedTsNs, err = vs.doCopyFile(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.IdxFileSize, indexBaseFileName, ".idx", false, false, nil); err != nil {
  114. return err
  115. }
  116. if modifiedTsNs > 0 {
  117. os.Chtimes(indexBaseFileName+".idx", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  118. }
  119. if modifiedTsNs, err = vs.doCopyFile(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.DatFileSize, dataBaseFileName, ".vif", false, true, nil); err != nil {
  120. return err
  121. }
  122. if modifiedTsNs > 0 {
  123. os.Chtimes(dataBaseFileName+".vif", time.Unix(0, modifiedTsNs), time.Unix(0, modifiedTsNs))
  124. }
  125. os.Remove(dataBaseFileName + ".note")
  126. return nil
  127. })
  128. if err != nil {
  129. return err
  130. }
  131. if dataBaseFileName == "" {
  132. return fmt.Errorf("not found volume %d file", req.VolumeId)
  133. }
  134. idxFileName = indexBaseFileName + ".idx"
  135. datFileName = dataBaseFileName + ".dat"
  136. defer func() {
  137. if err != nil && dataBaseFileName != "" {
  138. os.Remove(idxFileName)
  139. os.Remove(datFileName)
  140. os.Remove(dataBaseFileName + ".vif")
  141. }
  142. }()
  143. if err = checkCopyFiles(volFileInfoResp, idxFileName, datFileName); err != nil { // added by panyc16
  144. return err
  145. }
  146. // mount the volume
  147. err = vs.store.MountVolume(needle.VolumeId(req.VolumeId))
  148. if err != nil {
  149. return fmt.Errorf("failed to mount volume %d: %v", req.VolumeId, err)
  150. }
  151. if err = stream.Send(&volume_server_pb.VolumeCopyResponse{
  152. LastAppendAtNs: volFileInfoResp.DatFileTimestampSeconds * uint64(time.Second),
  153. }); err != nil {
  154. glog.Errorf("send response: %v", err)
  155. }
  156. return err
  157. }
  158. func (vs *VolumeServer) doCopyFile(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
  159. copyFileClient, err := client.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
  160. VolumeId: vid,
  161. Ext: ext,
  162. CompactionRevision: compactRevision,
  163. StopOffset: stopOffset,
  164. Collection: collection,
  165. IsEcVolume: isEcVolume,
  166. IgnoreSourceFileNotFound: ignoreSourceFileNotFound,
  167. })
  168. if err != nil {
  169. return modifiedTsNs, fmt.Errorf("failed to start copying volume %d %s file: %v", vid, ext, err)
  170. }
  171. modifiedTsNs, err = writeToFile(copyFileClient, baseFileName+ext, util.NewWriteThrottler(vs.compactionBytePerSecond), isAppend, progressFn)
  172. if err != nil {
  173. return modifiedTsNs, fmt.Errorf("failed to copy %s file: %v", baseFileName+ext, err)
  174. }
  175. return modifiedTsNs, nil
  176. }
  177. /**
  178. only check the the differ of the file size
  179. todo: maybe should check the received count and deleted count of the volume
  180. */
  181. func checkCopyFiles(originFileInf *volume_server_pb.ReadVolumeFileStatusResponse, idxFileName, datFileName string) error {
  182. stat, err := os.Stat(idxFileName)
  183. if err != nil {
  184. return fmt.Errorf("stat idx file %s failed: %v", idxFileName, err)
  185. }
  186. if originFileInf.IdxFileSize != uint64(stat.Size()) {
  187. return fmt.Errorf("idx file %s size [%v] is not same as origin file size [%v]",
  188. idxFileName, stat.Size(), originFileInf.IdxFileSize)
  189. }
  190. stat, err = os.Stat(datFileName)
  191. if err != nil {
  192. return fmt.Errorf("get dat file info failed, %v", err)
  193. }
  194. if originFileInf.DatFileSize != uint64(stat.Size()) {
  195. return fmt.Errorf("the dat file size [%v] is not same as origin file size [%v]",
  196. stat.Size(), originFileInf.DatFileSize)
  197. }
  198. return nil
  199. }
  200. func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName string, wt *util.WriteThrottler, isAppend bool, progressFn storage.ProgressFunc) (modifiedTsNs int64, err error) {
  201. glog.V(4).Infof("writing to %s", fileName)
  202. flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
  203. if isAppend {
  204. flags = os.O_WRONLY | os.O_CREATE
  205. }
  206. dst, err := os.OpenFile(fileName, flags, 0644)
  207. if err != nil {
  208. return modifiedTsNs, nil
  209. }
  210. defer dst.Close()
  211. var progressedBytes int64
  212. for {
  213. resp, receiveErr := client.Recv()
  214. if receiveErr == io.EOF {
  215. break
  216. }
  217. if resp != nil && resp.ModifiedTsNs != 0 {
  218. modifiedTsNs = resp.ModifiedTsNs
  219. }
  220. if receiveErr != nil {
  221. return modifiedTsNs, fmt.Errorf("receiving %s: %v", fileName, receiveErr)
  222. }
  223. dst.Write(resp.FileContent)
  224. progressedBytes += int64(len(resp.FileContent))
  225. if progressFn != nil {
  226. if !progressFn(progressedBytes) {
  227. return modifiedTsNs, fmt.Errorf("interrupted copy operation")
  228. }
  229. }
  230. wt.MaybeSlowdown(int64(len(resp.FileContent)))
  231. }
  232. return modifiedTsNs, nil
  233. }
  234. func (vs *VolumeServer) ReadVolumeFileStatus(ctx context.Context, req *volume_server_pb.ReadVolumeFileStatusRequest) (*volume_server_pb.ReadVolumeFileStatusResponse, error) {
  235. resp := &volume_server_pb.ReadVolumeFileStatusResponse{}
  236. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  237. if v == nil {
  238. return nil, fmt.Errorf("not found volume id %d", req.VolumeId)
  239. }
  240. resp.VolumeId = req.VolumeId
  241. datSize, idxSize, modTime := v.FileStat()
  242. resp.DatFileSize = datSize
  243. resp.IdxFileSize = idxSize
  244. resp.DatFileTimestampSeconds = uint64(modTime.Unix())
  245. resp.IdxFileTimestampSeconds = uint64(modTime.Unix())
  246. resp.FileCount = v.FileCount()
  247. resp.CompactionRevision = uint32(v.CompactionRevision)
  248. resp.Collection = v.Collection
  249. resp.DiskType = string(v.DiskType())
  250. return resp, nil
  251. }
  252. // CopyFile client pulls the volume related file from the source server.
  253. // if req.CompactionRevision != math.MaxUint32, it ensures the compact revision is as expected
  254. // The copying still stop at req.StopOffset, but you can set it to math.MaxUint64 in order to read all data.
  255. func (vs *VolumeServer) CopyFile(req *volume_server_pb.CopyFileRequest, stream volume_server_pb.VolumeServer_CopyFileServer) error {
  256. var fileName string
  257. if !req.IsEcVolume {
  258. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  259. if v == nil {
  260. return fmt.Errorf("not found volume id %d", req.VolumeId)
  261. }
  262. if uint32(v.CompactionRevision) != req.CompactionRevision && req.CompactionRevision != math.MaxUint32 {
  263. return fmt.Errorf("volume %d is compacted", req.VolumeId)
  264. }
  265. v.SyncToDisk()
  266. fileName = v.FileName(req.Ext)
  267. } else {
  268. baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId)) + req.Ext
  269. for _, location := range vs.store.Locations {
  270. tName := util.Join(location.Directory, baseFileName)
  271. if util.FileExists(tName) {
  272. fileName = tName
  273. }
  274. tName = util.Join(location.IdxDirectory, baseFileName)
  275. if util.FileExists(tName) {
  276. fileName = tName
  277. }
  278. }
  279. if fileName == "" {
  280. if req.IgnoreSourceFileNotFound {
  281. return nil
  282. }
  283. return fmt.Errorf("CopyFile not found ec volume id %d", req.VolumeId)
  284. }
  285. }
  286. bytesToRead := int64(req.StopOffset)
  287. file, err := os.Open(fileName)
  288. if err != nil {
  289. if req.IgnoreSourceFileNotFound && err == os.ErrNotExist {
  290. return nil
  291. }
  292. return err
  293. }
  294. defer file.Close()
  295. fileInfo, err := file.Stat()
  296. if err != nil {
  297. return err
  298. }
  299. fileModTsNs := fileInfo.ModTime().UnixNano()
  300. buffer := make([]byte, BufferSizeLimit)
  301. for bytesToRead > 0 {
  302. bytesread, err := file.Read(buffer)
  303. // println(fileName, "read", bytesread, "bytes, with target", bytesToRead)
  304. if err != nil {
  305. if err != io.EOF {
  306. return err
  307. }
  308. // println(fileName, "read", bytesread, "bytes, with target", bytesToRead, "err", err.Error())
  309. break
  310. }
  311. if int64(bytesread) > bytesToRead {
  312. bytesread = int(bytesToRead)
  313. }
  314. err = stream.Send(&volume_server_pb.CopyFileResponse{
  315. FileContent: buffer[:bytesread],
  316. ModifiedTsNs: fileModTsNs,
  317. })
  318. if err != nil {
  319. // println("sending", bytesread, "bytes err", err.Error())
  320. return err
  321. }
  322. fileModTsNs = 0 // only send once
  323. bytesToRead -= int64(bytesread)
  324. }
  325. return nil
  326. }