You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

356 lines
11 KiB

5 years ago
6 years ago
6 years ago
  1. package weed_server
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "math"
  8. "os"
  9. "path"
  10. "path/filepath"
  11. "strings"
  12. "github.com/chrislusf/seaweedfs/weed/glog"
  13. "github.com/chrislusf/seaweedfs/weed/operation"
  14. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  15. "github.com/chrislusf/seaweedfs/weed/storage"
  16. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  17. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  18. "github.com/chrislusf/seaweedfs/weed/storage/types"
  19. "github.com/chrislusf/seaweedfs/weed/util"
  20. )
  21. /*
  22. Steps to apply erasure coding to .dat .idx files
  23. 0. ensure the volume is readonly
  24. 1. client call VolumeEcShardsGenerate to generate the .ecx and .ec00 ~ .ec13 files
  25. 2. client ask master for possible servers to hold the ec files, at least 4 servers
  26. 3. client call VolumeEcShardsCopy on above target servers to copy ec files from the source server
  27. 4. target servers report the new ec files to the master
  28. 5. master stores vid -> [14]*DataNode
  29. 6. client checks master. If all 14 slices are ready, delete the original .idx, .idx files
  30. */
  31. // VolumeEcShardsGenerate generates the .ecx and .ec00 ~ .ec13 files
  32. func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_server_pb.VolumeEcShardsGenerateRequest) (*volume_server_pb.VolumeEcShardsGenerateResponse, error) {
  33. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  34. if v == nil {
  35. return nil, fmt.Errorf("volume %d not found", req.VolumeId)
  36. }
  37. baseFileName := v.FileName()
  38. if v.Collection != req.Collection {
  39. return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
  40. }
  41. // write .ecx file
  42. if err := erasure_coding.WriteSortedFileFromIdx(baseFileName, ".ecx"); err != nil {
  43. return nil, fmt.Errorf("WriteSortedFileFromIdx %s: %v", baseFileName, err)
  44. }
  45. // write .ec00 ~ .ec13 files
  46. if err := erasure_coding.WriteEcFiles(baseFileName); err != nil {
  47. return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err)
  48. }
  49. return &volume_server_pb.VolumeEcShardsGenerateResponse{}, nil
  50. }
  51. // VolumeEcShardsRebuild generates the any of the missing .ec00 ~ .ec13 files
  52. func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_server_pb.VolumeEcShardsRebuildRequest) (*volume_server_pb.VolumeEcShardsRebuildResponse, error) {
  53. baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId))
  54. var rebuiltShardIds []uint32
  55. for _, location := range vs.store.Locations {
  56. if util.FileExists(path.Join(location.Directory, baseFileName+".ecx")) {
  57. // write .ec00 ~ .ec13 files
  58. baseFileName = path.Join(location.Directory, baseFileName)
  59. if generatedShardIds, err := erasure_coding.RebuildEcFiles(baseFileName); err != nil {
  60. return nil, fmt.Errorf("RebuildEcFiles %s: %v", baseFileName, err)
  61. } else {
  62. rebuiltShardIds = generatedShardIds
  63. }
  64. if err := erasure_coding.RebuildEcxFile(baseFileName); err != nil {
  65. return nil, fmt.Errorf("RebuildEcxFile %s: %v", baseFileName, err)
  66. }
  67. break
  68. }
  69. }
  70. return &volume_server_pb.VolumeEcShardsRebuildResponse{
  71. RebuiltShardIds: rebuiltShardIds,
  72. }, nil
  73. }
  74. // VolumeEcShardsCopy copy the .ecx and some ec data slices
  75. func (vs *VolumeServer) VolumeEcShardsCopy(ctx context.Context, req *volume_server_pb.VolumeEcShardsCopyRequest) (*volume_server_pb.VolumeEcShardsCopyResponse, error) {
  76. location := vs.store.FindFreeLocation()
  77. if location == nil {
  78. return nil, fmt.Errorf("no space left")
  79. }
  80. baseFileName := storage.VolumeFileName(location.Directory, req.Collection, int(req.VolumeId))
  81. err := operation.WithVolumeServerClient(req.SourceDataNode, vs.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
  82. // copy ec data slices
  83. for _, shardId := range req.ShardIds {
  84. if err := vs.doCopyFile(ctx, client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, erasure_coding.ToExt(int(shardId)), false, false); err != nil {
  85. return err
  86. }
  87. }
  88. if req.CopyEcxFile {
  89. // copy ecx file
  90. if err := vs.doCopyFile(ctx, client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".ecx", false, false); err != nil {
  91. return err
  92. }
  93. return nil
  94. }
  95. if req.CopyEcjFile {
  96. // copy ecj file
  97. if err := vs.doCopyFile(ctx, client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".ecj", true, true); err != nil {
  98. return err
  99. }
  100. }
  101. return nil
  102. })
  103. if err != nil {
  104. return nil, fmt.Errorf("VolumeEcShardsCopy volume %d: %v", req.VolumeId, err)
  105. }
  106. return &volume_server_pb.VolumeEcShardsCopyResponse{}, nil
  107. }
  108. // VolumeEcShardsDelete local delete the .ecx and some ec data slices if not needed
  109. // the shard should not be mounted before calling this.
  110. func (vs *VolumeServer) VolumeEcShardsDelete(ctx context.Context, req *volume_server_pb.VolumeEcShardsDeleteRequest) (*volume_server_pb.VolumeEcShardsDeleteResponse, error) {
  111. baseFilename := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId))
  112. glog.V(0).Infof("ec volume %d shard delete %v", req.VolumeId, req.ShardIds)
  113. found := false
  114. for _, location := range vs.store.Locations {
  115. if util.FileExists(path.Join(location.Directory, baseFilename+".ecx")) {
  116. found = true
  117. baseFilename = path.Join(location.Directory, baseFilename)
  118. for _, shardId := range req.ShardIds {
  119. os.Remove(baseFilename + erasure_coding.ToExt(int(shardId)))
  120. }
  121. break
  122. }
  123. }
  124. if !found {
  125. return nil, nil
  126. }
  127. // check whether to delete the .ecx and .ecj file also
  128. hasEcxFile := false
  129. existingShardCount := 0
  130. bName := filepath.Base(baseFilename)
  131. for _, location := range vs.store.Locations {
  132. fileInfos, err := ioutil.ReadDir(location.Directory)
  133. if err != nil {
  134. continue
  135. }
  136. for _, fileInfo := range fileInfos {
  137. if fileInfo.Name() == bName+".ecx" || fileInfo.Name() == bName+".ecj" {
  138. hasEcxFile = true
  139. continue
  140. }
  141. if strings.HasPrefix(fileInfo.Name(), bName+".ec") {
  142. existingShardCount++
  143. }
  144. }
  145. }
  146. if hasEcxFile && existingShardCount == 0 {
  147. if err := os.Remove(baseFilename + ".ecx"); err != nil {
  148. return nil, err
  149. }
  150. if err := os.Remove(baseFilename + ".ecj"); err != nil {
  151. return nil, err
  152. }
  153. }
  154. return &volume_server_pb.VolumeEcShardsDeleteResponse{}, nil
  155. }
  156. func (vs *VolumeServer) VolumeEcShardsMount(ctx context.Context, req *volume_server_pb.VolumeEcShardsMountRequest) (*volume_server_pb.VolumeEcShardsMountResponse, error) {
  157. for _, shardId := range req.ShardIds {
  158. err := vs.store.MountEcShards(req.Collection, needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId))
  159. if err != nil {
  160. glog.Errorf("ec shard mount %v: %v", req, err)
  161. } else {
  162. glog.V(2).Infof("ec shard mount %v", req)
  163. }
  164. if err != nil {
  165. return nil, fmt.Errorf("mount %d.%d: %v", req.VolumeId, shardId, err)
  166. }
  167. }
  168. return &volume_server_pb.VolumeEcShardsMountResponse{}, nil
  169. }
  170. func (vs *VolumeServer) VolumeEcShardsUnmount(ctx context.Context, req *volume_server_pb.VolumeEcShardsUnmountRequest) (*volume_server_pb.VolumeEcShardsUnmountResponse, error) {
  171. for _, shardId := range req.ShardIds {
  172. err := vs.store.UnmountEcShards(needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId))
  173. if err != nil {
  174. glog.Errorf("ec shard unmount %v: %v", req, err)
  175. } else {
  176. glog.V(2).Infof("ec shard unmount %v", req)
  177. }
  178. if err != nil {
  179. return nil, fmt.Errorf("unmount %d.%d: %v", req.VolumeId, shardId, err)
  180. }
  181. }
  182. return &volume_server_pb.VolumeEcShardsUnmountResponse{}, nil
  183. }
  184. func (vs *VolumeServer) VolumeEcShardRead(req *volume_server_pb.VolumeEcShardReadRequest, stream volume_server_pb.VolumeServer_VolumeEcShardReadServer) error {
  185. ecVolume, found := vs.store.FindEcVolume(needle.VolumeId(req.VolumeId))
  186. if !found {
  187. return fmt.Errorf("VolumeEcShardRead not found ec volume id %d", req.VolumeId)
  188. }
  189. ecShard, found := ecVolume.FindEcVolumeShard(erasure_coding.ShardId(req.ShardId))
  190. if !found {
  191. return fmt.Errorf("not found ec shard %d.%d", req.VolumeId, req.ShardId)
  192. }
  193. if req.FileKey != 0 {
  194. _, size, _ := ecVolume.FindNeedleFromEcx(types.Uint64ToNeedleId(req.FileKey))
  195. if size == types.TombstoneFileSize {
  196. return stream.Send(&volume_server_pb.VolumeEcShardReadResponse{
  197. IsDeleted: true,
  198. })
  199. }
  200. }
  201. bufSize := req.Size
  202. if bufSize > BufferSizeLimit {
  203. bufSize = BufferSizeLimit
  204. }
  205. buffer := make([]byte, bufSize)
  206. startOffset, bytesToRead := req.Offset, req.Size
  207. for bytesToRead > 0 {
  208. // min of bytesToRead and bufSize
  209. bufferSize := bufSize
  210. if bufferSize > bytesToRead {
  211. bufferSize = bytesToRead
  212. }
  213. bytesread, err := ecShard.ReadAt(buffer[0:bufferSize], startOffset)
  214. // println("read", ecShard.FileName(), "startOffset", startOffset, bytesread, "bytes, with target", bufferSize)
  215. if bytesread > 0 {
  216. if int64(bytesread) > bytesToRead {
  217. bytesread = int(bytesToRead)
  218. }
  219. err = stream.Send(&volume_server_pb.VolumeEcShardReadResponse{
  220. Data: buffer[:bytesread],
  221. })
  222. if err != nil {
  223. // println("sending", bytesread, "bytes err", err.Error())
  224. return err
  225. }
  226. startOffset += int64(bytesread)
  227. bytesToRead -= int64(bytesread)
  228. }
  229. if err != nil {
  230. if err != io.EOF {
  231. return err
  232. }
  233. return nil
  234. }
  235. }
  236. return nil
  237. }
  238. func (vs *VolumeServer) VolumeEcBlobDelete(ctx context.Context, req *volume_server_pb.VolumeEcBlobDeleteRequest) (*volume_server_pb.VolumeEcBlobDeleteResponse, error) {
  239. resp := &volume_server_pb.VolumeEcBlobDeleteResponse{}
  240. for _, location := range vs.store.Locations {
  241. if localEcVolume, found := location.FindEcVolume(needle.VolumeId(req.VolumeId)); found {
  242. _, size, _, err := localEcVolume.LocateEcShardNeedle(types.NeedleId(req.FileKey), needle.Version(req.Version))
  243. if err != nil {
  244. return nil, fmt.Errorf("locate in local ec volume: %v", err)
  245. }
  246. if size == types.TombstoneFileSize {
  247. return resp, nil
  248. }
  249. err = localEcVolume.DeleteNeedleFromEcx(types.NeedleId(req.FileKey))
  250. if err != nil {
  251. return nil, err
  252. }
  253. break
  254. }
  255. }
  256. return resp, nil
  257. }
  258. // VolumeEcShardsToVolume generates the .idx, .dat files from .ecx, .ecj and .ec01 ~ .ec14 files
  259. func (vs *VolumeServer) VolumeEcShardsToVolume(ctx context.Context, req *volume_server_pb.VolumeEcShardsToVolumeRequest) (*volume_server_pb.VolumeEcShardsToVolumeResponse, error) {
  260. v, found := vs.store.FindEcVolume(needle.VolumeId(req.VolumeId))
  261. if !found {
  262. return nil, fmt.Errorf("ec volume %d not found", req.VolumeId)
  263. }
  264. baseFileName := v.FileName()
  265. if v.Collection != req.Collection {
  266. return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
  267. }
  268. // calculate .dat file size
  269. datFileSize, err := erasure_coding.FindDatFileSize(baseFileName)
  270. if err != nil {
  271. return nil, fmt.Errorf("FindDatFileSize %s: %v", baseFileName, err)
  272. }
  273. // write .dat file from .ec00 ~ .ec09 files
  274. if err := erasure_coding.WriteDatFile(baseFileName, datFileSize); err != nil {
  275. return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err)
  276. }
  277. // write .idx file from .ecx and .ecj files
  278. if err := erasure_coding.WriteIdxFileFromEcIndex(baseFileName); err != nil {
  279. return nil, fmt.Errorf("WriteIdxFileFromEcIndex %s: %v", baseFileName, err)
  280. }
  281. return &volume_server_pb.VolumeEcShardsToVolumeResponse{}, nil
  282. }