You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

370 lines
11 KiB

3 years ago
3 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
4 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
4 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
5 years ago
3 years ago
2 years ago
3 years ago
4 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
12 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. package operation
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "mime"
  9. "mime/multipart"
  10. "net"
  11. "net/http"
  12. "net/textproto"
  13. "path/filepath"
  14. "strings"
  15. "time"
  16. "github.com/seaweedfs/seaweedfs/weed/glog"
  17. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  18. "github.com/seaweedfs/seaweedfs/weed/security"
  19. "github.com/seaweedfs/seaweedfs/weed/stats"
  20. "github.com/seaweedfs/seaweedfs/weed/util"
  21. )
  22. type UploadOption struct {
  23. UploadUrl string
  24. Filename string
  25. Cipher bool
  26. IsInputCompressed bool
  27. MimeType string
  28. PairMap map[string]string
  29. Jwt security.EncodedJwt
  30. RetryForever bool
  31. Md5 string
  32. }
  33. type UploadResult struct {
  34. Name string `json:"name,omitempty"`
  35. Size uint32 `json:"size,omitempty"`
  36. Error string `json:"error,omitempty"`
  37. ETag string `json:"eTag,omitempty"`
  38. CipherKey []byte `json:"cipherKey,omitempty"`
  39. Mime string `json:"mime,omitempty"`
  40. Gzip uint32 `json:"gzip,omitempty"`
  41. ContentMd5 string `json:"contentMd5,omitempty"`
  42. RetryCount int `json:"-"`
  43. }
  44. func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64, tsNs int64) *filer_pb.FileChunk {
  45. fid, _ := filer_pb.ToFileIdObject(fileId)
  46. return &filer_pb.FileChunk{
  47. FileId: fileId,
  48. Offset: offset,
  49. Size: uint64(uploadResult.Size),
  50. ModifiedTsNs: tsNs,
  51. ETag: uploadResult.ContentMd5,
  52. CipherKey: uploadResult.CipherKey,
  53. IsCompressed: uploadResult.Gzip > 0,
  54. Fid: fid,
  55. }
  56. }
  57. // HTTPClient interface for testing
  58. type HTTPClient interface {
  59. Do(req *http.Request) (*http.Response, error)
  60. }
  61. var (
  62. HttpClient HTTPClient
  63. )
  64. func init() {
  65. HttpClient = &http.Client{Transport: &http.Transport{
  66. DialContext: (&net.Dialer{
  67. Timeout: 10 * time.Second,
  68. KeepAlive: 10 * time.Second,
  69. }).DialContext,
  70. MaxIdleConns: 1024,
  71. MaxIdleConnsPerHost: 1024,
  72. }}
  73. }
  74. // UploadWithRetry will retry both assigning volume request and uploading content
  75. // The option parameter does not need to specify UploadUrl and Jwt, which will come from assigning volume.
  76. func UploadWithRetry(filerClient filer_pb.FilerClient, assignRequest *filer_pb.AssignVolumeRequest, uploadOption *UploadOption, genFileUrlFn func(host, fileId string) string, reader io.Reader) (fileId string, uploadResult *UploadResult, err error, data []byte) {
  77. doUploadFunc := func() error {
  78. var host string
  79. var auth security.EncodedJwt
  80. // grpc assign volume
  81. if grpcAssignErr := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
  82. resp, assignErr := client.AssignVolume(context.Background(), assignRequest)
  83. if assignErr != nil {
  84. glog.V(0).Infof("assign volume failure %v: %v", assignRequest, assignErr)
  85. return assignErr
  86. }
  87. if resp.Error != "" {
  88. return fmt.Errorf("assign volume failure %v: %v", assignRequest, resp.Error)
  89. }
  90. fileId, auth = resp.FileId, security.EncodedJwt(resp.Auth)
  91. loc := resp.Location
  92. host = filerClient.AdjustedUrl(loc)
  93. return nil
  94. }); grpcAssignErr != nil {
  95. return fmt.Errorf("filerGrpcAddress assign volume: %v", grpcAssignErr)
  96. }
  97. uploadOption.UploadUrl = genFileUrlFn(host, fileId)
  98. uploadOption.Jwt = auth
  99. var uploadErr error
  100. uploadResult, uploadErr, data = doUpload(reader, uploadOption)
  101. return uploadErr
  102. }
  103. if uploadOption.RetryForever {
  104. util.RetryUntil("uploadWithRetryForever", doUploadFunc, func(err error) (shouldContinue bool) {
  105. glog.V(0).Infof("upload content: %v", err)
  106. return true
  107. })
  108. } else {
  109. uploadErrList := []string{"transport", "is read only"}
  110. err = util.MultiRetry("uploadWithRetry", uploadErrList, doUploadFunc)
  111. }
  112. return
  113. }
  114. var fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`, "\n", "")
  115. // Upload sends a POST request to a volume server to upload the content with adjustable compression level
  116. func UploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  117. uploadResult, err = retriedUploadData(data, option)
  118. return
  119. }
  120. // Upload sends a POST request to a volume server to upload the content with fast compression
  121. func Upload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  122. uploadResult, err, data = doUpload(reader, option)
  123. return
  124. }
  125. func doUpload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  126. bytesReader, ok := reader.(*util.BytesReader)
  127. if ok {
  128. data = bytesReader.Bytes
  129. } else {
  130. data, err = io.ReadAll(reader)
  131. if err != nil {
  132. err = fmt.Errorf("read input: %v", err)
  133. return
  134. }
  135. }
  136. uploadResult, uploadErr := retriedUploadData(data, option)
  137. return uploadResult, uploadErr, data
  138. }
  139. func retriedUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  140. for i := 0; i < 3; i++ {
  141. if i > 0 {
  142. time.Sleep(time.Millisecond * time.Duration(237*(i+1)))
  143. }
  144. uploadResult, err = doUploadData(data, option)
  145. if err == nil {
  146. uploadResult.RetryCount = i
  147. return
  148. }
  149. glog.Warningf("uploading %d to %s: %v", i, option.UploadUrl, err)
  150. }
  151. return
  152. }
  153. func doUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  154. contentIsGzipped := option.IsInputCompressed
  155. shouldGzipNow := false
  156. if !option.IsInputCompressed {
  157. if option.MimeType == "" {
  158. option.MimeType = http.DetectContentType(data)
  159. // println("detect1 mimetype to", MimeType)
  160. if option.MimeType == "application/octet-stream" {
  161. option.MimeType = ""
  162. }
  163. }
  164. if shouldBeCompressed, iAmSure := util.IsCompressableFileType(filepath.Base(option.Filename), option.MimeType); iAmSure && shouldBeCompressed {
  165. shouldGzipNow = true
  166. } else if !iAmSure && option.MimeType == "" && len(data) > 16*1024 {
  167. var compressed []byte
  168. compressed, err = util.GzipData(data[0:128])
  169. shouldGzipNow = len(compressed)*10 < 128*9 // can not compress to less than 90%
  170. }
  171. }
  172. var clearDataLen int
  173. // gzip if possible
  174. // this could be double copying
  175. clearDataLen = len(data)
  176. clearData := data
  177. if shouldGzipNow && !option.Cipher {
  178. compressed, compressErr := util.GzipData(data)
  179. // fmt.Printf("data is compressed from %d ==> %d\n", len(data), len(compressed))
  180. if compressErr == nil {
  181. data = compressed
  182. contentIsGzipped = true
  183. }
  184. } else if option.IsInputCompressed {
  185. // just to get the clear data length
  186. clearData, err = util.DecompressData(data)
  187. if err == nil {
  188. clearDataLen = len(clearData)
  189. }
  190. }
  191. if option.Cipher {
  192. // encrypt(gzip(data))
  193. // encrypt
  194. cipherKey := util.GenCipherKey()
  195. encryptedData, encryptionErr := util.Encrypt(clearData, cipherKey)
  196. if encryptionErr != nil {
  197. err = fmt.Errorf("encrypt input: %v", encryptionErr)
  198. return
  199. }
  200. // upload data
  201. uploadResult, err = upload_content(func(w io.Writer) (err error) {
  202. _, err = w.Write(encryptedData)
  203. return
  204. }, len(encryptedData), &UploadOption{
  205. UploadUrl: option.UploadUrl,
  206. Filename: "",
  207. Cipher: false,
  208. IsInputCompressed: false,
  209. MimeType: "",
  210. PairMap: nil,
  211. Jwt: option.Jwt,
  212. })
  213. if uploadResult == nil {
  214. return
  215. }
  216. uploadResult.Name = option.Filename
  217. uploadResult.Mime = option.MimeType
  218. uploadResult.CipherKey = cipherKey
  219. uploadResult.Size = uint32(clearDataLen)
  220. } else {
  221. // upload data
  222. uploadResult, err = upload_content(func(w io.Writer) (err error) {
  223. _, err = w.Write(data)
  224. return
  225. }, len(data), &UploadOption{
  226. UploadUrl: option.UploadUrl,
  227. Filename: option.Filename,
  228. Cipher: false,
  229. IsInputCompressed: contentIsGzipped,
  230. MimeType: option.MimeType,
  231. PairMap: option.PairMap,
  232. Jwt: option.Jwt,
  233. Md5: option.Md5,
  234. })
  235. if uploadResult == nil {
  236. return
  237. }
  238. uploadResult.Size = uint32(clearDataLen)
  239. if contentIsGzipped {
  240. uploadResult.Gzip = 1
  241. }
  242. }
  243. return uploadResult, err
  244. }
  245. func upload_content(fillBufferFunction func(w io.Writer) error, originalDataSize int, option *UploadOption) (*UploadResult, error) {
  246. buf := GetBuffer()
  247. defer PutBuffer(buf)
  248. body_writer := multipart.NewWriter(buf)
  249. h := make(textproto.MIMEHeader)
  250. filename := fileNameEscaper.Replace(option.Filename)
  251. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, filename))
  252. h.Set("Idempotency-Key", option.UploadUrl)
  253. if option.MimeType == "" {
  254. option.MimeType = mime.TypeByExtension(strings.ToLower(filepath.Ext(option.Filename)))
  255. }
  256. if option.MimeType != "" {
  257. h.Set("Content-Type", option.MimeType)
  258. }
  259. if option.IsInputCompressed {
  260. h.Set("Content-Encoding", "gzip")
  261. }
  262. if option.Md5 != "" {
  263. h.Set("Content-MD5", option.Md5)
  264. }
  265. file_writer, cp_err := body_writer.CreatePart(h)
  266. if cp_err != nil {
  267. glog.V(0).Infoln("error creating form file", cp_err.Error())
  268. return nil, cp_err
  269. }
  270. if err := fillBufferFunction(file_writer); err != nil {
  271. glog.V(0).Infoln("error copying data", err)
  272. return nil, err
  273. }
  274. content_type := body_writer.FormDataContentType()
  275. if err := body_writer.Close(); err != nil {
  276. glog.V(0).Infoln("error closing body", err)
  277. return nil, err
  278. }
  279. req, postErr := http.NewRequest("POST", option.UploadUrl, bytes.NewReader(buf.Bytes()))
  280. if postErr != nil {
  281. glog.V(1).Infof("create upload request %s: %v", option.UploadUrl, postErr)
  282. return nil, fmt.Errorf("create upload request %s: %v", option.UploadUrl, postErr)
  283. }
  284. req.Header.Set("Content-Type", content_type)
  285. for k, v := range option.PairMap {
  286. req.Header.Set(k, v)
  287. }
  288. if option.Jwt != "" {
  289. req.Header.Set("Authorization", "BEARER "+string(option.Jwt))
  290. }
  291. // print("+")
  292. resp, post_err := HttpClient.Do(req)
  293. defer util.CloseResponse(resp)
  294. if post_err != nil {
  295. if strings.Contains(post_err.Error(), "connection reset by peer") ||
  296. strings.Contains(post_err.Error(), "use of closed network connection") {
  297. glog.V(1).Infof("repeat error upload request %s: %v", option.UploadUrl, postErr)
  298. stats.FilerRequestCounter.WithLabelValues(stats.RepeatErrorUploadContent).Inc()
  299. resp, post_err = HttpClient.Do(req)
  300. defer util.CloseResponse(resp)
  301. }
  302. }
  303. if post_err != nil {
  304. return nil, fmt.Errorf("upload %s %d bytes to %v: %v", option.Filename, originalDataSize, option.UploadUrl, post_err)
  305. }
  306. // print("-")
  307. var ret UploadResult
  308. etag := getEtag(resp)
  309. if resp.StatusCode == http.StatusNoContent {
  310. ret.ETag = etag
  311. return &ret, nil
  312. }
  313. resp_body, ra_err := io.ReadAll(resp.Body)
  314. if ra_err != nil {
  315. return nil, fmt.Errorf("read response body %v: %v", option.UploadUrl, ra_err)
  316. }
  317. unmarshal_err := json.Unmarshal(resp_body, &ret)
  318. if unmarshal_err != nil {
  319. glog.Errorf("unmarshal %s: %v", option.UploadUrl, string(resp_body))
  320. return nil, fmt.Errorf("unmarshal %v: %v", option.UploadUrl, unmarshal_err)
  321. }
  322. if ret.Error != "" {
  323. return nil, fmt.Errorf("unmarshalled error %v: %v", option.UploadUrl, ret.Error)
  324. }
  325. ret.ETag = etag
  326. ret.ContentMd5 = resp.Header.Get("Content-MD5")
  327. return &ret, nil
  328. }
  329. func getEtag(r *http.Response) (etag string) {
  330. etag = r.Header.Get("ETag")
  331. if strings.HasPrefix(etag, "\"") && strings.HasSuffix(etag, "\"") {
  332. etag = etag[1 : len(etag)-1]
  333. }
  334. return
  335. }