You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

409 lines
12 KiB

3 years ago
3 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
4 years ago
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2 years ago
4 years ago
5 years ago
5 years ago
2 years ago
4 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
5 years ago
3 years ago
3 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
12 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. package operation
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "github.com/valyala/bytebufferpool"
  8. "io"
  9. "mime"
  10. "mime/multipart"
  11. "sync"
  12. "net/http"
  13. "net/textproto"
  14. "path/filepath"
  15. "strings"
  16. "time"
  17. "github.com/seaweedfs/seaweedfs/weed/glog"
  18. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  19. "github.com/seaweedfs/seaweedfs/weed/security"
  20. "github.com/seaweedfs/seaweedfs/weed/stats"
  21. "github.com/seaweedfs/seaweedfs/weed/util"
  22. util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
  23. util_http_client "github.com/seaweedfs/seaweedfs/weed/util/http/client"
  24. )
  25. type UploadOption struct {
  26. UploadUrl string
  27. Filename string
  28. Cipher bool
  29. IsInputCompressed bool
  30. MimeType string
  31. PairMap map[string]string
  32. Jwt security.EncodedJwt
  33. RetryForever bool
  34. Md5 string
  35. BytesBuffer *bytes.Buffer
  36. }
  37. type UploadResult struct {
  38. Name string `json:"name,omitempty"`
  39. Size uint32 `json:"size,omitempty"`
  40. Error string `json:"error,omitempty"`
  41. ETag string `json:"eTag,omitempty"`
  42. CipherKey []byte `json:"cipherKey,omitempty"`
  43. Mime string `json:"mime,omitempty"`
  44. Gzip uint32 `json:"gzip,omitempty"`
  45. ContentMd5 string `json:"contentMd5,omitempty"`
  46. RetryCount int `json:"-"`
  47. }
  48. func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64, tsNs int64) *filer_pb.FileChunk {
  49. fid, _ := filer_pb.ToFileIdObject(fileId)
  50. return &filer_pb.FileChunk{
  51. FileId: fileId,
  52. Offset: offset,
  53. Size: uint64(uploadResult.Size),
  54. ModifiedTsNs: tsNs,
  55. ETag: uploadResult.ContentMd5,
  56. CipherKey: uploadResult.CipherKey,
  57. IsCompressed: uploadResult.Gzip > 0,
  58. Fid: fid,
  59. }
  60. }
  61. var (
  62. fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`, "\n", "")
  63. uploader *Uploader
  64. uploaderErr error
  65. once sync.Once
  66. )
  67. // HTTPClient interface for testing
  68. type HTTPClient interface {
  69. Do(req *http.Request) (*http.Response, error)
  70. }
  71. // Uploader
  72. type Uploader struct {
  73. httpClient HTTPClient
  74. }
  75. func NewUploader() (*Uploader, error) {
  76. once.Do(func () {
  77. // With Dial context
  78. var httpClient *util_http_client.HTTPClient
  79. httpClient, uploaderErr = util_http.NewGlobalHttpClient(util_http_client.AddDialContext)
  80. if uploaderErr != nil {
  81. uploaderErr = fmt.Errorf("error initializing the loader: %s", uploaderErr)
  82. }
  83. if httpClient != nil {
  84. uploader = newUploader(httpClient)
  85. }
  86. })
  87. return uploader, uploaderErr
  88. }
  89. func newUploader(httpClient HTTPClient) (*Uploader) {
  90. return &Uploader{
  91. httpClient: httpClient,
  92. }
  93. }
  94. // UploadWithRetry will retry both assigning volume request and uploading content
  95. // The option parameter does not need to specify UploadUrl and Jwt, which will come from assigning volume.
  96. func (uploader *Uploader) UploadWithRetry(filerClient filer_pb.FilerClient, assignRequest *filer_pb.AssignVolumeRequest, uploadOption *UploadOption, genFileUrlFn func(host, fileId string) string, reader io.Reader) (fileId string, uploadResult *UploadResult, err error, data []byte) {
  97. doUploadFunc := func() error {
  98. var host string
  99. var auth security.EncodedJwt
  100. // grpc assign volume
  101. if grpcAssignErr := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
  102. resp, assignErr := client.AssignVolume(context.Background(), assignRequest)
  103. if assignErr != nil {
  104. glog.V(0).Infof("assign volume failure %v: %v", assignRequest, assignErr)
  105. return assignErr
  106. }
  107. if resp.Error != "" {
  108. return fmt.Errorf("assign volume failure %v: %v", assignRequest, resp.Error)
  109. }
  110. fileId, auth = resp.FileId, security.EncodedJwt(resp.Auth)
  111. loc := resp.Location
  112. host = filerClient.AdjustedUrl(loc)
  113. return nil
  114. }); grpcAssignErr != nil {
  115. return fmt.Errorf("filerGrpcAddress assign volume: %v", grpcAssignErr)
  116. }
  117. uploadOption.UploadUrl = genFileUrlFn(host, fileId)
  118. uploadOption.Jwt = auth
  119. var uploadErr error
  120. uploadResult, uploadErr, data = uploader.doUpload(reader, uploadOption)
  121. return uploadErr
  122. }
  123. if uploadOption.RetryForever {
  124. util.RetryUntil("uploadWithRetryForever", doUploadFunc, func(err error) (shouldContinue bool) {
  125. glog.V(0).Infof("upload content: %v", err)
  126. return true
  127. })
  128. } else {
  129. uploadErrList := []string{"transport", "is read only"}
  130. err = util.MultiRetry("uploadWithRetry", uploadErrList, doUploadFunc)
  131. }
  132. return
  133. }
  134. // Upload sends a POST request to a volume server to upload the content with adjustable compression level
  135. func (uploader *Uploader) UploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  136. uploadResult, err = uploader.retriedUploadData(data, option)
  137. return
  138. }
  139. // Upload sends a POST request to a volume server to upload the content with fast compression
  140. func (uploader *Uploader) Upload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  141. uploadResult, err, data = uploader.doUpload(reader, option)
  142. return
  143. }
  144. func (uploader *Uploader) doUpload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  145. bytesReader, ok := reader.(*util.BytesReader)
  146. if ok {
  147. data = bytesReader.Bytes
  148. } else {
  149. data, err = io.ReadAll(reader)
  150. if err != nil {
  151. err = fmt.Errorf("read input: %v", err)
  152. return
  153. }
  154. }
  155. uploadResult, uploadErr := uploader.retriedUploadData(data, option)
  156. return uploadResult, uploadErr, data
  157. }
  158. func (uploader *Uploader) retriedUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  159. for i := 0; i < 3; i++ {
  160. if i > 0 {
  161. time.Sleep(time.Millisecond * time.Duration(237*(i+1)))
  162. }
  163. uploadResult, err = uploader.doUploadData(data, option)
  164. if err == nil {
  165. uploadResult.RetryCount = i
  166. return
  167. }
  168. glog.Warningf("uploading %d to %s: %v", i, option.UploadUrl, err)
  169. }
  170. return
  171. }
  172. func (uploader *Uploader) doUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  173. contentIsGzipped := option.IsInputCompressed
  174. shouldGzipNow := false
  175. if !option.IsInputCompressed {
  176. if option.MimeType == "" {
  177. option.MimeType = http.DetectContentType(data)
  178. // println("detect1 mimetype to", MimeType)
  179. if option.MimeType == "application/octet-stream" {
  180. option.MimeType = ""
  181. }
  182. }
  183. if shouldBeCompressed, iAmSure := util.IsCompressableFileType(filepath.Base(option.Filename), option.MimeType); iAmSure && shouldBeCompressed {
  184. shouldGzipNow = true
  185. } else if !iAmSure && option.MimeType == "" && len(data) > 16*1024 {
  186. var compressed []byte
  187. compressed, err = util.GzipData(data[0:128])
  188. if err != nil {
  189. return
  190. }
  191. shouldGzipNow = len(compressed)*10 < 128*9 // can not compress to less than 90%
  192. }
  193. }
  194. var clearDataLen int
  195. // gzip if possible
  196. // this could be double copying
  197. clearDataLen = len(data)
  198. clearData := data
  199. if shouldGzipNow {
  200. compressed, compressErr := util.GzipData(data)
  201. // fmt.Printf("data is compressed from %d ==> %d\n", len(data), len(compressed))
  202. if compressErr == nil {
  203. data = compressed
  204. contentIsGzipped = true
  205. }
  206. } else if option.IsInputCompressed {
  207. // just to get the clear data length
  208. clearData, err = util.DecompressData(data)
  209. if err == nil {
  210. clearDataLen = len(clearData)
  211. }
  212. }
  213. if option.Cipher {
  214. // encrypt(gzip(data))
  215. // encrypt
  216. cipherKey := util.GenCipherKey()
  217. encryptedData, encryptionErr := util.Encrypt(data, cipherKey)
  218. if encryptionErr != nil {
  219. err = fmt.Errorf("encrypt input: %v", encryptionErr)
  220. return
  221. }
  222. // upload data
  223. uploadResult, err = uploader.upload_content(func(w io.Writer) (err error) {
  224. _, err = w.Write(encryptedData)
  225. return
  226. }, len(encryptedData), &UploadOption{
  227. UploadUrl: option.UploadUrl,
  228. Filename: "",
  229. Cipher: false,
  230. IsInputCompressed: false,
  231. MimeType: "",
  232. PairMap: nil,
  233. Jwt: option.Jwt,
  234. })
  235. if uploadResult == nil {
  236. return
  237. }
  238. uploadResult.Name = option.Filename
  239. uploadResult.Mime = option.MimeType
  240. uploadResult.CipherKey = cipherKey
  241. uploadResult.Size = uint32(clearDataLen)
  242. if contentIsGzipped {
  243. uploadResult.Gzip = 1
  244. }
  245. } else {
  246. // upload data
  247. uploadResult, err = uploader.upload_content(func(w io.Writer) (err error) {
  248. _, err = w.Write(data)
  249. return
  250. }, len(data), &UploadOption{
  251. UploadUrl: option.UploadUrl,
  252. Filename: option.Filename,
  253. Cipher: false,
  254. IsInputCompressed: contentIsGzipped,
  255. MimeType: option.MimeType,
  256. PairMap: option.PairMap,
  257. Jwt: option.Jwt,
  258. Md5: option.Md5,
  259. BytesBuffer: option.BytesBuffer,
  260. })
  261. if uploadResult == nil {
  262. return
  263. }
  264. uploadResult.Size = uint32(clearDataLen)
  265. if contentIsGzipped {
  266. uploadResult.Gzip = 1
  267. }
  268. }
  269. return uploadResult, err
  270. }
  271. func (uploader *Uploader) upload_content(fillBufferFunction func(w io.Writer) error, originalDataSize int, option *UploadOption) (*UploadResult, error) {
  272. var body_writer *multipart.Writer
  273. var reqReader *bytes.Reader
  274. var buf *bytebufferpool.ByteBuffer
  275. if option.BytesBuffer == nil {
  276. buf = GetBuffer()
  277. defer PutBuffer(buf)
  278. body_writer = multipart.NewWriter(buf)
  279. } else {
  280. option.BytesBuffer.Reset()
  281. body_writer = multipart.NewWriter(option.BytesBuffer)
  282. }
  283. h := make(textproto.MIMEHeader)
  284. filename := fileNameEscaper.Replace(option.Filename)
  285. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, filename))
  286. h.Set("Idempotency-Key", option.UploadUrl)
  287. if option.MimeType == "" {
  288. option.MimeType = mime.TypeByExtension(strings.ToLower(filepath.Ext(option.Filename)))
  289. }
  290. if option.MimeType != "" {
  291. h.Set("Content-Type", option.MimeType)
  292. }
  293. if option.IsInputCompressed {
  294. h.Set("Content-Encoding", "gzip")
  295. }
  296. if option.Md5 != "" {
  297. h.Set("Content-MD5", option.Md5)
  298. }
  299. file_writer, cp_err := body_writer.CreatePart(h)
  300. if cp_err != nil {
  301. glog.V(0).Infoln("error creating form file", cp_err.Error())
  302. return nil, cp_err
  303. }
  304. if err := fillBufferFunction(file_writer); err != nil {
  305. glog.V(0).Infoln("error copying data", err)
  306. return nil, err
  307. }
  308. content_type := body_writer.FormDataContentType()
  309. if err := body_writer.Close(); err != nil {
  310. glog.V(0).Infoln("error closing body", err)
  311. return nil, err
  312. }
  313. if option.BytesBuffer == nil {
  314. reqReader = bytes.NewReader(buf.Bytes())
  315. } else {
  316. reqReader = bytes.NewReader(option.BytesBuffer.Bytes())
  317. }
  318. req, postErr := http.NewRequest(http.MethodPost, option.UploadUrl, reqReader)
  319. if postErr != nil {
  320. glog.V(1).Infof("create upload request %s: %v", option.UploadUrl, postErr)
  321. return nil, fmt.Errorf("create upload request %s: %v", option.UploadUrl, postErr)
  322. }
  323. req.Header.Set("Content-Type", content_type)
  324. for k, v := range option.PairMap {
  325. req.Header.Set(k, v)
  326. }
  327. if option.Jwt != "" {
  328. req.Header.Set("Authorization", "BEARER "+string(option.Jwt))
  329. }
  330. // print("+")
  331. resp, post_err := uploader.httpClient.Do(req)
  332. defer util_http.CloseResponse(resp)
  333. if post_err != nil {
  334. if strings.Contains(post_err.Error(), "connection reset by peer") ||
  335. strings.Contains(post_err.Error(), "use of closed network connection") {
  336. glog.V(1).Infof("repeat error upload request %s: %v", option.UploadUrl, postErr)
  337. stats.FilerHandlerCounter.WithLabelValues(stats.RepeatErrorUploadContent).Inc()
  338. resp, post_err = uploader.httpClient.Do(req)
  339. defer util_http.CloseResponse(resp)
  340. }
  341. }
  342. if post_err != nil {
  343. return nil, fmt.Errorf("upload %s %d bytes to %v: %v", option.Filename, originalDataSize, option.UploadUrl, post_err)
  344. }
  345. // print("-")
  346. var ret UploadResult
  347. etag := getEtag(resp)
  348. if resp.StatusCode == http.StatusNoContent {
  349. ret.ETag = etag
  350. return &ret, nil
  351. }
  352. resp_body, ra_err := io.ReadAll(resp.Body)
  353. if ra_err != nil {
  354. return nil, fmt.Errorf("read response body %v: %v", option.UploadUrl, ra_err)
  355. }
  356. unmarshal_err := json.Unmarshal(resp_body, &ret)
  357. if unmarshal_err != nil {
  358. glog.Errorf("unmarshal %s: %v", option.UploadUrl, string(resp_body))
  359. return nil, fmt.Errorf("unmarshal %v: %v", option.UploadUrl, unmarshal_err)
  360. }
  361. if ret.Error != "" {
  362. return nil, fmt.Errorf("unmarshalled error %v: %v", option.UploadUrl, ret.Error)
  363. }
  364. ret.ETag = etag
  365. ret.ContentMd5 = resp.Header.Get("Content-MD5")
  366. return &ret, nil
  367. }
  368. func getEtag(r *http.Response) (etag string) {
  369. etag = r.Header.Get("ETag")
  370. if strings.HasPrefix(etag, "\"") && strings.HasSuffix(etag, "\"") {
  371. etag = etag[1 : len(etag)-1]
  372. }
  373. return
  374. }