You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

83 lines
2.6 KiB

  1. package main
  2. import (
  3. "bytes"
  4. "compress/gzip"
  5. "crypto/md5"
  6. "flag"
  7. "io"
  8. "io/ioutil"
  9. "net/http"
  10. "time"
  11. "github.com/chrislusf/seaweedfs/weed/glog"
  12. "github.com/chrislusf/seaweedfs/weed/storage"
  13. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  14. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  15. "github.com/chrislusf/seaweedfs/weed/util"
  16. )
  17. type VolumeFileScanner4SeeDat struct {
  18. version needle.Version
  19. }
  20. func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error {
  21. scanner.version = superBlock.Version
  22. return nil
  23. }
  24. func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool {
  25. return true
  26. }
  27. var (
  28. files = int64(0)
  29. filebytes = int64(0)
  30. diffbytes = int64(0)
  31. )
  32. func Compresssion(data []byte) float64 {
  33. if len(data) <= 128 {
  34. return 100.0
  35. }
  36. compressed, _ := util.GzipData(data[0:128])
  37. return float64(len(compressed)*10) / 1280.0
  38. }
  39. func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
  40. t := time.Unix(int64(n.AppendAtNs)/int64(time.Second), int64(n.AppendAtNs)%int64(time.Second))
  41. glog.V(0).Info("----------------------------------------------------------------------------------")
  42. glog.V(0).Infof("%d,%s%x offset %d size %d(%s) cookie %x appendedAt %v hasmime[%t] mime[%s] (len: %d)",
  43. *volumeId, n.Id, n.Cookie, offset, n.Size, util.BytesToHumanReadable(uint64(n.Size)), n.Cookie, t, n.HasMime(), string(n.Mime), len(n.Mime))
  44. r, err := gzip.NewReader(bytes.NewReader(n.Data))
  45. if err == nil {
  46. buf := bytes.Buffer{}
  47. h := md5.New()
  48. c, _ := io.Copy(&buf, r)
  49. d := buf.Bytes()
  50. io.Copy(h, bytes.NewReader(d))
  51. diff := (int64(n.DataSize) - int64(c))
  52. diffbytes += diff
  53. glog.V(0).Infof("was gzip! stored_size: %d orig_size: %d diff: %d(%d) mime:%s compression-of-128: %.2f md5: %x", n.DataSize, c, diff, diffbytes, http.DetectContentType(d), Compresssion(d), h.Sum(nil))
  54. } else {
  55. glog.V(0).Infof("no gzip!")
  56. }
  57. return nil
  58. }
  59. var (
  60. _ = ioutil.ReadAll
  61. volumePath = flag.String("dir", "/tmp", "data directory to store files")
  62. volumeCollection = flag.String("collection", "", "the volume collection name")
  63. volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
  64. )
  65. func main() {
  66. flag.Parse()
  67. vid := needle.VolumeId(*volumeId)
  68. glog.V(0).Info("Starting")
  69. scanner := &VolumeFileScanner4SeeDat{}
  70. err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner)
  71. if err != nil {
  72. glog.Fatalf("Reading Volume File [ERROR] %s\n", err)
  73. }
  74. }