You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

213 lines
5.9 KiB

12 years ago
12 years ago
12 years ago
11 years ago
12 years ago
12 years ago
  1. package command
  2. import (
  3. "archive/tar"
  4. "bytes"
  5. "fmt"
  6. "os"
  7. "path"
  8. "path/filepath"
  9. "strconv"
  10. "strings"
  11. "text/template"
  12. "time"
  13. "github.com/chrislusf/seaweedfs/weed/glog"
  14. "github.com/chrislusf/seaweedfs/weed/storage"
  15. )
  16. const (
  17. defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}`
  18. timeFormat = "2006-01-02T15:04:05"
  19. )
  20. var (
  21. export ExportOptions
  22. )
  23. type ExportOptions struct {
  24. dir *string
  25. collection *string
  26. volumeId *int
  27. }
  28. var cmdExport = &Command{
  29. UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
  30. Short: "list or export files from one volume data file",
  31. Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
  32. The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
  33. `,
  34. }
  35. func init() {
  36. cmdExport.Run = runExport // break init cycle
  37. export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
  38. export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
  39. export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
  40. }
  41. var (
  42. output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
  43. format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}")
  44. newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
  45. tarOutputFile *tar.Writer
  46. tarHeader tar.Header
  47. fileNameTemplate *template.Template
  48. fileNameTemplateBuffer = bytes.NewBuffer(nil)
  49. newerThan time.Time
  50. newerThanUnix int64 = -1
  51. localLocation, _ = time.LoadLocation("Local")
  52. )
  53. func runExport(cmd *Command, args []string) bool {
  54. var err error
  55. if *newer != "" {
  56. if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
  57. fmt.Println("cannot parse 'newer' argument: " + err.Error())
  58. return false
  59. }
  60. newerThanUnix = newerThan.Unix()
  61. }
  62. if *export.volumeId == -1 {
  63. return false
  64. }
  65. if *output != "" {
  66. if *output != "-" && !strings.HasSuffix(*output, ".tar") {
  67. fmt.Println("the output file", *output, "should be '-' or end with .tar")
  68. return false
  69. }
  70. if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
  71. fmt.Println("cannot parse format " + *format + ": " + err.Error())
  72. return false
  73. }
  74. var outputFile *os.File
  75. if *output == "-" {
  76. outputFile = os.Stdout
  77. } else {
  78. if outputFile, err = os.Create(*output); err != nil {
  79. glog.Fatalf("cannot open output tar %s: %s", *output, err)
  80. }
  81. }
  82. defer outputFile.Close()
  83. tarOutputFile = tar.NewWriter(outputFile)
  84. defer tarOutputFile.Close()
  85. t := time.Now()
  86. tarHeader = tar.Header{Mode: 0644,
  87. ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
  88. Typeflag: tar.TypeReg,
  89. AccessTime: t, ChangeTime: t}
  90. }
  91. fileName := strconv.Itoa(*export.volumeId)
  92. if *export.collection != "" {
  93. fileName = *export.collection + "_" + fileName
  94. }
  95. vid := storage.VolumeId(*export.volumeId)
  96. indexFile, err := os.OpenFile(path.Join(*export.dir, fileName+".idx"), os.O_RDONLY, 0644)
  97. if err != nil {
  98. glog.Fatalf("Create Volume Index [ERROR] %s\n", err)
  99. }
  100. defer indexFile.Close()
  101. needleMap, err := storage.LoadNeedleMap(indexFile)
  102. if err != nil {
  103. glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err)
  104. }
  105. var version storage.Version
  106. err = storage.ScanVolumeFile(*export.dir, *export.collection, vid,
  107. storage.NeedleMapInMemory,
  108. func(superBlock storage.SuperBlock) error {
  109. version = superBlock.Version()
  110. return nil
  111. }, true, func(n *storage.Needle, offset int64) error {
  112. nv, ok := needleMap.Get(n.Id)
  113. glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
  114. n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped(), ok, nv)
  115. if ok && nv.Size > 0 && int64(nv.Offset)*8 == offset {
  116. if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
  117. glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
  118. n.LastModified, newerThanUnix)
  119. return nil
  120. }
  121. return walker(vid, n, version)
  122. }
  123. if !ok {
  124. glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
  125. } else {
  126. glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
  127. }
  128. return nil
  129. })
  130. if err != nil {
  131. glog.Fatalf("Export Volume File [ERROR] %s\n", err)
  132. }
  133. return true
  134. }
  135. type nameParams struct {
  136. Name string
  137. Id uint64
  138. Mime string
  139. Key string
  140. Ext string
  141. }
  142. func walker(vid storage.VolumeId, n *storage.Needle, version storage.Version) (err error) {
  143. key := storage.NewFileIdFromNeedle(vid, n).String()
  144. if tarOutputFile != nil {
  145. fileNameTemplateBuffer.Reset()
  146. if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
  147. nameParams{
  148. Name: string(n.Name),
  149. Id: n.Id,
  150. Mime: string(n.Mime),
  151. Key: key,
  152. Ext: filepath.Ext(string(n.Name)),
  153. },
  154. ); err != nil {
  155. return err
  156. }
  157. fileName := fileNameTemplateBuffer.String()
  158. if n.IsGzipped() && path.Ext(fileName) != ".gz" {
  159. fileName = fileName + ".gz"
  160. }
  161. tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
  162. if n.HasLastModifiedDate() {
  163. tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
  164. } else {
  165. tarHeader.ModTime = time.Unix(0, 0)
  166. }
  167. tarHeader.ChangeTime = tarHeader.ModTime
  168. if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
  169. return err
  170. }
  171. _, err = tarOutputFile.Write(n.Data)
  172. } else {
  173. size := n.DataSize
  174. if version == storage.Version1 {
  175. size = n.Size
  176. }
  177. fmt.Printf("key=%s Name=%s Size=%d gzip=%t mime=%s\n",
  178. key,
  179. n.Name,
  180. size,
  181. n.IsGzipped(),
  182. n.Mime,
  183. )
  184. }
  185. return
  186. }