You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

244 lines
6.8 KiB

12 years ago
12 years ago
12 years ago
11 years ago
  1. package command
  2. import (
  3. "archive/tar"
  4. "bytes"
  5. "fmt"
  6. "os"
  7. "path"
  8. "path/filepath"
  9. "strconv"
  10. "strings"
  11. "text/template"
  12. "time"
  13. "github.com/chrislusf/seaweedfs/weed/glog"
  14. "github.com/chrislusf/seaweedfs/weed/storage"
  15. "github.com/chrislusf/seaweedfs/weed/storage/types"
  16. "io"
  17. )
  18. const (
  19. defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}`
  20. timeFormat = "2006-01-02T15:04:05"
  21. )
  22. var (
  23. export ExportOptions
  24. )
  25. type ExportOptions struct {
  26. dir *string
  27. collection *string
  28. volumeId *int
  29. }
  30. var cmdExport = &Command{
  31. UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
  32. Short: "list or export files from one volume data file",
  33. Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
  34. The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
  35. `,
  36. }
  37. func init() {
  38. cmdExport.Run = runExport // break init cycle
  39. export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
  40. export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
  41. export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
  42. }
  43. var (
  44. output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
  45. format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}")
  46. newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
  47. showDeleted = cmdExport.Flag.Bool("deleted", false, "export deleted files. only applies if -o is not specified")
  48. limit = cmdExport.Flag.Int("limit", 0, "only show first n entries if specified")
  49. tarOutputFile *tar.Writer
  50. tarHeader tar.Header
  51. fileNameTemplate *template.Template
  52. fileNameTemplateBuffer = bytes.NewBuffer(nil)
  53. newerThan time.Time
  54. newerThanUnix int64 = -1
  55. localLocation, _ = time.LoadLocation("Local")
  56. )
  57. func printNeedle(vid storage.VolumeId, n *storage.Needle, version storage.Version, deleted bool) {
  58. key := storage.NewFileIdFromNeedle(vid, n).String()
  59. size := n.DataSize
  60. if version == storage.Version1 {
  61. size = n.Size
  62. }
  63. fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n",
  64. key,
  65. n.Name,
  66. size,
  67. n.IsGzipped(),
  68. n.Mime,
  69. n.LastModifiedString(),
  70. n.Ttl.String(),
  71. deleted,
  72. )
  73. }
  74. func runExport(cmd *Command, args []string) bool {
  75. var err error
  76. if *newer != "" {
  77. if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
  78. fmt.Println("cannot parse 'newer' argument: " + err.Error())
  79. return false
  80. }
  81. newerThanUnix = newerThan.Unix()
  82. }
  83. if *export.volumeId == -1 {
  84. return false
  85. }
  86. if *output != "" {
  87. if *output != "-" && !strings.HasSuffix(*output, ".tar") {
  88. fmt.Println("the output file", *output, "should be '-' or end with .tar")
  89. return false
  90. }
  91. if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
  92. fmt.Println("cannot parse format " + *format + ": " + err.Error())
  93. return false
  94. }
  95. var outputFile *os.File
  96. if *output == "-" {
  97. outputFile = os.Stdout
  98. } else {
  99. if outputFile, err = os.Create(*output); err != nil {
  100. glog.Fatalf("cannot open output tar %s: %s", *output, err)
  101. }
  102. }
  103. defer outputFile.Close()
  104. tarOutputFile = tar.NewWriter(outputFile)
  105. defer tarOutputFile.Close()
  106. t := time.Now()
  107. tarHeader = tar.Header{Mode: 0644,
  108. ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
  109. Typeflag: tar.TypeReg,
  110. AccessTime: t, ChangeTime: t}
  111. }
  112. fileName := strconv.Itoa(*export.volumeId)
  113. if *export.collection != "" {
  114. fileName = *export.collection + "_" + fileName
  115. }
  116. vid := storage.VolumeId(*export.volumeId)
  117. indexFile, err := os.OpenFile(path.Join(*export.dir, fileName+".idx"), os.O_RDONLY, 0644)
  118. if err != nil {
  119. glog.Fatalf("Create Volume Index [ERROR] %s\n", err)
  120. }
  121. defer indexFile.Close()
  122. needleMap, err := storage.LoadBtreeNeedleMap(indexFile)
  123. if err != nil {
  124. glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err)
  125. }
  126. var version storage.Version
  127. if tarOutputFile == nil {
  128. fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n")
  129. }
  130. var counter = 0
  131. err = storage.ScanVolumeFile(*export.dir, *export.collection, vid,
  132. storage.NeedleMapInMemory,
  133. func(superBlock storage.SuperBlock) error {
  134. version = superBlock.Version()
  135. return nil
  136. }, true, func(n *storage.Needle, offset int64) error {
  137. nv, ok := needleMap.Get(n.Id)
  138. glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
  139. n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped(), ok, nv)
  140. if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset {
  141. if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
  142. glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
  143. n.LastModified, newerThanUnix)
  144. return nil
  145. }
  146. counter++
  147. if *limit > 0 && counter > *limit {
  148. return io.EOF
  149. }
  150. if tarOutputFile != nil {
  151. return writeFile(vid, n)
  152. } else {
  153. printNeedle(vid, n, version, false)
  154. return nil
  155. }
  156. }
  157. if !ok {
  158. if *showDeleted && tarOutputFile == nil {
  159. if n.DataSize > 0 {
  160. printNeedle(vid, n, version, true)
  161. } else {
  162. n.Name = []byte("*tombstone")
  163. printNeedle(vid, n, version, true)
  164. }
  165. }
  166. glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
  167. } else {
  168. glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
  169. }
  170. return nil
  171. })
  172. if err != nil && err != io.EOF {
  173. glog.Fatalf("Export Volume File [ERROR] %s\n", err)
  174. }
  175. return true
  176. }
  177. type nameParams struct {
  178. Name string
  179. Id types.NeedleId
  180. Mime string
  181. Key string
  182. Ext string
  183. }
  184. func writeFile(vid storage.VolumeId, n *storage.Needle) (err error) {
  185. key := storage.NewFileIdFromNeedle(vid, n).String()
  186. fileNameTemplateBuffer.Reset()
  187. if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
  188. nameParams{
  189. Name: string(n.Name),
  190. Id: n.Id,
  191. Mime: string(n.Mime),
  192. Key: key,
  193. Ext: filepath.Ext(string(n.Name)),
  194. },
  195. ); err != nil {
  196. return err
  197. }
  198. fileName := fileNameTemplateBuffer.String()
  199. if n.IsGzipped() && path.Ext(fileName) != ".gz" {
  200. fileName = fileName + ".gz"
  201. }
  202. tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
  203. if n.HasLastModifiedDate() {
  204. tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
  205. } else {
  206. tarHeader.ModTime = time.Unix(0, 0)
  207. }
  208. tarHeader.ChangeTime = tarHeader.ModTime
  209. if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
  210. return err
  211. }
  212. _, err = tarOutputFile.Write(n.Data)
  213. return
  214. }