From c3f9d9fa2edde613c8a00da7b1252fa524fd2a0d Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 10 Feb 2022 20:32:13 -0800 Subject: [PATCH 01/39] initial setup --- go.mod | 1 + go.sum | 5 +++ weed/command/command.go | 1 + weed/command/mount2.go | 83 ++++++++++++++++++++++++++++++++++++++ weed/command/mount2_std.go | 53 ++++++++++++++++++++++++ weed/mount/weedfs.go | 32 +++++++++++++++ 6 files changed, 175 insertions(+) create mode 100644 weed/command/mount2.go create mode 100644 weed/command/mount2_std.go create mode 100644 weed/mount/weedfs.go diff --git a/go.mod b/go.mod index 6176714c0..62c09d8f2 100644 --- a/go.mod +++ b/go.mod @@ -169,6 +169,7 @@ require ( github.com/DataDog/zstd v1.3.6-0.20190409195224-796139022798 // indirect github.com/d4l3k/messagediff v1.2.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/hanwen/go-fuse/v2 v2.1.0 // indirect github.com/jcmturner/aescts/v2 v2.0.0 // indirect github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect github.com/jcmturner/goidentity/v6 v6.0.1 // indirect diff --git a/go.sum b/go.sum index 7aa37b888..b1db2fdbf 100644 --- a/go.sum +++ b/go.sum @@ -415,6 +415,10 @@ github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= +github.com/hanwen/go-fuse v1.0.0 h1:GxS9Zrn6c35/BnfiVsZVWmsG803xwE7eVRDvcf/BEVc= +github.com/hanwen/go-fuse v1.0.0/go.mod h1:unqXarDXqzAk0rt98O2tVndEPIpUgLD9+rwFisZH3Ok= +github.com/hanwen/go-fuse/v2 v2.1.0 h1:+32ffteETaLYClUj0a3aHjZ1hOPxxaNEHiZiujuDaek= +github.com/hanwen/go-fuse/v2 v2.1.0/go.mod h1:oRyA5eK+pvJyv5otpO/DgccS8y/RvYMaO00GgRLGryc= github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= @@ -512,6 +516,7 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kurin/blazer v0.5.3 h1:SAgYv0TKU0kN/ETfO5ExjNAPyMt2FocO2s/UlCHfjAk= github.com/kurin/blazer v0.5.3/go.mod h1:4FCXMUWo9DllR2Do4TtBd377ezyAJ51vB5uTBjt0pGU= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k= github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= github.com/lib/pq v1.10.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= diff --git a/weed/command/command.go b/weed/command/command.go index dbc18a053..c6665a7be 100644 --- a/weed/command/command.go +++ b/weed/command/command.go @@ -30,6 +30,7 @@ var Commands = []*Command{ cmdMaster, cmdMasterFollower, cmdMount, + cmdMount2, cmdS3, cmdIam, cmdMsgBroker, diff --git a/weed/command/mount2.go b/weed/command/mount2.go new file mode 100644 index 000000000..d7b125c6f --- /dev/null +++ b/weed/command/mount2.go @@ -0,0 +1,83 @@ +package command + +import ( + "os" + "time" +) + +type Mount2Options struct { + filer *string + filerMountRootPath *string + dir *string + dirAutoCreate *bool + collection *string + replication *string + diskType *string + ttlSec *int + chunkSizeLimitMB *int + concurrentWriters *int + cacheDir *string + cacheSizeMB *int64 + dataCenter *string + allowOthers *bool + umaskString *string + nonempty *bool + volumeServerAccess *string + uidMap *string + gidMap *string + readOnly *bool + debug *bool + debugPort *int +} + +var ( + mount2Options Mount2Options +) + +func init() { + cmdMount2.Run = runMount2 // break init cycle + mount2Options.filer = cmdMount2.Flag.String("filer", "localhost:8888", "comma-separated weed filer location") + mount2Options.filerMountRootPath = cmdMount2.Flag.String("filer.path", "/", "mount this remote path from filer server") + mount2Options.dir = cmdMount2.Flag.String("dir", ".", "mount weed filer to this directory") + mount2Options.dirAutoCreate = cmdMount2.Flag.Bool("dirAutoCreate", false, "auto create the directory to mount to") + mount2Options.collection = cmdMount2.Flag.String("collection", "", "collection to create the files") + mount2Options.replication = cmdMount2.Flag.String("replication", "", "replication(e.g. 000, 001) to create to files. If empty, let filer decide.") + mount2Options.diskType = cmdMount2.Flag.String("disk", "", "[hdd|ssd|] hard drive or solid state drive or any tag") + mount2Options.ttlSec = cmdMount2.Flag.Int("ttl", 0, "file ttl in seconds") + mount2Options.chunkSizeLimitMB = cmdMount2.Flag.Int("chunkSizeLimitMB", 2, "local write buffer size, also chunk large files") + mount2Options.concurrentWriters = cmdMount2.Flag.Int("concurrentWriters", 32, "limit concurrent goroutine writers if not 0") + mount2Options.cacheDir = cmdMount2.Flag.String("cacheDir", os.TempDir(), "local cache directory for file chunks and meta data") + mount2Options.cacheSizeMB = cmdMount2.Flag.Int64("cacheCapacityMB", 1000, "local file chunk cache capacity in MB (0 will disable cache)") + mount2Options.dataCenter = cmdMount2.Flag.String("dataCenter", "", "prefer to write to the data center") + mount2Options.allowOthers = cmdMount2.Flag.Bool("allowOthers", true, "allows other users to access the file system") + mount2Options.umaskString = cmdMount2.Flag.String("umask", "022", "octal umask, e.g., 022, 0111") + mount2Options.nonempty = cmdMount2.Flag.Bool("nonempty", false, "allows the mounting over a non-empty directory") + mount2Options.volumeServerAccess = cmdMount2.Flag.String("volumeServerAccess", "direct", "access volume servers by [direct|publicUrl|filerProxy]") + mount2Options.uidMap = cmdMount2.Flag.String("map.uid", "", "map local uid to uid on filer, comma-separated :") + mount2Options.gidMap = cmdMount2.Flag.String("map.gid", "", "map local gid to gid on filer, comma-separated :") + mount2Options.readOnly = cmdMount2.Flag.Bool("readOnly", false, "read only") + mount2Options.debug = cmdMount2.Flag.Bool("debug", false, "serves runtime profiling data, e.g., http://localhost:/debug/pprof/goroutine?debug=2") + mount2Options.debugPort = cmdMount2.Flag.Int("debug.port", 6061, "http port for debugging") + + mountCpuProfile = cmdMount2.Flag.String("cpuprofile", "", "cpu profile output file") + mountMemProfile = cmdMount2.Flag.String("memprofile", "", "memory profile output file") + mountReadRetryTime = cmdMount2.Flag.Duration("readRetryTime", 6*time.Second, "maximum read retry wait time") +} + +var cmdMount2 = &Command{ + UsageLine: "mount2 -filer=localhost:8888 -dir=/some/dir", + Short: " mount weed filer to a directory as file system in userspace(FUSE)", + Long: `mount weed filer to userspace. + + Pre-requisites: + 1) have SeaweedFS master and volume servers running + 2) have a "weed filer" running + These 2 requirements can be achieved with one command "weed server -filer=true" + + This uses github.com/seaweedfs/fuse, which enables writing FUSE file systems on + Linux, and OS X. + + On OS X, it requires OSXFUSE (http://osxfuse.github.com/). + + `, +} diff --git a/weed/command/mount2_std.go b/weed/command/mount2_std.go new file mode 100644 index 000000000..795a7864c --- /dev/null +++ b/weed/command/mount2_std.go @@ -0,0 +1,53 @@ +package command + +import ( + "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/mount" + "github.com/hanwen/go-fuse/v2/fs" + "net/http" + "os" + "strconv" + "time" + + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/util/grace" +) + +func runMount2(cmd *Command, args []string) bool { + + if *mountOptions.debug { + go http.ListenAndServe(fmt.Sprintf(":%d", *mountOptions.debugPort), nil) + } + + grace.SetupProfiling(*mountCpuProfile, *mountMemProfile) + if *mountReadRetryTime < time.Second { + *mountReadRetryTime = time.Second + } + util.RetryWaitTime = *mountReadRetryTime + + umask, umaskErr := strconv.ParseUint(*mountOptions.umaskString, 8, 64) + if umaskErr != nil { + fmt.Printf("can not parse umask %s", *mountOptions.umaskString) + return false + } + + if len(args) > 0 { + return false + } + + return RunMount2(&mount2Options, os.FileMode(umask)) +} + +func RunMount2(option *Mount2Options, umask os.FileMode) bool { + + opts := &fs.Options{} + opts.Debug = true + server, err := fs.Mount(*option.dir, &mount.WeedFS{}, opts) + if err != nil { + glog.Fatalf("Mount fail: %v", err) + } + server.Wait() + + return true +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go new file mode 100644 index 000000000..50c87a57f --- /dev/null +++ b/weed/mount/weedfs.go @@ -0,0 +1,32 @@ +package mount + +import ( + "context" + "syscall" + + "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" +) + +type WeedFS struct { + fs.Inode +} + +func (r *WeedFS) OnAdd(ctx context.Context) { + ch := r.NewPersistentInode( + ctx, &fs.MemRegularFile{ + Data: []byte("file.txt"), + Attr: fuse.Attr{ + Mode: 0644, + }, + }, fs.StableAttr{Ino: 2}) + r.AddChild("file.txt", ch, false) +} + +func (r *WeedFS) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno { + out.Mode = 0755 + return 0 +} + +var _ = (fs.NodeGetattrer)((*WeedFS)(nil)) +var _ = (fs.NodeOnAdder)((*WeedFS)(nil)) From 7a0c35674ca59fd96a5389ddebb3c7c14476d6a4 Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 10 Feb 2022 20:46:53 -0800 Subject: [PATCH 02/39] clean up previously mounted folder --- go.mod | 7 +++++-- go.sum | 1 + weed/command/mount2_std.go | 7 +++++++ weed/mount/unmount/unmount.go | 6 ++++++ weed/mount/unmount/unmount_linux.go | 21 +++++++++++++++++++++ weed/mount/unmount/unmount_std.go | 18 ++++++++++++++++++ 6 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 weed/mount/unmount/unmount.go create mode 100644 weed/mount/unmount/unmount_linux.go create mode 100644 weed/mount/unmount/unmount_std.go diff --git a/go.mod b/go.mod index 62c09d8f2..31c7666a2 100644 --- a/go.mod +++ b/go.mod @@ -162,14 +162,17 @@ require ( modernc.org/token v1.0.0 // indirect ) -require github.com/fluent/fluent-logger-golang v1.8.0 +require ( + github.com/fluent/fluent-logger-golang v1.8.0 + github.com/hanwen/go-fuse v1.0.0 + github.com/hanwen/go-fuse/v2 v2.1.0 +) require ( cloud.google.com/go/kms v1.0.0 // indirect github.com/DataDog/zstd v1.3.6-0.20190409195224-796139022798 // indirect github.com/d4l3k/messagediff v1.2.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/hanwen/go-fuse/v2 v2.1.0 // indirect github.com/jcmturner/aescts/v2 v2.0.0 // indirect github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect github.com/jcmturner/goidentity/v6 v6.0.1 // indirect diff --git a/go.sum b/go.sum index b1db2fdbf..1054996bf 100644 --- a/go.sum +++ b/go.sum @@ -516,6 +516,7 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kurin/blazer v0.5.3 h1:SAgYv0TKU0kN/ETfO5ExjNAPyMt2FocO2s/UlCHfjAk= github.com/kurin/blazer v0.5.3/go.mod h1:4FCXMUWo9DllR2Do4TtBd377ezyAJ51vB5uTBjt0pGU= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4= github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k= github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= diff --git a/weed/command/mount2_std.go b/weed/command/mount2_std.go index 795a7864c..60edb71b0 100644 --- a/weed/command/mount2_std.go +++ b/weed/command/mount2_std.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/mount" + "github.com/chrislusf/seaweedfs/weed/mount/unmount" "github.com/hanwen/go-fuse/v2/fs" "net/http" "os" @@ -43,6 +44,12 @@ func RunMount2(option *Mount2Options, umask os.FileMode) bool { opts := &fs.Options{} opts.Debug = true + + unmount.Unmount(*option.dir) + grace.OnInterrupt(func() { + unmount.Unmount(*option.dir) + }) + server, err := fs.Mount(*option.dir, &mount.WeedFS{}, opts) if err != nil { glog.Fatalf("Mount fail: %v", err) diff --git a/weed/mount/unmount/unmount.go b/weed/mount/unmount/unmount.go new file mode 100644 index 000000000..c481d8030 --- /dev/null +++ b/weed/mount/unmount/unmount.go @@ -0,0 +1,6 @@ +package unmount + +// Unmount tries to unmount the filesystem mounted at dir. +func Unmount(dir string) error { + return unmount(dir) +} diff --git a/weed/mount/unmount/unmount_linux.go b/weed/mount/unmount/unmount_linux.go new file mode 100644 index 000000000..e55d48f86 --- /dev/null +++ b/weed/mount/unmount/unmount_linux.go @@ -0,0 +1,21 @@ +package unmount + +import ( + "bytes" + "errors" + "os/exec" +) + +func unmount(dir string) error { + cmd := exec.Command("fusermount", "-u", dir) + output, err := cmd.CombinedOutput() + if err != nil { + if len(output) > 0 { + output = bytes.TrimRight(output, "\n") + msg := err.Error() + ": " + string(output) + err = errors.New(msg) + } + return err + } + return nil +} diff --git a/weed/mount/unmount/unmount_std.go b/weed/mount/unmount/unmount_std.go new file mode 100644 index 000000000..76267fb6a --- /dev/null +++ b/weed/mount/unmount/unmount_std.go @@ -0,0 +1,18 @@ +//go:build !linux +// +build !linux + +package unmount + +import ( + "os" + "syscall" +) + +func unmount(dir string) error { + err := syscall.Unmount(dir, 0) + if err != nil { + err = &os.PathError{Op: "unmount", Path: dir, Err: err} + return err + } + return nil +} From b6143de52a95b1ce8b08a972b10cd0c8d3a2166e Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 10 Feb 2022 22:43:55 -0800 Subject: [PATCH 03/39] mount with name --- weed/command/mount2_std.go | 173 +++++++++++++++++++++++++++++++++++-- weed/mount/weedfs.go | 85 ++++++++++++++++-- 2 files changed, 248 insertions(+), 10 deletions(-) diff --git a/weed/command/mount2_std.go b/weed/command/mount2_std.go index 60edb71b0..2d3c0dfb4 100644 --- a/weed/command/mount2_std.go +++ b/weed/command/mount2_std.go @@ -1,14 +1,24 @@ package command import ( + "context" "fmt" + "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/mount" "github.com/chrislusf/seaweedfs/weed/mount/unmount" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" "net/http" "os" + "os/user" + "runtime" "strconv" + "strings" "time" "github.com/chrislusf/seaweedfs/weed/util" @@ -42,18 +52,171 @@ func runMount2(cmd *Command, args []string) bool { func RunMount2(option *Mount2Options, umask os.FileMode) bool { - opts := &fs.Options{} + // basic checks + chunkSizeLimitMB := *mountOptions.chunkSizeLimitMB + if chunkSizeLimitMB <= 0 { + fmt.Printf("Please specify a reasonable buffer size.") + return false + } + + // try to connect to filer + filerAddresses := pb.ServerAddresses(*option.filer).ToAddresses() + util.LoadConfiguration("security", false) + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + var cipher bool + var err error + for i := 0; i < 10; i++ { + err = pb.WithOneOfGrpcFilerClients(false, filerAddresses, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer grpc address %v configuration: %v", filerAddresses, err) + } + cipher = resp.Cipher + return nil + }) + if err != nil { + glog.V(0).Infof("failed to talk to filer %v: %v", filerAddresses, err) + glog.V(0).Infof("wait for %d seconds ...", i+1) + time.Sleep(time.Duration(i+1) * time.Second) + } + } + if err != nil { + glog.Errorf("failed to talk to filer %v: %v", filerAddresses, err) + return true + } + + filerMountRootPath := *option.filerMountRootPath + + // clean up mount point + dir := util.ResolvePath(*option.dir) + if dir == "" { + fmt.Printf("Please specify the mount directory via \"-dir\"") + return false + } + + unmount.Unmount(dir) + + // detect mount folder mode + if *option.dirAutoCreate { + os.MkdirAll(dir, os.FileMode(0777)&^umask) + } + fileInfo, err := os.Stat(dir) + + // collect uid, gid + uid, gid := uint32(0), uint32(0) + mountMode := os.ModeDir | 0777 + if err == nil { + mountMode = os.ModeDir | os.FileMode(0777)&^umask + uid, gid = util.GetFileUidGid(fileInfo) + fmt.Printf("mount point owner uid=%d gid=%d mode=%s\n", uid, gid, mountMode) + } else { + fmt.Printf("can not stat %s\n", dir) + return false + } + + // detect uid, gid + if uid == 0 { + if u, err := user.Current(); err == nil { + if parsedId, pe := strconv.ParseUint(u.Uid, 10, 32); pe == nil { + uid = uint32(parsedId) + } + if parsedId, pe := strconv.ParseUint(u.Gid, 10, 32); pe == nil { + gid = uint32(parsedId) + } + fmt.Printf("current uid=%d gid=%d\n", uid, gid) + } + } + + // mapping uid, gid + uidGidMapper, err := meta_cache.NewUidGidMapper(*option.uidMap, *option.gidMap) + if err != nil { + fmt.Printf("failed to parse %s %s: %v\n", *option.uidMap, *option.gidMap, err) + return false + } + + // Ensure target mount point availability + if isValid := checkMountPointAvailable(dir); !isValid { + glog.Fatalf("Expected mount to still be active, target mount point: %s, please check!", dir) + return true + } + + // mount fuse + sec := time.Second + opts := &fs.Options{ + MountOptions: fuse.MountOptions{ + AllowOther: *option.allowOthers, + Options: nil, + MaxBackground: 128, + MaxWrite: 1024 * 1024 * 2, + MaxReadAhead: 1024 * 1024 * 2, + IgnoreSecurityLabels: false, + RememberInodes: false, + FsName: *option.filer + ":" + filerMountRootPath, + Name: "seaweedfs", + SingleThreaded: false, + DisableXAttrs: false, + Debug: false, + EnableLocks: false, + ExplicitDataCacheControl: false, + // SyncRead: false, // set to false to enable the FUSE_CAP_ASYNC_READ capability + DirectMount: true, + DirectMountFlags: 0, + // EnableAcl: false, + }, + EntryTimeout: &sec, + AttrTimeout: &sec, + NegativeTimeout: nil, + FirstAutomaticIno: 0, + OnAdd: nil, + NullPermissions: false, + UID: 0, + GID: 0, + ServerCallbacks: nil, + Logger: nil, + } opts.Debug = true - unmount.Unmount(*option.dir) - grace.OnInterrupt(func() { - unmount.Unmount(*option.dir) + // find mount point + mountRoot := filerMountRootPath + if mountRoot != "/" && strings.HasSuffix(mountRoot, "/") { + mountRoot = mountRoot[0 : len(mountRoot)-1] + } + + seaweedFileSystem := mount.NewSeaweedFileSystem(&mount.Option{ + MountDirectory: dir, + FilerAddresses: filerAddresses, + GrpcDialOption: grpcDialOption, + FilerMountRootPath: mountRoot, + Collection: *option.collection, + Replication: *option.replication, + TtlSec: int32(*option.ttlSec), + DiskType: types.ToDiskType(*option.diskType), + ChunkSizeLimit: int64(chunkSizeLimitMB) * 1024 * 1024, + ConcurrentWriters: *option.concurrentWriters, + CacheDir: *option.cacheDir, + CacheSizeMB: *option.cacheSizeMB, + DataCenter: *option.dataCenter, + MountUid: uid, + MountGid: gid, + MountMode: mountMode, + MountCtime: fileInfo.ModTime(), + MountMtime: time.Now(), + Umask: umask, + VolumeServerAccess: *mountOptions.volumeServerAccess, + Cipher: cipher, + UidGidMapper: uidGidMapper, }) - server, err := fs.Mount(*option.dir, &mount.WeedFS{}, opts) + server, err := fs.Mount(dir, seaweedFileSystem, opts) if err != nil { glog.Fatalf("Mount fail: %v", err) } + grace.OnInterrupt(func() { + unmount.Unmount(dir) + }) + + fmt.Printf("This is SeaweedFS version %s %s %s\n", util.Version(), runtime.GOOS, runtime.GOARCH) + server.Wait() return true diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 50c87a57f..68054667f 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -2,17 +2,92 @@ package mount import ( "context" + "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/util/grace" + "google.golang.org/grpc" + "os" + "path" + "path/filepath" "syscall" + "time" "github.com/hanwen/go-fuse/v2/fs" "github.com/hanwen/go-fuse/v2/fuse" ) -type WeedFS struct { +type Option struct { + MountDirectory string + FilerAddresses []pb.ServerAddress + filerIndex int + GrpcDialOption grpc.DialOption + FilerMountRootPath string + Collection string + Replication string + TtlSec int32 + DiskType types.DiskType + ChunkSizeLimit int64 + ConcurrentWriters int + CacheDir string + CacheSizeMB int64 + DataCenter string + Umask os.FileMode + + MountUid uint32 + MountGid uint32 + MountMode os.FileMode + MountCtime time.Time + MountMtime time.Time + MountParentInode uint64 + + VolumeServerAccess string // how to access volume servers + Cipher bool // whether encrypt data on volume server + UidGidMapper *meta_cache.UidGidMapper + + uniqueCacheDir string + uniqueCacheTempPageDir string +} + +type WFS struct { fs.Inode + option *Option + metaCache *meta_cache.MetaCache + signature int32 +} + +func NewSeaweedFileSystem(option *Option) *WFS { + wfs := &WFS{ + option: option, + signature: util.RandomInt32(), + } + + wfs.metaCache = meta_cache.NewMetaCache(path.Join(option.getUniqueCacheDir(), "meta"), util.FullPath(option.FilerMountRootPath), option.UidGidMapper, func(filePath util.FullPath, entry *filer_pb.Entry) { + }) + grace.OnInterrupt(func() { + wfs.metaCache.Shutdown() + }) + + return wfs +} + +func (option *Option) setupUniqueCacheDirectory() { + cacheUniqueId := util.Md5String([]byte(option.MountDirectory + string(option.FilerAddresses[0]) + option.FilerMountRootPath + util.Version()))[0:8] + option.uniqueCacheDir = path.Join(option.CacheDir, cacheUniqueId) + option.uniqueCacheTempPageDir = filepath.Join(option.uniqueCacheDir, "sw") + os.MkdirAll(option.uniqueCacheTempPageDir, os.FileMode(0777)&^option.Umask) +} + +func (option *Option) getTempFilePageDir() string { + return option.uniqueCacheTempPageDir +} +func (option *Option) getUniqueCacheDir() string { + return option.uniqueCacheDir } -func (r *WeedFS) OnAdd(ctx context.Context) { +func (r *WFS) OnAdd(ctx context.Context) { ch := r.NewPersistentInode( ctx, &fs.MemRegularFile{ Data: []byte("file.txt"), @@ -23,10 +98,10 @@ func (r *WeedFS) OnAdd(ctx context.Context) { r.AddChild("file.txt", ch, false) } -func (r *WeedFS) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno { +func (r *WFS) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno { out.Mode = 0755 return 0 } -var _ = (fs.NodeGetattrer)((*WeedFS)(nil)) -var _ = (fs.NodeOnAdder)((*WeedFS)(nil)) +var _ = (fs.NodeGetattrer)((*WFS)(nil)) +var _ = (fs.NodeOnAdder)((*WFS)(nil)) From 9a913457ddbb2cf4ee47309d865c154d0aef05e5 Mon Sep 17 00:00:00 2001 From: chrislu Date: Thu, 10 Feb 2022 23:23:47 -0800 Subject: [PATCH 04/39] supports stats --- weed/mount/weedfs.go | 1 + weed/mount/weedfs_stats.go | 85 ++++++++++++++++++++++++++++++++++ weed/mount/wfs_filer_client.go | 51 ++++++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 weed/mount/weedfs_stats.go create mode 100644 weed/mount/wfs_filer_client.go diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 68054667f..75a509b5c 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -55,6 +55,7 @@ type WFS struct { fs.Inode option *Option metaCache *meta_cache.MetaCache + stats statsCache signature int32 } diff --git a/weed/mount/weedfs_stats.go b/weed/mount/weedfs_stats.go new file mode 100644 index 000000000..fc24db0e2 --- /dev/null +++ b/weed/mount/weedfs_stats.go @@ -0,0 +1,85 @@ +package mount + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" + "math" + "os" + "syscall" + "time" +) + +const blockSize = 512 + +var _ = fs.NodeStatfser(&WFS{}) + +type statsCache struct { + filer_pb.StatisticsResponse + lastChecked int64 // unix time in seconds +} + +func (wfs *WFS) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { + + glog.V(4).Infof("reading fs stats") + + if wfs.stats.lastChecked < time.Now().Unix()-20 { + + err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.StatisticsRequest{ + Collection: wfs.option.Collection, + Replication: wfs.option.Replication, + Ttl: fmt.Sprintf("%ds", wfs.option.TtlSec), + DiskType: string(wfs.option.DiskType), + } + + glog.V(4).Infof("reading filer stats: %+v", request) + resp, err := client.Statistics(context.Background(), request) + if err != nil { + glog.V(0).Infof("reading filer stats %v: %v", request, err) + return err + } + glog.V(4).Infof("read filer stats: %+v", resp) + + wfs.stats.TotalSize = resp.TotalSize + wfs.stats.UsedSize = resp.UsedSize + wfs.stats.FileCount = resp.FileCount + wfs.stats.lastChecked = time.Now().Unix() + + return nil + }) + if err != nil { + glog.V(0).Infof("filer Statistics: %v", err) + return fs.ToErrno(os.ErrInvalid) + } + } + + totalDiskSize := wfs.stats.TotalSize + usedDiskSize := wfs.stats.UsedSize + actualFileCount := wfs.stats.FileCount + + // Compute the total number of available blocks + out.Blocks = totalDiskSize / blockSize + + // Compute the number of used blocks + numBlocks := uint64(usedDiskSize / blockSize) + + // Report the number of free and available blocks for the block size + out.Bfree = out.Blocks - numBlocks + out.Bavail = out.Blocks - numBlocks + out.Bsize = uint32(blockSize) + + // Report the total number of possible files in the file system (and those free) + out.Files = math.MaxInt64 + out.Ffree = math.MaxInt64 - actualFileCount + + // Report the maximum length of a name and the minimum fragment size + out.NameLen = 1024 + out.Frsize = uint32(blockSize) + + return fs.OK +} diff --git a/weed/mount/wfs_filer_client.go b/weed/mount/wfs_filer_client.go new file mode 100644 index 000000000..e8feb8342 --- /dev/null +++ b/weed/mount/wfs_filer_client.go @@ -0,0 +1,51 @@ +package mount + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +var _ = filer_pb.FilerClient(&WFS{}) + +func (wfs *WFS) WithFilerClient(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) (err error) { + + return util.Retry("filer grpc", func() error { + + i := wfs.option.filerIndex + n := len(wfs.option.FilerAddresses) + for x := 0; x < n; x++ { + + filerGrpcAddress := wfs.option.FilerAddresses[i].ToGrpcAddress() + err = pb.WithGrpcClient(streamingMode, func(grpcConnection *grpc.ClientConn) error { + client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return fn(client) + }, filerGrpcAddress, wfs.option.GrpcDialOption) + + if err != nil { + glog.V(0).Infof("WithFilerClient %d %v: %v", x, filerGrpcAddress, err) + } else { + wfs.option.filerIndex = i + return nil + } + + i++ + if i >= n { + i = 0 + } + + } + return err + }) + +} + +func (wfs *WFS) AdjustedUrl(location *filer_pb.Location) string { + if wfs.option.VolumeServerAccess == "publicUrl" { + return location.PublicUrl + } + return location.Url +} From f87da798a4e9f9d5e4e60966630c5ea1ff291d37 Mon Sep 17 00:00:00 2001 From: chrislu Date: Fri, 11 Feb 2022 03:09:30 -0800 Subject: [PATCH 05/39] to be re-written following fuse virtual file system --- go.mod | 1 - weed/Makefile | 4 ++ weed/command/mount2_std.go | 2 +- weed/mount/directory.go | 42 ++++++++++++++++++ weed/mount/directory_read.go | 84 ++++++++++++++++++++++++++++++++++++ weed/mount/weedfs.go | 34 ++++++--------- weed/mount/weedfs_stats.go | 6 ++- 7 files changed, 147 insertions(+), 26 deletions(-) create mode 100644 weed/mount/directory.go create mode 100644 weed/mount/directory_read.go diff --git a/go.mod b/go.mod index 31c7666a2..ffdc48dcc 100644 --- a/go.mod +++ b/go.mod @@ -164,7 +164,6 @@ require ( require ( github.com/fluent/fluent-logger-golang v1.8.0 - github.com/hanwen/go-fuse v1.0.0 github.com/hanwen/go-fuse/v2 v2.1.0 ) diff --git a/weed/Makefile b/weed/Makefile index 4e871a71e..1d1a8476c 100644 --- a/weed/Makefile +++ b/weed/Makefile @@ -21,6 +21,10 @@ debug_mount: go build -gcflags="all=-N -l" dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- -v=4 mount -dir=~/tmp/mm -cacheCapacityMB=0 -filer.path=/ -umask=000 +debug_mount2: + go build -gcflags="all=-N -l" + dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- -v=4 mount2 -dir=~/tmp/mm -cacheCapacityMB=0 -filer.path=/ -umask=000 + debug_server: go build -gcflags="all=-N -l" dlv --listen=:2345 --headless=true --api-version=2 --accept-multiclient exec weed -- server -dir=~/tmp/99 -filer -volume.port=8343 -s3 -volume.max=0 -master.volumeSizeLimitMB=1024 -volume.preStopSeconds=1 diff --git a/weed/command/mount2_std.go b/weed/command/mount2_std.go index 2d3c0dfb4..0cb288c3b 100644 --- a/weed/command/mount2_std.go +++ b/weed/command/mount2_std.go @@ -207,7 +207,7 @@ func RunMount2(option *Mount2Options, umask os.FileMode) bool { UidGidMapper: uidGidMapper, }) - server, err := fs.Mount(dir, seaweedFileSystem, opts) + server, err := fs.Mount(dir, seaweedFileSystem.Root(), opts) if err != nil { glog.Fatalf("Mount fail: %v", err) } diff --git a/weed/mount/directory.go b/weed/mount/directory.go new file mode 100644 index 000000000..60fbafc37 --- /dev/null +++ b/weed/mount/directory.go @@ -0,0 +1,42 @@ +package mount + +import ( + "bytes" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/hanwen/go-fuse/v2/fs" + "strings" +) + +type Directory struct { + fs.Inode + + name string + wfs *WFS + entry *filer_pb.Entry + parent *Directory + id uint64 +} + +func (dir *Directory) FullPath() string { + var parts []string + for p := dir; p != nil; p = p.parent { + if strings.HasPrefix(p.name, "/") { + if len(p.name) > 1 { + parts = append(parts, p.name[1:]) + } + } else { + parts = append(parts, p.name) + } + } + + if len(parts) == 0 { + return "/" + } + + var buf bytes.Buffer + for i := len(parts) - 1; i >= 0; i-- { + buf.WriteString("/") + buf.WriteString(parts[i]) + } + return buf.String() +} diff --git a/weed/mount/directory_read.go b/weed/mount/directory_read.go new file mode 100644 index 000000000..51c51ae16 --- /dev/null +++ b/weed/mount/directory_read.go @@ -0,0 +1,84 @@ +package mount + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" + "math" + "os" + "syscall" +) + +var _ = fs.NodeReaddirer(&Directory{}) +var _ = fs.NodeGetattrer(&Directory{}) + +func (dir *Directory) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno { + out.Mode = 0755 + return 0 +} + +func (dir *Directory) Readdir(ctx context.Context) (fs.DirStream, syscall.Errno) { + + dirPath := util.FullPath(dir.FullPath()) + glog.V(4).Infof("Readdir %s", dirPath) + + sourceChan := make(chan fuse.DirEntry, 64) + + stream := newDirectoryListStream(sourceChan) + + processEachEntryFn := func(entry *filer.Entry, isLast bool) { + sourceChan <- fuse.DirEntry{ + Mode: uint32(entry.Mode), + Name: entry.Name(), + Ino: dirPath.Child(entry.Name()).AsInode(os.ModeDir), + } + } + + if err := meta_cache.EnsureVisited(dir.wfs.metaCache, dir.wfs, dirPath); err != nil { + glog.Errorf("dir ReadDirAll %s: %v", dirPath, err) + return nil, fs.ToErrno(os.ErrInvalid) + } + go func() { + dir.wfs.metaCache.ListDirectoryEntries(context.Background(), dirPath, "", false, int64(math.MaxInt32), func(entry *filer.Entry) bool { + processEachEntryFn(entry, false) + return true + }) + close(sourceChan) + }() + + return stream, fs.OK +} + +var _ = fs.DirStream(&DirectoryListStream{}) + +type DirectoryListStream struct { + next fuse.DirEntry + sourceChan chan fuse.DirEntry + isStarted bool + hasNext bool +} + +func newDirectoryListStream(ch chan fuse.DirEntry) *DirectoryListStream { + return &DirectoryListStream{ + sourceChan: ch, + } +} + +func (i *DirectoryListStream) HasNext() bool { + if !i.isStarted { + i.next, i.hasNext = <-i.sourceChan + i.isStarted = true + } + return i.hasNext +} +func (i *DirectoryListStream) Next() (fuse.DirEntry, syscall.Errno) { + t := i.next + i.next, i.hasNext = <-i.sourceChan + return t, fs.OK +} +func (i *DirectoryListStream) Close() { +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 75a509b5c..b2a64acb7 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -1,7 +1,6 @@ package mount import ( - "context" "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" @@ -12,11 +11,9 @@ import ( "os" "path" "path/filepath" - "syscall" "time" "github.com/hanwen/go-fuse/v2/fs" - "github.com/hanwen/go-fuse/v2/fuse" ) type Option struct { @@ -56,6 +53,7 @@ type WFS struct { option *Option metaCache *meta_cache.MetaCache stats statsCache + root Directory signature int32 } @@ -65,6 +63,13 @@ func NewSeaweedFileSystem(option *Option) *WFS { signature: util.RandomInt32(), } + wfs.root = Directory{ + name: "/", + wfs: wfs, + entry: nil, + parent: nil, + } + wfs.metaCache = meta_cache.NewMetaCache(path.Join(option.getUniqueCacheDir(), "meta"), util.FullPath(option.FilerMountRootPath), option.UidGidMapper, func(filePath util.FullPath, entry *filer_pb.Entry) { }) grace.OnInterrupt(func() { @@ -74,6 +79,10 @@ func NewSeaweedFileSystem(option *Option) *WFS { return wfs } +func (wfs *WFS) Root() *Directory { + return &wfs.root +} + func (option *Option) setupUniqueCacheDirectory() { cacheUniqueId := util.Md5String([]byte(option.MountDirectory + string(option.FilerAddresses[0]) + option.FilerMountRootPath + util.Version()))[0:8] option.uniqueCacheDir = path.Join(option.CacheDir, cacheUniqueId) @@ -87,22 +96,3 @@ func (option *Option) getTempFilePageDir() string { func (option *Option) getUniqueCacheDir() string { return option.uniqueCacheDir } - -func (r *WFS) OnAdd(ctx context.Context) { - ch := r.NewPersistentInode( - ctx, &fs.MemRegularFile{ - Data: []byte("file.txt"), - Attr: fuse.Attr{ - Mode: 0644, - }, - }, fs.StableAttr{Ino: 2}) - r.AddChild("file.txt", ch, false) -} - -func (r *WFS) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno { - out.Mode = 0755 - return 0 -} - -var _ = (fs.NodeGetattrer)((*WFS)(nil)) -var _ = (fs.NodeOnAdder)((*WFS)(nil)) diff --git a/weed/mount/weedfs_stats.go b/weed/mount/weedfs_stats.go index fc24db0e2..f3ef268df 100644 --- a/weed/mount/weedfs_stats.go +++ b/weed/mount/weedfs_stats.go @@ -15,14 +15,16 @@ import ( const blockSize = 512 -var _ = fs.NodeStatfser(&WFS{}) +var _ = fs.NodeStatfser(&Directory{}) type statsCache struct { filer_pb.StatisticsResponse lastChecked int64 // unix time in seconds } -func (wfs *WFS) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { +func (dir *Directory) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { + + wfs := dir.wfs glog.V(4).Infof("reading fs stats") From 45a0fda9bda7457db31f6134424d251ffac530f8 Mon Sep 17 00:00:00 2001 From: chrislu Date: Fri, 11 Feb 2022 03:12:52 -0800 Subject: [PATCH 06/39] need to follow https://github.com/hanwen/go-fuse/blob/master/fuse/api.go --- weed/mount/weedfs.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index b2a64acb7..5f35d8112 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -7,6 +7,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util/grace" + "github.com/hanwen/go-fuse/v2/fuse" "google.golang.org/grpc" "os" "path" @@ -49,6 +50,8 @@ type Option struct { } type WFS struct { + // follow https://github.com/hanwen/go-fuse/blob/master/fuse/api.go + fuse.RawFileSystem fs.Inode option *Option metaCache *meta_cache.MetaCache @@ -59,8 +62,9 @@ type WFS struct { func NewSeaweedFileSystem(option *Option) *WFS { wfs := &WFS{ - option: option, - signature: util.RandomInt32(), + RawFileSystem: fuse.NewDefaultRawFileSystem(), + option: option, + signature: util.RandomInt32(), } wfs.root = Directory{ From 180445f5a889c25c15074e105405b2e96c1eadb2 Mon Sep 17 00:00:00 2001 From: chrislu Date: Fri, 11 Feb 2022 21:35:09 -0800 Subject: [PATCH 07/39] change to use fuse file system --- weed/command/mount2_std.go | 59 ++++++++++++++------------------------ weed/mount/weedfs.go | 4 +++ weed/mount/weedfs_stats.go | 13 ++------- 3 files changed, 29 insertions(+), 47 deletions(-) diff --git a/weed/command/mount2_std.go b/weed/command/mount2_std.go index 0cb288c3b..cb2b46556 100644 --- a/weed/command/mount2_std.go +++ b/weed/command/mount2_std.go @@ -11,7 +11,6 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/storage/types" - "github.com/hanwen/go-fuse/v2/fs" "github.com/hanwen/go-fuse/v2/fuse" "net/http" "os" @@ -141,40 +140,26 @@ func RunMount2(option *Mount2Options, umask os.FileMode) bool { } // mount fuse - sec := time.Second - opts := &fs.Options{ - MountOptions: fuse.MountOptions{ - AllowOther: *option.allowOthers, - Options: nil, - MaxBackground: 128, - MaxWrite: 1024 * 1024 * 2, - MaxReadAhead: 1024 * 1024 * 2, - IgnoreSecurityLabels: false, - RememberInodes: false, - FsName: *option.filer + ":" + filerMountRootPath, - Name: "seaweedfs", - SingleThreaded: false, - DisableXAttrs: false, - Debug: false, - EnableLocks: false, - ExplicitDataCacheControl: false, - // SyncRead: false, // set to false to enable the FUSE_CAP_ASYNC_READ capability - DirectMount: true, - DirectMountFlags: 0, - // EnableAcl: false, - }, - EntryTimeout: &sec, - AttrTimeout: &sec, - NegativeTimeout: nil, - FirstAutomaticIno: 0, - OnAdd: nil, - NullPermissions: false, - UID: 0, - GID: 0, - ServerCallbacks: nil, - Logger: nil, - } - opts.Debug = true + fuseMountOptions := &fuse.MountOptions{ + AllowOther: *option.allowOthers, + Options: nil, + MaxBackground: 128, + MaxWrite: 1024 * 1024 * 2, + MaxReadAhead: 1024 * 1024 * 2, + IgnoreSecurityLabels: false, + RememberInodes: false, + FsName: *option.filer + ":" + filerMountRootPath, + Name: "seaweedfs", + SingleThreaded: false, + DisableXAttrs: false, + Debug: true, + EnableLocks: false, + ExplicitDataCacheControl: false, + // SyncRead: false, // set to false to enable the FUSE_CAP_ASYNC_READ capability + DirectMount: true, + DirectMountFlags: 0, + // EnableAcl: false, + } // find mount point mountRoot := filerMountRootPath @@ -207,7 +192,7 @@ func RunMount2(option *Mount2Options, umask os.FileMode) bool { UidGidMapper: uidGidMapper, }) - server, err := fs.Mount(dir, seaweedFileSystem.Root(), opts) + server, err := fuse.NewServer(seaweedFileSystem, dir, fuseMountOptions) if err != nil { glog.Fatalf("Mount fail: %v", err) } @@ -217,7 +202,7 @@ func RunMount2(option *Mount2Options, umask os.FileMode) bool { fmt.Printf("This is SeaweedFS version %s %s %s\n", util.Version(), runtime.GOOS, runtime.GOARCH) - server.Wait() + server.Serve() return true } diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 5f35d8112..8aa9c95a7 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -87,6 +87,10 @@ func (wfs *WFS) Root() *Directory { return &wfs.root } +func (wfs *WFS) String() string { + return "seaweedfs" +} + func (option *Option) setupUniqueCacheDirectory() { cacheUniqueId := util.Md5String([]byte(option.MountDirectory + string(option.FilerAddresses[0]) + option.FilerMountRootPath + util.Version()))[0:8] option.uniqueCacheDir = path.Join(option.CacheDir, cacheUniqueId) diff --git a/weed/mount/weedfs_stats.go b/weed/mount/weedfs_stats.go index f3ef268df..3de561082 100644 --- a/weed/mount/weedfs_stats.go +++ b/weed/mount/weedfs_stats.go @@ -5,26 +5,19 @@ import ( "fmt" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/hanwen/go-fuse/v2/fs" "github.com/hanwen/go-fuse/v2/fuse" "math" - "os" - "syscall" "time" ) const blockSize = 512 -var _ = fs.NodeStatfser(&Directory{}) - type statsCache struct { filer_pb.StatisticsResponse lastChecked int64 // unix time in seconds } -func (dir *Directory) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno { - - wfs := dir.wfs +func (wfs *WFS) StatFs(cancel <-chan struct{}, in *fuse.InHeader, out *fuse.StatfsOut) (code fuse.Status) { glog.V(4).Infof("reading fs stats") @@ -56,7 +49,7 @@ func (dir *Directory) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.E }) if err != nil { glog.V(0).Infof("filer Statistics: %v", err) - return fs.ToErrno(os.ErrInvalid) + return fuse.OK } } @@ -83,5 +76,5 @@ func (dir *Directory) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.E out.NameLen = 1024 out.Frsize = uint32(blockSize) - return fs.OK + return fuse.OK } From f4d88862c47bdd372c1fc2ac6335b1bed3b24a11 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 01:54:16 -0800 Subject: [PATCH 08/39] can attr root directory --- weed/mount/inode_to_path.go | 61 ++++++++++++++++++++ weed/mount/weedfs.go | 47 +++++++++++++-- weed/mount/weedfs_attr.go | 98 ++++++++++++++++++++++++++++++++ weed/mount/weedfs_attr_darwin.go | 8 +++ weed/mount/weedfs_attr_linux.go | 9 +++ 5 files changed, 218 insertions(+), 5 deletions(-) create mode 100644 weed/mount/inode_to_path.go create mode 100644 weed/mount/weedfs_attr.go create mode 100644 weed/mount/weedfs_attr_darwin.go create mode 100644 weed/mount/weedfs_attr_linux.go diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go new file mode 100644 index 000000000..04366ab0d --- /dev/null +++ b/weed/mount/inode_to_path.go @@ -0,0 +1,61 @@ +package mount + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "sync" +) + +type InodeToPath struct { + sync.RWMutex + nextInodeId uint64 + inode2path map[uint64]util.FullPath + path2inode map[util.FullPath]uint64 +} + +func NewInodeToPath() *InodeToPath { + return &InodeToPath{ + inode2path: make(map[uint64]util.FullPath), + path2inode: make(map[util.FullPath]uint64), + nextInodeId: 2, // the root inode id is 1 + } +} + +func (i *InodeToPath) GetInode(path util.FullPath) uint64 { + if path == "/" { + return 1 + } + i.Lock() + defer i.Unlock() + inode, found := i.path2inode[path] + if !found { + inode = i.nextInodeId + i.nextInodeId++ + i.path2inode[path] = inode + i.inode2path[inode] = path + } + return inode +} + +func (i *InodeToPath) GetPath(inode uint64) util.FullPath { + if inode == 1 { + return "/" + } + i.RLock() + defer i.RUnlock() + path, found := i.inode2path[inode] + if !found { + glog.Fatal("not found inode %d", inode) + } + return path +} + +func (i *InodeToPath) HasPath(path util.FullPath) bool { + if path == "/" { + return true + } + i.RLock() + defer i.RUnlock() + _, found := i.path2inode[path] + return found +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 8aa9c95a7..68f8dd985 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -1,6 +1,7 @@ package mount import ( + "context" "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" @@ -53,11 +54,12 @@ type WFS struct { // follow https://github.com/hanwen/go-fuse/blob/master/fuse/api.go fuse.RawFileSystem fs.Inode - option *Option - metaCache *meta_cache.MetaCache - stats statsCache - root Directory - signature int32 + option *Option + metaCache *meta_cache.MetaCache + stats statsCache + root Directory + signature int32 + inodeToPath *InodeToPath } func NewSeaweedFileSystem(option *Option) *WFS { @@ -65,6 +67,7 @@ func NewSeaweedFileSystem(option *Option) *WFS { RawFileSystem: fuse.NewDefaultRawFileSystem(), option: option, signature: util.RandomInt32(), + inodeToPath: NewInodeToPath(), } wfs.root = Directory{ @@ -91,6 +94,40 @@ func (wfs *WFS) String() string { return "seaweedfs" } +func (wfs *WFS) maybeReadEntry(inode uint64) (*filer_pb.Entry, fuse.Status) { + path := wfs.inodeToPath.GetPath(inode) + return wfs.maybeLoadEntry(path) +} + +func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.Status) { + + // glog.V(3).Infof("read entry cache miss %s", fullpath) + dir, name := fullpath.DirAndName() + + // return a valid entry for the mount root + if string(fullpath) == wfs.option.FilerMountRootPath { + return &filer_pb.Entry{ + Name: name, + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{ + Mtime: wfs.option.MountMtime.Unix(), + FileMode: uint32(wfs.option.MountMode), + Uid: wfs.option.MountUid, + Gid: wfs.option.MountGid, + Crtime: wfs.option.MountCtime.Unix(), + }, + }, fuse.OK + } + + // read from async meta cache + meta_cache.EnsureVisited(wfs.metaCache, wfs, util.FullPath(dir)) + cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath) + if cacheErr == filer_pb.ErrNotFound { + return nil, fuse.ENOENT + } + return cachedEntry.ToProtoEntry(), fuse.ENOSYS +} + func (option *Option) setupUniqueCacheDirectory() { cacheUniqueId := util.Md5String([]byte(option.MountDirectory + string(option.FilerAddresses[0]) + option.FilerMountRootPath + util.Version()))[0:8] option.uniqueCacheDir = path.Join(option.CacheDir, cacheUniqueId) diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go new file mode 100644 index 000000000..41cd29b75 --- /dev/null +++ b/weed/mount/weedfs_attr.go @@ -0,0 +1,98 @@ +package mount + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/hanwen/go-fuse/v2/fuse" + "os" + "syscall" + "time" +) + +func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse.AttrOut) (code fuse.Status) { + println("input node id", input.NodeId) + if input.NodeId == 1 { + wfs.setRootAttr(out) + return fuse.OK + } + + entry, status := wfs.maybeReadEntry(input.NodeId) + if status != fuse.OK { + return status + } + if entry.IsDirectory { + + } + + return fuse.ENOSYS +} + +func (wfs *WFS) SetAttr(cancel <-chan struct{}, input *fuse.SetAttrIn, out *fuse.AttrOut) (code fuse.Status) { + return fuse.ENOSYS +} +func (wfs *WFS) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string, dest []byte) (size uint32, code fuse.Status) { + return 0, fuse.ENOSYS +} + +func (wfs *WFS) SetXAttr(cancel <-chan struct{}, input *fuse.SetXAttrIn, attr string, data []byte) fuse.Status { + return fuse.ENOSYS +} + +func (wfs *WFS) ListXAttr(cancel <-chan struct{}, header *fuse.InHeader, dest []byte) (n uint32, code fuse.Status) { + return 0, fuse.ENOSYS +} + +func (wfs *WFS) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string) fuse.Status { + return fuse.ENOSYS +} + +func (wfs *WFS) setRootAttr(out *fuse.AttrOut) { + now := uint64(time.Now().Second()) + out.AttrValid = 119 + out.Ino = 1 + setBlksize(&out.Attr, blockSize) + out.Uid = wfs.option.MountUid + out.Gid = wfs.option.MountGid + out.Mtime = now + out.Ctime = now + out.Atime = now + out.Mode = uint32(syscall.S_IFDIR | wfs.option.MountMode) + out.Nlink = 1 +} + +func (wfs *WFS) setOutAttr(out *fuse.AttrOut, inode uint64, entry *filer_pb.Entry) { + out.AttrValid = 1 + out.Ino = inode + out.Uid = entry.Attributes.Uid + out.Gid = entry.Attributes.Gid + out.Mode = entry.Attributes.FileMode + out.Mtime = uint64(entry.Attributes.Mtime) + out.Ctime = uint64(entry.Attributes.Mtime) + out.Atime = uint64(entry.Attributes.Mtime) + if entry.HardLinkCounter > 0 { + out.Nlink = uint32(entry.HardLinkCounter) + } + out.Size = filer.FileSize(entry) + out.Blocks = out.Size/blockSize + 1 + setBlksize(&out.Attr, blockSize) + out.Nlink = 1 +} + +func osToSystemMode(mode os.FileMode) uint32 { + switch mode & 0x7F { + case os.ModeDir: + return syscall.S_IFDIR + case os.ModeSymlink: + return syscall.S_IFLNK + case os.ModeNamedPipe: + return syscall.S_IFIFO + case os.ModeSocket: + return syscall.S_IFSOCK + case os.ModeDevice: + return syscall.S_IFBLK + case os.ModeCharDevice: + return syscall.S_IFCHR + default: + return syscall.S_IFREG + } +} diff --git a/weed/mount/weedfs_attr_darwin.go b/weed/mount/weedfs_attr_darwin.go new file mode 100644 index 000000000..e7767d4a6 --- /dev/null +++ b/weed/mount/weedfs_attr_darwin.go @@ -0,0 +1,8 @@ +package mount + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +func setBlksize(out *fuse.Attr, size uint32) { +} diff --git a/weed/mount/weedfs_attr_linux.go b/weed/mount/weedfs_attr_linux.go new file mode 100644 index 000000000..56be62e62 --- /dev/null +++ b/weed/mount/weedfs_attr_linux.go @@ -0,0 +1,9 @@ +package mount + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +func setBlksize(out *fuse.Attr, size uint32) { + out.Blksize = size +} From a10c28ba8290b2d11944dc2a7ec624b9db3649b8 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 01:59:36 -0800 Subject: [PATCH 09/39] simplify --- weed/mount/weedfs_attr.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 41cd29b75..7e53ad506 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -20,11 +20,9 @@ func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse if status != fuse.OK { return status } - if entry.IsDirectory { + wfs.setOutAttr(out, input.NodeId, entry) - } - - return fuse.ENOSYS + return fuse.OK } func (wfs *WFS) SetAttr(cancel <-chan struct{}, input *fuse.SetAttrIn, out *fuse.AttrOut) (code fuse.Status) { @@ -56,7 +54,7 @@ func (wfs *WFS) setRootAttr(out *fuse.AttrOut) { out.Mtime = now out.Ctime = now out.Atime = now - out.Mode = uint32(syscall.S_IFDIR | wfs.option.MountMode) + out.Mode = osToSystemMode(os.ModeDir) | uint32(wfs.option.MountMode) out.Nlink = 1 } @@ -65,7 +63,7 @@ func (wfs *WFS) setOutAttr(out *fuse.AttrOut, inode uint64, entry *filer_pb.Entr out.Ino = inode out.Uid = entry.Attributes.Uid out.Gid = entry.Attributes.Gid - out.Mode = entry.Attributes.FileMode + out.Mode = modeToSystemMode(entry.Attributes.FileMode) out.Mtime = uint64(entry.Attributes.Mtime) out.Ctime = uint64(entry.Attributes.Mtime) out.Atime = uint64(entry.Attributes.Mtime) @@ -78,8 +76,12 @@ func (wfs *WFS) setOutAttr(out *fuse.AttrOut, inode uint64, entry *filer_pb.Entr out.Nlink = 1 } +func modeToSystemMode(mode uint32) uint32 { + return osToSystemMode(os.FileMode(mode)) | mode +} + func osToSystemMode(mode os.FileMode) uint32 { - switch mode & 0x7F { + switch mode & os.ModeType { case os.ModeDir: return syscall.S_IFDIR case os.ModeSymlink: From 72faae91e175fcfdf18af295a2248162c839e3cc Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 02:48:44 -0800 Subject: [PATCH 10/39] implement read directory and read directory plus --- weed/mount/weedfs.go | 7 ++-- weed/mount/weedfs_attr.go | 37 ++++++++++++++------ weed/mount/weedfs_dir.go | 71 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 13 deletions(-) create mode 100644 weed/mount/weedfs_dir.go diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 68f8dd985..fa5c7d4f9 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -94,9 +94,10 @@ func (wfs *WFS) String() string { return "seaweedfs" } -func (wfs *WFS) maybeReadEntry(inode uint64) (*filer_pb.Entry, fuse.Status) { - path := wfs.inodeToPath.GetPath(inode) - return wfs.maybeLoadEntry(path) +func (wfs *WFS) maybeReadEntry(inode uint64) (path util.FullPath, entry *filer_pb.Entry, status fuse.Status) { + path = wfs.inodeToPath.GetPath(inode) + entry, status = wfs.maybeLoadEntry(path) + return } func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.Status) { diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 7e53ad506..311d5331a 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -16,11 +16,12 @@ func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse return fuse.OK } - entry, status := wfs.maybeReadEntry(input.NodeId) + _, entry, status := wfs.maybeReadEntry(input.NodeId) if status != fuse.OK { return status } - wfs.setOutAttr(out, input.NodeId, entry) + out.AttrValid = 1 + wfs.setAttrByPbEntry(&out.Attr, input.NodeId, entry) return fuse.OK } @@ -54,16 +55,15 @@ func (wfs *WFS) setRootAttr(out *fuse.AttrOut) { out.Mtime = now out.Ctime = now out.Atime = now - out.Mode = osToSystemMode(os.ModeDir) | uint32(wfs.option.MountMode) + out.Mode = toSystemType(os.ModeDir) | uint32(wfs.option.MountMode) out.Nlink = 1 } -func (wfs *WFS) setOutAttr(out *fuse.AttrOut, inode uint64, entry *filer_pb.Entry) { - out.AttrValid = 1 +func (wfs *WFS) setAttrByPbEntry(out *fuse.Attr, inode uint64, entry *filer_pb.Entry) { out.Ino = inode out.Uid = entry.Attributes.Uid out.Gid = entry.Attributes.Gid - out.Mode = modeToSystemMode(entry.Attributes.FileMode) + out.Mode = toSystemMode(os.FileMode(entry.Attributes.FileMode)) out.Mtime = uint64(entry.Attributes.Mtime) out.Ctime = uint64(entry.Attributes.Mtime) out.Atime = uint64(entry.Attributes.Mtime) @@ -72,15 +72,32 @@ func (wfs *WFS) setOutAttr(out *fuse.AttrOut, inode uint64, entry *filer_pb.Entr } out.Size = filer.FileSize(entry) out.Blocks = out.Size/blockSize + 1 - setBlksize(&out.Attr, blockSize) + setBlksize(out, blockSize) + out.Nlink = 1 +} + +func (wfs *WFS) setAttrByFilerEntry(out *fuse.Attr, inode uint64, entry *filer.Entry) { + out.Ino = inode + out.Uid = entry.Attr.Uid + out.Gid = entry.Attr.Gid + out.Mode = toSystemMode(entry.Attr.Mode) + out.Mtime = uint64(entry.Attr.Mtime.Unix()) + out.Ctime = uint64(entry.Attr.Mtime.Unix()) + out.Atime = uint64(entry.Attr.Mtime.Unix()) + if entry.HardLinkCounter > 0 { + out.Nlink = uint32(entry.HardLinkCounter) + } + out.Size = entry.FileSize + out.Blocks = out.Size/blockSize + 1 + setBlksize(out, blockSize) out.Nlink = 1 } -func modeToSystemMode(mode uint32) uint32 { - return osToSystemMode(os.FileMode(mode)) | mode +func toSystemMode(mode os.FileMode) uint32 { + return toSystemType(mode) | uint32(mode) } -func osToSystemMode(mode os.FileMode) uint32 { +func toSystemType(mode os.FileMode) uint32 { switch mode & os.ModeType { case os.ModeDir: return syscall.S_IFDIR diff --git a/weed/mount/weedfs_dir.go b/weed/mount/weedfs_dir.go new file mode 100644 index 000000000..43eb17bf2 --- /dev/null +++ b/weed/mount/weedfs_dir.go @@ -0,0 +1,71 @@ +package mount + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/hanwen/go-fuse/v2/fuse" + "math" +) + +// Directory handling + +func (wfs *WFS) OpenDir(cancel <-chan struct{}, input *fuse.OpenIn, out *fuse.OpenOut) (code fuse.Status) { + return fuse.OK +} +func (wfs *WFS) ReleaseDir(input *fuse.ReleaseIn) { +} +func (wfs *WFS) FsyncDir(cancel <-chan struct{}, input *fuse.FsyncIn) (code fuse.Status) { + return fuse.OK +} + +func (wfs *WFS) ReadDir(cancel <-chan struct{}, input *fuse.ReadIn, out *fuse.DirEntryList) (code fuse.Status) { + return wfs.doReadDirectory(input, out, false) +} + +func (wfs *WFS) ReadDirPlus(cancel <-chan struct{}, input *fuse.ReadIn, out *fuse.DirEntryList) (code fuse.Status) { + return wfs.doReadDirectory(input, out, true) +} + +func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPlusMode bool) fuse.Status { + dirPath := wfs.inodeToPath.GetPath(input.NodeId) + + var dirEntry fuse.DirEntry + processEachEntryFn := func(entry *filer.Entry, isLast bool) bool { + dirEntry.Name = entry.Name() + inode := wfs.inodeToPath.GetInode(dirPath.Child(dirEntry.Name)) + dirEntry.Ino = inode + dirEntry.Mode = toSystemMode(entry.Mode) + if !isPlusMode { + if !out.AddDirEntry(dirEntry) { + return false + } + + } else { + entryOut := out.AddDirLookupEntry(dirEntry) + if entryOut == nil { + return false + } + entryOut.Generation = 1 + entryOut.EntryValid = 1 + entryOut.AttrValid = 1 + wfs.setAttrByFilerEntry(&entryOut.Attr, inode, entry) + } + return true + } + + // TODO remove this with checking whether directory is not forgotten + if err := meta_cache.EnsureVisited(wfs.metaCache, wfs, dirPath); err != nil { + glog.Errorf("dir ReadDirAll %s: %v", dirPath, err) + return fuse.EIO + } + listErr := wfs.metaCache.ListDirectoryEntries(context.Background(), dirPath, "", false, int64(math.MaxInt32), func(entry *filer.Entry) bool { + return processEachEntryFn(entry, false) + }) + if listErr != nil { + glog.Errorf("list meta cache: %v", listErr) + return fuse.EIO + } + return fuse.OK +} From 866981d8ac4e8c99c371c4ca2fafcbe36e00a506 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 02:49:15 -0800 Subject: [PATCH 11/39] rename --- weed/mount/{weedfs_dir.go => weedfs_dir_read.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename weed/mount/{weedfs_dir.go => weedfs_dir_read.go} (100%) diff --git a/weed/mount/weedfs_dir.go b/weed/mount/weedfs_dir_read.go similarity index 100% rename from weed/mount/weedfs_dir.go rename to weed/mount/weedfs_dir_read.go From 5a0a709016f6530a3eca225499f5f814c1a38948 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 05:27:16 -0800 Subject: [PATCH 12/39] it runs, but directory listing output is not showing up --- weed/mount/weedfs_attr.go | 22 ++++++++---- weed/mount/weedfs_dir_lookup.go | 59 +++++++++++++++++++++++++++++++++ weed/mount/weedfs_dir_read.go | 32 +++++++++++++++--- 3 files changed, 101 insertions(+), 12 deletions(-) create mode 100644 weed/mount/weedfs_dir_lookup.go diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 311d5331a..788badb77 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -78,19 +78,27 @@ func (wfs *WFS) setAttrByPbEntry(out *fuse.Attr, inode uint64, entry *filer_pb.E func (wfs *WFS) setAttrByFilerEntry(out *fuse.Attr, inode uint64, entry *filer.Entry) { out.Ino = inode - out.Uid = entry.Attr.Uid - out.Gid = entry.Attr.Gid - out.Mode = toSystemMode(entry.Attr.Mode) + out.Size = entry.FileSize + out.Blocks = out.Size/blockSize + 1 + setBlksize(out, blockSize) + out.Atime = uint64(entry.Attr.Mtime.Unix()) out.Mtime = uint64(entry.Attr.Mtime.Unix()) out.Ctime = uint64(entry.Attr.Mtime.Unix()) - out.Atime = uint64(entry.Attr.Mtime.Unix()) + out.Crtime_ = uint64(entry.Attr.Crtime.Unix()) + out.Mode = toSystemMode(entry.Attr.Mode) if entry.HardLinkCounter > 0 { out.Nlink = uint32(entry.HardLinkCounter) } - out.Size = entry.FileSize - out.Blocks = out.Size/blockSize + 1 - setBlksize(out, blockSize) out.Nlink = 1 + out.Uid = entry.Attr.Uid + out.Gid = entry.Attr.Gid +} + +func (wfs *WFS) outputEntry(out *fuse.EntryOut, inode uint64, entry *filer.Entry) { + // out.Generation = 1 + out.EntryValid = 1 + out.AttrValid = 1 + wfs.setAttrByFilerEntry(&out.Attr, inode, entry) } func toSystemMode(mode os.FileMode) uint32 { diff --git a/weed/mount/weedfs_dir_lookup.go b/weed/mount/weedfs_dir_lookup.go new file mode 100644 index 000000000..672ba9711 --- /dev/null +++ b/weed/mount/weedfs_dir_lookup.go @@ -0,0 +1,59 @@ +package mount + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/hanwen/go-fuse/v2/fuse" +) + +// Lookup is called by the kernel when the VFS wants to know +// about a file inside a directory. Many lookup calls can +// occur in parallel, but only one call happens for each (dir, +// name) pair. + +func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name string, out *fuse.EntryOut) (code fuse.Status) { + + dirPath := wfs.inodeToPath.GetPath(header.NodeId) + + println("lookup", name, "dir inode", header.NodeId) + + fullFilePath := dirPath.Child(name) + + visitErr := meta_cache.EnsureVisited(wfs.metaCache, wfs, dirPath) + if visitErr != nil { + glog.Errorf("dir Lookup %s: %v", dirPath, visitErr) + return fuse.EIO + } + localEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullFilePath) + if cacheErr == filer_pb.ErrNotFound { + return fuse.ENOENT + } + + if localEntry == nil { + // glog.V(3).Infof("dir Lookup cache miss %s", fullFilePath) + entry, err := filer_pb.GetEntry(wfs, fullFilePath) + if err != nil { + glog.V(1).Infof("dir GetEntry %s: %v", fullFilePath, err) + return fuse.ENOENT + } + localEntry = filer.FromPbEntry(string(dirPath), entry) + } else { + glog.V(4).Infof("dir Lookup cache hit %s", fullFilePath) + } + + if localEntry == nil { + return fuse.ENOENT + } + + inode := wfs.inodeToPath.GetInode(fullFilePath) + + println("found", name, "inode", inode) + + wfs.outputEntry(out, inode, localEntry) + + return fuse.OK + +} diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index 43eb17bf2..a696953a1 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -5,8 +5,10 @@ import ( "github.com/chrislusf/seaweedfs/weed/filer" "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" "github.com/hanwen/go-fuse/v2/fuse" "math" + "os" ) // Directory handling @@ -31,26 +33,45 @@ func (wfs *WFS) ReadDirPlus(cancel <-chan struct{}, input *fuse.ReadIn, out *fus func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPlusMode bool) fuse.Status { dirPath := wfs.inodeToPath.GetPath(input.NodeId) + println("input size", input.Size, "offset", input.Offset, "pid", input.Caller.Pid) + var dirEntry fuse.DirEntry + if input.Offset == 0 { + dirEntry.Ino = input.NodeId + dirEntry.Name = "." + dirEntry.Mode = toSystemMode(os.ModeDir) + out.AddDirEntry(dirEntry) + + parentDir, _ := dirPath.DirAndName() + parentInode := wfs.inodeToPath.GetInode(util.FullPath(parentDir)) + dirEntry.Ino = parentInode + dirEntry.Name = ".." + dirEntry.Mode = toSystemMode(os.ModeDir) + out.AddDirEntry(dirEntry) + + } + + var counter uint64 processEachEntryFn := func(entry *filer.Entry, isLast bool) bool { + counter++ + if counter <= input.Offset { + return true + } dirEntry.Name = entry.Name() inode := wfs.inodeToPath.GetInode(dirPath.Child(dirEntry.Name)) + println("entry", dirEntry.Name, "inode", inode) dirEntry.Ino = inode dirEntry.Mode = toSystemMode(entry.Mode) if !isPlusMode { if !out.AddDirEntry(dirEntry) { return false } - } else { entryOut := out.AddDirLookupEntry(dirEntry) if entryOut == nil { return false } - entryOut.Generation = 1 - entryOut.EntryValid = 1 - entryOut.AttrValid = 1 - wfs.setAttrByFilerEntry(&entryOut.Attr, inode, entry) + wfs.outputEntry(entryOut, inode, entry) } return true } @@ -67,5 +88,6 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl glog.Errorf("list meta cache: %v", listErr) return fuse.EIO } + return fuse.OK } From b0a5193e326962c8a411f872f45ddb6a16962fd9 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 22:21:30 -0800 Subject: [PATCH 13/39] working --- weed/mount/inode_to_path.go | 10 ++++++++++ weed/mount/weedfs_attr.go | 15 +++++++++------ weed/mount/weedfs_dir_read.go | 7 ++++++- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index 04366ab0d..e3fabb422 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -59,3 +59,13 @@ func (i *InodeToPath) HasPath(path util.FullPath) bool { _, found := i.path2inode[path] return found } + +func (i *InodeToPath) HasInode(inode uint64) bool { + if inode == 1 { + return true + } + i.RLock() + defer i.RUnlock() + _, found := i.inode2path[inode] + return found +} diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 788badb77..fddba289d 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -46,7 +46,7 @@ func (wfs *WFS) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr } func (wfs *WFS) setRootAttr(out *fuse.AttrOut) { - now := uint64(time.Now().Second()) + now := uint64(time.Now().Unix()) out.AttrValid = 119 out.Ino = 1 setBlksize(&out.Attr, blockSize) @@ -69,17 +69,18 @@ func (wfs *WFS) setAttrByPbEntry(out *fuse.Attr, inode uint64, entry *filer_pb.E out.Atime = uint64(entry.Attributes.Mtime) if entry.HardLinkCounter > 0 { out.Nlink = uint32(entry.HardLinkCounter) + } else { + out.Nlink = 1 } out.Size = filer.FileSize(entry) - out.Blocks = out.Size/blockSize + 1 + out.Blocks = (out.Size + blockSize - 1) / blockSize setBlksize(out, blockSize) - out.Nlink = 1 } func (wfs *WFS) setAttrByFilerEntry(out *fuse.Attr, inode uint64, entry *filer.Entry) { out.Ino = inode out.Size = entry.FileSize - out.Blocks = out.Size/blockSize + 1 + out.Blocks = (out.Size + blockSize - 1) / blockSize setBlksize(out, blockSize) out.Atime = uint64(entry.Attr.Mtime.Unix()) out.Mtime = uint64(entry.Attr.Mtime.Unix()) @@ -88,14 +89,16 @@ func (wfs *WFS) setAttrByFilerEntry(out *fuse.Attr, inode uint64, entry *filer.E out.Mode = toSystemMode(entry.Attr.Mode) if entry.HardLinkCounter > 0 { out.Nlink = uint32(entry.HardLinkCounter) + } else { + out.Nlink = 1 } - out.Nlink = 1 out.Uid = entry.Attr.Uid out.Gid = entry.Attr.Gid } func (wfs *WFS) outputEntry(out *fuse.EntryOut, inode uint64, entry *filer.Entry) { - // out.Generation = 1 + out.NodeId = inode + out.Generation = 1 out.EntryValid = 1 out.AttrValid = 1 wfs.setAttrByFilerEntry(&out.Attr, inode, entry) diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index a696953a1..bae6a18a2 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -14,6 +14,9 @@ import ( // Directory handling func (wfs *WFS) OpenDir(cancel <-chan struct{}, input *fuse.OpenIn, out *fuse.OpenOut) (code fuse.Status) { + if !wfs.inodeToPath.HasInode(input.NodeId) { + return fuse.ENOENT + } return fuse.OK } func (wfs *WFS) ReleaseDir(input *fuse.ReleaseIn) { @@ -35,13 +38,16 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl println("input size", input.Size, "offset", input.Offset, "pid", input.Caller.Pid) + var counter uint64 var dirEntry fuse.DirEntry if input.Offset == 0 { + counter++ dirEntry.Ino = input.NodeId dirEntry.Name = "." dirEntry.Mode = toSystemMode(os.ModeDir) out.AddDirEntry(dirEntry) + counter++ parentDir, _ := dirPath.DirAndName() parentInode := wfs.inodeToPath.GetInode(util.FullPath(parentDir)) dirEntry.Ino = parentInode @@ -51,7 +57,6 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl } - var counter uint64 processEachEntryFn := func(entry *filer.Entry, isLast bool) bool { counter++ if counter <= input.Offset { From 4c75fd5f9c01af818894411e8a1e81dd2d5b1d20 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 22:41:29 -0800 Subject: [PATCH 14/39] sync format --- weed/mount/weedfs_attr.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index fddba289d..ba0b9a9ff 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -61,20 +61,20 @@ func (wfs *WFS) setRootAttr(out *fuse.AttrOut) { func (wfs *WFS) setAttrByPbEntry(out *fuse.Attr, inode uint64, entry *filer_pb.Entry) { out.Ino = inode - out.Uid = entry.Attributes.Uid - out.Gid = entry.Attributes.Gid - out.Mode = toSystemMode(os.FileMode(entry.Attributes.FileMode)) + out.Size = filer.FileSize(entry) + out.Blocks = (out.Size + blockSize - 1) / blockSize + setBlksize(out, blockSize) out.Mtime = uint64(entry.Attributes.Mtime) out.Ctime = uint64(entry.Attributes.Mtime) out.Atime = uint64(entry.Attributes.Mtime) + out.Mode = toSystemMode(os.FileMode(entry.Attributes.FileMode)) if entry.HardLinkCounter > 0 { out.Nlink = uint32(entry.HardLinkCounter) } else { out.Nlink = 1 } - out.Size = filer.FileSize(entry) - out.Blocks = (out.Size + blockSize - 1) / blockSize - setBlksize(out, blockSize) + out.Uid = entry.Attributes.Uid + out.Gid = entry.Attributes.Gid } func (wfs *WFS) setAttrByFilerEntry(out *fuse.Attr, inode uint64, entry *filer.Entry) { From 7cfbf1e85fb407751038f750ecd156742ab13359 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 22:41:45 -0800 Subject: [PATCH 15/39] fix ok status --- weed/mount/weedfs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index fa5c7d4f9..6788b0b36 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -126,7 +126,7 @@ func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.St if cacheErr == filer_pb.ErrNotFound { return nil, fuse.ENOENT } - return cachedEntry.ToProtoEntry(), fuse.ENOSYS + return cachedEntry.ToProtoEntry(), fuse.OK } func (option *Option) setupUniqueCacheDirectory() { From 5c48c23235e33e1b800c68fce1e07e34c89ac8c6 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 22:45:07 -0800 Subject: [PATCH 16/39] remove println --- weed/mount/weedfs_attr.go | 1 - weed/mount/weedfs_dir_lookup.go | 4 ---- weed/mount/weedfs_dir_read.go | 3 --- 3 files changed, 8 deletions(-) diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index ba0b9a9ff..cbc07a914 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -10,7 +10,6 @@ import ( ) func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse.AttrOut) (code fuse.Status) { - println("input node id", input.NodeId) if input.NodeId == 1 { wfs.setRootAttr(out) return fuse.OK diff --git a/weed/mount/weedfs_dir_lookup.go b/weed/mount/weedfs_dir_lookup.go index 672ba9711..b74948f65 100644 --- a/weed/mount/weedfs_dir_lookup.go +++ b/weed/mount/weedfs_dir_lookup.go @@ -18,8 +18,6 @@ func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name strin dirPath := wfs.inodeToPath.GetPath(header.NodeId) - println("lookup", name, "dir inode", header.NodeId) - fullFilePath := dirPath.Child(name) visitErr := meta_cache.EnsureVisited(wfs.metaCache, wfs, dirPath) @@ -50,8 +48,6 @@ func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name strin inode := wfs.inodeToPath.GetInode(fullFilePath) - println("found", name, "inode", inode) - wfs.outputEntry(out, inode, localEntry) return fuse.OK diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index bae6a18a2..40e164fc9 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -36,8 +36,6 @@ func (wfs *WFS) ReadDirPlus(cancel <-chan struct{}, input *fuse.ReadIn, out *fus func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPlusMode bool) fuse.Status { dirPath := wfs.inodeToPath.GetPath(input.NodeId) - println("input size", input.Size, "offset", input.Offset, "pid", input.Caller.Pid) - var counter uint64 var dirEntry fuse.DirEntry if input.Offset == 0 { @@ -64,7 +62,6 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl } dirEntry.Name = entry.Name() inode := wfs.inodeToPath.GetInode(dirPath.Child(dirEntry.Name)) - println("entry", dirEntry.Name, "inode", inode) dirEntry.Ino = inode dirEntry.Mode = toSystemMode(entry.Mode) if !isPlusMode { From c81833a1924ca2d9f78573d2d97f0d32a230782a Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 12 Feb 2022 23:08:56 -0800 Subject: [PATCH 17/39] add directory setAttr --- weed/mount/weedfs_attr.go | 31 +++++++++++++++++++- weed/mount/wfs_save.go | 59 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 weed/mount/wfs_save.go diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index cbc07a914..09acd303b 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -26,7 +26,36 @@ func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse } func (wfs *WFS) SetAttr(cancel <-chan struct{}, input *fuse.SetAttrIn, out *fuse.AttrOut) (code fuse.Status) { - return fuse.ENOSYS + + // TODO this is only for directory. Filet setAttr involves open files and truncate to a size + + path, entry, status := wfs.maybeReadEntry(input.NodeId) + if status != fuse.OK { + return status + } + + if mode, ok := input.GetMode(); ok { + entry.Attributes.FileMode = uint32(mode) + } + + if uid, ok := input.GetUID(); ok { + entry.Attributes.Uid = uid + } + + if gid, ok := input.GetGID(); ok { + entry.Attributes.Gid = gid + } + + if mtime, ok := input.GetMTime(); ok { + entry.Attributes.Mtime = mtime.Unix() + } + + entry.Attributes.Mtime = time.Now().Unix() + out.AttrValid = 1 + wfs.setAttrByPbEntry(&out.Attr, input.NodeId, entry) + + return wfs.saveEntry(path, entry) + } func (wfs *WFS) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string, dest []byte) (size uint32, code fuse.Status) { return 0, fuse.ENOSYS diff --git a/weed/mount/wfs_save.go b/weed/mount/wfs_save.go new file mode 100644 index 000000000..240c010d8 --- /dev/null +++ b/weed/mount/wfs_save.go @@ -0,0 +1,59 @@ +package mount + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/hanwen/go-fuse/v2/fuse" +) + +func (wfs *WFS) saveEntry(path util.FullPath, entry *filer_pb.Entry) (code fuse.Status) { + + parentDir, _ := path.DirAndName() + + err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + wfs.mapPbIdFromLocalToFiler(entry) + defer wfs.mapPbIdFromFilerToLocal(entry) + + request := &filer_pb.UpdateEntryRequest{ + Directory: parentDir, + Entry: entry, + Signatures: []int32{wfs.signature}, + } + + glog.V(1).Infof("save entry: %v", request) + _, err := client.UpdateEntry(context.Background(), request) + if err != nil { + return fmt.Errorf("UpdateEntry dir %s: %v", path, err) + } + + if err := wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { + return fmt.Errorf("UpdateEntry dir %s: %v", path, err) + } + + return nil + }) + if err != nil { + glog.Errorf("saveEntry %s: %v", path, err) + return fuse.EIO + } + + return fuse.OK +} + +func (wfs *WFS) mapPbIdFromFilerToLocal(entry *filer_pb.Entry) { + if entry.Attributes == nil { + return + } + entry.Attributes.Uid, entry.Attributes.Gid = wfs.option.UidGidMapper.FilerToLocal(entry.Attributes.Uid, entry.Attributes.Gid) +} +func (wfs *WFS) mapPbIdFromLocalToFiler(entry *filer_pb.Entry) { + if entry.Attributes == nil { + return + } + entry.Attributes.Uid, entry.Attributes.Gid = wfs.option.UidGidMapper.LocalToFiler(entry.Attributes.Uid, entry.Attributes.Gid) +} From a4c9223b9da14add2cb9522fb5c75fc252243d4a Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 00:58:46 -0800 Subject: [PATCH 18/39] support xattr --- weed/mount/weedfs_attr.go | 155 +++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 4 deletions(-) diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 09acd303b..71aaa8c44 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -4,11 +4,21 @@ import ( "github.com/chrislusf/seaweedfs/weed/filer" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/hanwen/go-fuse/v2/fuse" + sys "golang.org/x/sys/unix" "os" + "runtime" + "strings" "syscall" "time" ) +const ( + // https://man7.org/linux/man-pages/man7/xattr.7.html#:~:text=The%20VFS%20imposes%20limitations%20that,in%20listxattr(2)). + MAX_XATTR_NAME_SIZE = 255 + MAX_XATTR_VALUE_SIZE = 65536 + XATTR_PREFIX = "xattr-" // same as filer +) + func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse.AttrOut) (code fuse.Status) { if input.NodeId == 1 { wfs.setRootAttr(out) @@ -57,20 +67,157 @@ func (wfs *WFS) SetAttr(cancel <-chan struct{}, input *fuse.SetAttrIn, out *fuse return wfs.saveEntry(path, entry) } + +// GetXAttr reads an extended attribute, and should return the +// number of bytes. If the buffer is too small, return ERANGE, +// with the required buffer size. func (wfs *WFS) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string, dest []byte) (size uint32, code fuse.Status) { - return 0, fuse.ENOSYS + + //validate attr name + if len(attr) > MAX_XATTR_NAME_SIZE { + if runtime.GOOS == "darwin" { + return 0, fuse.EPERM + } else { + return 0, fuse.ERANGE + } + } + if len(attr) == 0 { + return 0, fuse.EINVAL + } + + _, entry, status := wfs.maybeReadEntry(header.NodeId) + if status != fuse.OK { + return 0, status + } + if entry == nil { + return 0, fuse.ENOENT + } + if entry.Extended == nil { + return 0, fuse.ENOATTR + } + data, found := entry.Extended[XATTR_PREFIX+attr] + if !found { + return 0, fuse.ENOATTR + } + if len(dest) < len(data) { + return uint32(len(data)), fuse.ERANGE + } + copy(dest, data) + + return uint32(len(data)), fuse.OK } +// SetXAttr writes an extended attribute. +// https://man7.org/linux/man-pages/man2/setxattr.2.html +// By default (i.e., flags is zero), the extended attribute will be +// created if it does not exist, or the value will be replaced if +// the attribute already exists. To modify these semantics, one of +// the following values can be specified in flags: +// +// XATTR_CREATE +// Perform a pure create, which fails if the named attribute +// exists already. +// +// XATTR_REPLACE +// Perform a pure replace operation, which fails if the named +// attribute does not already exist. func (wfs *WFS) SetXAttr(cancel <-chan struct{}, input *fuse.SetXAttrIn, attr string, data []byte) fuse.Status { - return fuse.ENOSYS + //validate attr name + if len(attr) > MAX_XATTR_NAME_SIZE { + if runtime.GOOS == "darwin" { + return fuse.EPERM + } else { + return fuse.ERANGE + } + } + if len(attr) == 0 { + return fuse.EINVAL + } + //validate attr value + if len(data) > MAX_XATTR_VALUE_SIZE { + if runtime.GOOS == "darwin" { + return fuse.Status(syscall.E2BIG) + } else { + return fuse.ERANGE + } + } + + path, entry, status := wfs.maybeReadEntry(input.NodeId) + if status != fuse.OK { + return status + } + if entry.Extended == nil { + entry.Extended = make(map[string][]byte) + } + oldData, _ := entry.Extended[XATTR_PREFIX+attr] + switch input.Flags { + case sys.XATTR_CREATE: + if len(oldData) > 0 { + break + } + fallthrough + case sys.XATTR_REPLACE: + fallthrough + default: + entry.Extended[XATTR_PREFIX+attr] = data + } + + return wfs.saveEntry(path, entry) + } +// ListXAttr lists extended attributes as '\0' delimited byte +// slice, and return the number of bytes. If the buffer is too +// small, return ERANGE, with the required buffer size. func (wfs *WFS) ListXAttr(cancel <-chan struct{}, header *fuse.InHeader, dest []byte) (n uint32, code fuse.Status) { - return 0, fuse.ENOSYS + _, entry, status := wfs.maybeReadEntry(header.NodeId) + if status != fuse.OK { + return 0, status + } + if entry == nil { + return 0, fuse.ENOENT + } + if entry.Extended == nil { + return 0, fuse.ENOATTR + } + + var data []byte + for k := range entry.Extended { + if strings.HasPrefix(k, XATTR_PREFIX) { + data = append(data, k[len(XATTR_PREFIX):]...) + data = append(data, 0) + } + } + if len(dest) < len(data) { + return uint32(len(data)), fuse.ERANGE + } + + copy(dest, data) + + return uint32(len(data)), fuse.OK } +// RemoveXAttr removes an extended attribute. func (wfs *WFS) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string) fuse.Status { - return fuse.ENOSYS + if len(attr) == 0 { + return fuse.EINVAL + } + path, entry, status := wfs.maybeReadEntry(header.NodeId) + if status != fuse.OK { + return status + } + if entry.Extended == nil { + return fuse.ENOATTR + } + _, found := entry.Extended[XATTR_PREFIX+attr] + + if !found { + return fuse.ENOATTR + } + + delete(entry.Extended, XATTR_PREFIX+attr) + + return wfs.saveEntry(path, entry) } func (wfs *WFS) setRootAttr(out *fuse.AttrOut) { From 21046c6a287e68b07ef21702ded90abe29a3412a Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 01:05:30 -0800 Subject: [PATCH 19/39] split files --- weed/mount/weedfs_attr.go | 162 ----------------------------------- weed/mount/weedfs_xattr.go | 168 +++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 162 deletions(-) create mode 100644 weed/mount/weedfs_xattr.go diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 71aaa8c44..4354baa86 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -4,21 +4,11 @@ import ( "github.com/chrislusf/seaweedfs/weed/filer" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/hanwen/go-fuse/v2/fuse" - sys "golang.org/x/sys/unix" "os" - "runtime" - "strings" "syscall" "time" ) -const ( - // https://man7.org/linux/man-pages/man7/xattr.7.html#:~:text=The%20VFS%20imposes%20limitations%20that,in%20listxattr(2)). - MAX_XATTR_NAME_SIZE = 255 - MAX_XATTR_VALUE_SIZE = 65536 - XATTR_PREFIX = "xattr-" // same as filer -) - func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse.AttrOut) (code fuse.Status) { if input.NodeId == 1 { wfs.setRootAttr(out) @@ -68,158 +58,6 @@ func (wfs *WFS) SetAttr(cancel <-chan struct{}, input *fuse.SetAttrIn, out *fuse } -// GetXAttr reads an extended attribute, and should return the -// number of bytes. If the buffer is too small, return ERANGE, -// with the required buffer size. -func (wfs *WFS) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string, dest []byte) (size uint32, code fuse.Status) { - - //validate attr name - if len(attr) > MAX_XATTR_NAME_SIZE { - if runtime.GOOS == "darwin" { - return 0, fuse.EPERM - } else { - return 0, fuse.ERANGE - } - } - if len(attr) == 0 { - return 0, fuse.EINVAL - } - - _, entry, status := wfs.maybeReadEntry(header.NodeId) - if status != fuse.OK { - return 0, status - } - if entry == nil { - return 0, fuse.ENOENT - } - if entry.Extended == nil { - return 0, fuse.ENOATTR - } - data, found := entry.Extended[XATTR_PREFIX+attr] - if !found { - return 0, fuse.ENOATTR - } - if len(dest) < len(data) { - return uint32(len(data)), fuse.ERANGE - } - copy(dest, data) - - return uint32(len(data)), fuse.OK -} - -// SetXAttr writes an extended attribute. -// https://man7.org/linux/man-pages/man2/setxattr.2.html -// By default (i.e., flags is zero), the extended attribute will be -// created if it does not exist, or the value will be replaced if -// the attribute already exists. To modify these semantics, one of -// the following values can be specified in flags: -// -// XATTR_CREATE -// Perform a pure create, which fails if the named attribute -// exists already. -// -// XATTR_REPLACE -// Perform a pure replace operation, which fails if the named -// attribute does not already exist. -func (wfs *WFS) SetXAttr(cancel <-chan struct{}, input *fuse.SetXAttrIn, attr string, data []byte) fuse.Status { - //validate attr name - if len(attr) > MAX_XATTR_NAME_SIZE { - if runtime.GOOS == "darwin" { - return fuse.EPERM - } else { - return fuse.ERANGE - } - } - if len(attr) == 0 { - return fuse.EINVAL - } - //validate attr value - if len(data) > MAX_XATTR_VALUE_SIZE { - if runtime.GOOS == "darwin" { - return fuse.Status(syscall.E2BIG) - } else { - return fuse.ERANGE - } - } - - path, entry, status := wfs.maybeReadEntry(input.NodeId) - if status != fuse.OK { - return status - } - if entry.Extended == nil { - entry.Extended = make(map[string][]byte) - } - oldData, _ := entry.Extended[XATTR_PREFIX+attr] - switch input.Flags { - case sys.XATTR_CREATE: - if len(oldData) > 0 { - break - } - fallthrough - case sys.XATTR_REPLACE: - fallthrough - default: - entry.Extended[XATTR_PREFIX+attr] = data - } - - return wfs.saveEntry(path, entry) - -} - -// ListXAttr lists extended attributes as '\0' delimited byte -// slice, and return the number of bytes. If the buffer is too -// small, return ERANGE, with the required buffer size. -func (wfs *WFS) ListXAttr(cancel <-chan struct{}, header *fuse.InHeader, dest []byte) (n uint32, code fuse.Status) { - _, entry, status := wfs.maybeReadEntry(header.NodeId) - if status != fuse.OK { - return 0, status - } - if entry == nil { - return 0, fuse.ENOENT - } - if entry.Extended == nil { - return 0, fuse.ENOATTR - } - - var data []byte - for k := range entry.Extended { - if strings.HasPrefix(k, XATTR_PREFIX) { - data = append(data, k[len(XATTR_PREFIX):]...) - data = append(data, 0) - } - } - if len(dest) < len(data) { - return uint32(len(data)), fuse.ERANGE - } - - copy(dest, data) - - return uint32(len(data)), fuse.OK -} - -// RemoveXAttr removes an extended attribute. -func (wfs *WFS) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string) fuse.Status { - if len(attr) == 0 { - return fuse.EINVAL - } - path, entry, status := wfs.maybeReadEntry(header.NodeId) - if status != fuse.OK { - return status - } - if entry.Extended == nil { - return fuse.ENOATTR - } - _, found := entry.Extended[XATTR_PREFIX+attr] - - if !found { - return fuse.ENOATTR - } - - delete(entry.Extended, XATTR_PREFIX+attr) - - return wfs.saveEntry(path, entry) -} - func (wfs *WFS) setRootAttr(out *fuse.AttrOut) { now := uint64(time.Now().Unix()) out.AttrValid = 119 diff --git a/weed/mount/weedfs_xattr.go b/weed/mount/weedfs_xattr.go new file mode 100644 index 000000000..284e47ec0 --- /dev/null +++ b/weed/mount/weedfs_xattr.go @@ -0,0 +1,168 @@ +package mount + +import ( + "github.com/hanwen/go-fuse/v2/fuse" + sys "golang.org/x/sys/unix" + "runtime" + "strings" + "syscall" +) + +const ( + // https://man7.org/linux/man-pages/man7/xattr.7.html#:~:text=The%20VFS%20imposes%20limitations%20that,in%20listxattr(2)). + MAX_XATTR_NAME_SIZE = 255 + MAX_XATTR_VALUE_SIZE = 65536 + XATTR_PREFIX = "xattr-" // same as filer +) + +// GetXAttr reads an extended attribute, and should return the +// number of bytes. If the buffer is too small, return ERANGE, +// with the required buffer size. +func (wfs *WFS) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string, dest []byte) (size uint32, code fuse.Status) { + + //validate attr name + if len(attr) > MAX_XATTR_NAME_SIZE { + if runtime.GOOS == "darwin" { + return 0, fuse.EPERM + } else { + return 0, fuse.ERANGE + } + } + if len(attr) == 0 { + return 0, fuse.EINVAL + } + + _, entry, status := wfs.maybeReadEntry(header.NodeId) + if status != fuse.OK { + return 0, status + } + if entry == nil { + return 0, fuse.ENOENT + } + if entry.Extended == nil { + return 0, fuse.ENOATTR + } + data, found := entry.Extended[XATTR_PREFIX+attr] + if !found { + return 0, fuse.ENOATTR + } + if len(dest) < len(data) { + return uint32(len(data)), fuse.ERANGE + } + copy(dest, data) + + return uint32(len(data)), fuse.OK +} + +// SetXAttr writes an extended attribute. +// https://man7.org/linux/man-pages/man2/setxattr.2.html +// By default (i.e., flags is zero), the extended attribute will be +// created if it does not exist, or the value will be replaced if +// the attribute already exists. To modify these semantics, one of +// the following values can be specified in flags: +// +// XATTR_CREATE +// Perform a pure create, which fails if the named attribute +// exists already. +// +// XATTR_REPLACE +// Perform a pure replace operation, which fails if the named +// attribute does not already exist. +func (wfs *WFS) SetXAttr(cancel <-chan struct{}, input *fuse.SetXAttrIn, attr string, data []byte) fuse.Status { + //validate attr name + if len(attr) > MAX_XATTR_NAME_SIZE { + if runtime.GOOS == "darwin" { + return fuse.EPERM + } else { + return fuse.ERANGE + } + } + if len(attr) == 0 { + return fuse.EINVAL + } + //validate attr value + if len(data) > MAX_XATTR_VALUE_SIZE { + if runtime.GOOS == "darwin" { + return fuse.Status(syscall.E2BIG) + } else { + return fuse.ERANGE + } + } + + path, entry, status := wfs.maybeReadEntry(input.NodeId) + if status != fuse.OK { + return status + } + if entry.Extended == nil { + entry.Extended = make(map[string][]byte) + } + oldData, _ := entry.Extended[XATTR_PREFIX+attr] + switch input.Flags { + case sys.XATTR_CREATE: + if len(oldData) > 0 { + break + } + fallthrough + case sys.XATTR_REPLACE: + fallthrough + default: + entry.Extended[XATTR_PREFIX+attr] = data + } + + return wfs.saveEntry(path, entry) + +} + +// ListXAttr lists extended attributes as '\0' delimited byte +// slice, and return the number of bytes. If the buffer is too +// small, return ERANGE, with the required buffer size. +func (wfs *WFS) ListXAttr(cancel <-chan struct{}, header *fuse.InHeader, dest []byte) (n uint32, code fuse.Status) { + _, entry, status := wfs.maybeReadEntry(header.NodeId) + if status != fuse.OK { + return 0, status + } + if entry == nil { + return 0, fuse.ENOENT + } + if entry.Extended == nil { + return 0, fuse.ENOATTR + } + + var data []byte + for k := range entry.Extended { + if strings.HasPrefix(k, XATTR_PREFIX) { + data = append(data, k[len(XATTR_PREFIX):]...) + data = append(data, 0) + } + } + if len(dest) < len(data) { + return uint32(len(data)), fuse.ERANGE + } + + copy(dest, data) + + return uint32(len(data)), fuse.OK +} + +// RemoveXAttr removes an extended attribute. +func (wfs *WFS) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string) fuse.Status { + if len(attr) == 0 { + return fuse.EINVAL + } + path, entry, status := wfs.maybeReadEntry(header.NodeId) + if status != fuse.OK { + return status + } + if entry.Extended == nil { + return fuse.ENOATTR + } + _, found := entry.Extended[XATTR_PREFIX+attr] + + if !found { + return fuse.ENOATTR + } + + delete(entry.Extended, XATTR_PREFIX+attr) + + return wfs.saveEntry(path, entry) +} From e85ca10a1a426e9808024f65dd0921b241296796 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 01:34:19 -0800 Subject: [PATCH 20/39] add mkdir --- weed/mount/weedfs_attr.go | 10 ++++- weed/mount/weedfs_dir_lookup.go | 6 ++- weed/mount/weedfs_dir_mkrm.go | 72 +++++++++++++++++++++++++++++++++ weed/mount/weedfs_dir_read.go | 2 +- weed/mount/wfs_save.go | 8 ++++ 5 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 weed/mount/weedfs_dir_mkrm.go diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 4354baa86..8dbf65fe8 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -109,7 +109,15 @@ func (wfs *WFS) setAttrByFilerEntry(out *fuse.Attr, inode uint64, entry *filer.E out.Gid = entry.Attr.Gid } -func (wfs *WFS) outputEntry(out *fuse.EntryOut, inode uint64, entry *filer.Entry) { +func (wfs *WFS) outputPbEntry(out *fuse.EntryOut, inode uint64, entry *filer_pb.Entry) { + out.NodeId = inode + out.Generation = 1 + out.EntryValid = 1 + out.AttrValid = 1 + wfs.setAttrByPbEntry(&out.Attr, inode, entry) +} + +func (wfs *WFS) outputFilerEntry(out *fuse.EntryOut, inode uint64, entry *filer.Entry) { out.NodeId = inode out.Generation = 1 out.EntryValid = 1 diff --git a/weed/mount/weedfs_dir_lookup.go b/weed/mount/weedfs_dir_lookup.go index b74948f65..477cfad0a 100644 --- a/weed/mount/weedfs_dir_lookup.go +++ b/weed/mount/weedfs_dir_lookup.go @@ -16,6 +16,10 @@ import ( func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name string, out *fuse.EntryOut) (code fuse.Status) { + if s := checkName(name); s != fuse.OK { + return s + } + dirPath := wfs.inodeToPath.GetPath(header.NodeId) fullFilePath := dirPath.Child(name) @@ -48,7 +52,7 @@ func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name strin inode := wfs.inodeToPath.GetInode(fullFilePath) - wfs.outputEntry(out, inode, localEntry) + wfs.outputFilerEntry(out, inode, localEntry) return fuse.OK diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go new file mode 100644 index 000000000..670275915 --- /dev/null +++ b/weed/mount/weedfs_dir_mkrm.go @@ -0,0 +1,72 @@ +package mount + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/hanwen/go-fuse/v2/fuse" + "os" + "time" +) + +func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out *fuse.EntryOut) (code fuse.Status) { + + if s := checkName(name); s != fuse.OK { + return s + } + + newEntry := &filer_pb.Entry{ + Name: name, + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32(os.ModeDir) | in.Mode&^uint32(wfs.option.Umask), + Uid: in.Uid, + Gid: in.Gid, + }, + } + + dirFullPath := wfs.inodeToPath.GetPath(in.NodeId) + + entryFullPath := dirFullPath.Child(name) + + err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + wfs.mapPbIdFromLocalToFiler(newEntry) + defer wfs.mapPbIdFromFilerToLocal(newEntry) + + request := &filer_pb.CreateEntryRequest{ + Directory: string(dirFullPath), + Entry: newEntry, + Signatures: []int32{wfs.signature}, + } + + glog.V(1).Infof("mkdir: %v", request) + if err := filer_pb.CreateEntry(client, request); err != nil { + glog.V(0).Infof("mkdir %s: %v", entryFullPath, err) + return err + } + + if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { + return fmt.Errorf("local mkdir dir %s: %v", entryFullPath, err) + } + + return nil + }) + + glog.V(0).Infof("mkdir %s: %v", entryFullPath, err) + + if err != nil { + return fuse.EIO + } + + inode := wfs.inodeToPath.GetInode(entryFullPath) + + wfs.outputPbEntry(out, inode, newEntry) + + return fuse.OK + +} diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index 40e164fc9..96fd36ffe 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -73,7 +73,7 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl if entryOut == nil { return false } - wfs.outputEntry(entryOut, inode, entry) + wfs.outputFilerEntry(entryOut, inode, entry) } return true } diff --git a/weed/mount/wfs_save.go b/weed/mount/wfs_save.go index 240c010d8..0cac30453 100644 --- a/weed/mount/wfs_save.go +++ b/weed/mount/wfs_save.go @@ -8,6 +8,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/util" "github.com/hanwen/go-fuse/v2/fuse" + "syscall" ) func (wfs *WFS) saveEntry(path util.FullPath, entry *filer_pb.Entry) (code fuse.Status) { @@ -57,3 +58,10 @@ func (wfs *WFS) mapPbIdFromLocalToFiler(entry *filer_pb.Entry) { } entry.Attributes.Uid, entry.Attributes.Gid = wfs.option.UidGidMapper.LocalToFiler(entry.Attributes.Uid, entry.Attributes.Gid) } + +func checkName(name string) fuse.Status { + if len(name) >= 256 { + return fuse.Status(syscall.ENAMETOOLONG) + } + return fuse.OK +} From 813b868b9ad8d22e05acc3f2003a9c196cdd83b5 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 01:43:11 -0800 Subject: [PATCH 21/39] add rmdir --- weed/mount/weedfs_dir_mkrm.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go index 670275915..69bafaa7e 100644 --- a/weed/mount/weedfs_dir_mkrm.go +++ b/weed/mount/weedfs_dir_mkrm.go @@ -8,6 +8,8 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/hanwen/go-fuse/v2/fuse" "os" + "strings" + "syscall" "time" ) @@ -70,3 +72,32 @@ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out return fuse.OK } + +func (wfs *WFS) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) { + + if name == "." { + return fuse.Status(syscall.EINVAL) + } + if name == ".." { + return fuse.Status(syscall.ENOTEMPTY) + } + + dirFullPath := wfs.inodeToPath.GetPath(header.NodeId) + entryFullPath := dirFullPath.Child(name) + + glog.V(3).Infof("remove directory: %v", entryFullPath) + ignoreRecursiveErr := true // ignore recursion error since the OS should manage it + err := filer_pb.Remove(wfs, string(dirFullPath), name, true, true, ignoreRecursiveErr, false, []int32{wfs.signature}) + if err != nil { + glog.V(0).Infof("remove %s: %v", entryFullPath, err) + if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) { + return fuse.Status(syscall.ENOTEMPTY) + } + return fuse.ENOENT + } + + wfs.metaCache.DeleteEntry(context.Background(), entryFullPath) + + return fuse.OK + +} From 6a42cb6b0bf1b96fce06af06e418845ca288ac0f Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 03:09:24 -0800 Subject: [PATCH 22/39] supports mknod, unlink --- weed/mount/inode_to_path.go | 13 ++++ weed/mount/weedfs_attr.go | 23 +++++++ weed/mount/weedfs_dir_mkrm.go | 1 + weed/mount/weedfs_file_mkrm.go | 108 +++++++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+) create mode 100644 weed/mount/weedfs_file_mkrm.go diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index e3fabb422..21e6c867c 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -69,3 +69,16 @@ func (i *InodeToPath) HasInode(inode uint64) bool { _, found := i.inode2path[inode] return found } + +func (i *InodeToPath) RemovePath(path util.FullPath) { + if path == "/" { + return + } + i.Lock() + defer i.Unlock() + inode, found := i.path2inode[path] + if found { + delete(i.path2inode, path) + delete(i.inode2path, inode) + } +} diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index 8dbf65fe8..a907493ad 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -147,3 +147,26 @@ func toSystemType(mode os.FileMode) uint32 { return syscall.S_IFREG } } + +func toFileType(mode uint32) os.FileMode { + switch mode & (syscall.S_IFMT & 0xffff) { + case syscall.S_IFDIR: + return os.ModeDir + case syscall.S_IFLNK: + return os.ModeSymlink + case syscall.S_IFIFO: + return os.ModeNamedPipe + case syscall.S_IFSOCK: + return os.ModeSocket + case syscall.S_IFBLK: + return os.ModeDevice + case syscall.S_IFCHR: + return os.ModeCharDevice + default: + return 0 + } +} + +func toFileMode(mode uint32) os.FileMode { + return toFileType(mode) | os.FileMode(mode&07777) +} diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go index 69bafaa7e..63fe34f91 100644 --- a/weed/mount/weedfs_dir_mkrm.go +++ b/weed/mount/weedfs_dir_mkrm.go @@ -97,6 +97,7 @@ func (wfs *WFS) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string } wfs.metaCache.DeleteEntry(context.Background(), entryFullPath) + wfs.inodeToPath.RemovePath(entryFullPath) return fuse.OK diff --git a/weed/mount/weedfs_file_mkrm.go b/weed/mount/weedfs_file_mkrm.go new file mode 100644 index 000000000..b5ade894e --- /dev/null +++ b/weed/mount/weedfs_file_mkrm.go @@ -0,0 +1,108 @@ +package mount + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/hanwen/go-fuse/v2/fuse" + "time" +) + +func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out *fuse.EntryOut) (code fuse.Status) { + + if s := checkName(name); s != fuse.OK { + return s + } + + newEntry := &filer_pb.Entry{ + Name: name, + IsDirectory: false, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32(toFileMode(in.Mode) &^ wfs.option.Umask), + Uid: in.Uid, + Gid: in.Gid, + Collection: wfs.option.Collection, + Replication: wfs.option.Replication, + TtlSec: wfs.option.TtlSec, + }, + } + + dirFullPath := wfs.inodeToPath.GetPath(in.NodeId) + + entryFullPath := dirFullPath.Child(name) + + err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + wfs.mapPbIdFromLocalToFiler(newEntry) + defer wfs.mapPbIdFromFilerToLocal(newEntry) + + request := &filer_pb.CreateEntryRequest{ + Directory: string(dirFullPath), + Entry: newEntry, + Signatures: []int32{wfs.signature}, + } + + glog.V(1).Infof("mknod: %v", request) + if err := filer_pb.CreateEntry(client, request); err != nil { + glog.V(0).Infof("mknod %s: %v", entryFullPath, err) + return err + } + + if err := wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)); err != nil { + return fmt.Errorf("local mknod %s: %v", entryFullPath, err) + } + + return nil + }) + + glog.V(0).Infof("mknod %s: %v", entryFullPath, err) + + if err != nil { + return fuse.EIO + } + + inode := wfs.inodeToPath.GetInode(entryFullPath) + + wfs.outputPbEntry(out, inode, newEntry) + + return fuse.OK + +} + +func (wfs *WFS) Unlink(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) { + + dirFullPath := wfs.inodeToPath.GetPath(header.NodeId) + entryFullPath := dirFullPath.Child(name) + + entry, status := wfs.maybeLoadEntry(entryFullPath) + if status != fuse.OK { + return status + } + + // first, ensure the filer store can correctly delete + glog.V(3).Infof("remove file: %v", entryFullPath) + isDeleteData := entry != nil && entry.HardLinkCounter <= 1 + err := filer_pb.Remove(wfs, string(dirFullPath), name, isDeleteData, false, false, false, []int32{wfs.signature}) + if err != nil { + glog.V(0).Infof("remove %s: %v", entryFullPath, err) + return fuse.ENOENT + } + + // then, delete meta cache + if err = wfs.metaCache.DeleteEntry(context.Background(), entryFullPath); err != nil { + glog.V(3).Infof("local DeleteEntry %s: %v", entryFullPath, err) + return fuse.EIO + } + + wfs.metaCache.DeleteEntry(context.Background(), entryFullPath) + wfs.inodeToPath.RemovePath(entryFullPath) + + // TODO handle open files, hardlink + + return fuse.OK + +} From 3d93570979967b685145c7126098a419eda9fc29 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 03:31:47 -0800 Subject: [PATCH 23/39] supports forget --- weed/mount/inode_to_path.go | 12 +++++++++ weed/mount/weedfs.go | 1 + weed/mount/weedfs_forget.go | 52 +++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 weed/mount/weedfs_forget.go diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index 21e6c867c..6650f2380 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -82,3 +82,15 @@ func (i *InodeToPath) RemovePath(path util.FullPath) { delete(i.inode2path, inode) } } +func (i *InodeToPath) RemoveInode(inode uint64) { + if inode == 1 { + return + } + i.RLock() + defer i.RUnlock() + path, found := i.inode2path[inode] + if found { + delete(i.path2inode, path) + delete(i.inode2path, inode) + } +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 6788b0b36..a36e4dc97 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -120,6 +120,7 @@ func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.St }, fuse.OK } + // TODO Use inode to selectively filetering metadata updates // read from async meta cache meta_cache.EnsureVisited(wfs.metaCache, wfs, util.FullPath(dir)) cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath) diff --git a/weed/mount/weedfs_forget.go b/weed/mount/weedfs_forget.go new file mode 100644 index 000000000..2d09a473b --- /dev/null +++ b/weed/mount/weedfs_forget.go @@ -0,0 +1,52 @@ +package mount + +// Forget is called when the kernel discards entries from its +// dentry cache. This happens on unmount, and when the kernel +// is short on memory. Since it is not guaranteed to occur at +// any moment, and since there is no return value, Forget +// should not do I/O, as there is no channel to report back +// I/O errors. +// from https://github.com/libfuse/libfuse/blob/master/include/fuse_lowlevel.h +/** + * Forget about an inode + * + * This function is called when the kernel removes an inode + * from its internal caches. + * + * The inode's lookup count increases by one for every call to + * fuse_reply_entry and fuse_reply_create. The nlookup parameter + * indicates by how much the lookup count should be decreased. + * + * Inodes with a non-zero lookup count may receive request from + * the kernel even after calls to unlink, rmdir or (when + * overwriting an existing file) rename. Filesystems must handle + * such requests properly and it is recommended to defer removal + * of the inode until the lookup count reaches zero. Calls to + * unlink, rmdir or rename will be followed closely by forget + * unless the file or directory is open, in which case the + * kernel issues forget only after the release or releasedir + * calls. + * + * Note that if a file system will be exported over NFS the + * inodes lifetime must extend even beyond forget. See the + * generation field in struct fuse_entry_param above. + * + * On unmount the lookup count for all inodes implicitly drops + * to zero. It is not guaranteed that the file system will + * receive corresponding forget messages for the affected + * inodes. + * + * Valid replies: + * fuse_reply_none + * + * @param req request handle + * @param ino the inode number + * @param nlookup the number of lookups to forget + */ +func (wfs *WFS) Forget(nodeid, nlookup uint64) { + if nlookup == 0 { + // need to maintain the inode for selective filtering + // and caching for metadata updates + wfs.inodeToPath.RemoveInode(nodeid) + } +} From be40ff6048f8b6005db9cff54f074b3ab5134abb Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 03:50:16 -0800 Subject: [PATCH 24/39] add symlink --- weed/mount/weedfs_symlink.go | 77 ++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 weed/mount/weedfs_symlink.go diff --git a/weed/mount/weedfs_symlink.go b/weed/mount/weedfs_symlink.go new file mode 100644 index 000000000..3c36717d1 --- /dev/null +++ b/weed/mount/weedfs_symlink.go @@ -0,0 +1,77 @@ +package mount + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/hanwen/go-fuse/v2/fuse" + "os" + "time" +) + +func (wfs *WFS) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target string, name string, out *fuse.EntryOut) (code fuse.Status) { + + if s := checkName(name); s != fuse.OK { + return s + } + + dirPath := wfs.inodeToPath.GetPath(header.NodeId) + entryFullPath := dirPath.Child(name) + + request := &filer_pb.CreateEntryRequest{ + Directory: string(dirPath), + Entry: &filer_pb.Entry{ + Name: name, + IsDirectory: false, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32((os.FileMode(0777) | os.ModeSymlink) &^ wfs.option.Umask), + Uid: header.Uid, + Gid: header.Gid, + SymlinkTarget: target, + }, + }, + Signatures: []int32{wfs.signature}, + } + + err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + wfs.mapPbIdFromLocalToFiler(request.Entry) + defer wfs.mapPbIdFromFilerToLocal(request.Entry) + + if err := filer_pb.CreateEntry(client, request); err != nil { + return fmt.Errorf("symlink %s: %v", entryFullPath, err) + } + + wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)) + + return nil + }) + if err != nil { + glog.V(0).Infof("Symlink %s => %s: %v", entryFullPath, target, err) + return fuse.EIO + } + + inode := wfs.inodeToPath.GetInode(entryFullPath) + + wfs.outputPbEntry(out, inode, request.Entry) + + return fuse.OK +} + +func (wfs *WFS) Readlink(cancel <-chan struct{}, header *fuse.InHeader) (out []byte, code fuse.Status) { + entryFullPath := wfs.inodeToPath.GetPath(header.NodeId) + + entry, status := wfs.maybeLoadEntry(entryFullPath) + if status != fuse.OK { + return nil, status + } + if os.FileMode(entry.Attributes.FileMode)&os.ModeSymlink == 0 { + return nil, fuse.EINVAL + } + + return []byte(entry.Attributes.SymlinkTarget), fuse.OK +} From a1ef0e48a982ade0ca6a6e288e955422c61f8c37 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 04:22:02 -0800 Subject: [PATCH 25/39] doc --- weed/mount/weedfs_dir_mkrm.go | 7 ++++ weed/mount/weedfs_dir_read.go | 38 +++++++++++++++++++ weed/mount/weedfs_file_mkrm.go | 21 +++++++++++ weed/mount/weedfs_symlink.go | 1 + weed/mount/weedfs_unsupported.go | 65 ++++++++++++++++++++++++++++++++ 5 files changed, 132 insertions(+) create mode 100644 weed/mount/weedfs_unsupported.go diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go index 63fe34f91..fb854b77e 100644 --- a/weed/mount/weedfs_dir_mkrm.go +++ b/weed/mount/weedfs_dir_mkrm.go @@ -13,6 +13,12 @@ import ( "time" ) +/** Create a directory + * + * Note that the mode argument may not have the type specification + * bits set, i.e. S_ISDIR(mode) can be false. To obtain the + * correct directory type bits use mode|S_IFDIR + * */ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out *fuse.EntryOut) (code fuse.Status) { if s := checkName(name); s != fuse.OK { @@ -73,6 +79,7 @@ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out } +/** Remove a directory */ func (wfs *WFS) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) { if name == "." { diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index 96fd36ffe..3a187aa1c 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -13,18 +13,56 @@ import ( // Directory handling +/** Open directory + * + * Unless the 'default_permissions' mount option is given, + * this method should check if opendir is permitted for this + * directory. Optionally opendir may also return an arbitrary + * filehandle in the fuse_file_info structure, which will be + * passed to readdir, releasedir and fsyncdir. + */ func (wfs *WFS) OpenDir(cancel <-chan struct{}, input *fuse.OpenIn, out *fuse.OpenOut) (code fuse.Status) { if !wfs.inodeToPath.HasInode(input.NodeId) { return fuse.ENOENT } return fuse.OK } + +/** Release directory + * + * If the directory has been removed after the call to opendir, the + * path parameter will be NULL. + */ func (wfs *WFS) ReleaseDir(input *fuse.ReleaseIn) { } + +/** Synchronize directory contents + * + * If the directory has been removed after the call to opendir, the + * path parameter will be NULL. + * + * If the datasync parameter is non-zero, then only the user data + * should be flushed, not the meta data + */ func (wfs *WFS) FsyncDir(cancel <-chan struct{}, input *fuse.FsyncIn) (code fuse.Status) { return fuse.OK } +/** Read directory + * + * The filesystem may choose between two modes of operation: + * + * 1) The readdir implementation ignores the offset parameter, and + * passes zero to the filler function's offset. The filler + * function will not return '1' (unless an error happens), so the + * whole directory is read in a single readdir operation. + * + * 2) The readdir implementation keeps track of the offsets of the + * directory entries. It uses the offset parameter and always + * passes non-zero offset to the filler function. When the buffer + * is full (or an error happens) the filler function will return + * '1'. + */ func (wfs *WFS) ReadDir(cancel <-chan struct{}, input *fuse.ReadIn, out *fuse.DirEntryList) (code fuse.Status) { return wfs.doReadDirectory(input, out, false) } diff --git a/weed/mount/weedfs_file_mkrm.go b/weed/mount/weedfs_file_mkrm.go index b5ade894e..82da45179 100644 --- a/weed/mount/weedfs_file_mkrm.go +++ b/weed/mount/weedfs_file_mkrm.go @@ -10,6 +10,26 @@ import ( "time" ) +/** + * Create and open a file + * + * If the file does not exist, first create it with the specified + * mode, and then open it. + * + * If this method is not implemented or under Linux kernel + * versions earlier than 2.6.15, the mknod() and open() methods + * will be called instead. + */ +func (wfs *WFS) Create(cancel <-chan struct{}, in *fuse.CreateIn, name string, out *fuse.CreateOut) (code fuse.Status) { + return fuse.ENOSYS +} + +/** Create a file node + * + * This is called for creation of all non-directory, non-symlink + * nodes. If the filesystem defines a create() method, then for + * regular files that will be called instead. + */ func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out *fuse.EntryOut) (code fuse.Status) { if s := checkName(name); s != fuse.OK { @@ -73,6 +93,7 @@ func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out } +/** Remove a file */ func (wfs *WFS) Unlink(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) { dirFullPath := wfs.inodeToPath.GetPath(header.NodeId) diff --git a/weed/mount/weedfs_symlink.go b/weed/mount/weedfs_symlink.go index 3c36717d1..2c66db7f4 100644 --- a/weed/mount/weedfs_symlink.go +++ b/weed/mount/weedfs_symlink.go @@ -11,6 +11,7 @@ import ( "time" ) +/** Create a symbolic link */ func (wfs *WFS) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target string, name string, out *fuse.EntryOut) (code fuse.Status) { if s := checkName(name); s != fuse.OK { diff --git a/weed/mount/weedfs_unsupported.go b/weed/mount/weedfs_unsupported.go new file mode 100644 index 000000000..218486d5c --- /dev/null +++ b/weed/mount/weedfs_unsupported.go @@ -0,0 +1,65 @@ +package mount + +import "github.com/hanwen/go-fuse/fuse" + +// https://github.com/libfuse/libfuse/blob/48ae2e72b39b6a31cb2194f6f11786b7ca06aac6/include/fuse.h#L778 + +/** + * Copy a range of data from one file to anotherNiels de Vos, 4 years ago: • libfuse: add copy_file_range() support + * + * Performs an optimized copy between two file descriptors without the + * additional cost of transferring data through the FUSE kernel module + * to user space (glibc) and then back into the FUSE filesystem again. + * + * In case this method is not implemented, applications are expected to + * fall back to a regular file copy. (Some glibc versions did this + * emulation automatically, but the emulation has been removed from all + * glibc release branches.) + */ +func (wfs *WFS) CopyFileRange(cancel <-chan struct{}, in *fuse.CopyFileRangeIn) (written uint32, code fuse.Status) { + return 0, fuse.ENOSYS +} + +/** + * Allocates space for an open file + * + * This function ensures that required space is allocated for specified + * file. If this function returns success then any subsequent write + * request to specified range is guaranteed not to fail because of lack + * of space on the file system media. + */ +func (wfs *WFS) Fallocate(cancel <-chan struct{}, in *fuse.FallocateIn) (code fuse.Status) { + return fuse.ENOSYS +} + +/** + * Find next data or hole after the specified offset + */ +func (wfs *WFS) Lseek(cancel <-chan struct{}, in *fuse.LseekIn, out *fuse.LseekOut) fuse.Status { + return fuse.ENOSYS +} + +func (wfs *WFS) GetLk(cancel <-chan struct{}, in *fuse.LkIn, out *fuse.LkOut) (code fuse.Status) { + return fuse.ENOSYS +} + +func (wfs *WFS) SetLk(cancel <-chan struct{}, in *fuse.LkIn) (code fuse.Status) { + return fuse.ENOSYS +} + +func (wfs *WFS) SetLkw(cancel <-chan struct{}, in *fuse.LkIn) (code fuse.Status) { + return fuse.ENOSYS +} + +/** + * Check file access permissions + * + * This will be called for the access() system call. If the + * 'default_permissions' mount option is given, this method is not + * called. + * + * This method is not called under Linux kernel versions 2.4.x + */ +func (wfs *WFS) Access(cancel <-chan struct{}, input *fuse.AccessIn) (code fuse.Status) { + return fuse.ENOSYS +} From 0381338d401f1d03f911b6b203cb2188a2cffe26 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 04:23:06 -0800 Subject: [PATCH 26/39] fix import --- weed/mount/weedfs_unsupported.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/mount/weedfs_unsupported.go b/weed/mount/weedfs_unsupported.go index 218486d5c..2536811b8 100644 --- a/weed/mount/weedfs_unsupported.go +++ b/weed/mount/weedfs_unsupported.go @@ -1,6 +1,6 @@ package mount -import "github.com/hanwen/go-fuse/fuse" +import "github.com/hanwen/go-fuse/v2/fuse" // https://github.com/libfuse/libfuse/blob/48ae2e72b39b6a31cb2194f6f11786b7ca06aac6/include/fuse.h#L778 From f8af0f93d9474e44741d07d9d83aaa6818fd6736 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 05:29:43 -0800 Subject: [PATCH 27/39] support link --- weed/mount/weedfs_link.go | 93 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 weed/mount/weedfs_link.go diff --git a/weed/mount/weedfs_link.go b/weed/mount/weedfs_link.go new file mode 100644 index 000000000..c1d634a94 --- /dev/null +++ b/weed/mount/weedfs_link.go @@ -0,0 +1,93 @@ +package mount + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/hanwen/go-fuse/v2/fuse" + "time" +) + +const ( + HARD_LINK_MARKER = '\x01' +) + +/** Create a hard link to a file */ +func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out *fuse.EntryOut) (code fuse.Status) { + + if s := checkName(name); s != fuse.OK { + return s + } + + newParentPath := wfs.inodeToPath.GetPath(in.NodeId) + oldEntryPath := wfs.inodeToPath.GetPath(in.Oldnodeid) + oldParentPath, _ := oldEntryPath.DirAndName() + + oldEntry, status := wfs.maybeLoadEntry(oldEntryPath) + if status != fuse.OK { + return status + } + + // update old file to hardlink mode + if len(oldEntry.HardLinkId) == 0 { + oldEntry.HardLinkId = append(util.RandomBytes(16), HARD_LINK_MARKER) + oldEntry.HardLinkCounter = 1 + } + oldEntry.HardLinkCounter++ + updateOldEntryRequest := &filer_pb.UpdateEntryRequest{ + Directory: oldParentPath, + Entry: oldEntry, + Signatures: []int32{wfs.signature}, + } + + // CreateLink 1.2 : update new file to hardlink mode + oldEntry.Attributes.Mtime = time.Now().Unix() + request := &filer_pb.CreateEntryRequest{ + Directory: string(newParentPath), + Entry: &filer_pb.Entry{ + Name: name, + IsDirectory: false, + Attributes: oldEntry.Attributes, + Chunks: oldEntry.Chunks, + Extended: oldEntry.Extended, + HardLinkId: oldEntry.HardLinkId, + HardLinkCounter: oldEntry.HardLinkCounter, + }, + Signatures: []int32{wfs.signature}, + } + + // apply changes to the filer, and also apply to local metaCache + err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + wfs.mapPbIdFromLocalToFiler(request.Entry) + defer wfs.mapPbIdFromFilerToLocal(request.Entry) + + if err := filer_pb.UpdateEntry(client, updateOldEntryRequest); err != nil { + return err + } + wfs.metaCache.UpdateEntry(context.Background(), filer.FromPbEntry(updateOldEntryRequest.Directory, updateOldEntryRequest.Entry)) + + if err := filer_pb.CreateEntry(client, request); err != nil { + return err + } + + wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)) + + return nil + }) + + newEntryPath := newParentPath.Child(name) + + if err != nil { + glog.V(0).Infof("Link %v -> %s: %v", oldEntryPath, newEntryPath, err) + return fuse.EIO + } + + inode := wfs.inodeToPath.GetInode(newEntryPath) + + wfs.outputPbEntry(out, inode, request.Entry) + + return fuse.OK +} From 6a921e15f350c3cedfe6331fab2297213ece89bd Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 05:49:29 -0800 Subject: [PATCH 28/39] forget() factor in nlookup --- weed/mount/inode_to_path.go | 45 +++++++++++++++++++++++++-------- weed/mount/weedfs_dir_lookup.go | 2 +- weed/mount/weedfs_dir_mkrm.go | 2 +- weed/mount/weedfs_file_mkrm.go | 2 +- weed/mount/weedfs_forget.go | 18 +++++++++---- weed/mount/weedfs_link.go | 2 +- weed/mount/weedfs_symlink.go | 2 +- 7 files changed, 52 insertions(+), 21 deletions(-) diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index 6650f2380..25e615ed6 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -9,19 +9,23 @@ import ( type InodeToPath struct { sync.RWMutex nextInodeId uint64 - inode2path map[uint64]util.FullPath + inode2path map[uint64]*InodeEntry path2inode map[util.FullPath]uint64 } +type InodeEntry struct { + util.FullPath + nlookup uint64 +} func NewInodeToPath() *InodeToPath { return &InodeToPath{ - inode2path: make(map[uint64]util.FullPath), + inode2path: make(map[uint64]*InodeEntry), path2inode: make(map[util.FullPath]uint64), nextInodeId: 2, // the root inode id is 1 } } -func (i *InodeToPath) GetInode(path util.FullPath) uint64 { +func (i *InodeToPath) Lookup(path util.FullPath) uint64 { if path == "/" { return 1 } @@ -32,7 +36,22 @@ func (i *InodeToPath) GetInode(path util.FullPath) uint64 { inode = i.nextInodeId i.nextInodeId++ i.path2inode[path] = inode - i.inode2path[inode] = path + i.inode2path[inode] = &InodeEntry{path, 1} + } else { + i.inode2path[inode].nlookup++ + } + return inode +} + +func (i *InodeToPath) GetInode(path util.FullPath) uint64 { + if path == "/" { + return 1 + } + i.Lock() + defer i.Unlock() + inode, found := i.path2inode[path] + if !found { + glog.Fatalf("GetInode unknown inode %d", inode) } return inode } @@ -45,9 +64,9 @@ func (i *InodeToPath) GetPath(inode uint64) util.FullPath { defer i.RUnlock() path, found := i.inode2path[inode] if !found { - glog.Fatal("not found inode %d", inode) + glog.Fatalf("not found inode %d", inode) } - return path + return path.FullPath } func (i *InodeToPath) HasPath(path util.FullPath) bool { @@ -82,15 +101,19 @@ func (i *InodeToPath) RemovePath(path util.FullPath) { delete(i.inode2path, inode) } } -func (i *InodeToPath) RemoveInode(inode uint64) { + +func (i *InodeToPath) Forget(inode, nlookup uint64) { if inode == 1 { return } - i.RLock() - defer i.RUnlock() + i.Lock() + defer i.Unlock() path, found := i.inode2path[inode] if found { - delete(i.path2inode, path) - delete(i.inode2path, inode) + path.nlookup -= nlookup + if path.nlookup <= 0 { + delete(i.path2inode, path.FullPath) + delete(i.inode2path, inode) + } } } diff --git a/weed/mount/weedfs_dir_lookup.go b/weed/mount/weedfs_dir_lookup.go index 477cfad0a..733e31908 100644 --- a/weed/mount/weedfs_dir_lookup.go +++ b/weed/mount/weedfs_dir_lookup.go @@ -50,7 +50,7 @@ func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name strin return fuse.ENOENT } - inode := wfs.inodeToPath.GetInode(fullFilePath) + inode := wfs.inodeToPath.Lookup(fullFilePath) wfs.outputFilerEntry(out, inode, localEntry) diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go index fb854b77e..4efab078f 100644 --- a/weed/mount/weedfs_dir_mkrm.go +++ b/weed/mount/weedfs_dir_mkrm.go @@ -71,7 +71,7 @@ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out return fuse.EIO } - inode := wfs.inodeToPath.GetInode(entryFullPath) + inode := wfs.inodeToPath.Lookup(entryFullPath) wfs.outputPbEntry(out, inode, newEntry) diff --git a/weed/mount/weedfs_file_mkrm.go b/weed/mount/weedfs_file_mkrm.go index 82da45179..089cb540d 100644 --- a/weed/mount/weedfs_file_mkrm.go +++ b/weed/mount/weedfs_file_mkrm.go @@ -85,7 +85,7 @@ func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out return fuse.EIO } - inode := wfs.inodeToPath.GetInode(entryFullPath) + inode := wfs.inodeToPath.Lookup(entryFullPath) wfs.outputPbEntry(out, inode, newEntry) diff --git a/weed/mount/weedfs_forget.go b/weed/mount/weedfs_forget.go index 2d09a473b..34e0eddc9 100644 --- a/weed/mount/weedfs_forget.go +++ b/weed/mount/weedfs_forget.go @@ -43,10 +43,18 @@ package mount * @param ino the inode number * @param nlookup the number of lookups to forget */ +/* + +int fuse_reply_entry ( fuse_req_t req, +const struct fuse_entry_param * e +) +Reply with a directory entry + +Possible requests: lookup, mknod, mkdir, symlink, link + +Side effects: increments the lookup count on success + +*/ func (wfs *WFS) Forget(nodeid, nlookup uint64) { - if nlookup == 0 { - // need to maintain the inode for selective filtering - // and caching for metadata updates - wfs.inodeToPath.RemoveInode(nodeid) - } + wfs.inodeToPath.Forget(nodeid, nlookup) } diff --git a/weed/mount/weedfs_link.go b/weed/mount/weedfs_link.go index c1d634a94..05710e5a0 100644 --- a/weed/mount/weedfs_link.go +++ b/weed/mount/weedfs_link.go @@ -85,7 +85,7 @@ func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out * return fuse.EIO } - inode := wfs.inodeToPath.GetInode(newEntryPath) + inode := wfs.inodeToPath.Lookup(newEntryPath) wfs.outputPbEntry(out, inode, request.Entry) diff --git a/weed/mount/weedfs_symlink.go b/weed/mount/weedfs_symlink.go index 2c66db7f4..86a7b50e4 100644 --- a/weed/mount/weedfs_symlink.go +++ b/weed/mount/weedfs_symlink.go @@ -56,7 +56,7 @@ func (wfs *WFS) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target st return fuse.EIO } - inode := wfs.inodeToPath.GetInode(entryFullPath) + inode := wfs.inodeToPath.Lookup(entryFullPath) wfs.outputPbEntry(out, inode, request.Entry) From 6200b6abb10eb2559166be8890718102ad8b4c1c Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 05:59:10 -0800 Subject: [PATCH 29/39] avoid fatal message in some edge cases --- weed/mount/inode_to_path.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index 25e615ed6..84b952227 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -51,7 +51,8 @@ func (i *InodeToPath) GetInode(path util.FullPath) uint64 { defer i.Unlock() inode, found := i.path2inode[path] if !found { - glog.Fatalf("GetInode unknown inode %d", inode) + // glog.Fatalf("GetInode unknown inode for %s", path) + // this could be the parent for mount point } return inode } From 24290fed9d7f827b1098408fa291818b778740c5 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 06:02:21 -0800 Subject: [PATCH 30/39] add comments --- weed/mount/weedfs_file_mkrm.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/weed/mount/weedfs_file_mkrm.go b/weed/mount/weedfs_file_mkrm.go index 089cb540d..218ce24f1 100644 --- a/weed/mount/weedfs_file_mkrm.go +++ b/weed/mount/weedfs_file_mkrm.go @@ -21,6 +21,9 @@ import ( * will be called instead. */ func (wfs *WFS) Create(cancel <-chan struct{}, in *fuse.CreateIn, name string, out *fuse.CreateOut) (code fuse.Status) { + // if implemented, need to use + // inode := wfs.inodeToPath.Lookup(entryFullPath) + // to ensure nlookup counter return fuse.ENOSYS } From 417b59b89324881fb0155f528814c7cd29f88458 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 14:37:47 -0800 Subject: [PATCH 31/39] doc --- weed/mount/weedfs_forget.go | 1 + 1 file changed, 1 insertion(+) diff --git a/weed/mount/weedfs_forget.go b/weed/mount/weedfs_forget.go index 34e0eddc9..14b39882e 100644 --- a/weed/mount/weedfs_forget.go +++ b/weed/mount/weedfs_forget.go @@ -44,6 +44,7 @@ package mount * @param nlookup the number of lookups to forget */ /* +https://libfuse.github.io/doxygen/include_2fuse__lowlevel_8h.html int fuse_reply_entry ( fuse_req_t req, const struct fuse_entry_param * e From 3d0e9e51974ef594fedcff0b96dbee5f37ac1780 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 16:34:57 -0800 Subject: [PATCH 32/39] supports renaming --- weed/mount/inode_to_path.go | 25 ++++ weed/mount/weedfs_rename.go | 237 ++++++++++++++++++++++++++++++++++++ 2 files changed, 262 insertions(+) create mode 100644 weed/mount/weedfs_rename.go diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index 84b952227..529ecadda 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -37,6 +37,7 @@ func (i *InodeToPath) Lookup(path util.FullPath) uint64 { i.nextInodeId++ i.path2inode[path] = inode i.inode2path[inode] = &InodeEntry{path, 1} + println("add", path, inode) } else { i.inode2path[inode].nlookup++ } @@ -103,6 +104,30 @@ func (i *InodeToPath) RemovePath(path util.FullPath) { } } +func (i *InodeToPath) MovePath(sourcePath, targetPath util.FullPath) { + if sourcePath == "/" || targetPath == "/" { + return + } + i.Lock() + defer i.Unlock() + sourceInode, sourceFound := i.path2inode[sourcePath] + targetInode, targetFound := i.path2inode[targetPath] + if sourceFound { + delete(i.path2inode, sourcePath) + i.path2inode[targetPath] = sourceInode + } else { + // it is possible some source folder items has not been visited before + // so no need to worry about their source inodes + return + } + i.inode2path[sourceInode].FullPath = targetPath + if targetFound { + delete(i.inode2path, targetInode) + } else { + i.inode2path[sourceInode].nlookup++ + } +} + func (i *InodeToPath) Forget(inode, nlookup uint64) { if inode == 1 { return diff --git a/weed/mount/weedfs_rename.go b/weed/mount/weedfs_rename.go new file mode 100644 index 000000000..a4054b64a --- /dev/null +++ b/weed/mount/weedfs_rename.go @@ -0,0 +1,237 @@ +package mount + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" + "io" + "strings" + "syscall" +) + +/** Rename a file + * + * If the target exists it should be atomically replaced. If + * the target's inode's lookup count is non-zero, the file + * system is expected to postpone any removal of the inode + * until the lookup count reaches zero (see description of the + * forget function). + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EINVAL, i.e. all + * future bmap requests will fail with EINVAL without being + * send to the filesystem process. + * + * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If + * RENAME_NOREPLACE is specified, the filesystem must not + * overwrite *newname* if it exists and return an error + * instead. If `RENAME_EXCHANGE` is specified, the filesystem + * must atomically exchange the two files, i.e. both must + * exist and neither may be deleted. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the old parent directory + * @param name old name + * @param newparent inode number of the new parent directory + * @param newname new name + */ +/* +renameat2() + renameat2() has an additional flags argument. A renameat2() call + with a zero flags argument is equivalent to renameat(). + + The flags argument is a bit mask consisting of zero or more of + the following flags: + + RENAME_EXCHANGE + Atomically exchange oldpath and newpath. Both pathnames + must exist but may be of different types (e.g., one could + be a non-empty directory and the other a symbolic link). + + RENAME_NOREPLACE + Don't overwrite newpath of the rename. Return an error if + newpath already exists. + + RENAME_NOREPLACE can't be employed together with + RENAME_EXCHANGE. + + RENAME_NOREPLACE requires support from the underlying + filesystem. Support for various filesystems was added as + follows: + + * ext4 (Linux 3.15); + + * btrfs, tmpfs, and cifs (Linux 3.17); + + * xfs (Linux 4.0); + + * Support for many other filesystems was added in Linux + 4.9, including ext2, minix, reiserfs, jfs, vfat, and + bpf. + + RENAME_WHITEOUT (since Linux 3.18) + This operation makes sense only for overlay/union + filesystem implementations. + + Specifying RENAME_WHITEOUT creates a "whiteout" object at + the source of the rename at the same time as performing + the rename. The whole operation is atomic, so that if the + rename succeeds then the whiteout will also have been + created. + + A "whiteout" is an object that has special meaning in + union/overlay filesystem constructs. In these constructs, + multiple layers exist and only the top one is ever + modified. A whiteout on an upper layer will effectively + hide a matching file in the lower layer, making it appear + as if the file didn't exist. + + When a file that exists on the lower layer is renamed, the + file is first copied up (if not already on the upper + layer) and then renamed on the upper, read-write layer. + At the same time, the source file needs to be "whiteouted" + (so that the version of the source file in the lower layer + is rendered invisible). The whole operation needs to be + done atomically. + + When not part of a union/overlay, the whiteout appears as + a character device with a {0,0} device number. (Note that + other union/overlay implementations may employ different + methods for storing whiteout entries; specifically, BSD + union mount employs a separate inode type, DT_WHT, which, + while supported by some filesystems available in Linux, + such as CODA and XFS, is ignored by the kernel's whiteout + support code, as of Linux 4.19, at least.) + + RENAME_WHITEOUT requires the same privileges as creating a + device node (i.e., the CAP_MKNOD capability). + + RENAME_WHITEOUT can't be employed together with + RENAME_EXCHANGE. + + RENAME_WHITEOUT requires support from the underlying + filesystem. Among the filesystems that support it are + tmpfs (since Linux 3.18), ext4 (since Linux 3.18), XFS + (since Linux 4.1), f2fs (since Linux 4.2), btrfs (since + Linux 4.7), and ubifs (since Linux 4.9). +*/ +const ( + RenameEmptyFlag = 0 + RenameNoReplace = 1 + RenameExchange = fs.RENAME_EXCHANGE + RenameWhiteout = 3 +) + +func (wfs *WFS) Rename(cancel <-chan struct{}, in *fuse.RenameIn, oldName string, newName string) (code fuse.Status) { + if s := checkName(newName); s != fuse.OK { + return s + } + + switch in.Flags { + case RenameEmptyFlag: + case RenameNoReplace: + case RenameExchange: + case RenameWhiteout: + return fuse.ENOTSUP + default: + return fuse.EINVAL + } + + oldDir := wfs.inodeToPath.GetPath(in.NodeId) + oldPath := oldDir.Child(oldName) + newDir := wfs.inodeToPath.GetPath(in.Newdir) + newPath := newDir.Child(newName) + + glog.V(4).Infof("dir Rename %s => %s", oldPath, newPath) + + // update remote filer + err := wfs.WithFilerClient(true, func(client filer_pb.SeaweedFilerClient) error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + request := &filer_pb.StreamRenameEntryRequest{ + OldDirectory: string(oldDir), + OldName: oldName, + NewDirectory: string(newDir), + NewName: newName, + Signatures: []int32{wfs.signature}, + } + + stream, err := client.StreamRenameEntry(ctx, request) + if err != nil { + code = fuse.EIO + return fmt.Errorf("dir AtomicRenameEntry %s => %s : %v", oldPath, newPath, err) + } + + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + if strings.Contains(recvErr.Error(), "not empty") { + code = fuse.Status(syscall.ENOTEMPTY) + } else if strings.Contains(recvErr.Error(), "not directory") { + code = fuse.ENOTDIR + } + return fmt.Errorf("dir Rename %s => %s receive: %v", oldPath, newPath, recvErr) + } + } + + if err = wfs.handleRenameResponse(ctx, resp); err != nil { + glog.V(0).Infof("dir Rename %s => %s : %v", oldPath, newPath, err) + return err + } + + } + + return nil + + }) + if err != nil { + glog.V(0).Infof("Link: %v", err) + return + } + + return fuse.OK + +} + +func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamRenameEntryResponse) error { + // comes from filer StreamRenameEntry, can only be create or delete entry + + if resp.EventNotification.NewEntry != nil { + // with new entry, the old entry name also exists. This is the first step to create new entry + newEntry := filer.FromPbEntry(resp.EventNotification.NewParentPath, resp.EventNotification.NewEntry) + if err := wfs.metaCache.AtomicUpdateEntryFromFiler(ctx, "", newEntry); err != nil { + return err + } + + oldParent, newParent := util.FullPath(resp.Directory), util.FullPath(resp.EventNotification.NewParentPath) + oldName, newName := resp.EventNotification.OldEntry.Name, resp.EventNotification.NewEntry.Name + + oldPath := oldParent.Child(oldName) + newPath := newParent.Child(newName) + + wfs.inodeToPath.MovePath(oldPath, newPath) + + // TODO change file handle + + } else if resp.EventNotification.OldEntry != nil { + // without new entry, only old entry name exists. This is the second step to delete old entry + if err := wfs.metaCache.AtomicUpdateEntryFromFiler(ctx, util.NewFullPath(resp.Directory, resp.EventNotification.OldEntry.Name), nil); err != nil { + return err + } + } + + return nil + +} From 072f923a9b22693750452deeebe5f45cad98160a Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 16:56:35 -0800 Subject: [PATCH 33/39] add doc --- weed/mount/weedfs_file_io.go | 217 +++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 weed/mount/weedfs_file_io.go diff --git a/weed/mount/weedfs_file_io.go b/weed/mount/weedfs_file_io.go new file mode 100644 index 000000000..0a31c3d70 --- /dev/null +++ b/weed/mount/weedfs_file_io.go @@ -0,0 +1,217 @@ +package mount + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +/** + * Read data + * + * Read should send exactly the number of bytes requested except + * on EOF or error, otherwise the rest of the data will be + * substituted with zeroes. An exception to this is when the file + * has been opened in 'direct_io' mode, in which case the return + * value of the read system call will reflect the return value of + * this operation. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * Valid replies: + * fuse_reply_buf + * fuse_reply_iov + * fuse_reply_data + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param size number of bytes to read + * @param off offset to read from + * @param fi file information + */ +func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buf []byte) (fuse.ReadResult, fuse.Status) { + return nil, fuse.ENOSYS +} + +/** + * Write data + * + * Write should return exactly the number of bytes requested + * except on error. An exception to this is when the file has + * been opened in 'direct_io' mode, in which case the return value + * of the write system call will reflect the return value of this + * operation. + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * Valid replies: + * fuse_reply_write + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param buf data to write + * @param size number of bytes to write + * @param off offset to write to + * @param fi file information + */ +func (wfs *WFS) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (written uint32, code fuse.Status) { + return 0, fuse.ENOSYS +} + +/** + * Open a file + * + * Open flags are available in fi->flags. The following rules + * apply. + * + * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be + * filtered out / handled by the kernel. + * + * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used + * by the filesystem to check if the operation is + * permitted. If the ``-o default_permissions`` mount + * option is given, this check is already done by the + * kernel before calling open() and may thus be omitted by + * the filesystem. + * + * - When writeback caching is enabled, the kernel may send + * read requests even for files opened with O_WRONLY. The + * filesystem should be prepared to handle this. + * + * - When writeback caching is disabled, the filesystem is + * expected to properly handle the O_APPEND flag and ensure + * that each write is appending to the end of the file. + * + * - When writeback caching is enabled, the kernel will + * handle O_APPEND. However, unless all changes to the file + * come through the kernel this will not work reliably. The + * filesystem should thus either ignore the O_APPEND flag + * (and let the kernel handle it), or return an error + * (indicating that reliably O_APPEND is not available). + * + * Filesystem may store an arbitrary file handle (pointer, + * index, etc) in fi->fh, and use this in other all other file + * operations (read, write, flush, release, fsync). + * + * Filesystem may also implement stateless file I/O and not store + * anything in fi->fh. + * + * There are also some flags (direct_io, keep_cache) which the + * filesystem may set in fi, to change the way the file is opened. + * See fuse_file_info structure in for more details. + * + * If this request is answered with an error code of ENOSYS + * and FUSE_CAP_NO_OPEN_SUPPORT is set in + * `fuse_conn_info.capable`, this is treated as success and + * future calls to open and release will also succeed without being + * sent to the filesystem process. + * + * Valid replies: + * fuse_reply_open + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information +*/ +func (wfs *WFS) Open(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut) (status fuse.Status) { + return fuse.ENOSYS +} + +/** + * Flush method + * + * This is called on each close() of the opened file. + * + * Since file descriptors can be duplicated (dup, dup2, fork), for + * one open call there may be many flush calls. + * + * Filesystems shouldn't assume that flush will always be called + * after some writes, or that if will be called at all. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * NOTE: the name of the method is misleading, since (unlike + * fsync) the filesystem is not forced to flush pending writes. + * One reason to flush data is if the filesystem wants to return + * write errors during close. However, such use is non-portable + * because POSIX does not require [close] to wait for delayed I/O to + * complete. + * + * If the filesystem supports file locking operations (setlk, + * getlk) it should remove all locks belonging to 'fi->owner'. + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to flush() will + * succeed automatically without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * + * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html + */ +func (wfs *WFS) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status { + return fuse.ENOSYS +} + +/** + * Release an open file + * + * Release is called when there are no more references to an open + * file: all file descriptors are closed and all memory mappings + * are unmapped. + * + * For every open call there will be exactly one release call (unless + * the filesystem is force-unmounted). + * + * The filesystem may reply with an error, but error values are + * not returned to close() or munmap() which triggered the + * release. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * fi->flags will contain the same flags as for open. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + */ +func (wfs *WFS) Release(cancel <-chan struct{}, in *fuse.ReleaseIn) { +} + +/** + * Synchronize file contents + * + * If the datasync parameter is non-zero, then only the user data + * should be flushed, not the meta data. + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to fsync() will + * succeed automatically without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param datasync flag indicating if only data should be flushed + * @param fi file information + */ +func (wfs *WFS) Fsync(cancel <-chan struct{}, in *fuse.FsyncIn) (code fuse.Status) { + return fuse.ENOSYS +} From bb9919b07a52c8af3295074d797792b0ed5d42de Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 19:14:34 -0800 Subject: [PATCH 34/39] add open release, refactor --- weed/mount/filehandle.go | 16 ++++ weed/mount/filehandle_map.go | 79 +++++++++++++++++++ weed/mount/inode_to_path.go | 1 - weed/mount/weedfs.go | 2 + weed/mount/weedfs_file_io.go | 131 ++------------------------------ weed/mount/weedfs_file_read.go | 34 +++++++++ weed/mount/weedfs_file_sync.go | 70 +++++++++++++++++ weed/mount/weedfs_file_write.go | 35 +++++++++ 8 files changed, 242 insertions(+), 126 deletions(-) create mode 100644 weed/mount/filehandle.go create mode 100644 weed/mount/filehandle_map.go create mode 100644 weed/mount/weedfs_file_read.go create mode 100644 weed/mount/weedfs_file_sync.go create mode 100644 weed/mount/weedfs_file_write.go diff --git a/weed/mount/filehandle.go b/weed/mount/filehandle.go new file mode 100644 index 000000000..551394262 --- /dev/null +++ b/weed/mount/filehandle.go @@ -0,0 +1,16 @@ +package mount + +import "github.com/hanwen/go-fuse/v2/fuse" + +func (wfs *WFS) AcquireHandle(inode uint64, uid, gid uint32) (fileHandle *FileHandle, code fuse.Status) { + _, entry, status := wfs.maybeReadEntry(inode) + if status == fuse.OK { + fileHandle = wfs.fhmap.GetFileHandle(inode) + fileHandle.entry = entry + } + return +} + +func (wfs *WFS) ReleaseHandle(handleId FileHandleId) { + wfs.fhmap.ReleaseByHandle(handleId) +} diff --git a/weed/mount/filehandle_map.go b/weed/mount/filehandle_map.go new file mode 100644 index 000000000..ca010dabb --- /dev/null +++ b/weed/mount/filehandle_map.go @@ -0,0 +1,79 @@ +package mount + +import ( + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "sync" +) + +type FileHandleId uint64 + +type FileHandleToInode struct { + sync.RWMutex + nextFh FileHandleId + inode2fh map[uint64]*FileHandle + fh2inode map[FileHandleId]uint64 +} +type FileHandle struct { + fh FileHandleId + counter int64 + entry *filer_pb.Entry + inode uint64 +} + +func NewFileHandleToInode() *FileHandleToInode { + return &FileHandleToInode{ + inode2fh: make(map[uint64]*FileHandle), + fh2inode: make(map[FileHandleId]uint64), + nextFh: 0, + } +} + +func (i *FileHandleToInode) GetFileHandle(inode uint64) *FileHandle { + i.Lock() + defer i.Unlock() + fh, found := i.inode2fh[inode] + if !found { + fh = &FileHandle{ + fh: i.nextFh, + counter: 1, + inode: inode, + } + i.nextFh++ + i.inode2fh[inode] = fh + i.fh2inode[fh.fh] = inode + } else { + fh.counter++ + } + return fh +} + +func (i *FileHandleToInode) ReleaseByInode(inode uint64) { + i.Lock() + defer i.Unlock() + fh, found := i.inode2fh[inode] + if found { + fh.counter-- + if fh.counter <= 0 { + delete(i.inode2fh, inode) + delete(i.fh2inode, fh.fh) + } + } +} +func (i *FileHandleToInode) ReleaseByHandle(fh FileHandleId) { + i.Lock() + defer i.Unlock() + inode, found := i.fh2inode[fh] + if found { + fhHandle, fhFound := i.inode2fh[inode] + if !fhFound { + delete(i.fh2inode, fh) + } else { + fhHandle.counter-- + if fhHandle.counter <= 0 { + delete(i.inode2fh, inode) + delete(i.fh2inode, fhHandle.fh) + } + } + + } +} diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index 529ecadda..590531397 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -37,7 +37,6 @@ func (i *InodeToPath) Lookup(path util.FullPath) uint64 { i.nextInodeId++ i.path2inode[path] = inode i.inode2path[inode] = &InodeEntry{path, 1} - println("add", path, inode) } else { i.inode2path[inode].nlookup++ } diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index a36e4dc97..1e9f07df9 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -60,6 +60,7 @@ type WFS struct { root Directory signature int32 inodeToPath *InodeToPath + fhmap *FileHandleToInode } func NewSeaweedFileSystem(option *Option) *WFS { @@ -68,6 +69,7 @@ func NewSeaweedFileSystem(option *Option) *WFS { option: option, signature: util.RandomInt32(), inodeToPath: NewInodeToPath(), + fhmap: NewFileHandleToInode(), } wfs.root = Directory{ diff --git a/weed/mount/weedfs_file_io.go b/weed/mount/weedfs_file_io.go index 0a31c3d70..7c8d1babc 100644 --- a/weed/mount/weedfs_file_io.go +++ b/weed/mount/weedfs_file_io.go @@ -4,65 +4,6 @@ import ( "github.com/hanwen/go-fuse/v2/fuse" ) -/** - * Read data - * - * Read should send exactly the number of bytes requested except - * on EOF or error, otherwise the rest of the data will be - * substituted with zeroes. An exception to this is when the file - * has been opened in 'direct_io' mode, in which case the return - * value of the read system call will reflect the return value of - * this operation. - * - * fi->fh will contain the value set by the open method, or will - * be undefined if the open method didn't set any value. - * - * Valid replies: - * fuse_reply_buf - * fuse_reply_iov - * fuse_reply_data - * fuse_reply_err - * - * @param req request handle - * @param ino the inode number - * @param size number of bytes to read - * @param off offset to read from - * @param fi file information - */ -func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buf []byte) (fuse.ReadResult, fuse.Status) { - return nil, fuse.ENOSYS -} - -/** - * Write data - * - * Write should return exactly the number of bytes requested - * except on error. An exception to this is when the file has - * been opened in 'direct_io' mode, in which case the return value - * of the write system call will reflect the return value of this - * operation. - * - * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is - * expected to reset the setuid and setgid bits. - * - * fi->fh will contain the value set by the open method, or will - * be undefined if the open method didn't set any value. - * - * Valid replies: - * fuse_reply_write - * fuse_reply_err - * - * @param req request handle - * @param ino the inode number - * @param buf data to write - * @param size number of bytes to write - * @param off offset to write to - * @param fi file information - */ -func (wfs *WFS) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (written uint32, code fuse.Status) { - return 0, fuse.ENOSYS -} - /** * Open a file * @@ -120,49 +61,11 @@ func (wfs *WFS) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (wr * @param fi file information */ func (wfs *WFS) Open(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut) (status fuse.Status) { - return fuse.ENOSYS -} - -/** - * Flush method - * - * This is called on each close() of the opened file. - * - * Since file descriptors can be duplicated (dup, dup2, fork), for - * one open call there may be many flush calls. - * - * Filesystems shouldn't assume that flush will always be called - * after some writes, or that if will be called at all. - * - * fi->fh will contain the value set by the open method, or will - * be undefined if the open method didn't set any value. - * - * NOTE: the name of the method is misleading, since (unlike - * fsync) the filesystem is not forced to flush pending writes. - * One reason to flush data is if the filesystem wants to return - * write errors during close. However, such use is non-portable - * because POSIX does not require [close] to wait for delayed I/O to - * complete. - * - * If the filesystem supports file locking operations (setlk, - * getlk) it should remove all locks belonging to 'fi->owner'. - * - * If this request is answered with an error code of ENOSYS, - * this is treated as success and future calls to flush() will - * succeed automatically without being send to the filesystem - * process. - * - * Valid replies: - * fuse_reply_err - * - * @param req request handle - * @param ino the inode number - * @param fi file information - * - * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html - */ -func (wfs *WFS) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status { - return fuse.ENOSYS + fileHandle, code := wfs.AcquireHandle(in.NodeId, in.Uid, in.Gid) + if code == fuse.OK { + out.Fh = uint64(fileHandle.fh) + } + return code } /** @@ -191,27 +94,5 @@ func (wfs *WFS) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status { * @param fi file information */ func (wfs *WFS) Release(cancel <-chan struct{}, in *fuse.ReleaseIn) { -} - -/** - * Synchronize file contents - * - * If the datasync parameter is non-zero, then only the user data - * should be flushed, not the meta data. - * - * If this request is answered with an error code of ENOSYS, - * this is treated as success and future calls to fsync() will - * succeed automatically without being send to the filesystem - * process. - * - * Valid replies: - * fuse_reply_err - * - * @param req request handle - * @param ino the inode number - * @param datasync flag indicating if only data should be flushed - * @param fi file information - */ -func (wfs *WFS) Fsync(cancel <-chan struct{}, in *fuse.FsyncIn) (code fuse.Status) { - return fuse.ENOSYS + wfs.ReleaseHandle(FileHandleId(in.Fh)) } diff --git a/weed/mount/weedfs_file_read.go b/weed/mount/weedfs_file_read.go new file mode 100644 index 000000000..d9ad1f4ea --- /dev/null +++ b/weed/mount/weedfs_file_read.go @@ -0,0 +1,34 @@ +package mount + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +/** + * Read data + * + * Read should send exactly the number of bytes requested except + * on EOF or error, otherwise the rest of the data will be + * substituted with zeroes. An exception to this is when the file + * has been opened in 'direct_io' mode, in which case the return + * value of the read system call will reflect the return value of + * this operation. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * Valid replies: + * fuse_reply_buf + * fuse_reply_iov + * fuse_reply_data + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param size number of bytes to read + * @param off offset to read from + * @param fi file information + */ +func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buf []byte) (fuse.ReadResult, fuse.Status) { + return nil, fuse.ENOSYS +} diff --git a/weed/mount/weedfs_file_sync.go b/weed/mount/weedfs_file_sync.go new file mode 100644 index 000000000..1b89c1ecb --- /dev/null +++ b/weed/mount/weedfs_file_sync.go @@ -0,0 +1,70 @@ +package mount + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +/** + * Flush method + * + * This is called on each close() of the opened file. + * + * Since file descriptors can be duplicated (dup, dup2, fork), for + * one open call there may be many flush calls. + * + * Filesystems shouldn't assume that flush will always be called + * after some writes, or that if will be called at all. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * NOTE: the name of the method is misleading, since (unlike + * fsync) the filesystem is not forced to flush pending writes. + * One reason to flush data is if the filesystem wants to return + * write errors during close. However, such use is non-portable + * because POSIX does not require [close] to wait for delayed I/O to + * complete. + * + * If the filesystem supports file locking operations (setlk, + * getlk) it should remove all locks belonging to 'fi->owner'. + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to flush() will + * succeed automatically without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * + * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html + */ +func (wfs *WFS) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status { + return fuse.ENOSYS +} + +/** + * Synchronize file contents + * + * If the datasync parameter is non-zero, then only the user data + * should be flushed, not the meta data. + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to fsync() will + * succeed automatically without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param datasync flag indicating if only data should be flushed + * @param fi file information + */ +func (wfs *WFS) Fsync(cancel <-chan struct{}, in *fuse.FsyncIn) (code fuse.Status) { + return fuse.ENOSYS +} diff --git a/weed/mount/weedfs_file_write.go b/weed/mount/weedfs_file_write.go new file mode 100644 index 000000000..72152d72e --- /dev/null +++ b/weed/mount/weedfs_file_write.go @@ -0,0 +1,35 @@ +package mount + +import ( + "github.com/hanwen/go-fuse/v2/fuse" +) + +/** + * Write data + * + * Write should return exactly the number of bytes requested + * except on error. An exception to this is when the file has + * been opened in 'direct_io' mode, in which case the return value + * of the write system call will reflect the return value of this + * operation. + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * Valid replies: + * fuse_reply_write + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param buf data to write + * @param size number of bytes to write + * @param off offset to write to + * @param fi file information + */ +func (wfs *WFS) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (written uint32, code fuse.Status) { + return 0, fuse.ENOSYS +} From f3c1e0052127a165955013cf7ba6483dcbda3391 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 19:16:56 -0800 Subject: [PATCH 35/39] rename --- weed/mount/{filehandle.go => weedfs_filehandle.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename weed/mount/{filehandle.go => weedfs_filehandle.go} (100%) diff --git a/weed/mount/filehandle.go b/weed/mount/weedfs_filehandle.go similarity index 100% rename from weed/mount/filehandle.go rename to weed/mount/weedfs_filehandle.go From 2b955c171345334a4034888c69547662150ceb91 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 22:50:44 -0800 Subject: [PATCH 36/39] support read --- weed/mount/dirty_pages_chunked.go | 99 ++++++++++ weed/mount/filehandle.go | 94 +++++++++ weed/mount/filehandle_map.go | 26 ++- weed/mount/filehandle_read.go | 114 +++++++++++ weed/mount/page_writer.go | 95 +++++++++ weed/mount/page_writer/chunk_interval_list.go | 115 +++++++++++ .../page_writer/chunk_interval_list_test.go | 49 +++++ weed/mount/page_writer/dirty_pages.go | 30 +++ weed/mount/page_writer/page_chunk.go | 16 ++ weed/mount/page_writer/page_chunk_mem.go | 69 +++++++ weed/mount/page_writer/page_chunk_swapfile.go | 121 ++++++++++++ weed/mount/page_writer/upload_pipeline.go | 182 ++++++++++++++++++ .../mount/page_writer/upload_pipeline_lock.go | 63 ++++++ .../mount/page_writer/upload_pipeline_test.go | 47 +++++ weed/mount/weedfs.go | 42 +++- weed/mount/weedfs_file_read.go | 29 ++- weed/mount/weedfs_filehandle.go | 6 +- weed/mount/weedfs_write.go | 84 ++++++++ 18 files changed, 1257 insertions(+), 24 deletions(-) create mode 100644 weed/mount/dirty_pages_chunked.go create mode 100644 weed/mount/filehandle.go create mode 100644 weed/mount/filehandle_read.go create mode 100644 weed/mount/page_writer.go create mode 100644 weed/mount/page_writer/chunk_interval_list.go create mode 100644 weed/mount/page_writer/chunk_interval_list_test.go create mode 100644 weed/mount/page_writer/dirty_pages.go create mode 100644 weed/mount/page_writer/page_chunk.go create mode 100644 weed/mount/page_writer/page_chunk_mem.go create mode 100644 weed/mount/page_writer/page_chunk_swapfile.go create mode 100644 weed/mount/page_writer/upload_pipeline.go create mode 100644 weed/mount/page_writer/upload_pipeline_lock.go create mode 100644 weed/mount/page_writer/upload_pipeline_test.go create mode 100644 weed/mount/weedfs_write.go diff --git a/weed/mount/dirty_pages_chunked.go b/weed/mount/dirty_pages_chunked.go new file mode 100644 index 000000000..5ffcff83a --- /dev/null +++ b/weed/mount/dirty_pages_chunked.go @@ -0,0 +1,99 @@ +package mount + +import ( + "fmt" + "github.com/chrislusf/seaweedfs/weed/filesys/page_writer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "io" + "sync" + "time" +) + +type ChunkedDirtyPages struct { + fh *FileHandle + writeWaitGroup sync.WaitGroup + lastErr error + collection string + replication string + uploadPipeline *page_writer.UploadPipeline + hasWrites bool +} + +var ( + _ = page_writer.DirtyPages(&ChunkedDirtyPages{}) +) + +func newMemoryChunkPages(fh *FileHandle, chunkSize int64) *ChunkedDirtyPages { + + dirtyPages := &ChunkedDirtyPages{ + fh: fh, + } + + dirtyPages.uploadPipeline = page_writer.NewUploadPipeline(fh.wfs.concurrentWriters, chunkSize, dirtyPages.saveChunkedFileIntevalToStorage, fh.wfs.option.ConcurrentWriters) + + return dirtyPages +} + +func (pages *ChunkedDirtyPages) AddPage(offset int64, data []byte) { + pages.hasWrites = true + + glog.V(4).Infof("%v memory AddPage [%d, %d)", pages.fh, offset, offset+int64(len(data))) + pages.uploadPipeline.SaveDataAt(data, offset) + + return +} + +func (pages *ChunkedDirtyPages) FlushData() error { + if !pages.hasWrites { + return nil + } + pages.uploadPipeline.FlushAll() + if pages.lastErr != nil { + return fmt.Errorf("flush data: %v", pages.lastErr) + } + return nil +} + +func (pages *ChunkedDirtyPages) ReadDirtyDataAt(data []byte, startOffset int64) (maxStop int64) { + if !pages.hasWrites { + return + } + return pages.uploadPipeline.MaybeReadDataAt(data, startOffset) +} + +func (pages *ChunkedDirtyPages) GetStorageOptions() (collection, replication string) { + return pages.collection, pages.replication +} + +func (pages *ChunkedDirtyPages) saveChunkedFileIntevalToStorage(reader io.Reader, offset int64, size int64, cleanupFn func()) { + + mtime := time.Now().UnixNano() + defer cleanupFn() + + fileFullPath := pages.fh.FullPath() + fileName := fileFullPath.Name() + chunk, collection, replication, err := pages.fh.wfs.saveDataAsChunk(fileFullPath)(reader, fileName, offset) + if err != nil { + glog.V(0).Infof("%v saveToStorage [%d,%d): %v", fileFullPath, offset, offset+size, err) + pages.lastErr = err + return + } + chunk.Mtime = mtime + pages.collection, pages.replication = collection, replication + pages.fh.addChunks([]*filer_pb.FileChunk{chunk}) + pages.fh.entryViewCache = nil + glog.V(3).Infof("%v saveToStorage %s [%d,%d)", fileFullPath, chunk.FileId, offset, offset+size) + +} + +func (pages ChunkedDirtyPages) Destroy() { + pages.uploadPipeline.Shutdown() +} + +func (pages *ChunkedDirtyPages) LockForRead(startOffset, stopOffset int64) { + pages.uploadPipeline.LockForRead(startOffset, stopOffset) +} +func (pages *ChunkedDirtyPages) UnlockForRead(startOffset, stopOffset int64) { + pages.uploadPipeline.UnlockForRead(startOffset, stopOffset) +} diff --git a/weed/mount/filehandle.go b/weed/mount/filehandle.go new file mode 100644 index 000000000..0d5481b30 --- /dev/null +++ b/weed/mount/filehandle.go @@ -0,0 +1,94 @@ +package mount + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "io" + "sort" + "sync" +) + +type FileHandleId uint64 + +type FileHandle struct { + fh FileHandleId + counter int64 + entry *filer_pb.Entry + chunkAddLock sync.Mutex + inode uint64 + wfs *WFS + + // cache file has been written to + dirtyPages *PageWriter + entryViewCache []filer.VisibleInterval + reader io.ReaderAt + contentType string + handle uint64 + sync.Mutex + + isDeleted bool +} + +func newFileHandle(wfs *WFS, handleId FileHandleId, inode uint64, entry *filer_pb.Entry) *FileHandle { + fh := &FileHandle{ + fh: handleId, + counter: 1, + inode: inode, + wfs: wfs, + } + // dirtyPages: newContinuousDirtyPages(file, writeOnly), + fh.dirtyPages = newPageWriter(fh, wfs.option.ChunkSizeLimit) + if entry != nil { + entry.Attributes.FileSize = filer.FileSize(entry) + } + + return fh +} + +func (fh *FileHandle) FullPath() util.FullPath { + return fh.wfs.inodeToPath.GetPath(fh.inode) +} + +func (fh *FileHandle) addChunks(chunks []*filer_pb.FileChunk) { + + // find the earliest incoming chunk + newChunks := chunks + earliestChunk := newChunks[0] + for i := 1; i < len(newChunks); i++ { + if lessThan(earliestChunk, newChunks[i]) { + earliestChunk = newChunks[i] + } + } + + if fh.entry == nil { + return + } + + // pick out-of-order chunks from existing chunks + for _, chunk := range fh.entry.Chunks { + if lessThan(earliestChunk, chunk) { + chunks = append(chunks, chunk) + } + } + + // sort incoming chunks + sort.Slice(chunks, func(i, j int) bool { + return lessThan(chunks[i], chunks[j]) + }) + + glog.V(4).Infof("%s existing %d chunks adds %d more", fh.FullPath(), len(fh.entry.Chunks), len(chunks)) + + fh.chunkAddLock.Lock() + fh.entry.Chunks = append(fh.entry.Chunks, newChunks...) + fh.entryViewCache = nil + fh.chunkAddLock.Unlock() +} + +func lessThan(a, b *filer_pb.FileChunk) bool { + if a.Mtime == b.Mtime { + return a.Fid.FileKey < b.Fid.FileKey + } + return a.Mtime < b.Mtime +} diff --git a/weed/mount/filehandle_map.go b/weed/mount/filehandle_map.go index ca010dabb..50ca6bcea 100644 --- a/weed/mount/filehandle_map.go +++ b/weed/mount/filehandle_map.go @@ -5,20 +5,12 @@ import ( "sync" ) -type FileHandleId uint64 - type FileHandleToInode struct { sync.RWMutex nextFh FileHandleId inode2fh map[uint64]*FileHandle fh2inode map[FileHandleId]uint64 } -type FileHandle struct { - fh FileHandleId - counter int64 - entry *filer_pb.Entry - inode uint64 -} func NewFileHandleToInode() *FileHandleToInode { return &FileHandleToInode{ @@ -28,16 +20,22 @@ func NewFileHandleToInode() *FileHandleToInode { } } -func (i *FileHandleToInode) GetFileHandle(inode uint64) *FileHandle { +func (i *FileHandleToInode) GetFileHandle(fh FileHandleId) *FileHandle { + i.RLock() + defer i.RUnlock() + inode, found := i.fh2inode[fh] + if found { + return i.inode2fh[inode] + } + return nil +} + +func (i *FileHandleToInode) AcquireFileHandle(wfs *WFS, inode uint64, entry *filer_pb.Entry) *FileHandle { i.Lock() defer i.Unlock() fh, found := i.inode2fh[inode] if !found { - fh = &FileHandle{ - fh: i.nextFh, - counter: 1, - inode: inode, - } + fh = newFileHandle(wfs, i.nextFh, inode, entry) i.nextFh++ i.inode2fh[inode] = fh i.fh2inode[fh.fh] = inode diff --git a/weed/mount/filehandle_read.go b/weed/mount/filehandle_read.go new file mode 100644 index 000000000..71166169e --- /dev/null +++ b/weed/mount/filehandle_read.go @@ -0,0 +1,114 @@ +package mount + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "io" + "math" +) + +func (fh *FileHandle) lockForRead(startOffset int64, size int) { + fh.dirtyPages.LockForRead(startOffset, startOffset+int64(size)) +} +func (fh *FileHandle) unlockForRead(startOffset int64, size int) { + fh.dirtyPages.UnlockForRead(startOffset, startOffset+int64(size)) +} + +func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (maxStop int64) { + maxStop = fh.dirtyPages.ReadDirtyDataAt(buff, startOffset) + return +} + +func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) { + + fileFullPath := fh.FullPath() + + entry := fh.entry + if entry == nil { + return 0, io.EOF + } + + if entry.IsInRemoteOnly() { + glog.V(4).Infof("download remote entry %s", fileFullPath) + newEntry, err := fh.downloadRemoteEntry(entry) + if err != nil { + glog.V(1).Infof("download remote entry %s: %v", fileFullPath, err) + return 0, err + } + entry = newEntry + } + + fileSize := int64(filer.FileSize(entry)) + + if fileSize == 0 { + glog.V(1).Infof("empty fh %v", fileFullPath) + return 0, io.EOF + } + + if offset+int64(len(buff)) <= int64(len(entry.Content)) { + totalRead := copy(buff, entry.Content[offset:]) + glog.V(4).Infof("file handle read cached %s [%d,%d] %d", fileFullPath, offset, offset+int64(totalRead), totalRead) + return int64(totalRead), nil + } + + var chunkResolveErr error + if fh.entryViewCache == nil { + fh.entryViewCache, chunkResolveErr = filer.NonOverlappingVisibleIntervals(fh.wfs.LookupFn(), entry.Chunks, 0, math.MaxInt64) + if chunkResolveErr != nil { + return 0, fmt.Errorf("fail to resolve chunk manifest: %v", chunkResolveErr) + } + fh.reader = nil + } + + reader := fh.reader + if reader == nil { + chunkViews := filer.ViewFromVisibleIntervals(fh.entryViewCache, 0, math.MaxInt64) + glog.V(4).Infof("file handle read %s [%d,%d) from %d views", fileFullPath, offset, offset+int64(len(buff)), len(chunkViews)) + for _, chunkView := range chunkViews { + glog.V(4).Infof(" read %s [%d,%d) from chunk %+v", fileFullPath, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size), chunkView.FileId) + } + reader = filer.NewChunkReaderAtFromClient(fh.wfs.LookupFn(), chunkViews, fh.wfs.chunkCache, fileSize) + } + fh.reader = reader + + totalRead, err := reader.ReadAt(buff, offset) + + if err != nil && err != io.EOF { + glog.Errorf("file handle read %s: %v", fileFullPath, err) + } + + glog.V(4).Infof("file handle read %s [%d,%d] %d : %v", fileFullPath, offset, offset+int64(totalRead), totalRead, err) + + return int64(totalRead), err +} + +func (fh *FileHandle) downloadRemoteEntry(entry *filer_pb.Entry) (*filer_pb.Entry, error) { + + fileFullPath := fh.FullPath() + dir, _ := fileFullPath.DirAndName() + + err := fh.wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.CacheRemoteObjectToLocalClusterRequest{ + Directory: string(dir), + Name: entry.Name, + } + + glog.V(4).Infof("download entry: %v", request) + resp, err := client.CacheRemoteObjectToLocalCluster(context.Background(), request) + if err != nil { + return fmt.Errorf("CacheRemoteObjectToLocalCluster file %s: %v", fileFullPath, err) + } + + entry = resp.Entry + + fh.wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, resp.Entry)) + + return nil + }) + + return entry, err +} diff --git a/weed/mount/page_writer.go b/weed/mount/page_writer.go new file mode 100644 index 000000000..eaf1fc176 --- /dev/null +++ b/weed/mount/page_writer.go @@ -0,0 +1,95 @@ +package mount + +import ( + "github.com/chrislusf/seaweedfs/weed/filesys/page_writer" + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type PageWriter struct { + fh *FileHandle + collection string + replication string + chunkSize int64 + + randomWriter page_writer.DirtyPages +} + +var ( + _ = page_writer.DirtyPages(&PageWriter{}) +) + +func newPageWriter(fh *FileHandle, chunkSize int64) *PageWriter { + pw := &PageWriter{ + fh: fh, + chunkSize: chunkSize, + randomWriter: newMemoryChunkPages(fh, chunkSize), + // randomWriter: newTempFileDirtyPages(fh.f, chunkSize), + } + return pw +} + +func (pw *PageWriter) AddPage(offset int64, data []byte) { + + glog.V(4).Infof("%v AddPage [%d, %d)", pw.fh, offset, offset+int64(len(data))) + + chunkIndex := offset / pw.chunkSize + for i := chunkIndex; len(data) > 0; i++ { + writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset) + pw.addToOneChunk(i, offset, data[:writeSize]) + offset += writeSize + data = data[writeSize:] + } +} + +func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte) { + pw.randomWriter.AddPage(offset, data) +} + +func (pw *PageWriter) FlushData() error { + return pw.randomWriter.FlushData() +} + +func (pw *PageWriter) ReadDirtyDataAt(data []byte, offset int64) (maxStop int64) { + glog.V(4).Infof("ReadDirtyDataAt %v [%d, %d)", pw.fh, offset, offset+int64(len(data))) + + chunkIndex := offset / pw.chunkSize + for i := chunkIndex; len(data) > 0; i++ { + readSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset) + + maxStop = pw.randomWriter.ReadDirtyDataAt(data[:readSize], offset) + + offset += readSize + data = data[readSize:] + } + + return +} + +func (pw *PageWriter) GetStorageOptions() (collection, replication string) { + return pw.randomWriter.GetStorageOptions() +} + +func (pw *PageWriter) LockForRead(startOffset, stopOffset int64) { + pw.randomWriter.LockForRead(startOffset, stopOffset) +} + +func (pw *PageWriter) UnlockForRead(startOffset, stopOffset int64) { + pw.randomWriter.UnlockForRead(startOffset, stopOffset) +} + +func (pw *PageWriter) Destroy() { + pw.randomWriter.Destroy() +} + +func max(x, y int64) int64 { + if x > y { + return x + } + return y +} +func min(x, y int64) int64 { + if x < y { + return x + } + return y +} diff --git a/weed/mount/page_writer/chunk_interval_list.go b/weed/mount/page_writer/chunk_interval_list.go new file mode 100644 index 000000000..e6dc5d1f5 --- /dev/null +++ b/weed/mount/page_writer/chunk_interval_list.go @@ -0,0 +1,115 @@ +package page_writer + +import "math" + +// ChunkWrittenInterval mark one written interval within one page chunk +type ChunkWrittenInterval struct { + StartOffset int64 + stopOffset int64 + prev *ChunkWrittenInterval + next *ChunkWrittenInterval +} + +func (interval *ChunkWrittenInterval) Size() int64 { + return interval.stopOffset - interval.StartOffset +} + +func (interval *ChunkWrittenInterval) isComplete(chunkSize int64) bool { + return interval.stopOffset-interval.StartOffset == chunkSize +} + +// ChunkWrittenIntervalList mark written intervals within one page chunk +type ChunkWrittenIntervalList struct { + head *ChunkWrittenInterval + tail *ChunkWrittenInterval +} + +func newChunkWrittenIntervalList() *ChunkWrittenIntervalList { + list := &ChunkWrittenIntervalList{ + head: &ChunkWrittenInterval{ + StartOffset: -1, + stopOffset: -1, + }, + tail: &ChunkWrittenInterval{ + StartOffset: math.MaxInt64, + stopOffset: math.MaxInt64, + }, + } + list.head.next = list.tail + list.tail.prev = list.head + return list +} + +func (list *ChunkWrittenIntervalList) MarkWritten(startOffset, stopOffset int64) { + interval := &ChunkWrittenInterval{ + StartOffset: startOffset, + stopOffset: stopOffset, + } + list.addInterval(interval) +} + +func (list *ChunkWrittenIntervalList) IsComplete(chunkSize int64) bool { + return list.size() == 1 && list.head.next.isComplete(chunkSize) +} +func (list *ChunkWrittenIntervalList) WrittenSize() (writtenByteCount int64) { + for t := list.head; t != nil; t = t.next { + writtenByteCount += t.Size() + } + return +} + +func (list *ChunkWrittenIntervalList) addInterval(interval *ChunkWrittenInterval) { + + p := list.head + for ; p.next != nil && p.next.StartOffset <= interval.StartOffset; p = p.next { + } + q := list.tail + for ; q.prev != nil && q.prev.stopOffset >= interval.stopOffset; q = q.prev { + } + + if interval.StartOffset <= p.stopOffset && q.StartOffset <= interval.stopOffset { + // merge p and q together + p.stopOffset = q.stopOffset + unlinkNodesBetween(p, q.next) + return + } + if interval.StartOffset <= p.stopOffset { + // merge new interval into p + p.stopOffset = interval.stopOffset + unlinkNodesBetween(p, q) + return + } + if q.StartOffset <= interval.stopOffset { + // merge new interval into q + q.StartOffset = interval.StartOffset + unlinkNodesBetween(p, q) + return + } + + // add the new interval between p and q + unlinkNodesBetween(p, q) + p.next = interval + interval.prev = p + q.prev = interval + interval.next = q + +} + +// unlinkNodesBetween remove all nodes after start and before stop, exclusive +func unlinkNodesBetween(start *ChunkWrittenInterval, stop *ChunkWrittenInterval) { + if start.next == stop { + return + } + start.next.prev = nil + start.next = stop + stop.prev.next = nil + stop.prev = start +} + +func (list *ChunkWrittenIntervalList) size() int { + var count int + for t := list.head; t != nil; t = t.next { + count++ + } + return count - 2 +} diff --git a/weed/mount/page_writer/chunk_interval_list_test.go b/weed/mount/page_writer/chunk_interval_list_test.go new file mode 100644 index 000000000..b22f5eb5d --- /dev/null +++ b/weed/mount/page_writer/chunk_interval_list_test.go @@ -0,0 +1,49 @@ +package page_writer + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func Test_PageChunkWrittenIntervalList(t *testing.T) { + list := newChunkWrittenIntervalList() + + assert.Equal(t, 0, list.size(), "empty list") + + list.MarkWritten(0, 5) + assert.Equal(t, 1, list.size(), "one interval") + + list.MarkWritten(0, 5) + assert.Equal(t, 1, list.size(), "duplicated interval2") + + list.MarkWritten(95, 100) + assert.Equal(t, 2, list.size(), "two intervals") + + list.MarkWritten(50, 60) + assert.Equal(t, 3, list.size(), "three intervals") + + list.MarkWritten(50, 55) + assert.Equal(t, 3, list.size(), "three intervals merge") + + list.MarkWritten(40, 50) + assert.Equal(t, 3, list.size(), "three intervals grow forward") + + list.MarkWritten(50, 65) + assert.Equal(t, 3, list.size(), "three intervals grow backward") + + list.MarkWritten(70, 80) + assert.Equal(t, 4, list.size(), "four intervals") + + list.MarkWritten(60, 70) + assert.Equal(t, 3, list.size(), "three intervals merged") + + list.MarkWritten(59, 71) + assert.Equal(t, 3, list.size(), "covered three intervals") + + list.MarkWritten(5, 59) + assert.Equal(t, 2, list.size(), "covered two intervals") + + list.MarkWritten(70, 99) + assert.Equal(t, 1, list.size(), "covered one intervals") + +} diff --git a/weed/mount/page_writer/dirty_pages.go b/weed/mount/page_writer/dirty_pages.go new file mode 100644 index 000000000..25b747fad --- /dev/null +++ b/weed/mount/page_writer/dirty_pages.go @@ -0,0 +1,30 @@ +package page_writer + +type DirtyPages interface { + AddPage(offset int64, data []byte) + FlushData() error + ReadDirtyDataAt(data []byte, startOffset int64) (maxStop int64) + GetStorageOptions() (collection, replication string) + Destroy() + LockForRead(startOffset, stopOffset int64) + UnlockForRead(startOffset, stopOffset int64) +} + +func max(x, y int64) int64 { + if x > y { + return x + } + return y +} +func min(x, y int64) int64 { + if x < y { + return x + } + return y +} +func minInt(x, y int) int { + if x < y { + return x + } + return y +} diff --git a/weed/mount/page_writer/page_chunk.go b/weed/mount/page_writer/page_chunk.go new file mode 100644 index 000000000..4e8f31425 --- /dev/null +++ b/weed/mount/page_writer/page_chunk.go @@ -0,0 +1,16 @@ +package page_writer + +import ( + "io" +) + +type SaveToStorageFunc func(reader io.Reader, offset int64, size int64, cleanupFn func()) + +type PageChunk interface { + FreeResource() + WriteDataAt(src []byte, offset int64) (n int) + ReadDataAt(p []byte, off int64) (maxStop int64) + IsComplete() bool + WrittenSize() int64 + SaveContent(saveFn SaveToStorageFunc) +} diff --git a/weed/mount/page_writer/page_chunk_mem.go b/weed/mount/page_writer/page_chunk_mem.go new file mode 100644 index 000000000..dfd54c19e --- /dev/null +++ b/weed/mount/page_writer/page_chunk_mem.go @@ -0,0 +1,69 @@ +package page_writer + +import ( + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/util/mem" +) + +var ( + _ = PageChunk(&MemChunk{}) +) + +type MemChunk struct { + buf []byte + usage *ChunkWrittenIntervalList + chunkSize int64 + logicChunkIndex LogicChunkIndex +} + +func NewMemChunk(logicChunkIndex LogicChunkIndex, chunkSize int64) *MemChunk { + return &MemChunk{ + logicChunkIndex: logicChunkIndex, + chunkSize: chunkSize, + buf: mem.Allocate(int(chunkSize)), + usage: newChunkWrittenIntervalList(), + } +} + +func (mc *MemChunk) FreeResource() { + mem.Free(mc.buf) +} + +func (mc *MemChunk) WriteDataAt(src []byte, offset int64) (n int) { + innerOffset := offset % mc.chunkSize + n = copy(mc.buf[innerOffset:], src) + mc.usage.MarkWritten(innerOffset, innerOffset+int64(n)) + return +} + +func (mc *MemChunk) ReadDataAt(p []byte, off int64) (maxStop int64) { + memChunkBaseOffset := int64(mc.logicChunkIndex) * mc.chunkSize + for t := mc.usage.head.next; t != mc.usage.tail; t = t.next { + logicStart := max(off, int64(mc.logicChunkIndex)*mc.chunkSize+t.StartOffset) + logicStop := min(off+int64(len(p)), memChunkBaseOffset+t.stopOffset) + if logicStart < logicStop { + copy(p[logicStart-off:logicStop-off], mc.buf[logicStart-memChunkBaseOffset:logicStop-memChunkBaseOffset]) + maxStop = max(maxStop, logicStop) + } + } + return +} + +func (mc *MemChunk) IsComplete() bool { + return mc.usage.IsComplete(mc.chunkSize) +} + +func (mc *MemChunk) WrittenSize() int64 { + return mc.usage.WrittenSize() +} + +func (mc *MemChunk) SaveContent(saveFn SaveToStorageFunc) { + if saveFn == nil { + return + } + for t := mc.usage.head.next; t != mc.usage.tail; t = t.next { + reader := util.NewBytesReader(mc.buf[t.StartOffset:t.stopOffset]) + saveFn(reader, int64(mc.logicChunkIndex)*mc.chunkSize+t.StartOffset, t.Size(), func() { + }) + } +} diff --git a/weed/mount/page_writer/page_chunk_swapfile.go b/weed/mount/page_writer/page_chunk_swapfile.go new file mode 100644 index 000000000..486557629 --- /dev/null +++ b/weed/mount/page_writer/page_chunk_swapfile.go @@ -0,0 +1,121 @@ +package page_writer + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/util/mem" + "os" +) + +var ( + _ = PageChunk(&SwapFileChunk{}) +) + +type ActualChunkIndex int + +type SwapFile struct { + dir string + file *os.File + logicToActualChunkIndex map[LogicChunkIndex]ActualChunkIndex + chunkSize int64 +} + +type SwapFileChunk struct { + swapfile *SwapFile + usage *ChunkWrittenIntervalList + logicChunkIndex LogicChunkIndex + actualChunkIndex ActualChunkIndex +} + +func NewSwapFile(dir string, chunkSize int64) *SwapFile { + return &SwapFile{ + dir: dir, + file: nil, + logicToActualChunkIndex: make(map[LogicChunkIndex]ActualChunkIndex), + chunkSize: chunkSize, + } +} +func (sf *SwapFile) FreeResource() { + if sf.file != nil { + sf.file.Close() + os.Remove(sf.file.Name()) + } +} + +func (sf *SwapFile) NewTempFileChunk(logicChunkIndex LogicChunkIndex) (tc *SwapFileChunk) { + if sf.file == nil { + var err error + sf.file, err = os.CreateTemp(sf.dir, "") + if err != nil { + glog.Errorf("create swap file: %v", err) + return nil + } + } + actualChunkIndex, found := sf.logicToActualChunkIndex[logicChunkIndex] + if !found { + actualChunkIndex = ActualChunkIndex(len(sf.logicToActualChunkIndex)) + sf.logicToActualChunkIndex[logicChunkIndex] = actualChunkIndex + } + + return &SwapFileChunk{ + swapfile: sf, + usage: newChunkWrittenIntervalList(), + logicChunkIndex: logicChunkIndex, + actualChunkIndex: actualChunkIndex, + } +} + +func (sc *SwapFileChunk) FreeResource() { +} + +func (sc *SwapFileChunk) WriteDataAt(src []byte, offset int64) (n int) { + innerOffset := offset % sc.swapfile.chunkSize + var err error + n, err = sc.swapfile.file.WriteAt(src, int64(sc.actualChunkIndex)*sc.swapfile.chunkSize+innerOffset) + if err == nil { + sc.usage.MarkWritten(innerOffset, innerOffset+int64(n)) + } else { + glog.Errorf("failed to write swap file %s: %v", sc.swapfile.file.Name(), err) + } + return +} + +func (sc *SwapFileChunk) ReadDataAt(p []byte, off int64) (maxStop int64) { + chunkStartOffset := int64(sc.logicChunkIndex) * sc.swapfile.chunkSize + for t := sc.usage.head.next; t != sc.usage.tail; t = t.next { + logicStart := max(off, chunkStartOffset+t.StartOffset) + logicStop := min(off+int64(len(p)), chunkStartOffset+t.stopOffset) + if logicStart < logicStop { + actualStart := logicStart - chunkStartOffset + int64(sc.actualChunkIndex)*sc.swapfile.chunkSize + if _, err := sc.swapfile.file.ReadAt(p[logicStart-off:logicStop-off], actualStart); err != nil { + glog.Errorf("failed to reading swap file %s: %v", sc.swapfile.file.Name(), err) + break + } + maxStop = max(maxStop, logicStop) + } + } + return +} + +func (sc *SwapFileChunk) IsComplete() bool { + return sc.usage.IsComplete(sc.swapfile.chunkSize) +} + +func (sc *SwapFileChunk) WrittenSize() int64 { + return sc.usage.WrittenSize() +} + +func (sc *SwapFileChunk) SaveContent(saveFn SaveToStorageFunc) { + if saveFn == nil { + return + } + for t := sc.usage.head.next; t != sc.usage.tail; t = t.next { + data := mem.Allocate(int(t.Size())) + sc.swapfile.file.ReadAt(data, t.StartOffset+int64(sc.actualChunkIndex)*sc.swapfile.chunkSize) + reader := util.NewBytesReader(data) + saveFn(reader, int64(sc.logicChunkIndex)*sc.swapfile.chunkSize+t.StartOffset, t.Size(), func() { + }) + mem.Free(data) + } + sc.usage = newChunkWrittenIntervalList() +} diff --git a/weed/mount/page_writer/upload_pipeline.go b/weed/mount/page_writer/upload_pipeline.go new file mode 100644 index 000000000..53641e66d --- /dev/null +++ b/weed/mount/page_writer/upload_pipeline.go @@ -0,0 +1,182 @@ +package page_writer + +import ( + "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "sync" + "sync/atomic" + "time" +) + +type LogicChunkIndex int + +type UploadPipeline struct { + filepath util.FullPath + ChunkSize int64 + writableChunks map[LogicChunkIndex]PageChunk + writableChunksLock sync.Mutex + sealedChunks map[LogicChunkIndex]*SealedChunk + sealedChunksLock sync.Mutex + uploaders *util.LimitedConcurrentExecutor + uploaderCount int32 + uploaderCountCond *sync.Cond + saveToStorageFn SaveToStorageFunc + activeReadChunks map[LogicChunkIndex]int + activeReadChunksLock sync.Mutex + bufferChunkLimit int +} + +type SealedChunk struct { + chunk PageChunk + referenceCounter int // track uploading or reading processes +} + +func (sc *SealedChunk) FreeReference(messageOnFree string) { + sc.referenceCounter-- + if sc.referenceCounter == 0 { + glog.V(4).Infof("Free sealed chunk: %s", messageOnFree) + sc.chunk.FreeResource() + } +} + +func NewUploadPipeline(writers *util.LimitedConcurrentExecutor, chunkSize int64, saveToStorageFn SaveToStorageFunc, bufferChunkLimit int) *UploadPipeline { + return &UploadPipeline{ + ChunkSize: chunkSize, + writableChunks: make(map[LogicChunkIndex]PageChunk), + sealedChunks: make(map[LogicChunkIndex]*SealedChunk), + uploaders: writers, + uploaderCountCond: sync.NewCond(&sync.Mutex{}), + saveToStorageFn: saveToStorageFn, + activeReadChunks: make(map[LogicChunkIndex]int), + bufferChunkLimit: bufferChunkLimit, + } +} + +func (up *UploadPipeline) SaveDataAt(p []byte, off int64) (n int) { + up.writableChunksLock.Lock() + defer up.writableChunksLock.Unlock() + + logicChunkIndex := LogicChunkIndex(off / up.ChunkSize) + + memChunk, found := up.writableChunks[logicChunkIndex] + if !found { + if len(up.writableChunks) < up.bufferChunkLimit { + memChunk = NewMemChunk(logicChunkIndex, up.ChunkSize) + } else { + fullestChunkIndex, fullness := LogicChunkIndex(-1), int64(0) + for lci, mc := range up.writableChunks { + chunkFullness := mc.WrittenSize() + if fullness < chunkFullness { + fullestChunkIndex = lci + fullness = chunkFullness + } + } + up.moveToSealed(up.writableChunks[fullestChunkIndex], fullestChunkIndex) + delete(up.writableChunks, fullestChunkIndex) + fmt.Printf("flush chunk %d with %d bytes written", logicChunkIndex, fullness) + memChunk = NewMemChunk(logicChunkIndex, up.ChunkSize) + } + up.writableChunks[logicChunkIndex] = memChunk + } + n = memChunk.WriteDataAt(p, off) + up.maybeMoveToSealed(memChunk, logicChunkIndex) + + return +} + +func (up *UploadPipeline) MaybeReadDataAt(p []byte, off int64) (maxStop int64) { + logicChunkIndex := LogicChunkIndex(off / up.ChunkSize) + + // read from sealed chunks first + up.sealedChunksLock.Lock() + sealedChunk, found := up.sealedChunks[logicChunkIndex] + if found { + sealedChunk.referenceCounter++ + } + up.sealedChunksLock.Unlock() + if found { + maxStop = sealedChunk.chunk.ReadDataAt(p, off) + glog.V(4).Infof("%s read sealed memchunk [%d,%d)", up.filepath, off, maxStop) + sealedChunk.FreeReference(fmt.Sprintf("%s finish reading chunk %d", up.filepath, logicChunkIndex)) + } + + // read from writable chunks last + up.writableChunksLock.Lock() + defer up.writableChunksLock.Unlock() + writableChunk, found := up.writableChunks[logicChunkIndex] + if !found { + return + } + writableMaxStop := writableChunk.ReadDataAt(p, off) + glog.V(4).Infof("%s read writable memchunk [%d,%d)", up.filepath, off, writableMaxStop) + maxStop = max(maxStop, writableMaxStop) + + return +} + +func (up *UploadPipeline) FlushAll() { + up.writableChunksLock.Lock() + defer up.writableChunksLock.Unlock() + + for logicChunkIndex, memChunk := range up.writableChunks { + up.moveToSealed(memChunk, logicChunkIndex) + } + + up.waitForCurrentWritersToComplete() +} + +func (up *UploadPipeline) maybeMoveToSealed(memChunk PageChunk, logicChunkIndex LogicChunkIndex) { + if memChunk.IsComplete() { + up.moveToSealed(memChunk, logicChunkIndex) + } +} + +func (up *UploadPipeline) moveToSealed(memChunk PageChunk, logicChunkIndex LogicChunkIndex) { + atomic.AddInt32(&up.uploaderCount, 1) + glog.V(4).Infof("%s uploaderCount %d ++> %d", up.filepath, up.uploaderCount-1, up.uploaderCount) + + up.sealedChunksLock.Lock() + + if oldMemChunk, found := up.sealedChunks[logicChunkIndex]; found { + oldMemChunk.FreeReference(fmt.Sprintf("%s replace chunk %d", up.filepath, logicChunkIndex)) + } + sealedChunk := &SealedChunk{ + chunk: memChunk, + referenceCounter: 1, // default 1 is for uploading process + } + up.sealedChunks[logicChunkIndex] = sealedChunk + delete(up.writableChunks, logicChunkIndex) + + up.sealedChunksLock.Unlock() + + up.uploaders.Execute(func() { + // first add to the file chunks + sealedChunk.chunk.SaveContent(up.saveToStorageFn) + + // notify waiting process + atomic.AddInt32(&up.uploaderCount, -1) + glog.V(4).Infof("%s uploaderCount %d --> %d", up.filepath, up.uploaderCount+1, up.uploaderCount) + // Lock and Unlock are not required, + // but it may signal multiple times during one wakeup, + // and the waiting goroutine may miss some of them! + up.uploaderCountCond.L.Lock() + up.uploaderCountCond.Broadcast() + up.uploaderCountCond.L.Unlock() + + // wait for readers + for up.IsLocked(logicChunkIndex) { + time.Sleep(59 * time.Millisecond) + } + + // then remove from sealed chunks + up.sealedChunksLock.Lock() + defer up.sealedChunksLock.Unlock() + delete(up.sealedChunks, logicChunkIndex) + sealedChunk.FreeReference(fmt.Sprintf("%s finished uploading chunk %d", up.filepath, logicChunkIndex)) + + }) +} + +func (up *UploadPipeline) Shutdown() { +} diff --git a/weed/mount/page_writer/upload_pipeline_lock.go b/weed/mount/page_writer/upload_pipeline_lock.go new file mode 100644 index 000000000..47a40ba37 --- /dev/null +++ b/weed/mount/page_writer/upload_pipeline_lock.go @@ -0,0 +1,63 @@ +package page_writer + +import ( + "sync/atomic" +) + +func (up *UploadPipeline) LockForRead(startOffset, stopOffset int64) { + startLogicChunkIndex := LogicChunkIndex(startOffset / up.ChunkSize) + stopLogicChunkIndex := LogicChunkIndex(stopOffset / up.ChunkSize) + if stopOffset%up.ChunkSize > 0 { + stopLogicChunkIndex += 1 + } + up.activeReadChunksLock.Lock() + defer up.activeReadChunksLock.Unlock() + for i := startLogicChunkIndex; i < stopLogicChunkIndex; i++ { + if count, found := up.activeReadChunks[i]; found { + up.activeReadChunks[i] = count + 1 + } else { + up.activeReadChunks[i] = 1 + } + } +} + +func (up *UploadPipeline) UnlockForRead(startOffset, stopOffset int64) { + startLogicChunkIndex := LogicChunkIndex(startOffset / up.ChunkSize) + stopLogicChunkIndex := LogicChunkIndex(stopOffset / up.ChunkSize) + if stopOffset%up.ChunkSize > 0 { + stopLogicChunkIndex += 1 + } + up.activeReadChunksLock.Lock() + defer up.activeReadChunksLock.Unlock() + for i := startLogicChunkIndex; i < stopLogicChunkIndex; i++ { + if count, found := up.activeReadChunks[i]; found { + if count == 1 { + delete(up.activeReadChunks, i) + } else { + up.activeReadChunks[i] = count - 1 + } + } + } +} + +func (up *UploadPipeline) IsLocked(logicChunkIndex LogicChunkIndex) bool { + up.activeReadChunksLock.Lock() + defer up.activeReadChunksLock.Unlock() + if count, found := up.activeReadChunks[logicChunkIndex]; found { + return count > 0 + } + return false +} + +func (up *UploadPipeline) waitForCurrentWritersToComplete() { + up.uploaderCountCond.L.Lock() + t := int32(100) + for { + t = atomic.LoadInt32(&up.uploaderCount) + if t <= 0 { + break + } + up.uploaderCountCond.Wait() + } + up.uploaderCountCond.L.Unlock() +} diff --git a/weed/mount/page_writer/upload_pipeline_test.go b/weed/mount/page_writer/upload_pipeline_test.go new file mode 100644 index 000000000..816fb228b --- /dev/null +++ b/weed/mount/page_writer/upload_pipeline_test.go @@ -0,0 +1,47 @@ +package page_writer + +import ( + "github.com/chrislusf/seaweedfs/weed/util" + "testing" +) + +func TestUploadPipeline(t *testing.T) { + + uploadPipeline := NewUploadPipeline(nil, 2*1024*1024, nil, 16) + + writeRange(uploadPipeline, 0, 131072) + writeRange(uploadPipeline, 131072, 262144) + writeRange(uploadPipeline, 262144, 1025536) + + confirmRange(t, uploadPipeline, 0, 1025536) + + writeRange(uploadPipeline, 1025536, 1296896) + + confirmRange(t, uploadPipeline, 1025536, 1296896) + + writeRange(uploadPipeline, 1296896, 2162688) + + confirmRange(t, uploadPipeline, 1296896, 2162688) + + confirmRange(t, uploadPipeline, 1296896, 2162688) +} + +// startOff and stopOff must be divided by 4 +func writeRange(uploadPipeline *UploadPipeline, startOff, stopOff int64) { + p := make([]byte, 4) + for i := startOff / 4; i < stopOff/4; i += 4 { + util.Uint32toBytes(p, uint32(i)) + uploadPipeline.SaveDataAt(p, i) + } +} + +func confirmRange(t *testing.T, uploadPipeline *UploadPipeline, startOff, stopOff int64) { + p := make([]byte, 4) + for i := startOff; i < stopOff/4; i += 4 { + uploadPipeline.MaybeReadDataAt(p, i) + x := util.BytesToUint32(p) + if x != uint32(i) { + t.Errorf("expecting %d found %d at offset [%d,%d)", i, x, i, i+4) + } + } +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 1e9f07df9..b7f50cd13 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -2,14 +2,18 @@ package mount import ( "context" + "github.com/chrislusf/seaweedfs/weed/filer" "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/util/chunk_cache" "github.com/chrislusf/seaweedfs/weed/util/grace" + "github.com/chrislusf/seaweedfs/weed/wdclient" "github.com/hanwen/go-fuse/v2/fuse" "google.golang.org/grpc" + "math/rand" "os" "path" "path/filepath" @@ -54,13 +58,15 @@ type WFS struct { // follow https://github.com/hanwen/go-fuse/blob/master/fuse/api.go fuse.RawFileSystem fs.Inode - option *Option - metaCache *meta_cache.MetaCache - stats statsCache - root Directory - signature int32 - inodeToPath *InodeToPath - fhmap *FileHandleToInode + option *Option + metaCache *meta_cache.MetaCache + stats statsCache + root Directory + chunkCache *chunk_cache.TieredChunkCache + signature int32 + concurrentWriters *util.LimitedConcurrentExecutor + inodeToPath *InodeToPath + fhmap *FileHandleToInode } func NewSeaweedFileSystem(option *Option) *WFS { @@ -79,12 +85,21 @@ func NewSeaweedFileSystem(option *Option) *WFS { parent: nil, } + wfs.option.filerIndex = rand.Intn(len(option.FilerAddresses)) + wfs.option.setupUniqueCacheDirectory() + if option.CacheSizeMB > 0 { + wfs.chunkCache = chunk_cache.NewTieredChunkCache(256, option.getUniqueCacheDir(), option.CacheSizeMB, 1024*1024) + } + wfs.metaCache = meta_cache.NewMetaCache(path.Join(option.getUniqueCacheDir(), "meta"), util.FullPath(option.FilerMountRootPath), option.UidGidMapper, func(filePath util.FullPath, entry *filer_pb.Entry) { }) grace.OnInterrupt(func() { wfs.metaCache.Shutdown() }) + if wfs.option.ConcurrentWriters > 0 { + wfs.concurrentWriters = util.NewLimitedConcurrentExecutor(wfs.option.ConcurrentWriters) + } return wfs } @@ -132,6 +147,19 @@ func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.St return cachedEntry.ToProtoEntry(), fuse.OK } +func (wfs *WFS) LookupFn() wdclient.LookupFileIdFunctionType { + if wfs.option.VolumeServerAccess == "filerProxy" { + return func(fileId string) (targetUrls []string, err error) { + return []string{"http://" + wfs.getCurrentFiler().ToHttpAddress() + "/?proxyChunkId=" + fileId}, nil + } + } + return filer.LookupFn(wfs) +} + +func (wfs *WFS) getCurrentFiler() pb.ServerAddress { + return wfs.option.FilerAddresses[wfs.option.filerIndex] +} + func (option *Option) setupUniqueCacheDirectory() { cacheUniqueId := util.Md5String([]byte(option.MountDirectory + string(option.FilerAddresses[0]) + option.FilerMountRootPath + util.Version()))[0:8] option.uniqueCacheDir = path.Join(option.CacheDir, cacheUniqueId) diff --git a/weed/mount/weedfs_file_read.go b/weed/mount/weedfs_file_read.go index d9ad1f4ea..00143a5b4 100644 --- a/weed/mount/weedfs_file_read.go +++ b/weed/mount/weedfs_file_read.go @@ -1,7 +1,9 @@ package mount import ( + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/hanwen/go-fuse/v2/fuse" + "io" ) /** @@ -29,6 +31,29 @@ import ( * @param off offset to read from * @param fi file information */ -func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buf []byte) (fuse.ReadResult, fuse.Status) { - return nil, fuse.ENOSYS +func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buff []byte) (fuse.ReadResult, fuse.Status) { + fh := wfs.GetHandle(FileHandleId(in.Fh)) + if fh == nil { + return nil, fuse.ENOENT + } + + offset := int64(in.Offset) + fh.lockForRead(offset, len(buff)) + defer fh.unlockForRead(offset, len(buff)) + + totalRead, err := fh.readFromChunks(buff, offset) + if err == nil || err == io.EOF { + maxStop := fh.readFromDirtyPages(buff, offset) + totalRead = max(maxStop-offset, totalRead) + } + if err == io.EOF { + err = nil + } + + if err != nil { + glog.Warningf("file handle read %s %d: %v", fh.FullPath(), totalRead, err) + return nil, fuse.EIO + } + + return fuse.ReadResultData(buff[:totalRead]), fuse.OK } diff --git a/weed/mount/weedfs_filehandle.go b/weed/mount/weedfs_filehandle.go index 551394262..03f72282e 100644 --- a/weed/mount/weedfs_filehandle.go +++ b/weed/mount/weedfs_filehandle.go @@ -5,7 +5,7 @@ import "github.com/hanwen/go-fuse/v2/fuse" func (wfs *WFS) AcquireHandle(inode uint64, uid, gid uint32) (fileHandle *FileHandle, code fuse.Status) { _, entry, status := wfs.maybeReadEntry(inode) if status == fuse.OK { - fileHandle = wfs.fhmap.GetFileHandle(inode) + fileHandle = wfs.fhmap.AcquireFileHandle(wfs, inode, entry) fileHandle.entry = entry } return @@ -14,3 +14,7 @@ func (wfs *WFS) AcquireHandle(inode uint64, uid, gid uint32) (fileHandle *FileHa func (wfs *WFS) ReleaseHandle(handleId FileHandleId) { wfs.fhmap.ReleaseByHandle(handleId) } + +func (wfs *WFS) GetHandle(handleId FileHandleId) *FileHandle { + return wfs.fhmap.GetFileHandle(handleId) +} diff --git a/weed/mount/weedfs_write.go b/weed/mount/weedfs_write.go new file mode 100644 index 000000000..723ce9c34 --- /dev/null +++ b/weed/mount/weedfs_write.go @@ -0,0 +1,84 @@ +package mount + +import ( + "context" + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func (wfs *WFS) saveDataAsChunk(fullPath util.FullPath) filer.SaveDataAsChunkFunctionType { + + return func(reader io.Reader, filename string, offset int64) (chunk *filer_pb.FileChunk, collection, replication string, err error) { + var fileId, host string + var auth security.EncodedJwt + + if err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + return util.Retry("assignVolume", func() error { + request := &filer_pb.AssignVolumeRequest{ + Count: 1, + Replication: wfs.option.Replication, + Collection: wfs.option.Collection, + TtlSec: wfs.option.TtlSec, + DiskType: string(wfs.option.DiskType), + DataCenter: wfs.option.DataCenter, + Path: string(fullPath), + } + + resp, err := client.AssignVolume(context.Background(), request) + if err != nil { + glog.V(0).Infof("assign volume failure %v: %v", request, err) + return err + } + if resp.Error != "" { + return fmt.Errorf("assign volume failure %v: %v", request, resp.Error) + } + + fileId, auth = resp.FileId, security.EncodedJwt(resp.Auth) + loc := resp.Location + host = wfs.AdjustedUrl(loc) + collection, replication = resp.Collection, resp.Replication + + return nil + }) + }); err != nil { + return nil, "", "", fmt.Errorf("filerGrpcAddress assign volume: %v", err) + } + + fileUrl := fmt.Sprintf("http://%s/%s", host, fileId) + if wfs.option.VolumeServerAccess == "filerProxy" { + fileUrl = fmt.Sprintf("http://%s/?proxyChunkId=%s", wfs.getCurrentFiler(), fileId) + } + uploadOption := &operation.UploadOption{ + UploadUrl: fileUrl, + Filename: filename, + Cipher: wfs.option.Cipher, + IsInputCompressed: false, + MimeType: "", + PairMap: nil, + Jwt: auth, + } + uploadResult, err, data := operation.Upload(reader, uploadOption) + if err != nil { + glog.V(0).Infof("upload data %v to %s: %v", filename, fileUrl, err) + return nil, "", "", fmt.Errorf("upload data: %v", err) + } + if uploadResult.Error != "" { + glog.V(0).Infof("upload failure %v to %s: %v", filename, fileUrl, err) + return nil, "", "", fmt.Errorf("upload result: %v", uploadResult.Error) + } + + if offset == 0 { + wfs.chunkCache.SetChunk(fileId, data) + } + + chunk = uploadResult.ToPbFileChunk(fileId, offset) + return chunk, collection, replication, nil + } +} From 7286e525ad85dec877d506908a0ff35590b0f357 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 13 Feb 2022 23:27:11 -0800 Subject: [PATCH 37/39] support write --- weed/mount/filehandle.go | 1 + weed/mount/weedfs_file_sync.go | 114 +++++++++++++++++++++++++++++++- weed/mount/weedfs_file_write.go | 33 ++++++++- 3 files changed, 145 insertions(+), 3 deletions(-) diff --git a/weed/mount/filehandle.go b/weed/mount/filehandle.go index 0d5481b30..f2a2ec69c 100644 --- a/weed/mount/filehandle.go +++ b/weed/mount/filehandle.go @@ -21,6 +21,7 @@ type FileHandle struct { wfs *WFS // cache file has been written to + dirtyMetadata bool dirtyPages *PageWriter entryViewCache []filer.VisibleInterval reader io.ReaderAt diff --git a/weed/mount/weedfs_file_sync.go b/weed/mount/weedfs_file_sync.go index 1b89c1ecb..29a13690b 100644 --- a/weed/mount/weedfs_file_sync.go +++ b/weed/mount/weedfs_file_sync.go @@ -1,7 +1,14 @@ package mount import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/hanwen/go-fuse/v2/fuse" + "os" + "time" ) /** @@ -43,7 +50,15 @@ import ( * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html */ func (wfs *WFS) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status { - return fuse.ENOSYS + fh := wfs.GetHandle(FileHandleId(in.Fh)) + if fh == nil { + return fuse.ENOENT + } + + fh.Lock() + defer fh.Unlock() + + return wfs.doFlush(fh, in.Uid, in.Gid) } /** @@ -66,5 +81,100 @@ func (wfs *WFS) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status { * @param fi file information */ func (wfs *WFS) Fsync(cancel <-chan struct{}, in *fuse.FsyncIn) (code fuse.Status) { - return fuse.ENOSYS + + fh := wfs.GetHandle(FileHandleId(in.Fh)) + if fh == nil { + return fuse.ENOENT + } + + fh.Lock() + defer fh.Unlock() + + return wfs.doFlush(fh, in.Uid, in.Gid) + +} + +func (wfs *WFS) doFlush(fh *FileHandle, uid, gid uint32) fuse.Status { + // flush works at fh level + fileFullPath := fh.FullPath() + dir, _ := fileFullPath.DirAndName() + // send the data to the OS + glog.V(4).Infof("doFlush %s fh %d", fileFullPath, fh.handle) + + if err := fh.dirtyPages.FlushData(); err != nil { + glog.Errorf("%v doFlush: %v", fileFullPath, err) + return fuse.EIO + } + + if !fh.dirtyMetadata { + return fuse.OK + } + + err := wfs.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error { + + entry := fh.entry + if entry == nil { + return nil + } + + if entry.Attributes != nil { + entry.Attributes.Mime = fh.contentType + if entry.Attributes.Uid == 0 { + entry.Attributes.Uid = uid + } + if entry.Attributes.Gid == 0 { + entry.Attributes.Gid = gid + } + if entry.Attributes.Crtime == 0 { + entry.Attributes.Crtime = time.Now().Unix() + } + entry.Attributes.Mtime = time.Now().Unix() + entry.Attributes.FileMode = uint32(os.FileMode(entry.Attributes.FileMode) &^ wfs.option.Umask) + entry.Attributes.Collection, entry.Attributes.Replication = fh.dirtyPages.GetStorageOptions() + } + + request := &filer_pb.CreateEntryRequest{ + Directory: string(dir), + Entry: entry, + Signatures: []int32{wfs.signature}, + } + + glog.V(4).Infof("%s set chunks: %v", fileFullPath, len(entry.Chunks)) + for i, chunk := range entry.Chunks { + glog.V(4).Infof("%s chunks %d: %v [%d,%d)", fileFullPath, i, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size)) + } + + manifestChunks, nonManifestChunks := filer.SeparateManifestChunks(entry.Chunks) + + chunks, _ := filer.CompactFileChunks(wfs.LookupFn(), nonManifestChunks) + chunks, manifestErr := filer.MaybeManifestize(wfs.saveDataAsChunk(fileFullPath), chunks) + if manifestErr != nil { + // not good, but should be ok + glog.V(0).Infof("MaybeManifestize: %v", manifestErr) + } + entry.Chunks = append(chunks, manifestChunks...) + + wfs.mapPbIdFromLocalToFiler(request.Entry) + defer wfs.mapPbIdFromFilerToLocal(request.Entry) + + if err := filer_pb.CreateEntry(client, request); err != nil { + glog.Errorf("fh flush create %s: %v", fileFullPath, err) + return fmt.Errorf("fh flush create %s: %v", fileFullPath, err) + } + + wfs.metaCache.InsertEntry(context.Background(), filer.FromPbEntry(request.Directory, request.Entry)) + + return nil + }) + + if err == nil { + fh.dirtyMetadata = false + } + + if err != nil { + glog.Errorf("%v fh %d flush: %v", fileFullPath, fh.handle, err) + return fuse.EIO + } + + return fuse.OK } diff --git a/weed/mount/weedfs_file_write.go b/weed/mount/weedfs_file_write.go index 72152d72e..efdf39386 100644 --- a/weed/mount/weedfs_file_write.go +++ b/weed/mount/weedfs_file_write.go @@ -2,6 +2,7 @@ package mount import ( "github.com/hanwen/go-fuse/v2/fuse" + "net/http" ) /** @@ -31,5 +32,35 @@ import ( * @param fi file information */ func (wfs *WFS) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (written uint32, code fuse.Status) { - return 0, fuse.ENOSYS + + fh := wfs.GetHandle(FileHandleId(in.Fh)) + if fh == nil { + return 0, fuse.ENOENT + } + + fh.Lock() + defer fh.Unlock() + + entry := fh.entry + if entry == nil { + return 0, fuse.OK + } + + entry.Content = nil + offset := int64(in.Offset) + entry.Attributes.FileSize = uint64(max(offset+int64(len(data)), int64(entry.Attributes.FileSize))) + // glog.V(4).Infof("%v write [%d,%d) %d", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)), len(req.Data)) + + fh.dirtyPages.AddPage(offset, data) + + written = uint32(len(data)) + + if offset == 0 { + // detect mime type + fh.contentType = http.DetectContentType(data) + } + + fh.dirtyMetadata = true + + return written, fuse.OK } From dbeeda812376eda39997cd814c3e7eefaf4ea686 Mon Sep 17 00:00:00 2001 From: chrislu Date: Mon, 14 Feb 2022 01:09:31 -0800 Subject: [PATCH 38/39] listen for metadata updates --- weed/command/mount2_std.go | 4 +- weed/mount/directory_read.go | 2 +- weed/mount/inode_to_path.go | 65 ++++--- weed/mount/meta_cache/cache_config.go | 32 ++++ weed/mount/meta_cache/id_mapper.go | 101 +++++++++++ weed/mount/meta_cache/meta_cache.go | 160 ++++++++++++++++++ weed/mount/meta_cache/meta_cache_init.go | 67 ++++++++ weed/mount/meta_cache/meta_cache_subscribe.go | 68 ++++++++ weed/mount/weedfs.go | 13 +- weed/mount/weedfs_dir_lookup.go | 4 +- weed/mount/weedfs_dir_mkrm.go | 2 +- weed/mount/weedfs_dir_read.go | 2 +- weed/mount/weedfs_file_mkrm.go | 4 +- weed/mount/weedfs_forget.go | 9 +- weed/mount/weedfs_link.go | 2 +- weed/mount/weedfs_rename.go | 2 - weed/mount/weedfs_symlink.go | 2 +- 17 files changed, 499 insertions(+), 40 deletions(-) create mode 100644 weed/mount/meta_cache/cache_config.go create mode 100644 weed/mount/meta_cache/id_mapper.go create mode 100644 weed/mount/meta_cache/meta_cache.go create mode 100644 weed/mount/meta_cache/meta_cache_init.go create mode 100644 weed/mount/meta_cache/meta_cache_subscribe.go diff --git a/weed/command/mount2_std.go b/weed/command/mount2_std.go index cb2b46556..584a72fc1 100644 --- a/weed/command/mount2_std.go +++ b/weed/command/mount2_std.go @@ -3,9 +3,9 @@ package command import ( "context" "fmt" - "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/mount" + "github.com/chrislusf/seaweedfs/weed/mount/meta_cache" "github.com/chrislusf/seaweedfs/weed/mount/unmount" "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" @@ -200,6 +200,8 @@ func RunMount2(option *Mount2Options, umask os.FileMode) bool { unmount.Unmount(dir) }) + seaweedFileSystem.StartBackgroundTasks() + fmt.Printf("This is SeaweedFS version %s %s %s\n", util.Version(), runtime.GOOS, runtime.GOARCH) server.Serve() diff --git a/weed/mount/directory_read.go b/weed/mount/directory_read.go index 51c51ae16..6034856f0 100644 --- a/weed/mount/directory_read.go +++ b/weed/mount/directory_read.go @@ -3,8 +3,8 @@ package mount import ( "context" "github.com/chrislusf/seaweedfs/weed/filer" - "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/mount/meta_cache" "github.com/chrislusf/seaweedfs/weed/util" "github.com/hanwen/go-fuse/v2/fs" "github.com/hanwen/go-fuse/v2/fuse" diff --git a/weed/mount/inode_to_path.go b/weed/mount/inode_to_path.go index 590531397..ffb0cc02f 100644 --- a/weed/mount/inode_to_path.go +++ b/weed/mount/inode_to_path.go @@ -14,21 +14,23 @@ type InodeToPath struct { } type InodeEntry struct { util.FullPath - nlookup uint64 + nlookup uint64 + isDirectory bool + isChildrenCached bool } func NewInodeToPath() *InodeToPath { - return &InodeToPath{ + t := &InodeToPath{ inode2path: make(map[uint64]*InodeEntry), path2inode: make(map[util.FullPath]uint64), nextInodeId: 2, // the root inode id is 1 } + t.inode2path[1] = &InodeEntry{"/", 1, true, false} + t.path2inode["/"] = 1 + return t } -func (i *InodeToPath) Lookup(path util.FullPath) uint64 { - if path == "/" { - return 1 - } +func (i *InodeToPath) Lookup(path util.FullPath, isDirectory bool) uint64 { i.Lock() defer i.Unlock() inode, found := i.path2inode[path] @@ -36,7 +38,7 @@ func (i *InodeToPath) Lookup(path util.FullPath) uint64 { inode = i.nextInodeId i.nextInodeId++ i.path2inode[path] = inode - i.inode2path[inode] = &InodeEntry{path, 1} + i.inode2path[inode] = &InodeEntry{path, 1, isDirectory, false} } else { i.inode2path[inode].nlookup++ } @@ -58,9 +60,6 @@ func (i *InodeToPath) GetInode(path util.FullPath) uint64 { } func (i *InodeToPath) GetPath(inode uint64) util.FullPath { - if inode == 1 { - return "/" - } i.RLock() defer i.RUnlock() path, found := i.inode2path[inode] @@ -71,15 +70,37 @@ func (i *InodeToPath) GetPath(inode uint64) util.FullPath { } func (i *InodeToPath) HasPath(path util.FullPath) bool { - if path == "/" { - return true - } i.RLock() defer i.RUnlock() _, found := i.path2inode[path] return found } +func (i *InodeToPath) MarkChildrenCached(fullpath util.FullPath) { + i.RLock() + defer i.RUnlock() + inode, found := i.path2inode[fullpath] + if !found { + glog.Fatalf("MarkChildrenCached not found inode %v", fullpath) + } + path, found := i.inode2path[inode] + path.isChildrenCached = true +} + +func (i *InodeToPath) IsChildrenCached(fullpath util.FullPath) bool { + i.RLock() + defer i.RUnlock() + inode, found := i.path2inode[fullpath] + if !found { + return false + } + path, found := i.inode2path[inode] + if found { + return path.isChildrenCached + } + return false +} + func (i *InodeToPath) HasInode(inode uint64) bool { if inode == 1 { return true @@ -91,9 +112,6 @@ func (i *InodeToPath) HasInode(inode uint64) bool { } func (i *InodeToPath) RemovePath(path util.FullPath) { - if path == "/" { - return - } i.Lock() defer i.Unlock() inode, found := i.path2inode[path] @@ -104,9 +122,6 @@ func (i *InodeToPath) RemovePath(path util.FullPath) { } func (i *InodeToPath) MovePath(sourcePath, targetPath util.FullPath) { - if sourcePath == "/" || targetPath == "/" { - return - } i.Lock() defer i.Unlock() sourceInode, sourceFound := i.path2inode[sourcePath] @@ -127,12 +142,8 @@ func (i *InodeToPath) MovePath(sourcePath, targetPath util.FullPath) { } } -func (i *InodeToPath) Forget(inode, nlookup uint64) { - if inode == 1 { - return - } +func (i *InodeToPath) Forget(inode, nlookup uint64, onForgetDir func(dir util.FullPath)) { i.Lock() - defer i.Unlock() path, found := i.inode2path[inode] if found { path.nlookup -= nlookup @@ -141,4 +152,10 @@ func (i *InodeToPath) Forget(inode, nlookup uint64) { delete(i.inode2path, inode) } } + i.Unlock() + if found { + if path.isDirectory && onForgetDir != nil { + onForgetDir(path.FullPath) + } + } } diff --git a/weed/mount/meta_cache/cache_config.go b/weed/mount/meta_cache/cache_config.go new file mode 100644 index 000000000..e6593ebde --- /dev/null +++ b/weed/mount/meta_cache/cache_config.go @@ -0,0 +1,32 @@ +package meta_cache + +import "github.com/chrislusf/seaweedfs/weed/util" + +var ( + _ = util.Configuration(&cacheConfig{}) +) + +// implementing util.Configuraion +type cacheConfig struct { + dir string +} + +func (c cacheConfig) GetString(key string) string { + return c.dir +} + +func (c cacheConfig) GetBool(key string) bool { + panic("implement me") +} + +func (c cacheConfig) GetInt(key string) int { + panic("implement me") +} + +func (c cacheConfig) GetStringSlice(key string) []string { + panic("implement me") +} + +func (c cacheConfig) SetDefault(key string, value interface{}) { + panic("implement me") +} diff --git a/weed/mount/meta_cache/id_mapper.go b/weed/mount/meta_cache/id_mapper.go new file mode 100644 index 000000000..4a2179f31 --- /dev/null +++ b/weed/mount/meta_cache/id_mapper.go @@ -0,0 +1,101 @@ +package meta_cache + +import ( + "fmt" + "strconv" + "strings" +) + +type UidGidMapper struct { + uidMapper *IdMapper + gidMapper *IdMapper +} + +type IdMapper struct { + localToFiler map[uint32]uint32 + filerToLocal map[uint32]uint32 +} + +// UidGidMapper translates local uid/gid to filer uid/gid +// The local storage always persists the same as the filer. +// The local->filer translation happens when updating the filer first and later saving to meta_cache. +// And filer->local happens when reading from the meta_cache. +func NewUidGidMapper(uidPairsStr, gidPairStr string) (*UidGidMapper, error) { + uidMapper, err := newIdMapper(uidPairsStr) + if err != nil { + return nil, err + } + gidMapper, err := newIdMapper(gidPairStr) + if err != nil { + return nil, err + } + + return &UidGidMapper{ + uidMapper: uidMapper, + gidMapper: gidMapper, + }, nil +} + +func (m *UidGidMapper) LocalToFiler(uid, gid uint32) (uint32, uint32) { + return m.uidMapper.LocalToFiler(uid), m.gidMapper.LocalToFiler(gid) +} +func (m *UidGidMapper) FilerToLocal(uid, gid uint32) (uint32, uint32) { + return m.uidMapper.FilerToLocal(uid), m.gidMapper.FilerToLocal(gid) +} + +func (m *IdMapper) LocalToFiler(id uint32) uint32 { + value, found := m.localToFiler[id] + if found { + return value + } + return id +} +func (m *IdMapper) FilerToLocal(id uint32) uint32 { + value, found := m.filerToLocal[id] + if found { + return value + } + return id +} + +func newIdMapper(pairsStr string) (*IdMapper, error) { + + localToFiler, filerToLocal, err := parseUint32Pairs(pairsStr) + if err != nil { + return nil, err + } + + return &IdMapper{ + localToFiler: localToFiler, + filerToLocal: filerToLocal, + }, nil + +} + +func parseUint32Pairs(pairsStr string) (localToFiler, filerToLocal map[uint32]uint32, err error) { + + if pairsStr == "" { + return + } + + localToFiler = make(map[uint32]uint32) + filerToLocal = make(map[uint32]uint32) + for _, pairStr := range strings.Split(pairsStr, ",") { + pair := strings.Split(pairStr, ":") + localUidStr, filerUidStr := pair[0], pair[1] + localUid, localUidErr := strconv.Atoi(localUidStr) + if localUidErr != nil { + err = fmt.Errorf("failed to parse local %s: %v", localUidStr, localUidErr) + return + } + filerUid, filerUidErr := strconv.Atoi(filerUidStr) + if filerUidErr != nil { + err = fmt.Errorf("failed to parse remote %s: %v", filerUidStr, filerUidErr) + return + } + localToFiler[uint32(localUid)] = uint32(filerUid) + filerToLocal[uint32(filerUid)] = uint32(localUid) + } + + return +} diff --git a/weed/mount/meta_cache/meta_cache.go b/weed/mount/meta_cache/meta_cache.go new file mode 100644 index 000000000..7f997c5b0 --- /dev/null +++ b/weed/mount/meta_cache/meta_cache.go @@ -0,0 +1,160 @@ +package meta_cache + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filer/leveldb" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "os" +) + +// need to have logic similar to FilerStoreWrapper +// e.g. fill fileId field for chunks + +type MetaCache struct { + localStore filer.VirtualFilerStore + // sync.RWMutex + uidGidMapper *UidGidMapper + markCachedFn func(fullpath util.FullPath) + isCachedFn func(fullpath util.FullPath) bool + invalidateFunc func(fullpath util.FullPath, entry *filer_pb.Entry) +} + +func NewMetaCache(dbFolder string, uidGidMapper *UidGidMapper, markCachedFn func(path util.FullPath), isCachedFn func(path util.FullPath) bool, invalidateFunc func(util.FullPath, *filer_pb.Entry)) *MetaCache { + return &MetaCache{ + localStore: openMetaStore(dbFolder), + markCachedFn: markCachedFn, + isCachedFn: isCachedFn, + uidGidMapper: uidGidMapper, + invalidateFunc: func(fullpath util.FullPath, entry *filer_pb.Entry) { + invalidateFunc(fullpath, entry) + }, + } +} + +func openMetaStore(dbFolder string) filer.VirtualFilerStore { + + os.RemoveAll(dbFolder) + os.MkdirAll(dbFolder, 0755) + + store := &leveldb.LevelDBStore{} + config := &cacheConfig{ + dir: dbFolder, + } + + if err := store.Initialize(config, ""); err != nil { + glog.Fatalf("Failed to initialize metadata cache store for %s: %+v", store.GetName(), err) + } + + return filer.NewFilerStoreWrapper(store) + +} + +func (mc *MetaCache) InsertEntry(ctx context.Context, entry *filer.Entry) error { + //mc.Lock() + //defer mc.Unlock() + return mc.doInsertEntry(ctx, entry) +} + +func (mc *MetaCache) doInsertEntry(ctx context.Context, entry *filer.Entry) error { + return mc.localStore.InsertEntry(ctx, entry) +} + +func (mc *MetaCache) AtomicUpdateEntryFromFiler(ctx context.Context, oldPath util.FullPath, newEntry *filer.Entry) error { + //mc.Lock() + //defer mc.Unlock() + + oldDir, _ := oldPath.DirAndName() + if mc.isCachedFn(util.FullPath(oldDir)) { + if oldPath != "" { + if newEntry != nil && oldPath == newEntry.FullPath { + // skip the unnecessary deletion + // leave the update to the following InsertEntry operation + } else { + glog.V(3).Infof("DeleteEntry %s", oldPath) + if err := mc.localStore.DeleteEntry(ctx, oldPath); err != nil { + return err + } + } + } + } else { + // println("unknown old directory:", oldDir) + } + + if newEntry != nil { + newDir, _ := newEntry.DirAndName() + if mc.isCachedFn(util.FullPath(newDir)) { + glog.V(3).Infof("InsertEntry %s/%s", newDir, newEntry.Name()) + if err := mc.localStore.InsertEntry(ctx, newEntry); err != nil { + return err + } + } + } + return nil +} + +func (mc *MetaCache) UpdateEntry(ctx context.Context, entry *filer.Entry) error { + //mc.Lock() + //defer mc.Unlock() + return mc.localStore.UpdateEntry(ctx, entry) +} + +func (mc *MetaCache) FindEntry(ctx context.Context, fp util.FullPath) (entry *filer.Entry, err error) { + //mc.RLock() + //defer mc.RUnlock() + entry, err = mc.localStore.FindEntry(ctx, fp) + if err != nil { + return nil, err + } + mc.mapIdFromFilerToLocal(entry) + return +} + +func (mc *MetaCache) DeleteEntry(ctx context.Context, fp util.FullPath) (err error) { + //mc.Lock() + //defer mc.Unlock() + return mc.localStore.DeleteEntry(ctx, fp) +} +func (mc *MetaCache) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) { + //mc.Lock() + //defer mc.Unlock() + return mc.localStore.DeleteFolderChildren(ctx, fp) +} + +func (mc *MetaCache) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) error { + //mc.RLock() + //defer mc.RUnlock() + + if !mc.isCachedFn(dirPath) { + // if this request comes after renaming, it should be fine + glog.Warningf("unsynchronized dir: %v", dirPath) + } + + _, err := mc.localStore.ListDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit, func(entry *filer.Entry) bool { + mc.mapIdFromFilerToLocal(entry) + return eachEntryFunc(entry) + }) + if err != nil { + return err + } + return err +} + +func (mc *MetaCache) Shutdown() { + //mc.Lock() + //defer mc.Unlock() + mc.localStore.Shutdown() +} + +func (mc *MetaCache) mapIdFromFilerToLocal(entry *filer.Entry) { + entry.Attr.Uid, entry.Attr.Gid = mc.uidGidMapper.FilerToLocal(entry.Attr.Uid, entry.Attr.Gid) +} + +func (mc *MetaCache) Debug() { + if debuggable, ok := mc.localStore.(filer.Debuggable); ok { + println("start debugging") + debuggable.Debug(os.Stderr) + } +} diff --git a/weed/mount/meta_cache/meta_cache_init.go b/weed/mount/meta_cache/meta_cache_init.go new file mode 100644 index 000000000..cd9c71668 --- /dev/null +++ b/weed/mount/meta_cache/meta_cache_init.go @@ -0,0 +1,67 @@ +package meta_cache + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func EnsureVisited(mc *MetaCache, client filer_pb.FilerClient, dirPath util.FullPath) error { + + for { + + // the directory children are already cached + // so no need for this and upper directories + if mc.isCachedFn(dirPath) { + return nil + } + + if err := doEnsureVisited(mc, client, dirPath); err != nil { + return err + } + + // continue to parent directory + if dirPath != "/" { + parent, _ := dirPath.DirAndName() + dirPath = util.FullPath(parent) + } else { + break + } + } + + return nil + +} + +func doEnsureVisited(mc *MetaCache, client filer_pb.FilerClient, path util.FullPath) error { + + glog.V(4).Infof("ReadDirAllEntries %s ...", path) + + err := util.Retry("ReadDirAllEntries", func() error { + return filer_pb.ReadDirAllEntries(client, path, "", func(pbEntry *filer_pb.Entry, isLast bool) error { + entry := filer.FromPbEntry(string(path), pbEntry) + if IsHiddenSystemEntry(string(path), entry.Name()) { + return nil + } + if err := mc.doInsertEntry(context.Background(), entry); err != nil { + glog.V(0).Infof("read %s: %v", entry.FullPath, err) + return err + } + return nil + }) + }) + + if err != nil { + err = fmt.Errorf("list %s: %v", path, err) + } + mc.markCachedFn(path) + return err +} + +func IsHiddenSystemEntry(dir, name string) bool { + return dir == "/" && (name == "topics" || name == "etc") +} diff --git a/weed/mount/meta_cache/meta_cache_subscribe.go b/weed/mount/meta_cache/meta_cache_subscribe.go new file mode 100644 index 000000000..881fee08f --- /dev/null +++ b/weed/mount/meta_cache/meta_cache_subscribe.go @@ -0,0 +1,68 @@ +package meta_cache + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func SubscribeMetaEvents(mc *MetaCache, selfSignature int32, client filer_pb.FilerClient, dir string, lastTsNs int64) error { + + processEventFn := func(resp *filer_pb.SubscribeMetadataResponse) error { + message := resp.EventNotification + + for _, sig := range message.Signatures { + if sig == selfSignature && selfSignature != 0 { + return nil + } + } + + dir := resp.Directory + var oldPath util.FullPath + var newEntry *filer.Entry + if message.OldEntry != nil { + oldPath = util.NewFullPath(dir, message.OldEntry.Name) + glog.V(4).Infof("deleting %v", oldPath) + } + + if message.NewEntry != nil { + if message.NewParentPath != "" { + dir = message.NewParentPath + } + key := util.NewFullPath(dir, message.NewEntry.Name) + glog.V(4).Infof("creating %v", key) + newEntry = filer.FromPbEntry(dir, message.NewEntry) + } + err := mc.AtomicUpdateEntryFromFiler(context.Background(), oldPath, newEntry) + if err == nil { + if message.OldEntry != nil && message.NewEntry != nil { + oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) + mc.invalidateFunc(oldKey, message.OldEntry) + if message.OldEntry.Name != message.NewEntry.Name { + newKey := util.NewFullPath(dir, message.NewEntry.Name) + mc.invalidateFunc(newKey, message.NewEntry) + } + } else if message.OldEntry == nil && message.NewEntry != nil { + // no need to invaalidate + } else if message.OldEntry != nil && message.NewEntry == nil { + oldKey := util.NewFullPath(resp.Directory, message.OldEntry.Name) + mc.invalidateFunc(oldKey, message.OldEntry) + } + } + + return err + + } + + util.RetryForever("followMetaUpdates", func() error { + return pb.WithFilerClientFollowMetadata(client, "mount", selfSignature, dir, &lastTsNs, selfSignature, processEventFn, true) + }, func(err error) bool { + glog.Errorf("follow metadata updates: %v", err) + return true + }) + + return nil +} diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index b7f50cd13..0fdd9bd28 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -3,7 +3,7 @@ package mount import ( "context" "github.com/chrislusf/seaweedfs/weed/filer" - "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" + "github.com/chrislusf/seaweedfs/weed/mount/meta_cache" "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/storage/types" @@ -91,7 +91,11 @@ func NewSeaweedFileSystem(option *Option) *WFS { wfs.chunkCache = chunk_cache.NewTieredChunkCache(256, option.getUniqueCacheDir(), option.CacheSizeMB, 1024*1024) } - wfs.metaCache = meta_cache.NewMetaCache(path.Join(option.getUniqueCacheDir(), "meta"), util.FullPath(option.FilerMountRootPath), option.UidGidMapper, func(filePath util.FullPath, entry *filer_pb.Entry) { + wfs.metaCache = meta_cache.NewMetaCache(path.Join(option.getUniqueCacheDir(), "meta"), option.UidGidMapper, func(path util.FullPath) { + wfs.inodeToPath.MarkChildrenCached(path) + }, func(path util.FullPath) bool { + return wfs.inodeToPath.IsChildrenCached(path) + }, func(filePath util.FullPath, entry *filer_pb.Entry) { }) grace.OnInterrupt(func() { wfs.metaCache.Shutdown() @@ -103,6 +107,11 @@ func NewSeaweedFileSystem(option *Option) *WFS { return wfs } +func (wfs *WFS) StartBackgroundTasks() { + startTime := time.Now() + go meta_cache.SubscribeMetaEvents(wfs.metaCache, wfs.signature, wfs, wfs.option.FilerMountRootPath, startTime.UnixNano()) +} + func (wfs *WFS) Root() *Directory { return &wfs.root } diff --git a/weed/mount/weedfs_dir_lookup.go b/weed/mount/weedfs_dir_lookup.go index 733e31908..30b61d75f 100644 --- a/weed/mount/weedfs_dir_lookup.go +++ b/weed/mount/weedfs_dir_lookup.go @@ -3,8 +3,8 @@ package mount import ( "context" "github.com/chrislusf/seaweedfs/weed/filer" - "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/mount/meta_cache" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/hanwen/go-fuse/v2/fuse" ) @@ -50,7 +50,7 @@ func (wfs *WFS) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name strin return fuse.ENOENT } - inode := wfs.inodeToPath.Lookup(fullFilePath) + inode := wfs.inodeToPath.Lookup(fullFilePath, localEntry.IsDirectory()) wfs.outputFilerEntry(out, inode, localEntry) diff --git a/weed/mount/weedfs_dir_mkrm.go b/weed/mount/weedfs_dir_mkrm.go index 4efab078f..839fa493b 100644 --- a/weed/mount/weedfs_dir_mkrm.go +++ b/weed/mount/weedfs_dir_mkrm.go @@ -71,7 +71,7 @@ func (wfs *WFS) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out return fuse.EIO } - inode := wfs.inodeToPath.Lookup(entryFullPath) + inode := wfs.inodeToPath.Lookup(entryFullPath, true) wfs.outputPbEntry(out, inode, newEntry) diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index 3a187aa1c..0177c1863 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -3,8 +3,8 @@ package mount import ( "context" "github.com/chrislusf/seaweedfs/weed/filer" - "github.com/chrislusf/seaweedfs/weed/filesys/meta_cache" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/mount/meta_cache" "github.com/chrislusf/seaweedfs/weed/util" "github.com/hanwen/go-fuse/v2/fuse" "math" diff --git a/weed/mount/weedfs_file_mkrm.go b/weed/mount/weedfs_file_mkrm.go index 218ce24f1..c3fd04661 100644 --- a/weed/mount/weedfs_file_mkrm.go +++ b/weed/mount/weedfs_file_mkrm.go @@ -88,7 +88,7 @@ func (wfs *WFS) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out return fuse.EIO } - inode := wfs.inodeToPath.Lookup(entryFullPath) + inode := wfs.inodeToPath.Lookup(entryFullPath, false) wfs.outputPbEntry(out, inode, newEntry) @@ -125,8 +125,6 @@ func (wfs *WFS) Unlink(cancel <-chan struct{}, header *fuse.InHeader, name strin wfs.metaCache.DeleteEntry(context.Background(), entryFullPath) wfs.inodeToPath.RemovePath(entryFullPath) - // TODO handle open files, hardlink - return fuse.OK } diff --git a/weed/mount/weedfs_forget.go b/weed/mount/weedfs_forget.go index 14b39882e..62946b216 100644 --- a/weed/mount/weedfs_forget.go +++ b/weed/mount/weedfs_forget.go @@ -1,5 +1,10 @@ package mount +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/util" +) + // Forget is called when the kernel discards entries from its // dentry cache. This happens on unmount, and when the kernel // is short on memory. Since it is not guaranteed to occur at @@ -57,5 +62,7 @@ Side effects: increments the lookup count on success */ func (wfs *WFS) Forget(nodeid, nlookup uint64) { - wfs.inodeToPath.Forget(nodeid, nlookup) + wfs.inodeToPath.Forget(nodeid, nlookup, func(dir util.FullPath) { + wfs.metaCache.DeleteFolderChildren(context.Background(), dir) + }) } diff --git a/weed/mount/weedfs_link.go b/weed/mount/weedfs_link.go index 05710e5a0..ca252d639 100644 --- a/weed/mount/weedfs_link.go +++ b/weed/mount/weedfs_link.go @@ -85,7 +85,7 @@ func (wfs *WFS) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out * return fuse.EIO } - inode := wfs.inodeToPath.Lookup(newEntryPath) + inode := wfs.inodeToPath.Lookup(newEntryPath, false) wfs.outputPbEntry(out, inode, request.Entry) diff --git a/weed/mount/weedfs_rename.go b/weed/mount/weedfs_rename.go index a4054b64a..9e461abce 100644 --- a/weed/mount/weedfs_rename.go +++ b/weed/mount/weedfs_rename.go @@ -223,8 +223,6 @@ func (wfs *WFS) handleRenameResponse(ctx context.Context, resp *filer_pb.StreamR wfs.inodeToPath.MovePath(oldPath, newPath) - // TODO change file handle - } else if resp.EventNotification.OldEntry != nil { // without new entry, only old entry name exists. This is the second step to delete old entry if err := wfs.metaCache.AtomicUpdateEntryFromFiler(ctx, util.NewFullPath(resp.Directory, resp.EventNotification.OldEntry.Name), nil); err != nil { diff --git a/weed/mount/weedfs_symlink.go b/weed/mount/weedfs_symlink.go index 86a7b50e4..c47ad0a2e 100644 --- a/weed/mount/weedfs_symlink.go +++ b/weed/mount/weedfs_symlink.go @@ -56,7 +56,7 @@ func (wfs *WFS) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target st return fuse.EIO } - inode := wfs.inodeToPath.Lookup(entryFullPath) + inode := wfs.inodeToPath.Lookup(entryFullPath, false) wfs.outputPbEntry(out, inode, request.Entry) From fe57a2e770bf666ffb6f901631a7407a593225de Mon Sep 17 00:00:00 2001 From: chrislu Date: Mon, 14 Feb 2022 01:36:10 -0800 Subject: [PATCH 39/39] file set attribute --- weed/mount/filehandle_map.go | 7 ++++++ weed/mount/page_writer.go | 4 +-- weed/mount/weedfs.go | 7 ++++-- weed/mount/weedfs_attr.go | 44 ++++++++++++++++++++++++++++++--- weed/mount/weedfs_dir_read.go | 1 - weed/mount/weedfs_filehandle.go | 2 +- weed/mount/weedfs_xattr.go | 8 +++--- 7 files changed, 59 insertions(+), 14 deletions(-) diff --git a/weed/mount/filehandle_map.go b/weed/mount/filehandle_map.go index 50ca6bcea..80cfd02c7 100644 --- a/weed/mount/filehandle_map.go +++ b/weed/mount/filehandle_map.go @@ -30,6 +30,13 @@ func (i *FileHandleToInode) GetFileHandle(fh FileHandleId) *FileHandle { return nil } +func (i *FileHandleToInode) FindFileHandle(inode uint64) (fh *FileHandle, found bool) { + i.RLock() + defer i.RUnlock() + fh, found = i.inode2fh[inode] + return +} + func (i *FileHandleToInode) AcquireFileHandle(wfs *WFS, inode uint64, entry *filer_pb.Entry) *FileHandle { i.Lock() defer i.Unlock() diff --git a/weed/mount/page_writer.go b/weed/mount/page_writer.go index eaf1fc176..8685b3d15 100644 --- a/weed/mount/page_writer.go +++ b/weed/mount/page_writer.go @@ -30,7 +30,7 @@ func newPageWriter(fh *FileHandle, chunkSize int64) *PageWriter { func (pw *PageWriter) AddPage(offset int64, data []byte) { - glog.V(4).Infof("%v AddPage [%d, %d)", pw.fh, offset, offset+int64(len(data))) + glog.V(4).Infof("%v AddPage [%d, %d)", pw.fh.fh, offset, offset+int64(len(data))) chunkIndex := offset / pw.chunkSize for i := chunkIndex; len(data) > 0; i++ { @@ -50,7 +50,7 @@ func (pw *PageWriter) FlushData() error { } func (pw *PageWriter) ReadDirtyDataAt(data []byte, offset int64) (maxStop int64) { - glog.V(4).Infof("ReadDirtyDataAt %v [%d, %d)", pw.fh, offset, offset+int64(len(data))) + glog.V(4).Infof("ReadDirtyDataAt %v [%d, %d)", pw.fh.fh, offset, offset+int64(len(data))) chunkIndex := offset / pw.chunkSize for i := chunkIndex; len(data) > 0; i++ { diff --git a/weed/mount/weedfs.go b/weed/mount/weedfs.go index 0fdd9bd28..072f562fc 100644 --- a/weed/mount/weedfs.go +++ b/weed/mount/weedfs.go @@ -120,8 +120,12 @@ func (wfs *WFS) String() string { return "seaweedfs" } -func (wfs *WFS) maybeReadEntry(inode uint64) (path util.FullPath, entry *filer_pb.Entry, status fuse.Status) { +func (wfs *WFS) maybeReadEntry(inode uint64) (path util.FullPath, fh *FileHandle, entry *filer_pb.Entry, status fuse.Status) { path = wfs.inodeToPath.GetPath(inode) + var found bool + if fh, found = wfs.fhmap.FindFileHandle(inode); found { + return path, fh, fh.entry, fuse.OK + } entry, status = wfs.maybeLoadEntry(path) return } @@ -146,7 +150,6 @@ func (wfs *WFS) maybeLoadEntry(fullpath util.FullPath) (*filer_pb.Entry, fuse.St }, fuse.OK } - // TODO Use inode to selectively filetering metadata updates // read from async meta cache meta_cache.EnsureVisited(wfs.metaCache, wfs, util.FullPath(dir)) cachedEntry, cacheErr := wfs.metaCache.FindEntry(context.Background(), fullpath) diff --git a/weed/mount/weedfs_attr.go b/weed/mount/weedfs_attr.go index a907493ad..f34885564 100644 --- a/weed/mount/weedfs_attr.go +++ b/weed/mount/weedfs_attr.go @@ -2,6 +2,7 @@ package mount import ( "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/hanwen/go-fuse/v2/fuse" "os" @@ -15,7 +16,7 @@ func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse return fuse.OK } - _, entry, status := wfs.maybeReadEntry(input.NodeId) + _, _, entry, status := wfs.maybeReadEntry(input.NodeId) if status != fuse.OK { return status } @@ -27,13 +28,43 @@ func (wfs *WFS) GetAttr(cancel <-chan struct{}, input *fuse.GetAttrIn, out *fuse func (wfs *WFS) SetAttr(cancel <-chan struct{}, input *fuse.SetAttrIn, out *fuse.AttrOut) (code fuse.Status) { - // TODO this is only for directory. Filet setAttr involves open files and truncate to a size - - path, entry, status := wfs.maybeReadEntry(input.NodeId) + path, fh, entry, status := wfs.maybeReadEntry(input.NodeId) if status != fuse.OK { return status } + if size, ok := input.GetSize(); ok { + glog.V(4).Infof("%v setattr set size=%v chunks=%d", path, size, len(entry.Chunks)) + if size < filer.FileSize(entry) { + // fmt.Printf("truncate %v \n", fullPath) + var chunks []*filer_pb.FileChunk + var truncatedChunks []*filer_pb.FileChunk + for _, chunk := range entry.Chunks { + int64Size := int64(chunk.Size) + if chunk.Offset+int64Size > int64(size) { + // this chunk is truncated + int64Size = int64(size) - chunk.Offset + if int64Size > 0 { + chunks = append(chunks, chunk) + glog.V(4).Infof("truncated chunk %+v from %d to %d\n", chunk.GetFileIdString(), chunk.Size, int64Size) + chunk.Size = uint64(int64Size) + } else { + glog.V(4).Infof("truncated whole chunk %+v\n", chunk.GetFileIdString()) + truncatedChunks = append(truncatedChunks, chunk) + } + } + } + // set the new chunks and reset entry cache + entry.Chunks = chunks + if fh != nil { + fh.entryViewCache = nil + } + } + entry.Attributes.Mtime = time.Now().Unix() + entry.Attributes.FileSize = size + + } + if mode, ok := input.GetMode(); ok { entry.Attributes.FileMode = uint32(mode) } @@ -54,6 +85,11 @@ func (wfs *WFS) SetAttr(cancel <-chan struct{}, input *fuse.SetAttrIn, out *fuse out.AttrValid = 1 wfs.setAttrByPbEntry(&out.Attr, input.NodeId, entry) + if fh != nil { + fh.dirtyMetadata = true + return fuse.OK + } + return wfs.saveEntry(path, entry) } diff --git a/weed/mount/weedfs_dir_read.go b/weed/mount/weedfs_dir_read.go index 0177c1863..ad8a161d7 100644 --- a/weed/mount/weedfs_dir_read.go +++ b/weed/mount/weedfs_dir_read.go @@ -116,7 +116,6 @@ func (wfs *WFS) doReadDirectory(input *fuse.ReadIn, out *fuse.DirEntryList, isPl return true } - // TODO remove this with checking whether directory is not forgotten if err := meta_cache.EnsureVisited(wfs.metaCache, wfs, dirPath); err != nil { glog.Errorf("dir ReadDirAll %s: %v", dirPath, err) return fuse.EIO diff --git a/weed/mount/weedfs_filehandle.go b/weed/mount/weedfs_filehandle.go index 03f72282e..3e085df37 100644 --- a/weed/mount/weedfs_filehandle.go +++ b/weed/mount/weedfs_filehandle.go @@ -3,7 +3,7 @@ package mount import "github.com/hanwen/go-fuse/v2/fuse" func (wfs *WFS) AcquireHandle(inode uint64, uid, gid uint32) (fileHandle *FileHandle, code fuse.Status) { - _, entry, status := wfs.maybeReadEntry(inode) + _, _, entry, status := wfs.maybeReadEntry(inode) if status == fuse.OK { fileHandle = wfs.fhmap.AcquireFileHandle(wfs, inode, entry) fileHandle.entry = entry diff --git a/weed/mount/weedfs_xattr.go b/weed/mount/weedfs_xattr.go index 284e47ec0..389e86148 100644 --- a/weed/mount/weedfs_xattr.go +++ b/weed/mount/weedfs_xattr.go @@ -32,7 +32,7 @@ func (wfs *WFS) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr str return 0, fuse.EINVAL } - _, entry, status := wfs.maybeReadEntry(header.NodeId) + _, _, entry, status := wfs.maybeReadEntry(header.NodeId) if status != fuse.OK { return 0, status } @@ -89,7 +89,7 @@ func (wfs *WFS) SetXAttr(cancel <-chan struct{}, input *fuse.SetXAttrIn, attr st } } - path, entry, status := wfs.maybeReadEntry(input.NodeId) + path, _, entry, status := wfs.maybeReadEntry(input.NodeId) if status != fuse.OK { return status } @@ -117,7 +117,7 @@ func (wfs *WFS) SetXAttr(cancel <-chan struct{}, input *fuse.SetXAttrIn, attr st // slice, and return the number of bytes. If the buffer is too // small, return ERANGE, with the required buffer size. func (wfs *WFS) ListXAttr(cancel <-chan struct{}, header *fuse.InHeader, dest []byte) (n uint32, code fuse.Status) { - _, entry, status := wfs.maybeReadEntry(header.NodeId) + _, _, entry, status := wfs.maybeReadEntry(header.NodeId) if status != fuse.OK { return 0, status } @@ -149,7 +149,7 @@ func (wfs *WFS) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr if len(attr) == 0 { return fuse.EINVAL } - path, entry, status := wfs.maybeReadEntry(header.NodeId) + path, _, entry, status := wfs.maybeReadEntry(header.NodeId) if status != fuse.OK { return status }