From 7edbee6f57f9e3ba90df46255d0b836fbf0f6976 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 15 Mar 2020 02:50:42 -0700 Subject: [PATCH] volume: proxy writes to remote volume server, with replication or not the panic is triggered by uploading a file to a volume server not holding the designated replica. 2020-03-15 10:20:14.365488 I | http: panic serving 127.0.0.1:57124: runtime error: invalid memory address or nil pointer dereference goroutine 119 [running]: net/http.(*conn).serve.func1(0xc0001a8000) /home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:1772 +0x139 panic(0x2316fe0, 0x3662900) /home/travis/.gimme/versions/go1.14.linux.amd64/src/runtime/panic.go:973 +0x396 github.com/chrislusf/seaweedfs/weed/topology.getWritableRemoteReplications(0xc00009c000, 0x2, 0x7ffeefbffbd2, 0xe, 0x0, 0xa, 0x0, 0x0, 0xbb4bf1f7) /home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/topology/store_replicate.go:157 +0x53 github.com/chrislusf/seaweedfs/weed/topology.ReplicatedWrite(0x7ffeefbffbd2, 0xe, 0xc00009c000, 0xc000000002, 0xc000472750, 0xc0001b2200, 0x0, 0x1, 0x0) /home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/topology/store_replicate.go:29 +0xc7 github.com/chrislusf/seaweedfs/weed/server.(*VolumeServer).PostHandler(0xc0001513f0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200) /home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/server/volume_server_handlers_write.go:52 +0x56f github.com/chrislusf/seaweedfs/weed/server.(*VolumeServer).privateStoreHandler(0xc0001513f0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200) /home/travis/gopath/src/github.com/chrislusf/seaweedfs/weed/server/volume_server_handlers.go:37 +0x21f net/http.HandlerFunc.ServeHTTP(0xc0004420e0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200) /home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2012 +0x44 net/http.(*ServeMux).ServeHTTP(0xc0001fc800, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200) /home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2387 +0x1a5 net/http.serverHandler.ServeHTTP(0xc0001781c0, 0x292bde0, 0xc0001fe2a0, 0xc0001b2200) /home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2807 +0xa3 net/http.(*conn).serve(0xc0001a8000, 0x2934420, 0xc000212400) /home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:1895 +0x86c created by net/http.(*Server).Serve /home/travis/.gimme/versions/go1.14.linux.amd64/src/net/http/server.go:2933 +0x35c Eg: server A (datacenter 1) and server B (datacenter 2) hold replica (100) for volume 1. If you upload a file with a key 1,xxxxx to server C (datacenter 3) will trigger the panic on server C. The server C should either proxy upload file to the correct volume server or should return an HTTP error code and not panic. --- weed/topology/store_replicate.go | 41 ++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/weed/topology/store_replicate.go b/weed/topology/store_replicate.go index c7738311c..495c38cfa 100644 --- a/weed/topology/store_replicate.go +++ b/weed/topology/store_replicate.go @@ -154,27 +154,32 @@ func distributedOperation(locations []operation.Location, store *storage.Store, func getWritableRemoteReplications(s *storage.Store, volumeId needle.VolumeId, masterNode string) ( remoteLocations []operation.Location, err error) { + v := s.GetVolume(volumeId) - if v == nil { - return nil, fmt.Errorf("fail to find volume %d", volumeId) + if v != nil && v.ReplicaPlacement.GetCopyCount() == 1 { + return } - copyCount := v.ReplicaPlacement.GetCopyCount() - if copyCount > 1 { - if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil { - if len(lookupResult.Locations) < copyCount { - err = fmt.Errorf("replicating opetations [%d] is less than volume %d replication copy count [%d]", - len(lookupResult.Locations), volumeId, copyCount) - return - } - selfUrl := s.Ip + ":" + strconv.Itoa(s.Port) - for _, location := range lookupResult.Locations { - if location.Url != selfUrl { - remoteLocations = append(remoteLocations, location) - } + + // not on local store, or has replications + lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()) + if lookupErr == nil { + selfUrl := s.Ip + ":" + strconv.Itoa(s.Port) + for _, location := range lookupResult.Locations { + if location.Url != selfUrl { + remoteLocations = append(remoteLocations, location) } - } else { - err = fmt.Errorf("failed to lookup for %d: %v", volumeId, lookupErr) - return + } + } else { + err = fmt.Errorf("failed to lookup for %d: %v", volumeId, lookupErr) + return + } + + if v != nil { + // has one local and has remote replications + copyCount := v.ReplicaPlacement.GetCopyCount() + if len(lookupResult.Locations) < copyCount { + err = fmt.Errorf("replicating opetations [%d] is less than volume %d replication copy count [%d]", + len(lookupResult.Locations), volumeId, copyCount) } }