From 4236c3659977dc38e42e64fc1fe74999b2e8d693 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 11 Jul 2022 16:58:15 +0500 Subject: [PATCH 01/12] volume server evacuate to target server --- weed/shell/command_volume_server_evacuate.go | 29 ++++++++++++++------ 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index ffbee0302..f2c24a8b4 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -47,7 +47,7 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) volumeServer := vsEvacuateCommand.String("node", "", ": of the volume server") - c.targetServer = *vsEvacuateCommand.String("target", "", ": of target volume") + targetServer := vsEvacuateCommand.String("target", "", ": of target volume") skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") retryCount := vsEvacuateCommand.Int("retry", 0, "how many times to retry") @@ -63,6 +63,9 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, if *volumeServer == "" { return fmt.Errorf("need to specify volume server by -node=:") } + if *targetServer != "" { + c.targetServer = *targetServer + } for i := 0; i < *retryCount+1; i++ { if err = c.volumeServerEvacuate(commandEnv, *volumeServer, *skipNonMoveable, *applyChange, writer); err == nil { return nil @@ -103,14 +106,27 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE if thisNode == nil { return fmt.Errorf("%s is not found in this cluster", volumeServer) } + if c.targetServer != "" { + targetServerFound := false + for _, otherNode := range otherNodes { + if otherNode.info.Id == c.targetServer { + otherNodes = []*Node{otherNode} + targetServerFound = true + break + } + } + if !targetServerFound { + return fmt.Errorf("target %s is not found in this cluster", c.targetServer) + } + } // move away normal volumes volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) for _, diskInfo := range thisNode.info.DiskInfos { for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { - return fmt.Errorf("move away volume %d from %s: %v", vol.Id, volumeServer, err) + fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } if !hasMoved { if skipNonMoveable { @@ -138,7 +154,7 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, for _, ecShardInfo := range diskInfo.EcShardInfos { hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) if err != nil { - return fmt.Errorf("move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) + fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) } if !hasMoved { if skipNonMoveable { @@ -160,9 +176,6 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv }) for i := 0; i < len(otherNodes); i++ { emptyNode := otherNodes[i] - if c.targetServer != "" && c.targetServer != emptyNode.info.Id { - continue - } collectionPrefix := "" if ecShardInfo.Collection != "" { collectionPrefix = ecShardInfo.Collection + "_" @@ -184,7 +197,7 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv return } -func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { +func (c *commandVolumeServerEvacuate) moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { fn := capacityByFreeVolumeCount(types.ToDiskType(vol.DiskType)) for _, n := range otherNodes { n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { From 087fa1347f6348b829b95a03877367667cea5de8 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 11:33:08 +0500 Subject: [PATCH 02/12] volume server evacuate from rack --- weed/shell/command_volume_server_evacuate.go | 103 ++++++++++--------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index f2c24a8b4..37fb29b14 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -19,6 +19,7 @@ func init() { type commandVolumeServerEvacuate struct { targetServer string + volumeRack string } func (c *commandVolumeServerEvacuate) Name() string { @@ -47,6 +48,7 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) volumeServer := vsEvacuateCommand.String("node", "", ": of the volume server") + volumeRack := vsEvacuateCommand.String("rack", "", "rack for then volume servers") targetServer := vsEvacuateCommand.String("target", "", ": of target volume") skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") @@ -66,6 +68,9 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, if *targetServer != "" { c.targetServer = *targetServer } + if *volumeRack != "" { + c.volumeRack = *volumeRack + } for i := 0; i < *retryCount+1; i++ { if err = c.volumeServerEvacuate(commandEnv, *volumeServer, *skipNonMoveable, *applyChange, writer); err == nil { return nil @@ -102,41 +107,31 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server volumeServers := collectVolumeServersByDc(topologyInfo, "") - thisNode, otherNodes := nodesOtherThan(volumeServers, volumeServer) - if thisNode == nil { + thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) + if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) } - if c.targetServer != "" { - targetServerFound := false - for _, otherNode := range otherNodes { - if otherNode.info.Id == c.targetServer { - otherNodes = []*Node{otherNode} - targetServerFound = true - break - } - } - if !targetServerFound { - return fmt.Errorf("target %s is not found in this cluster", c.targetServer) - } - } // move away normal volumes - volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) - for _, diskInfo := range thisNode.info.DiskInfos { - for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) - if err != nil { - fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) - } - if !hasMoved { - if skipNonMoveable { - replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) - fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) - } else { - return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) + for _, thisNode := range thisNodes { + for _, diskInfo := range thisNode.info.DiskInfos { + volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) + for _, vol := range diskInfo.VolumeInfos { + hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + if err != nil { + fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) + fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) + } else { + return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) + } } } } + } return nil } @@ -144,23 +139,25 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this ec volume server ecNodes, _ := collectEcVolumeServersByDc(topologyInfo, "") - thisNode, otherNodes := ecNodesOtherThan(ecNodes, volumeServer) - if thisNode == nil { + thisNodes, otherNodes := c.ecNodesOtherThan(ecNodes, volumeServer) + if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster\n", volumeServer) } // move away ec volumes - for _, diskInfo := range thisNode.info.DiskInfos { - for _, ecShardInfo := range diskInfo.EcShardInfos { - hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) - if err != nil { - fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) - } - if !hasMoved { - if skipNonMoveable { - fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) - } else { - return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) + for _, thisNode := range thisNodes { + for _, diskInfo := range thisNode.info.DiskInfos { + for _, ecShardInfo := range diskInfo.EcShardInfos { + hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) + if err != nil { + fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) + } else { + return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) + } } } } @@ -220,10 +217,16 @@ func (c *commandVolumeServerEvacuate) moveAwayOneNormalVolume(commandEnv *Comman return } -func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, otherNodes []*Node) { +func (c *commandVolumeServerEvacuate) nodesOtherThan(volumeServers []*Node, thisServer string) (thisNodes []*Node, otherNodes []*Node) { for _, node := range volumeServers { - if node.info.Id == thisServer { - thisNode = node + if node.info.Id == thisServer || (c.volumeRack != "" && node.rack == c.volumeRack) { + thisNodes = append(thisNodes, node) + continue + } + if c.volumeRack != "" && c.volumeRack == node.rack { + continue + } + if c.targetServer != "" && c.targetServer != node.info.Id { continue } otherNodes = append(otherNodes, node) @@ -231,10 +234,16 @@ func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, o return } -func ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNode *EcNode, otherNodes []*EcNode) { +func (c *commandVolumeServerEvacuate) ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNodes []*EcNode, otherNodes []*EcNode) { for _, node := range volumeServers { - if node.info.Id == thisServer { - thisNode = node + if node.info.Id == thisServer || (c.volumeRack != "" && string(node.rack) == c.volumeRack) { + thisNodes = append(thisNodes, node) + continue + } + if c.volumeRack != "" && c.volumeRack == string(node.rack) { + continue + } + if c.targetServer != "" && c.targetServer != node.info.Id { continue } otherNodes = append(otherNodes, node) From ee95d23a22d55a12662c1ebc8e2292b61f505bb0 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 11:56:58 +0500 Subject: [PATCH 03/12] help rack --- weed/shell/command_volume_server_evacuate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 37fb29b14..dad8d8626 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -48,7 +48,7 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) volumeServer := vsEvacuateCommand.String("node", "", ": of the volume server") - volumeRack := vsEvacuateCommand.String("rack", "", "rack for then volume servers") + volumeRack := vsEvacuateCommand.String("rack", "", "source rack for the volume servers") targetServer := vsEvacuateCommand.String("target", "", ": of target volume") skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") From 8372721a62f2809ce99aaaf31f4bad5bbf2d99b1 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 13:47:21 +0500 Subject: [PATCH 04/12] update topologyInfo --- weed/shell/command_volume_server_evacuate.go | 32 +++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index dad8d8626..195cc2699 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -11,6 +11,7 @@ import ( "golang.org/x/exp/slices" "io" "os" + "time" ) func init() { @@ -18,6 +19,7 @@ func init() { } type commandVolumeServerEvacuate struct { + topologyInfo *master_pb.TopologyInfo targetServer string volumeRack string } @@ -58,12 +60,12 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, } infoAboutSimulationMode(writer, *applyChange, "-force") - if err = commandEnv.confirmIsLocked(args); err != nil { + if err = commandEnv.confirmIsLocked(args); err != nil && *applyChange { return } - if *volumeServer == "" { - return fmt.Errorf("need to specify volume server by -node=:") + if *volumeServer == "" && *volumeRack == "" { + return fmt.Errorf("need to specify volume server by -node=: or source rack") } if *targetServer != "" { c.targetServer = *targetServer @@ -88,25 +90,33 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn // list all the volumes // collect topology information - topologyInfo, _, err := collectTopologyInfo(commandEnv, 0) + c.topologyInfo, _, err = collectTopologyInfo(commandEnv, 0) if err != nil { return err } - if err := c.evacuateNormalVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + go func() { + for { + if topologyInfo, _, err := collectTopologyInfo(commandEnv, 5*time.Minute); err != nil { + c.topologyInfo = topologyInfo + } + } + }() + + if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } - if err := c.evacuateEcVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + if err := c.evacuateEcVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } return nil } -func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { +func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server - volumeServers := collectVolumeServersByDc(topologyInfo, "") + volumeServers := collectVolumeServersByDc(c.topologyInfo, "") thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) @@ -115,7 +125,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE // move away normal volumes for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { - volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) + volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { @@ -136,9 +146,9 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE return nil } -func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { +func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this ec volume server - ecNodes, _ := collectEcVolumeServersByDc(topologyInfo, "") + ecNodes, _ := collectEcVolumeServersByDc(c.topologyInfo, "") thisNodes, otherNodes := c.ecNodesOtherThan(ecNodes, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster\n", volumeServer) From 6622240df70c04b381b7f6daaf9dd07494b10701 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Tue, 12 Jul 2022 14:56:34 +0500 Subject: [PATCH 05/12] fix TestVolumeServerEvacuate --- weed/shell/command_volume_server_evacuate_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate_test.go b/weed/shell/command_volume_server_evacuate_test.go index 2cdb94a60..4563f38ba 100644 --- a/weed/shell/command_volume_server_evacuate_test.go +++ b/weed/shell/command_volume_server_evacuate_test.go @@ -6,12 +6,11 @@ import ( ) func TestVolumeServerEvacuate(t *testing.T) { - topologyInfo := parseOutput(topoData) + c := commandVolumeServerEvacuate{} + c.topologyInfo = parseOutput(topoData) volumeServer := "192.168.1.4:8080" - - c := commandVolumeServerEvacuate{} - if err := c.evacuateNormalVolumes(nil, topologyInfo, volumeServer, true, false, os.Stdout); err != nil { + if err := c.evacuateNormalVolumes(nil, volumeServer, true, false, os.Stdout); err != nil { t.Errorf("evacuate: %v", err) } From 4d5144e50d39fb33aea688f9c1bc5f0f3711c8c0 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Fri, 15 Jul 2022 13:51:08 +0500 Subject: [PATCH 06/12] clouse background update --- weed/shell/command_volume_server_evacuate.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 195cc2699..3b0c8381b 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -14,6 +14,8 @@ import ( "time" ) +const topologyInfoUpdateInterval = 5 * time.Minute + func init() { Commands = append(Commands, &commandVolumeServerEvacuate{}) } @@ -95,13 +97,22 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn return err } + stopchan := make(chan struct{}) go func() { for { - if topologyInfo, _, err := collectTopologyInfo(commandEnv, 5*time.Minute); err != nil { - c.topologyInfo = topologyInfo + select { + default: + if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { + c.topologyInfo = topologyInfo + } else { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } + case <-stopchan: + return } } }() + defer close(stopchan) if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err @@ -127,7 +138,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE for _, diskInfo := range thisNode.info.DiskInfos { volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := c.moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } @@ -204,7 +215,7 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv return } -func (c *commandVolumeServerEvacuate) moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { +func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { fn := capacityByFreeVolumeCount(types.ToDiskType(vol.DiskType)) for _, n := range otherNodes { n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { From 3c2774ec3de28fc3c5fdff4168cffbe31feabdb9 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 01:46:31 +0500 Subject: [PATCH 07/12] fix update topologyInfo --- weed/shell/command_volume_server_evacuate.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 3b0c8381b..0595ef308 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -103,9 +103,9 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn select { default: if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { - c.topologyInfo = topologyInfo - } else { fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + c.topologyInfo = topologyInfo } case <-stopchan: return From 2b4112e462d48af926caf6431a3de6ef256afcae Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 11:32:28 +0500 Subject: [PATCH 08/12] update otherNodes --- weed/shell/command_volume_server_evacuate.go | 40 +++++++++++--------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 0595ef308..7d50b7f81 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -24,6 +24,7 @@ type commandVolumeServerEvacuate struct { topologyInfo *master_pb.TopologyInfo targetServer string volumeRack string + otherNodes []*Node } func (c *commandVolumeServerEvacuate) Name() string { @@ -97,22 +98,27 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn return err } - stopchan := make(chan struct{}) - go func() { - for { - select { - default: - if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { - fmt.Fprintf(writer, "update topologyInfo %v", err) - } else { - c.topologyInfo = topologyInfo + if applyChange { + stopchan := make(chan struct{}) + go func() { + for { + select { + default: + if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + c.topologyInfo = topologyInfo + _, c.otherNodes = c.nodesOtherThan( + collectVolumeServersByDc(c.topologyInfo, ""), volumeServer) + fmt.Fprintf(writer, "topologyInfo updated %v\n", len(c.otherNodes)) + } + case <-stopchan: + return } - case <-stopchan: - return } - } - }() - defer close(stopchan) + }() + defer close(stopchan) + } if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err @@ -128,7 +134,8 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server volumeServers := collectVolumeServersByDc(c.topologyInfo, "") - thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) + var thisNodes []*Node + thisNodes, c.otherNodes = c.nodesOtherThan(volumeServers, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) } @@ -138,7 +145,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE for _, diskInfo := range thisNode.info.DiskInfos { volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, c.otherNodes, applyChange) if err != nil { fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } @@ -152,7 +159,6 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE } } } - } return nil } From 73a0dea16bb72a9f8b8d89cd53ce3f92092af5fb Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 16:27:02 +0500 Subject: [PATCH 09/12] sync update topologyInfo --- weed/shell/command_volume_server_evacuate.go | 45 +++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index 7d50b7f81..d1c474a76 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -24,7 +24,6 @@ type commandVolumeServerEvacuate struct { topologyInfo *master_pb.TopologyInfo targetServer string volumeRack string - otherNodes []*Node } func (c *commandVolumeServerEvacuate) Name() string { @@ -98,28 +97,6 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn return err } - if applyChange { - stopchan := make(chan struct{}) - go func() { - for { - select { - default: - if topologyInfo, _, err := collectTopologyInfo(commandEnv, topologyInfoUpdateInterval); err != nil { - fmt.Fprintf(writer, "update topologyInfo %v", err) - } else { - c.topologyInfo = topologyInfo - _, c.otherNodes = c.nodesOtherThan( - collectVolumeServersByDc(c.topologyInfo, ""), volumeServer) - fmt.Fprintf(writer, "topologyInfo updated %v\n", len(c.otherNodes)) - } - case <-stopchan: - return - } - } - }() - defer close(stopchan) - } - if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } @@ -134,18 +111,34 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server volumeServers := collectVolumeServersByDc(c.topologyInfo, "") - var thisNodes []*Node - thisNodes, c.otherNodes = c.nodesOtherThan(volumeServers, volumeServer) + thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) } // move away normal volumes + ticker := time.NewTicker(topologyInfoUpdateInterval) for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { + if applyChange { + select { + case <-ticker.C: + if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + _, otherNodesNew := c.nodesOtherThan( + collectVolumeServersByDc(topologyInfo, ""), volumeServer) + if len(otherNodesNew) > 0 { + otherNodes = otherNodesNew + c.topologyInfo = topologyInfo + fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) + } + } + } + } volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, c.otherNodes, applyChange) + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) if err != nil { fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) } From d422e7769c7f52cd250c3bf8ced279cfef8c8ef0 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Mon, 18 Jul 2022 16:38:19 +0500 Subject: [PATCH 10/12] ticker.Stop --- weed/shell/command_volume_server_evacuate.go | 1 + 1 file changed, 1 insertion(+) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index d1c474a76..c9df2c79a 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -118,6 +118,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE // move away normal volumes ticker := time.NewTicker(topologyInfoUpdateInterval) + defer ticker.Stop() for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { if applyChange { From 11e393dbe796ea3adf56e9fac9852dd8bb07ac2a Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Wed, 20 Jul 2022 00:45:13 +0500 Subject: [PATCH 11/12] err msg with duplicated local subscription detected move to log level 1 https://github.com/chrislusf/seaweedfs/issues/3320 --- weed/filer/meta_aggregator.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/weed/filer/meta_aggregator.go b/weed/filer/meta_aggregator.go index c672ce342..5799e247e 100644 --- a/weed/filer/meta_aggregator.go +++ b/weed/filer/meta_aggregator.go @@ -7,6 +7,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/util" "io" + "strings" "sync" "time" @@ -99,7 +100,11 @@ func (ma *MetaAggregator) loopSubscribeToOneFiler(f *Filer, self pb.ServerAddres return } if err != nil { - glog.V(0).Infof("subscribing remote %s meta change: %v", peer, err) + errLvl := glog.Level(0) + if strings.Contains(err.Error(), "duplicated local subscription detected") { + errLvl = glog.Level(1) + } + glog.V(errLvl).Infof("subscribing remote %s meta change: %v", peer, err) } if lastTsNs < nextLastTsNs { lastTsNs = nextLastTsNs From d3f7c09c03168fc0c29dd19c90dd9d2c86b2bda3 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Wed, 20 Jul 2022 00:54:23 +0500 Subject: [PATCH 12/12] remove ticker update the topology before each file --- weed/shell/command_volume_server_evacuate.go | 26 +++++++------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index c9df2c79a..f72d73230 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -11,11 +11,8 @@ import ( "golang.org/x/exp/slices" "io" "os" - "time" ) -const topologyInfoUpdateInterval = 5 * time.Minute - func init() { Commands = append(Commands, &commandVolumeServerEvacuate{}) } @@ -117,23 +114,18 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE } // move away normal volumes - ticker := time.NewTicker(topologyInfoUpdateInterval) - defer ticker.Stop() for _, thisNode := range thisNodes { for _, diskInfo := range thisNode.info.DiskInfos { if applyChange { - select { - case <-ticker.C: - if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { - fmt.Fprintf(writer, "update topologyInfo %v", err) - } else { - _, otherNodesNew := c.nodesOtherThan( - collectVolumeServersByDc(topologyInfo, ""), volumeServer) - if len(otherNodesNew) > 0 { - otherNodes = otherNodesNew - c.topologyInfo = topologyInfo - fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) - } + if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { + fmt.Fprintf(writer, "update topologyInfo %v", err) + } else { + _, otherNodesNew := c.nodesOtherThan( + collectVolumeServersByDc(topologyInfo, ""), volumeServer) + if len(otherNodesNew) > 0 { + otherNodes = otherNodesNew + c.topologyInfo = topologyInfo + fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) } } }