diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go index ffbee0302..f72d73230 100644 --- a/weed/shell/command_volume_server_evacuate.go +++ b/weed/shell/command_volume_server_evacuate.go @@ -18,7 +18,9 @@ func init() { } type commandVolumeServerEvacuate struct { + topologyInfo *master_pb.TopologyInfo targetServer string + volumeRack string } func (c *commandVolumeServerEvacuate) Name() string { @@ -47,7 +49,8 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) volumeServer := vsEvacuateCommand.String("node", "", ": of the volume server") - c.targetServer = *vsEvacuateCommand.String("target", "", ": of target volume") + volumeRack := vsEvacuateCommand.String("rack", "", "source rack for the volume servers") + targetServer := vsEvacuateCommand.String("target", "", ": of target volume") skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") retryCount := vsEvacuateCommand.Int("retry", 0, "how many times to retry") @@ -56,12 +59,18 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, } infoAboutSimulationMode(writer, *applyChange, "-force") - if err = commandEnv.confirmIsLocked(args); err != nil { + if err = commandEnv.confirmIsLocked(args); err != nil && *applyChange { return } - if *volumeServer == "" { - return fmt.Errorf("need to specify volume server by -node=:") + if *volumeServer == "" && *volumeRack == "" { + return fmt.Errorf("need to specify volume server by -node=: or source rack") + } + if *targetServer != "" { + c.targetServer = *targetServer + } + if *volumeRack != "" { + c.volumeRack = *volumeRack } for i := 0; i < *retryCount+1; i++ { if err = c.volumeServerEvacuate(commandEnv, *volumeServer, *skipNonMoveable, *applyChange, writer); err == nil { @@ -80,44 +89,59 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn // list all the volumes // collect topology information - topologyInfo, _, err := collectTopologyInfo(commandEnv, 0) + c.topologyInfo, _, err = collectTopologyInfo(commandEnv, 0) if err != nil { return err } - if err := c.evacuateNormalVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } - if err := c.evacuateEcVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + if err := c.evacuateEcVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { return err } return nil } -func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { +func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this volume server - volumeServers := collectVolumeServersByDc(topologyInfo, "") - thisNode, otherNodes := nodesOtherThan(volumeServers, volumeServer) - if thisNode == nil { + volumeServers := collectVolumeServersByDc(c.topologyInfo, "") + thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) + if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster", volumeServer) } // move away normal volumes - volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) - for _, diskInfo := range thisNode.info.DiskInfos { - for _, vol := range diskInfo.VolumeInfos { - hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) - if err != nil { - return fmt.Errorf("move away volume %d from %s: %v", vol.Id, volumeServer, err) - } - if !hasMoved { - if skipNonMoveable { - replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) - fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) + for _, thisNode := range thisNodes { + for _, diskInfo := range thisNode.info.DiskInfos { + if applyChange { + if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { + fmt.Fprintf(writer, "update topologyInfo %v", err) } else { - return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) + _, otherNodesNew := c.nodesOtherThan( + collectVolumeServersByDc(topologyInfo, ""), volumeServer) + if len(otherNodesNew) > 0 { + otherNodes = otherNodesNew + c.topologyInfo = topologyInfo + fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) + } + } + } + volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) + for _, vol := range diskInfo.VolumeInfos { + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + if err != nil { + fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) + fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) + } else { + return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) + } } } } @@ -125,26 +149,28 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE return nil } -func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { +func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { // find this ec volume server - ecNodes, _ := collectEcVolumeServersByDc(topologyInfo, "") - thisNode, otherNodes := ecNodesOtherThan(ecNodes, volumeServer) - if thisNode == nil { + ecNodes, _ := collectEcVolumeServersByDc(c.topologyInfo, "") + thisNodes, otherNodes := c.ecNodesOtherThan(ecNodes, volumeServer) + if len(thisNodes) == 0 { return fmt.Errorf("%s is not found in this cluster\n", volumeServer) } // move away ec volumes - for _, diskInfo := range thisNode.info.DiskInfos { - for _, ecShardInfo := range diskInfo.EcShardInfos { - hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) - if err != nil { - return fmt.Errorf("move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) - } - if !hasMoved { - if skipNonMoveable { - fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) - } else { - return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) + for _, thisNode := range thisNodes { + for _, diskInfo := range thisNode.info.DiskInfos { + for _, ecShardInfo := range diskInfo.EcShardInfos { + hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) + if err != nil { + fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) + } else { + return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) + } } } } @@ -160,9 +186,6 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv }) for i := 0; i < len(otherNodes); i++ { emptyNode := otherNodes[i] - if c.targetServer != "" && c.targetServer != emptyNode.info.Id { - continue - } collectionPrefix := "" if ecShardInfo.Collection != "" { collectionPrefix = ecShardInfo.Collection + "_" @@ -207,10 +230,16 @@ func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][ return } -func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, otherNodes []*Node) { +func (c *commandVolumeServerEvacuate) nodesOtherThan(volumeServers []*Node, thisServer string) (thisNodes []*Node, otherNodes []*Node) { for _, node := range volumeServers { - if node.info.Id == thisServer { - thisNode = node + if node.info.Id == thisServer || (c.volumeRack != "" && node.rack == c.volumeRack) { + thisNodes = append(thisNodes, node) + continue + } + if c.volumeRack != "" && c.volumeRack == node.rack { + continue + } + if c.targetServer != "" && c.targetServer != node.info.Id { continue } otherNodes = append(otherNodes, node) @@ -218,10 +247,16 @@ func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, o return } -func ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNode *EcNode, otherNodes []*EcNode) { +func (c *commandVolumeServerEvacuate) ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNodes []*EcNode, otherNodes []*EcNode) { for _, node := range volumeServers { - if node.info.Id == thisServer { - thisNode = node + if node.info.Id == thisServer || (c.volumeRack != "" && string(node.rack) == c.volumeRack) { + thisNodes = append(thisNodes, node) + continue + } + if c.volumeRack != "" && c.volumeRack == string(node.rack) { + continue + } + if c.targetServer != "" && c.targetServer != node.info.Id { continue } otherNodes = append(otherNodes, node) diff --git a/weed/shell/command_volume_server_evacuate_test.go b/weed/shell/command_volume_server_evacuate_test.go index 2cdb94a60..4563f38ba 100644 --- a/weed/shell/command_volume_server_evacuate_test.go +++ b/weed/shell/command_volume_server_evacuate_test.go @@ -6,12 +6,11 @@ import ( ) func TestVolumeServerEvacuate(t *testing.T) { - topologyInfo := parseOutput(topoData) + c := commandVolumeServerEvacuate{} + c.topologyInfo = parseOutput(topoData) volumeServer := "192.168.1.4:8080" - - c := commandVolumeServerEvacuate{} - if err := c.evacuateNormalVolumes(nil, topologyInfo, volumeServer, true, false, os.Stdout); err != nil { + if err := c.evacuateNormalVolumes(nil, volumeServer, true, false, os.Stdout); err != nil { t.Errorf("evacuate: %v", err) }