|
|
@ -18,7 +18,9 @@ func init() { |
|
|
|
} |
|
|
|
|
|
|
|
type commandVolumeServerEvacuate struct { |
|
|
|
topologyInfo *master_pb.TopologyInfo |
|
|
|
targetServer string |
|
|
|
volumeRack string |
|
|
|
} |
|
|
|
|
|
|
|
func (c *commandVolumeServerEvacuate) Name() string { |
|
|
@ -47,7 +49,8 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, |
|
|
|
|
|
|
|
vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) |
|
|
|
volumeServer := vsEvacuateCommand.String("node", "", "<host>:<port> of the volume server") |
|
|
|
c.targetServer = *vsEvacuateCommand.String("target", "", "<host>:<port> of target volume") |
|
|
|
volumeRack := vsEvacuateCommand.String("rack", "", "source rack for the volume servers") |
|
|
|
targetServer := vsEvacuateCommand.String("target", "", "<host>:<port> of target volume") |
|
|
|
skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") |
|
|
|
applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") |
|
|
|
retryCount := vsEvacuateCommand.Int("retry", 0, "how many times to retry") |
|
|
@ -56,12 +59,18 @@ func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, |
|
|
|
} |
|
|
|
infoAboutSimulationMode(writer, *applyChange, "-force") |
|
|
|
|
|
|
|
if err = commandEnv.confirmIsLocked(args); err != nil { |
|
|
|
if err = commandEnv.confirmIsLocked(args); err != nil && *applyChange { |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if *volumeServer == "" { |
|
|
|
return fmt.Errorf("need to specify volume server by -node=<host>:<port>") |
|
|
|
if *volumeServer == "" && *volumeRack == "" { |
|
|
|
return fmt.Errorf("need to specify volume server by -node=<host>:<port> or source rack") |
|
|
|
} |
|
|
|
if *targetServer != "" { |
|
|
|
c.targetServer = *targetServer |
|
|
|
} |
|
|
|
if *volumeRack != "" { |
|
|
|
c.volumeRack = *volumeRack |
|
|
|
} |
|
|
|
for i := 0; i < *retryCount+1; i++ { |
|
|
|
if err = c.volumeServerEvacuate(commandEnv, *volumeServer, *skipNonMoveable, *applyChange, writer); err == nil { |
|
|
@ -80,44 +89,59 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn |
|
|
|
|
|
|
|
// list all the volumes
|
|
|
|
// collect topology information
|
|
|
|
topologyInfo, _, err := collectTopologyInfo(commandEnv, 0) |
|
|
|
c.topologyInfo, _, err = collectTopologyInfo(commandEnv, 0) |
|
|
|
if err != nil { |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
if err := c.evacuateNormalVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { |
|
|
|
if err := c.evacuateNormalVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
if err := c.evacuateEcVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { |
|
|
|
if err := c.evacuateEcVolumes(commandEnv, volumeServer, skipNonMoveable, applyChange, writer); err != nil { |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { |
|
|
|
func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { |
|
|
|
// find this volume server
|
|
|
|
volumeServers := collectVolumeServersByDc(topologyInfo, "") |
|
|
|
thisNode, otherNodes := nodesOtherThan(volumeServers, volumeServer) |
|
|
|
if thisNode == nil { |
|
|
|
volumeServers := collectVolumeServersByDc(c.topologyInfo, "") |
|
|
|
thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer) |
|
|
|
if len(thisNodes) == 0 { |
|
|
|
return fmt.Errorf("%s is not found in this cluster", volumeServer) |
|
|
|
} |
|
|
|
|
|
|
|
// move away normal volumes
|
|
|
|
volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) |
|
|
|
for _, diskInfo := range thisNode.info.DiskInfos { |
|
|
|
for _, vol := range diskInfo.VolumeInfos { |
|
|
|
hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("move away volume %d from %s: %v", vol.Id, volumeServer, err) |
|
|
|
} |
|
|
|
if !hasMoved { |
|
|
|
if skipNonMoveable { |
|
|
|
replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) |
|
|
|
fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) |
|
|
|
for _, thisNode := range thisNodes { |
|
|
|
for _, diskInfo := range thisNode.info.DiskInfos { |
|
|
|
if applyChange { |
|
|
|
if topologyInfo, _, err := collectTopologyInfo(commandEnv, 0); err != nil { |
|
|
|
fmt.Fprintf(writer, "update topologyInfo %v", err) |
|
|
|
} else { |
|
|
|
return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) |
|
|
|
_, otherNodesNew := c.nodesOtherThan( |
|
|
|
collectVolumeServersByDc(topologyInfo, ""), volumeServer) |
|
|
|
if len(otherNodesNew) > 0 { |
|
|
|
otherNodes = otherNodesNew |
|
|
|
c.topologyInfo = topologyInfo |
|
|
|
fmt.Fprintf(writer, "topologyInfo updated %v\n", len(otherNodes)) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
volumeReplicas, _ := collectVolumeReplicaLocations(c.topologyInfo) |
|
|
|
for _, vol := range diskInfo.VolumeInfos { |
|
|
|
hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) |
|
|
|
if err != nil { |
|
|
|
fmt.Fprintf(writer, "move away volume %d from %s: %v", vol.Id, volumeServer, err) |
|
|
|
} |
|
|
|
if !hasMoved { |
|
|
|
if skipNonMoveable { |
|
|
|
replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) |
|
|
|
fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) |
|
|
|
} else { |
|
|
|
return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@ -125,26 +149,28 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { |
|
|
|
func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { |
|
|
|
// find this ec volume server
|
|
|
|
ecNodes, _ := collectEcVolumeServersByDc(topologyInfo, "") |
|
|
|
thisNode, otherNodes := ecNodesOtherThan(ecNodes, volumeServer) |
|
|
|
if thisNode == nil { |
|
|
|
ecNodes, _ := collectEcVolumeServersByDc(c.topologyInfo, "") |
|
|
|
thisNodes, otherNodes := c.ecNodesOtherThan(ecNodes, volumeServer) |
|
|
|
if len(thisNodes) == 0 { |
|
|
|
return fmt.Errorf("%s is not found in this cluster\n", volumeServer) |
|
|
|
} |
|
|
|
|
|
|
|
// move away ec volumes
|
|
|
|
for _, diskInfo := range thisNode.info.DiskInfos { |
|
|
|
for _, ecShardInfo := range diskInfo.EcShardInfos { |
|
|
|
hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) |
|
|
|
if err != nil { |
|
|
|
return fmt.Errorf("move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) |
|
|
|
} |
|
|
|
if !hasMoved { |
|
|
|
if skipNonMoveable { |
|
|
|
fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) |
|
|
|
} else { |
|
|
|
return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) |
|
|
|
for _, thisNode := range thisNodes { |
|
|
|
for _, diskInfo := range thisNode.info.DiskInfos { |
|
|
|
for _, ecShardInfo := range diskInfo.EcShardInfos { |
|
|
|
hasMoved, err := c.moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) |
|
|
|
if err != nil { |
|
|
|
fmt.Fprintf(writer, "move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) |
|
|
|
} |
|
|
|
if !hasMoved { |
|
|
|
if skipNonMoveable { |
|
|
|
fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) |
|
|
|
} else { |
|
|
|
return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@ -160,9 +186,6 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv |
|
|
|
}) |
|
|
|
for i := 0; i < len(otherNodes); i++ { |
|
|
|
emptyNode := otherNodes[i] |
|
|
|
if c.targetServer != "" && c.targetServer != emptyNode.info.Id { |
|
|
|
continue |
|
|
|
} |
|
|
|
collectionPrefix := "" |
|
|
|
if ecShardInfo.Collection != "" { |
|
|
|
collectionPrefix = ecShardInfo.Collection + "_" |
|
|
@ -207,10 +230,16 @@ func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][ |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, otherNodes []*Node) { |
|
|
|
func (c *commandVolumeServerEvacuate) nodesOtherThan(volumeServers []*Node, thisServer string) (thisNodes []*Node, otherNodes []*Node) { |
|
|
|
for _, node := range volumeServers { |
|
|
|
if node.info.Id == thisServer { |
|
|
|
thisNode = node |
|
|
|
if node.info.Id == thisServer || (c.volumeRack != "" && node.rack == c.volumeRack) { |
|
|
|
thisNodes = append(thisNodes, node) |
|
|
|
continue |
|
|
|
} |
|
|
|
if c.volumeRack != "" && c.volumeRack == node.rack { |
|
|
|
continue |
|
|
|
} |
|
|
|
if c.targetServer != "" && c.targetServer != node.info.Id { |
|
|
|
continue |
|
|
|
} |
|
|
|
otherNodes = append(otherNodes, node) |
|
|
@ -218,10 +247,16 @@ func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, o |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
func ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNode *EcNode, otherNodes []*EcNode) { |
|
|
|
func (c *commandVolumeServerEvacuate) ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNodes []*EcNode, otherNodes []*EcNode) { |
|
|
|
for _, node := range volumeServers { |
|
|
|
if node.info.Id == thisServer { |
|
|
|
thisNode = node |
|
|
|
if node.info.Id == thisServer || (c.volumeRack != "" && string(node.rack) == c.volumeRack) { |
|
|
|
thisNodes = append(thisNodes, node) |
|
|
|
continue |
|
|
|
} |
|
|
|
if c.volumeRack != "" && c.volumeRack == string(node.rack) { |
|
|
|
continue |
|
|
|
} |
|
|
|
if c.targetServer != "" && c.targetServer != node.info.Id { |
|
|
|
continue |
|
|
|
} |
|
|
|
otherNodes = append(otherNodes, node) |
|
|
|