@ -156,21 +156,29 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE
}
func ( c * commandVolumeServerEvacuate ) evacuateEcVolumes ( commandEnv * CommandEnv , volumeServer string , skipNonMoveable , applyChange bool , writer io . Writer ) error {
// find this ec volume server
// Evacuate EC volumes for all disk types
// We need to handle each disk type separately because shards should be moved to nodes with the same disk type
// We collect topology once at the start and track capacity changes ourselves
// (via freeEcSlot decrement after each move) rather than repeatedly refreshing,
// which would give a false sense of correctness since topology could be stale.
ecNodes , _ := collectEcVolumeServersByDc ( c . topologyInfo , "" )
diskTypes := [ ] types . DiskType { types . HardDriveType , types . SsdType }
for _ , diskType := range diskTypes {
ecNodes , _ := collectEcVolumeServersByDc ( c . topologyInfo , "" , diskType )
thisNodes , otherNodes := c . ecNodesOtherThan ( ecNodes , volumeServer )
if len ( thisNodes ) == 0 {
return fmt . Errorf ( "%s is not found in this cluster\n" , volumeServer )
// This server doesn't have EC shards for this disk type, skip
continue
}
// move away ec volumes
// move away ec volumes for this disk type
for _ , thisNode := range thisNodes {
for _ , diskInfo := range thisNode . info . DiskInfos {
diskInfo , found := thisNode . info . DiskInfos [ string ( diskType ) ]
if ! found {
continue
}
for _ , ecShardInfo := range diskInfo . EcShardInfos {
hasMoved , err := c . moveAwayOneEcVolume ( commandEnv , ecShardInfo , thisNode , otherNodes , applyChange , writer )
hasMoved , err := c . moveAwayOneEcVolume ( commandEnv , ecShardInfo , thisNode , otherNodes , applyChange , diskType , writer )
if err != nil {
fmt . Fprintf ( writer , "move away volume %d from %s: %v\n" , ecShardInfo . Id , volumeServer , err )
}
@ -187,7 +195,7 @@ func (c *commandVolumeServerEvacuate) evacuateEcVolumes(commandEnv *CommandEnv,
return nil
}
func ( c * commandVolumeServerEvacuate ) moveAwayOneEcVolume ( commandEnv * CommandEnv , ecShardInfo * master_pb . VolumeEcShardInformationMessage , thisNode * EcNode , otherNodes [ ] * EcNode , applyChange bool , writer io . Writer ) ( hasMoved bool , err error ) {
func ( c * commandVolumeServerEvacuate ) moveAwayOneEcVolume ( commandEnv * CommandEnv , ecShardInfo * master_pb . VolumeEcShardInformationMessage , thisNode * EcNode , otherNodes [ ] * EcNode , applyChange bool , diskType types . DiskType , writer io . Writer ) ( hasMoved bool , err error ) {
for _ , shardId := range erasure_coding . ShardBits ( ecShardInfo . EcIndexBits ) . ShardIds ( ) {
// Sort by: 1) fewest shards of this volume, 2) most free EC slots
@ -217,13 +225,14 @@ func (c *commandVolumeServerEvacuate) moveAwayOneEcVolume(commandEnv *CommandEnv
collectionPrefix = ecShardInfo . Collection + "_"
}
vid := needle . VolumeId ( ecShardInfo . Id )
destDiskId := pickBestDiskOnNode ( emptyNode , vid )
// For evacuation, prefer same disk type but allow fallback to other types
destDiskId := pickBestDiskOnNode ( emptyNode , vid , diskType , false )
if destDiskId > 0 {
fmt . Fprintf ( writer , "moving ec volume %s%d.%d %s => %s (disk %d)\n" , collectionPrefix , ecShardInfo . Id , shardId , thisNode . info . Id , emptyNode . info . Id , destDiskId )
} else {
fmt . Fprintf ( writer , "moving ec volume %s%d.%d %s => %s\n" , collectionPrefix , ecShardInfo . Id , shardId , thisNode . info . Id , emptyNode . info . Id )
}
err = moveMountedShardToEcNode ( commandEnv , thisNode , ecShardInfo . Collection , vid , shardId , emptyNode , destDiskId , applyChange )
err = moveMountedShardToEcNode ( commandEnv , thisNode , ecShardInfo . Collection , vid , shardId , emptyNode , destDiskId , applyChange , diskType )
if err != nil {
hasMoved = false
return