You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							323 lines
						
					
					
						
							10 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							323 lines
						
					
					
						
							10 KiB
						
					
					
				| package shell | |
| 
 | |
| import ( | |
| 	"context" | |
| 	"errors" | |
| 	"flag" | |
| 	"fmt" | |
| 	"io" | |
| 	"path/filepath" | |
| 	"sync" | |
| 	"time" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/glog" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/storage/types" | |
| 	"github.com/seaweedfs/seaweedfs/weed/wdclient" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/operation" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" | |
| 	"github.com/seaweedfs/seaweedfs/weed/storage/needle" | |
| ) | |
| 
 | |
| func init() { | |
| 	Commands = append(Commands, &commandVolumeTierMove{}) | |
| } | |
| 
 | |
| type volumeTierMoveJob struct { | |
| 	src pb.ServerAddress | |
| 	vid needle.VolumeId | |
| } | |
| 
 | |
| type commandVolumeTierMove struct { | |
| 	activeServers sync.Map | |
| 	queues        map[pb.ServerAddress]chan volumeTierMoveJob | |
| 	//activeServers     map[pb.ServerAddress]struct{} | |
| 	//activeServersLock sync.Mutex | |
| 	//activeServersCond *sync.Cond | |
| } | |
| 
 | |
| func (c *commandVolumeTierMove) Name() string { | |
| 	return "volume.tier.move" | |
| } | |
| 
 | |
| func (c *commandVolumeTierMove) Help() string { | |
| 	return `change a volume from one disk type to another | |
|  | |
| 	volume.tier.move -fromDiskType=hdd -toDiskType=ssd [-collectionPattern=""] [-fullPercent=95] [-quietFor=1h] [-parallelLimit=4] [-toReplication=XYZ] | |
|  | |
| 	Even if the volume is replicated, only one replica will be changed and the rest replicas will be dropped. | |
| 	So "volume.fix.replication" and "volume.balance" should be followed. | |
|  | |
| ` | |
| } | |
| 
 | |
| func (c *commandVolumeTierMove) HasTag(CommandTag) bool { | |
| 	return false | |
| } | |
| 
 | |
| func (c *commandVolumeTierMove) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { | |
| 
 | |
| 	tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) | |
| 	collectionPattern := tierCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'") | |
| 	fullPercentage := tierCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size") | |
| 	quietPeriod := tierCommand.Duration("quietFor", 24*time.Hour, "select volumes without no writes for this period") | |
| 	source := tierCommand.String("fromDiskType", "", "the source disk type") | |
| 	target := tierCommand.String("toDiskType", "", "the target disk type") | |
| 	parallelLimit := tierCommand.Int("parallelLimit", 0, "limit the number of parallel copying jobs") | |
| 	applyChange := tierCommand.Bool("force", false, "actually apply the changes") | |
| 	ioBytePerSecond := tierCommand.Int64("ioBytePerSecond", 0, "limit the speed of move") | |
| 	replicationString := tierCommand.String("toReplication", "", "the new target replication setting") | |
| 
 | |
| 	if err = tierCommand.Parse(args); err != nil { | |
| 		return nil | |
| 	} | |
| 	infoAboutSimulationMode(writer, *applyChange, "-force") | |
| 
 | |
| 	if err = commandEnv.confirmIsLocked(args); err != nil { | |
| 		return | |
| 	} | |
| 
 | |
| 	fromDiskType := types.ToDiskType(*source) | |
| 	toDiskType := types.ToDiskType(*target) | |
| 
 | |
| 	if fromDiskType == toDiskType { | |
| 		return fmt.Errorf("source tier %s is the same as target tier %s", fromDiskType, toDiskType) | |
| 	} | |
| 
 | |
| 	// collect topology information | |
| 	topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0) | |
| 	if err != nil { | |
| 		return err | |
| 	} | |
| 
 | |
| 	// collect all volumes that should change | |
| 	volumeIds, err := collectVolumeIdsForTierChange(topologyInfo, volumeSizeLimitMb, fromDiskType, *collectionPattern, *fullPercentage, *quietPeriod) | |
| 	if err != nil { | |
| 		return err | |
| 	} | |
| 	fmt.Printf("tier move volumes: %v\n", volumeIds) | |
| 
 | |
| 	_, allLocations := collectVolumeReplicaLocations(topologyInfo) | |
| 	allLocations = filterLocationsByDiskType(allLocations, toDiskType) | |
| 	keepDataNodesSorted(allLocations, toDiskType) | |
| 
 | |
| 	if len(allLocations) > 0 && *parallelLimit > 0 && *parallelLimit < len(allLocations) { | |
| 		allLocations = allLocations[:*parallelLimit] | |
| 	} | |
| 
 | |
| 	wg := sync.WaitGroup{} | |
| 	bufferLen := len(allLocations) | |
| 	c.queues = make(map[pb.ServerAddress]chan volumeTierMoveJob) | |
| 
 | |
| 	for _, dst := range allLocations { | |
| 		destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode) | |
| 		c.queues[destServerAddress] = make(chan volumeTierMoveJob, bufferLen) | |
| 
 | |
| 		wg.Add(1) | |
| 		go func(dst location, jobs <-chan volumeTierMoveJob, applyChanges bool) { | |
| 			defer wg.Done() | |
| 			for job := range jobs { | |
| 				fmt.Fprintf(writer, "moving volume %d from %s to %s with disk type %s ...\n", job.vid, job.src, dst.dataNode.Id, toDiskType.ReadableString()) | |
| 
 | |
| 				locations, found := commandEnv.MasterClient.GetLocationsClone(uint32(job.vid)) | |
| 				if !found { | |
| 					fmt.Printf("volume %d not found", job.vid) | |
| 					continue | |
| 				} | |
| 
 | |
| 				unlock := c.Lock(job.src) | |
| 
 | |
| 				if applyChanges { | |
| 					if err := c.doMoveOneVolume(commandEnv, writer, job.vid, toDiskType, locations, job.src, dst, *ioBytePerSecond, replicationString); err != nil { | |
| 						fmt.Fprintf(writer, "move volume %d %s => %s: %v\n", job.vid, job.src, dst.dataNode.Id, err) | |
| 					} | |
| 				} | |
| 				unlock() | |
| 			} | |
| 		}(dst, c.queues[destServerAddress], *applyChange) | |
| 	} | |
| 
 | |
| 	for _, vid := range volumeIds { | |
| 		if err = c.doVolumeTierMove(commandEnv, writer, vid, toDiskType, allLocations); err != nil { | |
| 			fmt.Printf("tier move volume %d: %v\n", vid, err) | |
| 		} | |
| 		allLocations = rotateDataNodes(allLocations) | |
| 	} | |
| 	for key, _ := range c.queues { | |
| 		close(c.queues[key]) | |
| 	} | |
| 
 | |
| 	wg.Wait() | |
| 
 | |
| 	return nil | |
| } | |
| 
 | |
| func (c *commandVolumeTierMove) Lock(key pb.ServerAddress) func() { | |
| 	value, _ := c.activeServers.LoadOrStore(key, &sync.Mutex{}) | |
| 	mtx := value.(*sync.Mutex) | |
| 	mtx.Lock() | |
| 
 | |
| 	return func() { mtx.Unlock() } | |
| } | |
| 
 | |
| func filterLocationsByDiskType(dataNodes []location, diskType types.DiskType) (ret []location) { | |
| 	for _, loc := range dataNodes { | |
| 		_, found := loc.dataNode.DiskInfos[string(diskType)] | |
| 		if found { | |
| 			ret = append(ret, loc) | |
| 		} | |
| 	} | |
| 	return | |
| } | |
| 
 | |
| func rotateDataNodes(dataNodes []location) []location { | |
| 	if len(dataNodes) > 0 { | |
| 		return append(dataNodes[1:], dataNodes[0]) | |
| 	} else { | |
| 		return dataNodes | |
| 	} | |
| } | |
| 
 | |
| func isOneOf(server string, locations []wdclient.Location) bool { | |
| 	for _, loc := range locations { | |
| 		if server == loc.Url { | |
| 			return true | |
| 		} | |
| 	} | |
| 	return false | |
| } | |
| 
 | |
| func (c *commandVolumeTierMove) doVolumeTierMove(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, allLocations []location) (err error) { | |
| 	// find volume location | |
| 	locations, found := commandEnv.MasterClient.GetLocationsClone(uint32(vid)) | |
| 	if !found { | |
| 		return fmt.Errorf("volume %d not found", vid) | |
| 	} | |
| 
 | |
| 	// find one server with the most empty volume slots with target disk type | |
| 	hasFoundTarget := false | |
| 	fn := capacityByFreeVolumeCount(toDiskType) | |
| 	for _, dst := range allLocations { | |
| 		if fn(dst.dataNode) > 0 && !hasFoundTarget { | |
| 			// ask the volume server to replicate the volume | |
| 			if isOneOf(dst.dataNode.Id, locations) { | |
| 				continue | |
| 			} | |
| 			var sourceVolumeServer pb.ServerAddress | |
| 			for _, loc := range locations { | |
| 				if loc.Url != dst.dataNode.Id { | |
| 					sourceVolumeServer = loc.ServerAddress() | |
| 				} | |
| 			} | |
| 			if sourceVolumeServer == "" { | |
| 				continue | |
| 			} | |
| 			hasFoundTarget = true | |
| 
 | |
| 			// adjust volume count | |
| 			addVolumeCount(dst.dataNode.DiskInfos[string(toDiskType)], 1) | |
| 
 | |
| 			destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode) | |
| 			c.queues[destServerAddress] <- volumeTierMoveJob{sourceVolumeServer, vid} | |
| 		} | |
| 	} | |
| 
 | |
| 	if !hasFoundTarget { | |
| 		fmt.Fprintf(writer, "can not find disk type %s for volume %d\n", toDiskType.ReadableString(), vid) | |
| 	} | |
| 
 | |
| 	return nil | |
| } | |
| 
 | |
| func (c *commandVolumeTierMove) doMoveOneVolume(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, locations []wdclient.Location, sourceVolumeServer pb.ServerAddress, dst location, ioBytePerSecond int64, replicationString *string) (err error) { | |
| 
 | |
| 	if !commandEnv.isLocked() { | |
| 		return fmt.Errorf("lock is lost") | |
| 	} | |
| 
 | |
| 	// mark all replicas as read only | |
| 	if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, false, false); err != nil { | |
| 		return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err) | |
| 	} | |
| 	newAddress := pb.NewServerAddressFromDataNode(dst.dataNode) | |
| 
 | |
| 	if err = LiveMoveVolume(commandEnv.option.GrpcDialOption, writer, vid, sourceVolumeServer, newAddress, 5*time.Second, toDiskType.ReadableString(), ioBytePerSecond, true); err != nil { | |
| 		// mark all replicas as writable | |
| 		if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, true, false); err != nil { | |
| 			glog.Errorf("mark volume %d as writable on %s: %v", vid, locations[0].Url, err) | |
| 		} | |
| 
 | |
| 		return fmt.Errorf("move volume %d %s => %s : %v", vid, locations[0].Url, dst.dataNode.Id, err) | |
| 	} | |
| 
 | |
| 	// If move is successful and replication is not empty, alter moved volume's replication setting | |
| 	if *replicationString != "" { | |
| 		err = operation.WithVolumeServerClient(false, newAddress, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { | |
| 			resp, configureErr := volumeServerClient.VolumeConfigure(context.Background(), &volume_server_pb.VolumeConfigureRequest{ | |
| 				VolumeId:    uint32(vid), | |
| 				Replication: *replicationString, | |
| 			}) | |
| 			if configureErr != nil { | |
| 				return configureErr | |
| 			} | |
| 			if resp.Error != "" { | |
| 				return errors.New(resp.Error) | |
| 			} | |
| 			return nil | |
| 		}) | |
| 		if err != nil { | |
| 			glog.Errorf("update volume %d replication on %s: %v", vid, locations[0].Url, err) | |
| 		} | |
| 	} | |
| 
 | |
| 	// remove the remaining replicas | |
| 	for _, loc := range locations { | |
| 		if loc.Url != dst.dataNode.Id && loc.ServerAddress() != sourceVolumeServer { | |
| 			if err = deleteVolume(commandEnv.option.GrpcDialOption, vid, loc.ServerAddress(), false); err != nil { | |
| 				fmt.Fprintf(writer, "failed to delete volume %d on %s: %v\n", vid, loc.Url, err) | |
| 			} | |
| 			// reduce volume count? Not really necessary since they are "more" full and will not be a candidate to move to | |
| 		} | |
| 	} | |
| 	return nil | |
| } | |
| 
 | |
| func collectVolumeIdsForTierChange(topologyInfo *master_pb.TopologyInfo, volumeSizeLimitMb uint64, sourceTier types.DiskType, collectionPattern string, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) { | |
| 
 | |
| 	quietSeconds := int64(quietPeriod / time.Second) | |
| 	nowUnixSeconds := time.Now().Unix() | |
| 
 | |
| 	fmt.Printf("collect %s volumes quiet for: %d seconds\n", sourceTier, quietSeconds) | |
| 
 | |
| 	vidMap := make(map[uint32]bool) | |
| 	eachDataNode(topologyInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) { | |
| 		for _, diskInfo := range dn.DiskInfos { | |
| 			for _, v := range diskInfo.VolumeInfos { | |
| 				// check collection name pattern | |
| 				if collectionPattern != "" { | |
| 					matched, err := filepath.Match(collectionPattern, v.Collection) | |
| 					if err != nil { | |
| 						return | |
| 					} | |
| 					if !matched { | |
| 						continue | |
| 					} | |
| 				} | |
| 
 | |
| 				if v.ModifiedAtSecond+quietSeconds < nowUnixSeconds && types.ToDiskType(v.DiskType) == sourceTier { | |
| 					if float64(v.Size) > fullPercentage/100*float64(volumeSizeLimitMb)*1024*1024 { | |
| 						vidMap[v.Id] = true | |
| 					} | |
| 				} | |
| 			} | |
| 		} | |
| 	}) | |
| 
 | |
| 	for vid := range vidMap { | |
| 		vids = append(vids, needle.VolumeId(vid)) | |
| 	} | |
| 
 | |
| 	return | |
| }
 |