You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							364 lines
						
					
					
						
							11 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							364 lines
						
					
					
						
							11 KiB
						
					
					
				| package topology | |
| 
 | |
| import ( | |
| 	"context" | |
| 	"io" | |
| 	"sync" | |
| 	"sync/atomic" | |
| 	"time" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/util" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb" | |
| 
 | |
| 	"google.golang.org/grpc" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/storage/needle" | |
| 
 | |
| 	"github.com/seaweedfs/seaweedfs/weed/glog" | |
| 	"github.com/seaweedfs/seaweedfs/weed/operation" | |
| 	"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" | |
| ) | |
| 
 | |
| func (t *Topology) batchVacuumVolumeCheck(grpcDialOption grpc.DialOption, vid needle.VolumeId, | |
| 	locationlist *VolumeLocationList, garbageThreshold float64) (*VolumeLocationList, bool) { | |
| 	ch := make(chan int, locationlist.Length()) | |
| 	errCount := int32(0) | |
| 	for index, dn := range locationlist.list { | |
| 		go func(index int, url pb.ServerAddress, vid needle.VolumeId) { | |
| 			err := operation.WithVolumeServerClient(false, url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { | |
| 				resp, err := volumeServerClient.VacuumVolumeCheck(context.Background(), &volume_server_pb.VacuumVolumeCheckRequest{ | |
| 					VolumeId: uint32(vid), | |
| 				}) | |
| 				if err != nil { | |
| 					atomic.AddInt32(&errCount, 1) | |
| 					ch <- -1 | |
| 					return err | |
| 				} | |
| 				if resp.GarbageRatio >= garbageThreshold { | |
| 					ch <- index | |
| 				} else { | |
| 					ch <- -1 | |
| 				} | |
| 				return nil | |
| 			}) | |
| 			if err != nil { | |
| 				glog.V(0).Infof("Checking vacuuming %d on %s: %v", vid, url, err) | |
| 			} | |
| 		}(index, dn.ServerAddress(), vid) | |
| 	} | |
| 	vacuumLocationList := NewVolumeLocationList() | |
| 
 | |
| 	waitTimeout := time.NewTimer(time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1)) | |
| 	defer waitTimeout.Stop() | |
| 
 | |
| 	for range locationlist.list { | |
| 		select { | |
| 		case index := <-ch: | |
| 			if index != -1 { | |
| 				vacuumLocationList.list = append(vacuumLocationList.list, locationlist.list[index]) | |
| 			} | |
| 		case <-waitTimeout.C: | |
| 			return vacuumLocationList, false | |
| 		} | |
| 	} | |
| 	return vacuumLocationList, errCount == 0 && len(vacuumLocationList.list) > 0 | |
| } | |
| 
 | |
| func (t *Topology) batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, | |
| 	locationlist *VolumeLocationList, preallocate int64) bool { | |
| 	vl.accessLock.Lock() | |
| 	vl.removeFromWritable(vid) | |
| 	vl.accessLock.Unlock() | |
| 
 | |
| 	ch := make(chan bool, locationlist.Length()) | |
| 	for index, dn := range locationlist.list { | |
| 		go func(index int, url pb.ServerAddress, vid needle.VolumeId) { | |
| 			glog.V(0).Infoln(index, "Start vacuuming", vid, "on", url) | |
| 			err := operation.WithVolumeServerClient(true, url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { | |
| 				stream, err := volumeServerClient.VacuumVolumeCompact(context.Background(), &volume_server_pb.VacuumVolumeCompactRequest{ | |
| 					VolumeId:    uint32(vid), | |
| 					Preallocate: preallocate, | |
| 				}) | |
| 				if err != nil { | |
| 					return err | |
| 				} | |
| 
 | |
| 				for { | |
| 					resp, recvErr := stream.Recv() | |
| 					if recvErr != nil { | |
| 						if recvErr == io.EOF { | |
| 							break | |
| 						} else { | |
| 							return recvErr | |
| 						} | |
| 					} | |
| 					glog.V(0).Infof("%d vacuum %d on %s processed %d bytes, loadAvg %.02f%%", | |
| 						index, vid, url, resp.ProcessedBytes, resp.LoadAvg_1M*100) | |
| 				} | |
| 				return nil | |
| 			}) | |
| 			if err != nil { | |
| 				glog.Errorf("Error when vacuuming %d on %s: %v", vid, url, err) | |
| 				ch <- false | |
| 			} else { | |
| 				glog.V(0).Infof("Complete vacuuming %d on %s", vid, url) | |
| 				ch <- true | |
| 			} | |
| 		}(index, dn.ServerAddress(), vid) | |
| 	} | |
| 	isVacuumSuccess := true | |
| 
 | |
| 	waitTimeout := time.NewTimer(3 * time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1)) | |
| 	defer waitTimeout.Stop() | |
| 
 | |
| 	for range locationlist.list { | |
| 		select { | |
| 		case canCommit := <-ch: | |
| 			isVacuumSuccess = isVacuumSuccess && canCommit | |
| 		case <-waitTimeout.C: | |
| 			return false | |
| 		} | |
| 	} | |
| 	return isVacuumSuccess | |
| } | |
| 
 | |
| func (t *Topology) batchVacuumVolumeCommit(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, vacuumLocationList, locationList *VolumeLocationList) bool { | |
| 	isCommitSuccess := true | |
| 	isReadOnly := false | |
| 	isFullCapacity := false | |
| 	for _, dn := range vacuumLocationList.list { | |
| 		glog.V(0).Infoln("Start Committing vacuum", vid, "on", dn.Url()) | |
| 		err := operation.WithVolumeServerClient(false, dn.ServerAddress(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { | |
| 			resp, err := volumeServerClient.VacuumVolumeCommit(context.Background(), &volume_server_pb.VacuumVolumeCommitRequest{ | |
| 				VolumeId: uint32(vid), | |
| 			}) | |
| 			if resp != nil { | |
| 				if resp.IsReadOnly { | |
| 					isReadOnly = true | |
| 				} | |
| 				if resp.VolumeSize > t.volumeSizeLimit { | |
| 					isFullCapacity = true | |
| 				} | |
| 			} | |
| 			return err | |
| 		}) | |
| 		if err != nil { | |
| 			glog.Errorf("Error when committing vacuum %d on %s: %v", vid, dn.Url(), err) | |
| 			isCommitSuccess = false | |
| 		} else { | |
| 			glog.V(0).Infof("Complete Committing vacuum %d on %s", vid, dn.Url()) | |
| 		} | |
| 	} | |
| 
 | |
| 	//we should check the status of all replicas | |
| 	if len(locationList.list) > len(vacuumLocationList.list) { | |
| 		for _, dn := range locationList.list { | |
| 			isFound := false | |
| 			for _, dnVaccum := range vacuumLocationList.list { | |
| 				if dn.id == dnVaccum.id { | |
| 					isFound = true | |
| 					break | |
| 				} | |
| 			} | |
| 			if !isFound { | |
| 				err := operation.WithVolumeServerClient(false, dn.ServerAddress(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { | |
| 					resp, err := volumeServerClient.VolumeStatus(context.Background(), &volume_server_pb.VolumeStatusRequest{ | |
| 						VolumeId: uint32(vid), | |
| 					}) | |
| 					if resp != nil { | |
| 						if resp.IsReadOnly { | |
| 							isReadOnly = true | |
| 						} | |
| 						if resp.VolumeSize > t.volumeSizeLimit { | |
| 							isFullCapacity = true | |
| 						} | |
| 					} | |
| 					return err | |
| 				}) | |
| 				if err != nil { | |
| 					glog.Errorf("Error when checking volume %d status on %s: %v", vid, dn.Url(), err) | |
| 					//we mark volume read-only, since the volume state is unknown | |
| 					isReadOnly = true | |
| 				} | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	if isCommitSuccess { | |
| 
 | |
| 		//record vacuum time of volume | |
| 		vl.accessLock.Lock() | |
| 		vl.vacuumedVolumes[vid] = time.Now() | |
| 		vl.accessLock.Unlock() | |
| 
 | |
| 		for _, dn := range vacuumLocationList.list { | |
| 			vl.SetVolumeAvailable(dn, vid, isReadOnly, isFullCapacity) | |
| 		} | |
| 	} | |
| 	return isCommitSuccess | |
| } | |
| 
 | |
| func (t *Topology) batchVacuumVolumeCleanup(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) { | |
| 	for _, dn := range locationlist.list { | |
| 		glog.V(0).Infoln("Start cleaning up", vid, "on", dn.Url()) | |
| 		err := operation.WithVolumeServerClient(false, dn.ServerAddress(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { | |
| 			_, err := volumeServerClient.VacuumVolumeCleanup(context.Background(), &volume_server_pb.VacuumVolumeCleanupRequest{ | |
| 				VolumeId: uint32(vid), | |
| 			}) | |
| 			return err | |
| 		}) | |
| 		if err != nil { | |
| 			glog.Errorf("Error when cleaning up vacuum %d on %s: %v", vid, dn.Url(), err) | |
| 		} else { | |
| 			glog.V(0).Infof("Complete cleaning up vacuum %d on %s", vid, dn.Url()) | |
| 		} | |
| 	} | |
| } | |
| 
 | |
| func (t *Topology) Vacuum(grpcDialOption grpc.DialOption, garbageThreshold float64, maxParallelVacuumPerServer int, volumeId uint32, collection string, preallocate int64, automatic bool) { | |
| 
 | |
| 	// if there is vacuum going on, return immediately | |
| 	swapped := atomic.CompareAndSwapInt64(&t.vacuumLockCounter, 0, 1) | |
| 	if !swapped { | |
| 		glog.V(0).Infof("Vacuum is already running") | |
| 		return | |
| 	} | |
| 	defer atomic.StoreInt64(&t.vacuumLockCounter, 0) | |
| 
 | |
| 	// now only one vacuum process going on | |
|  | |
| 	glog.V(1).Infof("Start vacuum on demand with threshold: %f collection: %s volumeId: %d", | |
| 		garbageThreshold, collection, volumeId) | |
| 	for _, col := range t.collectionMap.Items() { | |
| 		c := col.(*Collection) | |
| 		if collection != "" && collection != c.Name { | |
| 			continue | |
| 		} | |
| 		for _, vl := range c.storageType2VolumeLayout.Items() { | |
| 			if vl != nil { | |
| 				volumeLayout := vl.(*VolumeLayout) | |
| 				if volumeId > 0 { | |
| 					vid := needle.VolumeId(volumeId) | |
| 					volumeLayout.accessLock.RLock() | |
| 					locationList, ok := volumeLayout.vid2location[vid] | |
| 					volumeLayout.accessLock.RUnlock() | |
| 					if ok { | |
| 						t.vacuumOneVolumeId(grpcDialOption, volumeLayout, c, garbageThreshold, locationList, vid, preallocate) | |
| 					} | |
| 				} else { | |
| 					t.vacuumOneVolumeLayout(grpcDialOption, volumeLayout, c, garbageThreshold, maxParallelVacuumPerServer, preallocate, automatic) | |
| 				} | |
| 			} | |
| 			if automatic && t.isDisableVacuum { | |
| 				break | |
| 			} | |
| 		} | |
| 		if automatic && t.isDisableVacuum { | |
| 			glog.V(0).Infof("Vacuum is disabled") | |
| 			break | |
| 		} | |
| 	} | |
| } | |
| 
 | |
| func (t *Topology) vacuumOneVolumeLayout(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, maxParallelVacuumPerServer int, preallocate int64, automatic bool) { | |
| 
 | |
| 	volumeLayout.accessLock.RLock() | |
| 	todoVolumeMap := make(map[needle.VolumeId]*VolumeLocationList) | |
| 	for vid, locationList := range volumeLayout.vid2location { | |
| 		todoVolumeMap[vid] = locationList.Copy() | |
| 	} | |
| 	volumeLayout.accessLock.RUnlock() | |
| 
 | |
| 	// limiter for each volume server | |
| 	limiter := make(map[NodeId]int) | |
| 	var limiterLock sync.Mutex | |
| 	for _, locationList := range todoVolumeMap { | |
| 		for _, dn := range locationList.list { | |
| 			if _, ok := limiter[dn.Id()]; !ok { | |
| 				limiter[dn.Id()] = maxParallelVacuumPerServer | |
| 			} | |
| 		} | |
| 	} | |
| 
 | |
| 	executor := util.NewLimitedConcurrentExecutor(100) | |
| 
 | |
| 	var wg sync.WaitGroup | |
| 
 | |
| 	for len(todoVolumeMap) > 0 { | |
| 		pendingVolumeMap := make(map[needle.VolumeId]*VolumeLocationList) | |
| 		for vid, locationList := range todoVolumeMap { | |
| 			hasEnoughQuota := true | |
| 			for _, dn := range locationList.list { | |
| 				limiterLock.Lock() | |
| 				quota := limiter[dn.Id()] | |
| 				limiterLock.Unlock() | |
| 				if quota <= 0 { | |
| 					hasEnoughQuota = false | |
| 					break | |
| 				} | |
| 			} | |
| 			if !hasEnoughQuota { | |
| 				pendingVolumeMap[vid] = locationList | |
| 				continue | |
| 			} | |
| 
 | |
| 			// debit the quota | |
| 			for _, dn := range locationList.list { | |
| 				limiterLock.Lock() | |
| 				limiter[dn.Id()]-- | |
| 				limiterLock.Unlock() | |
| 			} | |
| 
 | |
| 			wg.Add(1) | |
| 			executor.Execute(func() { | |
| 				defer wg.Done() | |
| 				t.vacuumOneVolumeId(grpcDialOption, volumeLayout, c, garbageThreshold, locationList, vid, preallocate) | |
| 				// credit the quota | |
| 				for _, dn := range locationList.list { | |
| 					limiterLock.Lock() | |
| 					limiter[dn.Id()]++ | |
| 					limiterLock.Unlock() | |
| 				} | |
| 			}) | |
| 			if automatic && t.isDisableVacuum { | |
| 				break | |
| 			} | |
| 		} | |
| 		if automatic && t.isDisableVacuum { | |
| 			break | |
| 		} | |
| 		if len(todoVolumeMap) == len(pendingVolumeMap) { | |
| 			time.Sleep(10 * time.Second) | |
| 		} | |
| 		todoVolumeMap = pendingVolumeMap | |
| 	} | |
| 
 | |
| 	wg.Wait() | |
| 
 | |
| } | |
| 
 | |
| func (t *Topology) vacuumOneVolumeId(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, locationList *VolumeLocationList, vid needle.VolumeId, preallocate int64) { | |
| 	volumeLayout.accessLock.RLock() | |
| 	isReadOnly := volumeLayout.readonlyVolumes.IsTrue(vid) | |
| 	isEnoughCopies := volumeLayout.enoughCopies(vid) | |
| 	volumeLayout.accessLock.RUnlock() | |
| 
 | |
| 	if isReadOnly { | |
| 		return | |
| 	} | |
| 	if !isEnoughCopies { | |
| 		glog.Warningf("skip vacuuming: not enough copies for volume:%d", vid) | |
| 		return | |
| 	} | |
| 
 | |
| 	glog.V(1).Infof("check vacuum on collection:%s volume:%d", c.Name, vid) | |
| 	if vacuumLocationList, needVacuum := t.batchVacuumVolumeCheck( | |
| 		grpcDialOption, vid, locationList, garbageThreshold); needVacuum { | |
| 		if t.batchVacuumVolumeCompact(grpcDialOption, volumeLayout, vid, vacuumLocationList, preallocate) { | |
| 			t.batchVacuumVolumeCommit(grpcDialOption, volumeLayout, vid, vacuumLocationList, locationList) | |
| 		} else { | |
| 			t.batchVacuumVolumeCleanup(grpcDialOption, volumeLayout, vid, vacuumLocationList) | |
| 		} | |
| 	} | |
| }
 |