2 changed files with 717 additions and 0 deletions
@ -0,0 +1,539 @@ |
|||
package shell |
|||
|
|||
import ( |
|||
"context" |
|||
"fmt" |
|||
"github.com/chrislusf/seaweedfs/weed/glog" |
|||
"github.com/chrislusf/seaweedfs/weed/operation" |
|||
"github.com/chrislusf/seaweedfs/weed/pb/master_pb" |
|||
"github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" |
|||
"github.com/chrislusf/seaweedfs/weed/storage" |
|||
"github.com/chrislusf/seaweedfs/weed/storage/needle" |
|||
"google.golang.org/grpc" |
|||
"io" |
|||
"math/rand" |
|||
"sort" |
|||
"strings" |
|||
"sync" |
|||
) |
|||
|
|||
func init() { |
|||
//commands = append(commands, &commandReplicationHealthChecker{})
|
|||
//commands = append(commands, &commandReplicationHealthRepair{})
|
|||
} |
|||
|
|||
type commandReplicationHealthChecker struct { |
|||
args []string |
|||
commandEnv *commandEnv |
|||
writer io.Writer |
|||
checker *ReplicationHealthChecker |
|||
} |
|||
|
|||
func (c *commandReplicationHealthChecker) Name() string { |
|||
return "volume.check.replication" |
|||
} |
|||
|
|||
func (c *commandReplicationHealthChecker) Help() string { |
|||
return ` |
|||
` |
|||
} |
|||
|
|||
func (c *commandReplicationHealthChecker) Do(args []string, commandEnv *commandEnv, writer io.Writer) (err error) { |
|||
c.writer = writer |
|||
c.commandEnv = commandEnv |
|||
c.args = args |
|||
c.checker = NewReplicationHealthChecker(context.Background(), commandEnv.option.GrpcDialOption) |
|||
return nil |
|||
} |
|||
type commandReplicationHealthRepair struct { |
|||
args []string |
|||
commandEnv *commandEnv |
|||
writer io.Writer |
|||
repair *ReplicationHealthRepair |
|||
} |
|||
|
|||
func (c *commandReplicationHealthRepair) Name() string { |
|||
return "volume.repair.replication" |
|||
} |
|||
|
|||
func (c *commandReplicationHealthRepair) Help() string { |
|||
return ` |
|||
` |
|||
} |
|||
|
|||
func (c *commandReplicationHealthRepair) Do(args []string, commandEnv *commandEnv, writer io.Writer) (err error) { |
|||
c.args = args |
|||
c.commandEnv = commandEnv |
|||
c.writer = writer |
|||
ctx := context.Background() |
|||
c.repair = NewReplicationHealthRepair(ctx, commandEnv.option.GrpcDialOption) |
|||
|
|||
var resp *master_pb.VolumeListResponse |
|||
if err := c.commandEnv.masterClient.WithClient(ctx, func(client master_pb.SeaweedClient) error { |
|||
var err error |
|||
resp, err = client.VolumeList(ctx, &master_pb.VolumeListRequest{}) |
|||
return err |
|||
}); err != nil { |
|||
return err |
|||
} |
|||
|
|||
// invocation the commandReplicationHealthChecker and get the error replications
|
|||
checker := NewReplicationHealthChecker(ctx, c.commandEnv.option.GrpcDialOption) |
|||
eVids, err := checker.Check(resp.TopologyInfo) |
|||
if err != nil { |
|||
writer.Write([]byte(err.Error())) |
|||
return err |
|||
} |
|||
|
|||
// repair them
|
|||
successVids, failedVids, err := c.repair.Repair(resp.TopologyInfo, eVids) |
|||
if err != nil { |
|||
str := fmt.Sprintf("repair volume:%v replication failed.\n", failedVids) |
|||
writer.Write([]byte(str)) |
|||
} else { |
|||
str := fmt.Sprintf("repair volue:%v replication success.\n", successVids) |
|||
writer.Write([]byte(str)) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
/////////////////////////////////////////////////////////////////////////
|
|||
type ReplicationHealthChecker struct { |
|||
grpcDialOption grpc.DialOption |
|||
context context.Context |
|||
} |
|||
|
|||
func NewReplicationHealthChecker(ctx context.Context, grpcOption grpc.DialOption) *ReplicationHealthChecker { |
|||
return &ReplicationHealthChecker{grpcDialOption: grpcOption, context: ctx} |
|||
} |
|||
|
|||
/** |
|||
double check : |
|||
1st, get information of volume from topology; |
|||
2nd, get the latest information of volume from every data node |
|||
*/ |
|||
func (r *ReplicationHealthChecker) Check(topologyInfo *master_pb.TopologyInfo) ([]uint32, error) { |
|||
volInfoMap, vol2LocsMap := getVolumeInfo(topologyInfo) |
|||
if (nil == volInfoMap) || (nil == vol2LocsMap) || (len(volInfoMap) <= 0) || (len(vol2LocsMap) <= 0) { |
|||
return nil, fmt.Errorf("get volume info from topology failed") |
|||
} |
|||
|
|||
errVids := getUnhealthyVolumeIds(volInfoMap, vol2LocsMap, topologyInfo.VolumeSizeLimitBytes) |
|||
if nil == errVids || (len(errVids) <= 0) { |
|||
glog.V(4).Infof("no error replications") |
|||
return nil, nil |
|||
} |
|||
|
|||
// get the latest volume file status from every data node
|
|||
newErrVids := make([]uint32, 0, len(errVids)) |
|||
for _, eVid := range errVids { |
|||
eVidUrls := getVolumeUrls(vol2LocsMap[eVid]) |
|||
fileStats, err := getVolumeFileStatus(r.grpcDialOption, r.context, eVid, eVidUrls) |
|||
if err != nil { |
|||
glog.Error(err) |
|||
return nil, err |
|||
} |
|||
vInfos := make([]*ReplicaInformation, 0, len(fileStats)) |
|||
for _, i := range fileStats { |
|||
vInfos = append(vInfos, &ReplicaInformation{ |
|||
Size: i.fileStat.Size, |
|||
FileCount: i.fileStat.FileCount, |
|||
ReadOnly: i.fileStat.ReadOnly, |
|||
CompactRevision: i.fileStat.CompactRevision, |
|||
LastCompactIndexOffset: i.fileStat.LastCompactIndexOffset, |
|||
}) |
|||
} |
|||
if isHealthyVolumeReplications(vInfos, topologyInfo.VolumeSizeLimitBytes) { |
|||
continue |
|||
} |
|||
newErrVids = append(newErrVids, eVid) |
|||
} |
|||
return newErrVids, nil |
|||
} |
|||
|
|||
/////////////////////////////////////////////////////////////////////////
|
|||
type ReplicationHealthRepair struct { |
|||
grpcDialOption grpc.DialOption |
|||
context context.Context |
|||
} |
|||
|
|||
func NewReplicationHealthRepair(ctx context.Context, grpcOption grpc.DialOption) *ReplicationHealthRepair { |
|||
return &ReplicationHealthRepair{grpcDialOption: grpcOption, context: ctx,} |
|||
} |
|||
|
|||
/** |
|||
repair the unhealthy replications, |
|||
*/ |
|||
func (r *ReplicationHealthRepair) Repair(topologyInfo *master_pb.TopologyInfo, errVids []uint32) (success, failed []uint32, err error) { |
|||
volInfoMap, vol2LocsMap := getVolumeInfo(topologyInfo) |
|||
if (nil == volInfoMap) || (nil == vol2LocsMap) || (len(volInfoMap) <= 0) || (len(vol2LocsMap) <= 0) { |
|||
return nil, errVids, fmt.Errorf("get volume info from topology failed") |
|||
} |
|||
|
|||
for _, eVid := range errVids { |
|||
if isReadOnlyVolume(volInfoMap[eVid]) { |
|||
continue // skip the read-only volume compacting
|
|||
} |
|||
glog.V(4).Infof("begin compact all the replications of volume:%v", eVid) |
|||
eVidUrls := getVolumeUrls(vol2LocsMap[eVid]) |
|||
if tryBatchCompactVolume(r.context, r.grpcDialOption, needle.VolumeId(eVid), eVidUrls) == false { |
|||
err := fmt.Errorf("compact all the replications of volume:%v", eVid) |
|||
glog.Error(err) |
|||
return nil, errVids, err |
|||
} |
|||
glog.V(4).Infof("success compact all the replications of volume:%v", eVid) |
|||
} |
|||
|
|||
for _, eVid := range errVids { |
|||
eVidUrls := getVolumeUrls(vol2LocsMap[eVid]) |
|||
fileStats, err := getVolumeFileStatus(r.grpcDialOption, r.context, eVid, eVidUrls) |
|||
if err != nil { |
|||
glog.Error(err) |
|||
failed = append(failed, eVid) |
|||
continue |
|||
} |
|||
okUrls, errUrls := filterErrorReplication(fileStats) |
|||
if len(errUrls) == 0 { |
|||
success = append(success, eVid) // no need repair
|
|||
continue |
|||
} |
|||
|
|||
info := volInfoMap[eVid][0] |
|||
ttl := needle.LoadTTLFromUint32(info.Ttl).String() |
|||
rp, err := storage.NewReplicaPlacementFromByte(byte(info.ReplicaPlacement)) |
|||
if err != nil { |
|||
failed = append(failed, eVid) |
|||
glog.Errorf("vid:%v, parse replicaPlacement failed, %d", eVid, info.ReplicaPlacement) |
|||
continue |
|||
} |
|||
|
|||
syncSuccess := true |
|||
for _, errUrl := range errUrls { |
|||
okUrl := okUrls[rand.Intn(len(okUrls))] |
|||
req := &volume_server_pb.VolumeCopyRequest{ |
|||
VolumeId: uint32(info.Id), |
|||
Collection: info.Collection, |
|||
Replication: rp.String(), |
|||
Ttl: ttl, |
|||
SourceDataNode: okUrl, |
|||
} |
|||
err = syncReplication(r.grpcDialOption, errUrl, req) |
|||
if nil != err { |
|||
syncSuccess = false |
|||
glog.Errorf("sync replication from %s to %s failed, %v", okUrl, errUrl, err) |
|||
} |
|||
} |
|||
if syncSuccess { |
|||
success = append(success, eVid) |
|||
} else { |
|||
failed = append(failed, eVid) |
|||
} |
|||
} |
|||
|
|||
if len(failed) > 0 { |
|||
err = fmt.Errorf("there are some volumes health repair failed") |
|||
} |
|||
return |
|||
} |
|||
|
|||
type ReplicaFileStatus struct { |
|||
url string |
|||
fileStat *ReplicaInformation |
|||
} |
|||
|
|||
/** |
|||
get information of volume from every volume node concurrently |
|||
*/ |
|||
func getVolumeFileStatus(grpcDialOption grpc.DialOption, ctx context.Context, vid uint32, volumeUrls []string) (fileStatuses []*ReplicaFileStatus, err error) { |
|||
type ResponsePair struct { |
|||
url string |
|||
status *volume_server_pb.ReadVolumeFileStatusResponse |
|||
err error |
|||
} |
|||
|
|||
var wg sync.WaitGroup |
|||
resultChan := make(chan ResponsePair, len(volumeUrls)) |
|||
wg.Add(len(volumeUrls)) |
|||
getFileStatFunc := func(url string, volumeId uint32) { |
|||
defer wg.Done() |
|||
glog.V(4).Infof("volumeId:%v, location:%v", volumeId, url) |
|||
err := operation.WithVolumeServerClient(url, grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { |
|||
req := &volume_server_pb.ReadVolumeFileStatusRequest{ |
|||
VolumeId: uint32(volumeId), |
|||
} |
|||
respTmp, err := client.ReadVolumeFileStatus(ctx, req) |
|||
resultChan <- ResponsePair{ |
|||
url: url, |
|||
status: respTmp, |
|||
err: err, |
|||
} |
|||
return nil |
|||
}) |
|||
if nil != err { |
|||
glog.Error(err) |
|||
} |
|||
} |
|||
for _, url := range volumeUrls { |
|||
go getFileStatFunc(url, vid) |
|||
} |
|||
|
|||
go func() { // close channel
|
|||
wg.Wait() |
|||
close(resultChan) |
|||
}() |
|||
|
|||
var errs []string |
|||
for result := range resultChan { |
|||
if result.err == nil { |
|||
fileStatuses = append(fileStatuses, &ReplicaFileStatus{ |
|||
url: result.url, |
|||
fileStat: &ReplicaInformation{ |
|||
Size: result.status.DatFileSize, |
|||
FileCount: result.status.FileCount, |
|||
ReadOnly: false, |
|||
CompactRevision: 0, |
|||
LastCompactIndexOffset: 0, |
|||
}}) |
|||
continue |
|||
} |
|||
tmp := fmt.Sprintf("url : %s, error : %v", result.url, result.err) |
|||
errs = append(errs, tmp) |
|||
} |
|||
|
|||
if len(fileStatuses) == len(volumeUrls) { |
|||
return fileStatuses, nil |
|||
} |
|||
err = fmt.Errorf("get volume[%v] replication status failed, err : %s", vid, strings.Join(errs, "; ")) |
|||
return nil, err |
|||
} |
|||
|
|||
/** |
|||
<see the class mapMetric and needleMap> : |
|||
the file count is the total count of the volume received from user clients |
|||
*/ |
|||
func filterErrorReplication(vInfo []*ReplicaFileStatus) (okUrls, errUrls []string) { |
|||
sort.Slice(vInfo, func(i, j int) bool { |
|||
return vInfo[i].fileStat.FileCount > vInfo[j].fileStat.FileCount |
|||
}) |
|||
if vInfo[0].fileStat.FileCount != vInfo[len(vInfo)-1].fileStat.FileCount { |
|||
okFileCounter := vInfo[0].fileStat.FileCount |
|||
for _, v := range vInfo { |
|||
if okFileCounter == v.fileStat.FileCount { |
|||
okUrls = append(okUrls, v.url) |
|||
} else { |
|||
errUrls = append(errUrls, v.url) |
|||
} |
|||
} |
|||
return |
|||
} |
|||
return |
|||
} |
|||
|
|||
// execute the compact transaction
|
|||
func compactVolume(ctx context.Context, grpcDialOption grpc.DialOption, volumeUrl string, vid needle.VolumeId) bool { |
|||
glog.V(0).Infoln("Start vacuuming", vid, "on", volumeUrl) |
|||
err := operation.WithVolumeServerClient(volumeUrl, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { |
|||
_, err := volumeServerClient.VacuumVolumeCompact(ctx, &volume_server_pb.VacuumVolumeCompactRequest{ |
|||
VolumeId: uint32(vid), |
|||
}) |
|||
return err |
|||
}) |
|||
if err != nil { |
|||
glog.Errorf("Error when vacuuming %d on %s: %v", vid, volumeUrl, err) |
|||
return false |
|||
} |
|||
glog.V(0).Infof("Complete vacuuming volume:%v on %s", vid, volumeUrl) |
|||
return true |
|||
} |
|||
|
|||
// commit the compact transaction when compactVolume() return true
|
|||
func commitCompactedVolume(ctx context.Context, grpcDialOption grpc.DialOption, volumeUrl string, vid needle.VolumeId) bool { |
|||
err := operation.WithVolumeServerClient(volumeUrl, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { |
|||
_, err := volumeServerClient.VacuumVolumeCommit(ctx, &volume_server_pb.VacuumVolumeCommitRequest{ |
|||
VolumeId: uint32(vid), |
|||
}) |
|||
return err |
|||
}) |
|||
if err != nil { |
|||
glog.Errorf("Error when committing vacuum %d on %s: %v", vid, volumeUrl, err) |
|||
return false |
|||
} |
|||
glog.V(0).Infof("Complete Committing vacuum %d on %s", vid, volumeUrl) |
|||
return true |
|||
} |
|||
|
|||
// rollback the compact transaction when compactVolume return false
|
|||
func cleanupCompactedVolume(ctx context.Context, grpcDialOption grpc.DialOption, volumeUrl string, vid needle.VolumeId) bool { |
|||
glog.V(0).Infoln("Start cleaning up", vid, "on", volumeUrl) |
|||
err := operation.WithVolumeServerClient(volumeUrl, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { |
|||
_, err := volumeServerClient.VacuumVolumeCleanup(ctx, &volume_server_pb.VacuumVolumeCleanupRequest{ |
|||
VolumeId: uint32(vid), |
|||
}) |
|||
return err |
|||
}) |
|||
if err != nil { |
|||
glog.Errorf("Error when cleaning up vacuum %d on %s: %v", vid, volumeUrl, err) |
|||
return false |
|||
} |
|||
glog.V(0).Infof("Complete cleaning up vacuum %d on %s", vid, volumeUrl) |
|||
return false |
|||
} |
|||
|
|||
func tryCompactVolume(ctx context.Context, grpcDialOption grpc.DialOption, vid needle.VolumeId, volumeUrl string) bool { |
|||
if compactVolume(ctx, grpcDialOption, volumeUrl, vid) == false { |
|||
return cleanupCompactedVolume(ctx, grpcDialOption, volumeUrl, vid) |
|||
} |
|||
return commitCompactedVolume(ctx, grpcDialOption, volumeUrl, vid) |
|||
} |
|||
|
|||
func tryBatchCompactVolume(ctx context.Context, grpcDialOption grpc.DialOption, vid needle.VolumeId, urls []string) bool { |
|||
resultChan := make(chan error) |
|||
var wg sync.WaitGroup |
|||
wg.Add(len(urls)) |
|||
for _, url := range urls { |
|||
go func(volumeUrl string) { |
|||
defer wg.Done() |
|||
if tryCompactVolume(ctx, grpcDialOption, vid, volumeUrl) == false { |
|||
resultChan <- fmt.Errorf("url:%s", volumeUrl) |
|||
} |
|||
}(url) |
|||
} |
|||
|
|||
go func() { |
|||
wg.Wait() |
|||
close(resultChan) |
|||
}() |
|||
|
|||
var errs []string |
|||
for result := range resultChan { |
|||
if result != nil { |
|||
errs = append(errs, result.Error()) |
|||
} |
|||
} |
|||
if len(errs) > 0 { |
|||
glog.Errorf("consist volume:%v compact reversion failed, %s", vid, strings.Join(errs, "; ")) |
|||
return false |
|||
} |
|||
return true |
|||
} |
|||
|
|||
func getVolumeUrls(locs []*master_pb.DataNodeInfo) []string { |
|||
eVidUrls := make([]string, 0, len(locs)) |
|||
for _, loc := range locs { |
|||
eVidUrls = append(eVidUrls, loc.Url) |
|||
} |
|||
return eVidUrls |
|||
} |
|||
|
|||
type ReplicaInformation struct { |
|||
Size uint64 |
|||
FileCount uint64 |
|||
ReadOnly bool |
|||
CompactRevision uint32 |
|||
LastCompactIndexOffset uint64 |
|||
} |
|||
|
|||
func getUnhealthyVolumeIds(volInfoMap map[uint32][]*master_pb.VolumeInformationMessage, |
|||
vol2LocsMap map[uint32][]*master_pb.DataNodeInfo, volumeSizeLimitMB uint64) []uint32 { |
|||
errVids := make([]uint32, 0, len(vol2LocsMap)) |
|||
for vid, info := range volInfoMap { |
|||
vInfos := make([]*ReplicaInformation, 0, len(info)) |
|||
for _, i := range info { |
|||
vInfos = append(vInfos, &ReplicaInformation{ |
|||
Size: i.Size, |
|||
FileCount: i.FileCount, |
|||
ReadOnly: i.ReadOnly, |
|||
CompactRevision: i.CompactRevision, |
|||
}) |
|||
} |
|||
if isHealthyVolumeReplications(vInfos, volumeSizeLimitMB*1024*1024) { |
|||
glog.V(4).Infof("the volume:%v has %d same replication, need not repair", vid, len(info)) |
|||
continue |
|||
} |
|||
errVids = append(errVids, vid) |
|||
} |
|||
return errVids |
|||
} |
|||
|
|||
func isHealthyVolumeReplications(volInfo []*ReplicaInformation, volumeSizeLimit uint64) bool { |
|||
fileSizeSet := make(map[uint64]bool) |
|||
fileCountSet := make(map[uint64]bool) |
|||
compactVersionSet := make(map[uint32]bool) |
|||
compactOffsetSet := make(map[uint64]bool) |
|||
//lastModifiedSet := make(map[uint64]bool)
|
|||
var oneFileSize uint64 = 0 |
|||
for _, v := range volInfo { |
|||
fileCountSet[v.FileCount] = true |
|||
//lastModifiedSet[v.] = true
|
|||
fileSizeSet[v.Size] = true |
|||
oneFileSize = v.Size |
|||
compactVersionSet[v.CompactRevision] = true |
|||
compactOffsetSet[v.LastCompactIndexOffset] = true |
|||
} |
|||
|
|||
if (len(fileSizeSet) == 1) && (oneFileSize >= volumeSizeLimit) && (len(fileCountSet) == 1) { |
|||
return true |
|||
} |
|||
|
|||
if len(fileCountSet) != 1 { |
|||
return false |
|||
} |
|||
if len(fileSizeSet) != 1 { |
|||
return false |
|||
} |
|||
if len(compactVersionSet) != 1 { |
|||
return false |
|||
} |
|||
//if len(compactOffsetSet) != 1 {
|
|||
// return false
|
|||
//}
|
|||
return true |
|||
} |
|||
|
|||
func isReadOnlyVolume(replicaInfo []*master_pb.VolumeInformationMessage) bool { |
|||
readOnlySet := make(map[bool]bool) |
|||
for _, info := range replicaInfo { |
|||
readOnlySet[info.ReadOnly] = true |
|||
} |
|||
if _, exist := readOnlySet[true]; exist { |
|||
return len(readOnlySet) == 1 |
|||
} |
|||
return false |
|||
} |
|||
|
|||
func syncReplication(grpcDialOption grpc.DialOption, destUrl string, req *volume_server_pb.VolumeCopyRequest) error { |
|||
ctx := context.Background() |
|||
err := operation.WithVolumeServerClient(destUrl, grpcDialOption, |
|||
func(client volume_server_pb.VolumeServerClient) error { |
|||
if _, err := client.VolumeCopy(ctx, req); err != nil { |
|||
glog.Errorf("sync replication failed, %v", err) |
|||
return err |
|||
} |
|||
return nil |
|||
}) |
|||
return err |
|||
} |
|||
|
|||
func getVolumeInfo(topo *master_pb.TopologyInfo) (map[uint32][]*master_pb.VolumeInformationMessage, map[uint32][]*master_pb.DataNodeInfo) { |
|||
volInfoMap := make(map[uint32][]*master_pb.VolumeInformationMessage) |
|||
vol2LocsMap := make(map[uint32][]*master_pb.DataNodeInfo) |
|||
IterateVolumes(topo, func(dc *master_pb.DataCenterInfo, rack *master_pb.RackInfo, dataNode *master_pb.DataNodeInfo, vol *master_pb.VolumeInformationMessage) { |
|||
volInfoMap[vol.Id] = append(volInfoMap[vol.Id], vol) |
|||
vol2LocsMap[vol.Id] = append(vol2LocsMap[vol.Id], dataNode) |
|||
}) |
|||
|
|||
return volInfoMap, vol2LocsMap |
|||
} |
|||
|
|||
func IterateVolumes(topo *master_pb.TopologyInfo, |
|||
callBack func(dc *master_pb.DataCenterInfo, rack *master_pb.RackInfo, dataNode *master_pb.DataNodeInfo, vol *master_pb.VolumeInformationMessage)) { |
|||
for _, dc := range topo.DataCenterInfos { |
|||
for _, rack := range dc.RackInfos { |
|||
for _, dn := range rack.DataNodeInfos { |
|||
for _, vol := range dn.VolumeInfos { |
|||
callBack(dc, rack, dn, vol) |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,178 @@ |
|||
package shell |
|||
|
|||
import ( |
|||
"context" |
|||
"encoding/json" |
|||
"fmt" |
|||
"github.com/chrislusf/seaweedfs/weed/pb/master_pb" |
|||
"github.com/chrislusf/seaweedfs/weed/sequence" |
|||
"github.com/chrislusf/seaweedfs/weed/storage" |
|||
"github.com/chrislusf/seaweedfs/weed/storage/needle" |
|||
"github.com/chrislusf/seaweedfs/weed/topology" |
|||
"github.com/chrislusf/seaweedfs/weed/wdclient" |
|||
"google.golang.org/grpc" |
|||
"strconv" |
|||
"strings" |
|||
"testing" |
|||
) |
|||
|
|||
var topologyLayout = ` |
|||
{ |
|||
"dc1":{ |
|||
"rack1":{ |
|||
"server111":{ |
|||
"volumes":[ |
|||
{"id":1, "size":12312}, |
|||
{"id":2, "size":12312}, |
|||
{"id":3, "size":12312} |
|||
], |
|||
"limit":3 |
|||
}, |
|||
"server112":{ |
|||
"volumes":[ |
|||
{"id":4, "size":12312}, |
|||
{"id":5, "size":12312}, |
|||
{"id":6, "size":12312} |
|||
], |
|||
"limit":10 |
|||
} |
|||
}, |
|||
"rack2":{ |
|||
"server121":{ |
|||
"volumes":[ |
|||
{"id":4, "size":12312}, |
|||
{"id":5, "size":12312}, |
|||
{"id":6, "size":12312} |
|||
], |
|||
"limit":4 |
|||
}, |
|||
"server122":{ |
|||
"volumes":[], |
|||
"limit":4 |
|||
}, |
|||
"server123":{ |
|||
"volumes":[ |
|||
{"id":2, "size":12312}, |
|||
{"id":3, "size":12311}, |
|||
{"id":4, "size":12312} |
|||
], |
|||
"limit":5 |
|||
} |
|||
} |
|||
}, |
|||
"dc3":{ |
|||
} |
|||
} |
|||
` |
|||
|
|||
func setup(topologyLayout string) *topology.Topology { |
|||
var data interface{} |
|||
err := json.Unmarshal([]byte(topologyLayout), &data) |
|||
if err != nil { |
|||
fmt.Println("error:", err) |
|||
} |
|||
fmt.Println("data:", data) |
|||
|
|||
//need to connect all nodes first before server adding volumes
|
|||
var portT int |
|||
topo := topology.NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5) |
|||
mTopology := data.(map[string]interface{}) |
|||
for dcKey, dcValue := range mTopology { |
|||
dc := topology.NewDataCenter(dcKey) |
|||
dcMap := dcValue.(map[string]interface{}) |
|||
topo.LinkChildNode(dc) |
|||
for rackKey, rackValue := range dcMap { |
|||
rack := topology.NewRack(rackKey) |
|||
rackMap := rackValue.(map[string]interface{}) |
|||
dc.LinkChildNode(rack) |
|||
for serverKey, serverValue := range rackMap { |
|||
server := topology.NewDataNode(serverKey) |
|||
server.Ip = "localhost" |
|||
portT += 2 |
|||
server.Port = portT |
|||
server.PublicUrl = server.Ip + ":" + strconv.FormatUint(uint64(server.Port), 10) |
|||
serverMap := serverValue.(map[string]interface{}) |
|||
rack.LinkChildNode(server) |
|||
for _, v := range serverMap["volumes"].([]interface{}) { |
|||
m := v.(map[string]interface{}) |
|||
vi := storage.VolumeInfo{ |
|||
Id: needle.VolumeId(int64(m["id"].(float64))), |
|||
Size: uint64(m["size"].(float64)), |
|||
Version: needle.CurrentVersion, |
|||
ReplicaPlacement: &storage.ReplicaPlacement{1, 0, 0}, |
|||
Ttl: needle.EMPTY_TTL, |
|||
Collection: "", |
|||
} |
|||
server.AddOrUpdateVolume(vi) |
|||
} |
|||
server.UpAdjustMaxVolumeCountDelta(int64(serverMap["limit"].(float64))) |
|||
} |
|||
} |
|||
} |
|||
|
|||
return topo |
|||
} |
|||
|
|||
func TestGetVolumeList(t *testing.T) { |
|||
topo := setup(topologyLayout) |
|||
topoInfo := topo.ToTopologyInfo() |
|||
if nil == topoInfo { |
|||
t.Errorf("failed.") |
|||
} |
|||
} |
|||
|
|||
func TestReplicationHealthChecker_GetErrorReplications(t *testing.T) { |
|||
topo := setup(topologyLayout) |
|||
topoInfo := topo.ToTopologyInfo() |
|||
if nil == topoInfo { |
|||
t.Errorf("failed.") |
|||
} |
|||
|
|||
checker := NewReplicationHealthChecker(context.Background(), grpc.EmptyDialOption{}) |
|||
errVids, err := checker.Check(topoInfo) |
|||
if err != nil { |
|||
t.Error(err) |
|||
return |
|||
} else { |
|||
fmt.Printf("error vids : %v\n", errVids) |
|||
} |
|||
} |
|||
|
|||
// this UT need mock or real seaweedfs service
|
|||
func TestReplicationHealthChecker_GetErrorReplications2(t *testing.T) { |
|||
masters := "localhost:9633,localhost:9733,localhost:9833" |
|||
ctx := context.Background() |
|||
masterClient := wdclient.NewMasterClient(ctx, grpc.WithInsecure(), "shell", strings.Split(masters, ",")) |
|||
go masterClient.KeepConnectedToMaster() |
|||
masterClient.WaitUntilConnected() |
|||
|
|||
var resp *master_pb.VolumeListResponse |
|||
if err := masterClient.WithClient(ctx, func(client master_pb.SeaweedClient) error { |
|||
var err error |
|||
resp, err = client.VolumeList(ctx, &master_pb.VolumeListRequest{}) |
|||
return err |
|||
}); err != nil { |
|||
t.Error(err) |
|||
} |
|||
|
|||
//respBytes, err := json.Marshal(resp)
|
|||
//if err != nil {
|
|||
// t.Error(err)
|
|||
//}
|
|||
//t.Log(string(respBytes[:]))
|
|||
|
|||
checker := NewReplicationHealthChecker(ctx, grpc.WithInsecure()) |
|||
errVids, err := checker.Check(resp.TopologyInfo) |
|||
if err != nil { |
|||
t.Error(err) |
|||
return |
|||
} |
|||
fmt.Printf("error vids : %v\n", errVids) |
|||
|
|||
repair := NewReplicationHealthRepair(ctx, grpc.WithInsecure()) |
|||
success, failed, err := repair.Repair(resp.TopologyInfo, errVids) |
|||
if err != nil { |
|||
t.Error(err) |
|||
} |
|||
fmt.Printf("success:%v, failed:%v", success, failed) |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue