diff --git a/weed/shell/command_volume_check_disk.go b/weed/shell/command_volume_check_disk.go index 64b510383..5a0d46869 100644 --- a/weed/shell/command_volume_check_disk.go +++ b/weed/shell/command_volume_check_disk.go @@ -49,6 +49,7 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write slowMode := fsckCommand.Bool("slow", false, "slow mode checks all replicas even file counts are the same") verbose := fsckCommand.Bool("v", false, "verbose mode") applyChanges := fsckCommand.Bool("force", false, "apply the fix") + nonRepairThreshold := fsckCommand.Float64("nonRepairThreshold", 0.3, "repair when missing keys is not more than this limit") if err = fsckCommand.Parse(args); err != nil { return nil } @@ -101,10 +102,10 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write } // find and make up the differnces - if err := c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges); err != nil { + if err := c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil { return err } - if err := c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges); err != nil { + if err := c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil { return err } replicas = replicas[1:] @@ -114,7 +115,7 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write return nil } -func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool) error { +func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool, nonRepairThreshold float64) error { // find missing keys // hash join, can be more efficient @@ -130,6 +131,17 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m fmt.Fprintf(writer, "volume %d %s has %d entries, %s missed %d entries\n", source.info.Id, source.location.dataNode.Id, counter, target.location.dataNode.Id, len(missingNeedles)) + if counter == 0 || len(missingNeedles) == 0 { + return nil + } + + missingNeedlesFraction := float64(len(missingNeedles)) / float64(counter) + if missingNeedlesFraction > nonRepairThreshold { + return fmt.Errorf( + "failed to start repair volume %d, percentage of missing keys is greater than the threshold: %.2f > %.2f", + source.info.Id, missingNeedlesFraction, nonRepairThreshold) + } + for _, needleValue := range missingNeedles { needleBlob, err := c.readSourceNeedleBlob(source.location.dataNode.Id, source.info.Id, needleValue) @@ -151,7 +163,6 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m } - return nil } @@ -161,8 +172,8 @@ func (c *commandVolumeCheckDisk) readSourceNeedleBlob(sourceVolumeServer string, resp, err := client.ReadNeedleBlob(context.Background(), &volume_server_pb.ReadNeedleBlobRequest{ VolumeId: volumeId, NeedleId: uint64(needleValue.Key), - Offset: needleValue.Offset.ToActualOffset(), - Size: int32(needleValue.Size), + Offset: needleValue.Offset.ToActualOffset(), + Size: int32(needleValue.Size), }) if err != nil { return err @@ -177,9 +188,9 @@ func (c *commandVolumeCheckDisk) writeNeedleBlobToTarget(targetVolumeServer stri return operation.WithVolumeServerClient(targetVolumeServer, c.env.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error { _, err := client.WriteNeedleBlob(context.Background(), &volume_server_pb.WriteNeedleBlobRequest{ - VolumeId: volumeId, - NeedleId: uint64(needleValue.Key), - Size: int32(needleValue.Size), + VolumeId: volumeId, + NeedleId: uint64(needleValue.Key), + Size: int32(needleValue.Size), NeedleBlob: needleBlob, }) return err