Browse Source
Add volume.scrub and ec.scrub shell commands to scrub regular & EC volumes on demand. (#8188)
Add volume.scrub and ec.scrub shell commands to scrub regular & EC volumes on demand. (#8188)
* Implement RPC skeleton for regular/EC volumes scrubbing. See https://github.com/seaweedfs/seaweedfs/issues/8018 for details. * Add `volume.scrub` and `ec.scrub` shell commands to scrub regular & EC volumes on demand. F.ex: ``` > ec.scrub --full Scrubbing 10.200.17.13:9005 (1/10)... Scrubbing 10.200.17.13:9001 (2/10)... Scrubbing 10.200.17.13:9008 (3/10)... Scrubbing 10.200.17.13:9009 (4/10)... Scrubbing 10.200.17.13:9004 (5/10)... Scrubbing 10.200.17.13:9010 (6/10)... Scrubbing 10.200.17.13:9007 (7/10)... Scrubbing 10.200.17.13:9002 (8/10)... Scrubbing 10.200.17.13:9003 (9/10)... Scrubbing 10.200.17.13:9006 (10/10)... Scrubbed 20 EC files and 20 volumes on 10 nodes Got scrub failures on 1 EC volumes and 2 EC shards :( Affected volumes: 10.200.17.13:9005:1 Details: [10.200.17.13:9005] expected 551041 bytes for needle 6, got 551072 [10.200.17.13:9005] needles in volume file (1) don't match index entries (173) for volume 1 ```pull/8216/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 324 additions and 1 deletions
-
19weed/shell/command_ec_common.go
-
156weed/shell/command_ec_scrub.go
-
150weed/shell/command_volume_scrub.go
@ -0,0 +1,156 @@ |
|||
package shell |
|||
|
|||
import ( |
|||
"context" |
|||
"flag" |
|||
"fmt" |
|||
"io" |
|||
"strconv" |
|||
"strings" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/operation" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" |
|||
"google.golang.org/grpc" |
|||
) |
|||
|
|||
func init() { |
|||
Commands = append(Commands, &commandEcVolumeScrub{}) |
|||
} |
|||
|
|||
type commandEcVolumeScrub struct { |
|||
env *CommandEnv |
|||
volumeServerAddrs []pb.ServerAddress |
|||
volumeIDs []uint32 |
|||
mode volume_server_pb.VolumeScrubMode |
|||
grpcDialOption grpc.DialOption |
|||
} |
|||
|
|||
func (c *commandEcVolumeScrub) Name() string { |
|||
return "ec.scrub" |
|||
} |
|||
|
|||
func (c *commandEcVolumeScrub) Help() string { |
|||
return `scrubs EC volume contents on volume servers. |
|||
|
|||
Supports either scrubbing only needle data, or deep scrubbing file contents as well. |
|||
|
|||
Scrubbing can be limited to specific EC volume IDs for specific volume servers. |
|||
By default, all volume IDs across all servers are processed. |
|||
` |
|||
} |
|||
|
|||
func (c *commandEcVolumeScrub) HasTag(CommandTag) bool { |
|||
return false |
|||
} |
|||
|
|||
func (c *commandEcVolumeScrub) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { |
|||
volScrubCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) |
|||
nodesStr := volScrubCommand.String("node", "", "comma-separated list of volume server <host>:<port> (optional)") |
|||
volumeIDsStr := volScrubCommand.String("volumeId", "", "comma-separated EC volume IDs to process (optional)") |
|||
// TODO: switch default mode to LOCAL, once implemented.
|
|||
mode := volScrubCommand.String("mode", "INDEX", "scrubbing mode (INDEX/FULL)") |
|||
// TODO: add per-node parallelization
|
|||
|
|||
if err = volScrubCommand.Parse(args); err != nil { |
|||
return err |
|||
} |
|||
if err = commandEnv.confirmIsLocked(args); err != nil { |
|||
return |
|||
} |
|||
|
|||
c.volumeServerAddrs = []pb.ServerAddress{} |
|||
if *nodesStr != "" { |
|||
for _, addr := range strings.Split(*nodesStr, ",") { |
|||
c.volumeServerAddrs = append(c.volumeServerAddrs, pb.ServerAddress(addr)) |
|||
} |
|||
} else { |
|||
dns, err := collectDataNodes(commandEnv, 0) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
for _, dn := range dns { |
|||
c.volumeServerAddrs = append(c.volumeServerAddrs, pb.ServerAddress(dn.Address)) |
|||
} |
|||
} |
|||
|
|||
c.volumeIDs = []uint32{} |
|||
if *volumeIDsStr != "" { |
|||
for _, vids := range strings.Split(*volumeIDsStr, ",") { |
|||
vids = strings.TrimSpace(vids) |
|||
if vids == "" { |
|||
continue |
|||
} |
|||
if vid, err := strconv.ParseUint(vids, 10, 32); err == nil { |
|||
c.volumeIDs = append(c.volumeIDs, uint32(vid)) |
|||
} else { |
|||
return fmt.Errorf("invalid volume ID %q", vids) |
|||
} |
|||
} |
|||
} |
|||
|
|||
switch strings.ToUpper(*mode) { |
|||
case "INDEX": |
|||
c.mode = volume_server_pb.VolumeScrubMode_INDEX |
|||
case "FULL": |
|||
c.mode = volume_server_pb.VolumeScrubMode_FULL |
|||
default: |
|||
return fmt.Errorf("unsupported scrubbing mode %q", *mode) |
|||
} |
|||
fmt.Fprintf(writer, "using %s mode\n", c.mode.String()) |
|||
c.env = commandEnv |
|||
|
|||
return c.scrubEcVolumes(writer) |
|||
} |
|||
|
|||
func (c *commandEcVolumeScrub) scrubEcVolumes(writer io.Writer) error { |
|||
var brokenVolumesStr, brokenShardsStr []string |
|||
var details []string |
|||
var totalVolumes, brokenVolumes, brokenShards, totalFiles uint64 |
|||
|
|||
for i, addr := range c.volumeServerAddrs { |
|||
fmt.Fprintf(writer, "Scrubbing %s (%d/%d)...\n", addr.String(), i+1, len(c.volumeServerAddrs)) |
|||
|
|||
err := operation.WithVolumeServerClient(false, addr, c.env.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { |
|||
res, err := volumeServerClient.ScrubEcVolume(context.Background(), &volume_server_pb.ScrubEcVolumeRequest{ |
|||
Mode: c.mode, |
|||
VolumeIds: c.volumeIDs, |
|||
}) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
totalVolumes += res.GetTotalVolumes() |
|||
totalFiles += res.GetTotalFiles() |
|||
brokenVolumes += uint64(len(res.GetBrokenVolumeIds())) |
|||
brokenShards += uint64(len(res.GetBrokenShardInfos())) |
|||
for _, d := range res.GetDetails() { |
|||
details = append(details, fmt.Sprintf("[%s] %s", addr, d)) |
|||
} |
|||
for _, vid := range res.GetBrokenVolumeIds() { |
|||
brokenVolumesStr = append(brokenVolumesStr, fmt.Sprintf("%s:%v", addr, vid)) |
|||
} |
|||
for _, si := range res.GetBrokenShardInfos() { |
|||
brokenShardsStr = append(brokenShardsStr, fmt.Sprintf("%s:%v:%v", addr, si.VolumeId, si.ShardId)) |
|||
} |
|||
|
|||
return nil |
|||
}) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
fmt.Fprintf(writer, "Scrubbed %d EC files and %d volumes on %d nodes\n", totalFiles, totalVolumes, len(c.volumeServerAddrs)) |
|||
if brokenVolumes != 0 { |
|||
fmt.Fprintf(writer, "\nGot scrub failures on %d EC volumes and %d EC shards :(\n", brokenVolumes, brokenShards) |
|||
fmt.Fprintf(writer, "Affected volumes: %s\n", strings.Join(brokenVolumesStr, ", ")) |
|||
if len(brokenShardsStr) != 0 { |
|||
fmt.Fprintf(writer, "Affected shards: %s\n", strings.Join(brokenShardsStr, ", ")) |
|||
} |
|||
if len(details) != 0 { |
|||
fmt.Fprintf(writer, "Details:\n\t%s\n", strings.Join(details, "\n\t")) |
|||
} |
|||
} |
|||
return nil |
|||
} |
|||
@ -0,0 +1,150 @@ |
|||
package shell |
|||
|
|||
import ( |
|||
"context" |
|||
"flag" |
|||
"fmt" |
|||
"io" |
|||
"strconv" |
|||
"strings" |
|||
|
|||
"github.com/seaweedfs/seaweedfs/weed/operation" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb" |
|||
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" |
|||
"google.golang.org/grpc" |
|||
) |
|||
|
|||
func init() { |
|||
Commands = append(Commands, &commandVolumeScrub{}) |
|||
} |
|||
|
|||
type commandVolumeScrub struct { |
|||
env *CommandEnv |
|||
volumeServerAddrs []pb.ServerAddress |
|||
volumeIDs []uint32 |
|||
mode volume_server_pb.VolumeScrubMode |
|||
grpcDialOption grpc.DialOption |
|||
} |
|||
|
|||
func (c *commandVolumeScrub) Name() string { |
|||
return "volume.scrub" |
|||
} |
|||
|
|||
func (c *commandVolumeScrub) Help() string { |
|||
return `scrubs volume contents on volume servers. |
|||
|
|||
Supports either scrubbing only needle data, or deep scrubbing file contents as well. |
|||
|
|||
Scrubbing can be limited to specific volume IDs for specific volume servers. |
|||
By default, all volume IDs across all servers are processed. |
|||
|
|||
` |
|||
} |
|||
|
|||
func (c *commandVolumeScrub) HasTag(CommandTag) bool { |
|||
return false |
|||
} |
|||
|
|||
func (c *commandVolumeScrub) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { |
|||
volScrubCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) |
|||
nodesStr := volScrubCommand.String("node", "", "comma-separated list of volume server <host>:<port> (optional)") |
|||
volumeIDsStr := volScrubCommand.String("volumeId", "", "comma-separated volume IDs to process (optional)") |
|||
// TODO: switch default mode to LOCAL, once implemented.
|
|||
mode := volScrubCommand.String("mode", "INDEX", "scrubbing mode (INDEX/FULL)") |
|||
// TODO: add per-node parallelization
|
|||
|
|||
if err = volScrubCommand.Parse(args); err != nil { |
|||
return err |
|||
} |
|||
if err = commandEnv.confirmIsLocked(args); err != nil { |
|||
return |
|||
} |
|||
|
|||
c.volumeServerAddrs = []pb.ServerAddress{} |
|||
if *nodesStr != "" { |
|||
for _, addr := range strings.Split(*nodesStr, ",") { |
|||
c.volumeServerAddrs = append(c.volumeServerAddrs, pb.ServerAddress(addr)) |
|||
} |
|||
} else { |
|||
dns, err := collectDataNodes(commandEnv, 0) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
for _, dn := range dns { |
|||
c.volumeServerAddrs = append(c.volumeServerAddrs, pb.ServerAddress(dn.Address)) |
|||
} |
|||
} |
|||
|
|||
c.volumeIDs = []uint32{} |
|||
if *volumeIDsStr != "" { |
|||
for _, vids := range strings.Split(*volumeIDsStr, ",") { |
|||
vids = strings.TrimSpace(vids) |
|||
if vids == "" { |
|||
continue |
|||
} |
|||
if vid, err := strconv.ParseUint(vids, 10, 32); err == nil { |
|||
c.volumeIDs = append(c.volumeIDs, uint32(vid)) |
|||
} else { |
|||
return fmt.Errorf("invalid volume ID %q", vids) |
|||
} |
|||
} |
|||
} |
|||
|
|||
switch strings.ToUpper(*mode) { |
|||
case "INDEX": |
|||
c.mode = volume_server_pb.VolumeScrubMode_INDEX |
|||
case "FULL": |
|||
c.mode = volume_server_pb.VolumeScrubMode_FULL |
|||
default: |
|||
return fmt.Errorf("unsupported scrubbing mode %q", *mode) |
|||
} |
|||
fmt.Fprintf(writer, "using %s mode\n", c.mode.String()) |
|||
c.env = commandEnv |
|||
|
|||
return c.scrubVolumes(writer) |
|||
} |
|||
|
|||
func (c *commandVolumeScrub) scrubVolumes(writer io.Writer) error { |
|||
var brokenVolumesStr []string |
|||
var details []string |
|||
var totalVolumes, brokenVolumes, totalFiles uint64 |
|||
|
|||
for i, addr := range c.volumeServerAddrs { |
|||
fmt.Fprintf(writer, "Scrubbing %s (%d/%d)...\n", addr.String(), i+1, len(c.volumeServerAddrs)) |
|||
|
|||
err := operation.WithVolumeServerClient(false, addr, c.env.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { |
|||
res, err := volumeServerClient.ScrubVolume(context.Background(), &volume_server_pb.ScrubVolumeRequest{ |
|||
Mode: c.mode, |
|||
VolumeIds: c.volumeIDs, |
|||
}) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
totalVolumes += res.GetTotalVolumes() |
|||
totalFiles += res.GetTotalFiles() |
|||
brokenVolumes += uint64(len(res.GetBrokenVolumeIds())) |
|||
for _, d := range res.GetDetails() { |
|||
details = append(details, fmt.Sprintf("[%s] %s", addr, d)) |
|||
} |
|||
for _, vid := range res.GetBrokenVolumeIds() { |
|||
brokenVolumesStr = append(brokenVolumesStr, fmt.Sprintf("%s:%v", addr, vid)) |
|||
} |
|||
|
|||
return nil |
|||
}) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
|
|||
fmt.Fprintf(writer, "Scrubbed %d files and %d volumes on %d nodes\n", totalFiles, totalVolumes, len(c.volumeServerAddrs)) |
|||
if brokenVolumes != 0 { |
|||
fmt.Fprintf(writer, "\nGot scrub failures on %d volumes :(\n", brokenVolumes) |
|||
fmt.Fprintf(writer, "Affected volumes: %s\n", strings.Join(brokenVolumesStr, ", ")) |
|||
if len(details) != 0 { |
|||
fmt.Fprintf(writer, "Details:\n\t%s\n", strings.Join(details, "\n\t")) |
|||
} |
|||
} |
|||
return nil |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue