Browse Source

Give `cluster.status` detailed file metrics for regular volumes (#7791)

* Implement a `weed shell` command to return a status overview of the cluster.

Detailed file information will be implemented in a follow-up MR. Note also
that masters are currently not reporting back EC shard sizes correctly, via
`master_pb.VolumeEcShardInformationMessage.shard_sizes`.

F.ex:

```
> status

cluster:
	id:       topo
	status:   LOCKED
	nodes:    10
	topology: 1 DC(s)s, 1 disk(s) on 1 rack(s)

volumes:
	total:    3 volumes on 1 collections
	max size: 31457280000 bytes
	regular:  2/80 volumes on 6 replicas, 6 writable (100.00%), 0 read-only (0.00%)
	EC:       1 EC volumes on 14 shards (14.00 shards/volume)

storage:
	total:           186024424 bytes
	regular volumes: 186024424 bytes
	EC volumes:      0 bytes
	raw:             558073152 bytes on volume replicas, 0 bytes on EC shard files
```

* Humanize output for `weed.server` by default.

Makes things more readable :)

```
> cluster.status

cluster:
	id:       topo
	status:   LOCKED
	nodes:    10
	topology: 1 DC, 10 disks on 1 rack

volumes:
	total:    3 volumes, 1 collection
	max size: 32 GB
	regular:  2/80 volumes on 6 replicas, 6 writable (100%), 0 read-only (0%)
	EC:       1 EC volume on 14 shards (14 shards/volume)

storage:
	total:           172 MB
	regular volumes: 172 MB
	EC volumes:      0 B
	raw:             516 MB on volume replicas, 0 B on EC shards
```

```
> cluster.status --humanize=false

cluster:
	id:       topo
	status:   LOCKED
	nodes:    10
	topology: 1 DC(s), 10 disk(s) on 1 rack(s)

volumes:
	total:    3 volume(s), 1 collection(s)
	max size: 31457280000 byte(s)
	regular:  2/80 volume(s) on 6 replica(s), 5 writable (83.33%), 1 read-only (16.67%)
	EC:       1 EC volume(s) on 14 shard(s) (14.00 shards/volume)

storage:
	total:           172128072 byte(s)
	regular volumes: 172128072 byte(s)
	EC volumes:      0 byte(s)
	raw:             516384216 byte(s) on volume replicas, 0 byte(s) on EC shards
```

Also adds unit tests, and reshuffles test files handling for clarity.

* `cluster.status`: Add detailed file metrics for regular volumes.
master
Lisandro Pin 14 hours ago
committed by GitHub
parent
commit
6a1b9ce8cd
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 188
      weed/shell/command_cluster_status.go
  2. 75
      weed/shell/command_cluster_status_test.go

188
weed/shell/command_cluster_status.go

@ -1,13 +1,19 @@
package shell
import (
"context"
"flag"
"fmt"
"math"
"strings"
"sync"
"github.com/dustin/go-humanize"
"github.com/dustin/go-humanize/english"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
@ -18,15 +24,29 @@ func init() {
Commands = append(Commands, &commandClusterStatus{})
}
// Map of volume_id -> [volume replicas] with stat details.
type VolumeReplicaStats struct {
Id string
VolumeId uint32
Files uint64
FilesDeleted uint64
TotalSize uint64
}
type RegularVolumeStats map[uint32][]*VolumeReplicaStats
type commandClusterStatus struct{}
type ClusterStatusPrinter struct {
writer io.Writer
humanize bool
locked bool
collections []string
topology *master_pb.TopologyInfo
volumeSizeLimitMb uint64
writer io.Writer
writerMu sync.Mutex
humanize bool
maxParallelization int
locked bool
collections []string
topology *master_pb.TopologyInfo
volumeSizeLimitMb uint64
regularVolumeStats RegularVolumeStats
}
func (c *commandClusterStatus) Name() string {
@ -44,6 +64,8 @@ func (c *commandClusterStatus) HasTag(CommandTag) bool {
func (c *commandClusterStatus) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
flags := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
humanize := flags.Bool("humanize", true, "human-readable output")
includeFiles := flags.Bool("files", false, "include detailed file metrics, from all volume servers")
maxParallelization := flags.Int("maxParallelization", DefaultMaxParallelization, "run up to X tasks in parallel, whenever possible")
if err = flags.Parse(args); err != nil {
return err
@ -59,14 +81,21 @@ func (c *commandClusterStatus) Do(args []string, commandEnv *CommandEnv, writer
}
sp := &ClusterStatusPrinter{
writer: writer,
humanize: *humanize,
writer: writer,
humanize: *humanize,
maxParallelization: *maxParallelization,
locked: commandEnv.isLocked(),
collections: collections,
topology: topology,
volumeSizeLimitMb: volumeSizeLimitMb,
}
if *includeFiles {
if err := sp.loadFileStats(commandEnv); err != nil {
return err
}
}
sp.Print()
return nil
@ -83,11 +112,19 @@ func (sp *ClusterStatusPrinter) int(n int) string {
return sp.uint64(uint64(n))
}
func (sp *ClusterStatusPrinter) plural(n int, str string) string {
func (sp *ClusterStatusPrinter) uint64Plural(n uint64, str string) string {
if !sp.humanize {
return fmt.Sprintf("%s(s)", str)
}
return english.PluralWord(n, str, "")
uin := math.MaxInt
if n < math.MaxInt {
uin = int(n)
}
return english.PluralWord(int(uin), str, "")
}
func (sp *ClusterStatusPrinter) plural(n int, str string) string {
return sp.uint64Plural(uint64(n), str)
}
func (sp *ClusterStatusPrinter) bytes(b uint64) string {
@ -128,16 +165,90 @@ func (sp *ClusterStatusPrinter) intPct(a, b int) string {
}
func (sp *ClusterStatusPrinter) write(format string, a ...any) {
fmt.Fprintf(sp.writer, strings.TrimRight(format, "\r\n "), a...)
fmt.Fprint(sp.writer, "\n")
sp.writerMu.Lock()
defer sp.writerMu.Unlock()
format = strings.TrimRight(format, " ")
if len(format) == 0 {
format = "\n"
}
fmt.Fprintf(sp.writer, format, a...)
last := format[len(format)-1:]
if last != "\n" && last != "\r" {
fmt.Fprint(sp.writer, "\n")
}
}
// TODO: add option to collect detailed file stats
func (sp *ClusterStatusPrinter) Print() {
sp.write("")
sp.printClusterInfo()
sp.printVolumeInfo()
sp.printStorageInfo()
sp.printFilesInfo()
}
// TODO: collect stats for EC volumes as well
func (sp *ClusterStatusPrinter) loadFileStats(commandEnv *CommandEnv) error {
sp.regularVolumeStats = RegularVolumeStats{}
var mu sync.Mutex
var progressTotal, progressDone uint64
ewg := NewErrorWaitGroup(sp.maxParallelization)
for _, dci := range sp.topology.DataCenterInfos {
for _, ri := range dci.RackInfos {
for _, dni := range ri.DataNodeInfos {
for _, d := range dni.DiskInfos {
mu.Lock()
progressTotal += uint64(len(d.VolumeInfos))
mu.Unlock()
for _, v := range d.VolumeInfos {
ewg.Add(func() error {
// Collect regular volume stats
err := operation.WithVolumeServerClient(false, pb.NewServerAddressWithGrpcPort(dni.Id, int(dni.GrpcPort)), commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
resp, reqErr := volumeServerClient.VolumeStatus(context.Background(), &volume_server_pb.VolumeStatusRequest{
VolumeId: uint32(v.Id),
})
if reqErr != nil {
return reqErr
}
mu.Lock()
defer mu.Unlock()
if resp != nil {
if _, ok := sp.regularVolumeStats[v.Id]; !ok {
sp.regularVolumeStats[v.Id] = []*VolumeReplicaStats{}
}
sp.regularVolumeStats[v.Id] = append(sp.regularVolumeStats[v.Id], &VolumeReplicaStats{
Id: dni.Id,
VolumeId: v.Id,
Files: resp.FileCount,
FilesDeleted: resp.FileDeletedCount,
TotalSize: resp.VolumeSize,
})
}
progressDone++
return nil
})
if err != nil {
return err
}
mu.Lock()
sp.write("collecting file stats: %s \r", sp.uint64Pct(progressDone, progressTotal))
mu.Unlock()
return nil
})
}
}
}
}
}
err := ewg.Wait()
sp.write("")
return err
}
func (sp *ClusterStatusPrinter) printClusterInfo() {
@ -277,3 +388,52 @@ func (sp *ClusterStatusPrinter) printStorageInfo() {
sp.write("\traw: %s on volume replicas, %s on EC shards", sp.bytes(rawVolumeSize), sp.bytes(rawEcVolumeSize))
sp.write("")
}
func (sp *ClusterStatusPrinter) printFilesInfo() {
if len(sp.regularVolumeStats) == 0 {
return
}
var regularFilesTotal, regularFilesDeleted, regularFilesSize uint64
var regularFilesTotalRaw, regularFilesDeletedRaw, regularFilesSizeRaw uint64
for _, replicaStats := range sp.regularVolumeStats {
rc := uint64(len(replicaStats))
var volumeFilesTotal, volumeFilesSize, volumeFilesDeleted uint64
for _, rs := range replicaStats {
regularFilesTotalRaw += rs.Files
regularFilesSizeRaw += rs.TotalSize
regularFilesDeletedRaw += rs.FilesDeleted
volumeFilesTotal += rs.Files
volumeFilesSize += rs.TotalSize
volumeFilesDeleted += rs.FilesDeleted
}
regularFilesTotal += (volumeFilesTotal / rc)
regularFilesSize += (volumeFilesSize / rc)
regularFilesDeleted += (volumeFilesDeleted / rc)
}
regularFiles := regularFilesTotal - regularFilesDeleted
regularFilesRaw := regularFilesTotalRaw - regularFilesDeletedRaw
var avgFileSize uint64
if regularFilesTotal != 0 {
avgFileSize = regularFilesSize / regularFilesTotal
}
sp.write("files:")
sp.write("\tregular: %s %s, %s readable (%s), %s deleted (%s), avg %s per file",
sp.uint64(regularFilesTotal), sp.uint64Plural(regularFilesTotal, "file"),
sp.uint64(regularFiles), sp.uint64Pct(regularFiles, regularFilesTotal),
sp.uint64(regularFilesDeleted), sp.uint64Pct(regularFilesDeleted, regularFilesTotal),
sp.bytes(avgFileSize))
sp.write("\tregular raw: %s %s, %s readable (%s), %s deleted (%s), %s total",
sp.uint64(regularFilesTotalRaw), sp.uint64Plural(regularFilesTotalRaw, "file"),
sp.uint64(regularFilesRaw), sp.uint64Pct(regularFilesRaw, regularFilesTotalRaw),
sp.uint64(regularFilesDeletedRaw), sp.uint64Pct(regularFilesDeletedRaw, regularFilesTotalRaw),
sp.bytes(regularFilesSizeRaw))
sp.write("\tEC: [no data]")
sp.write("\tEC raw: [no data]")
sp.write("")
}

75
weed/shell/command_cluster_status_test.go

@ -138,3 +138,78 @@ func TestPrintStorageInfo(t *testing.T) {
}
}
}
func TestPrintFilesInfo(t *testing.T) {
testCases := []struct {
regularVolumeStats RegularVolumeStats
humanize bool
want string
}{
{
regularVolumeStats: RegularVolumeStats{
1: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9001", VolumeId: 1, Files: 159, FilesDeleted: 8, TotalSize: 89762704},
&VolumeReplicaStats{Id: "10.200.17.13:9002", VolumeId: 1, Files: 159, FilesDeleted: 8, TotalSize: 89762704},
&VolumeReplicaStats{Id: "10.200.17.13:9008", VolumeId: 1, Files: 159, FilesDeleted: 8, TotalSize: 89762704},
},
2: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9003", VolumeId: 2, Files: 192, FilesDeleted: 21, TotalSize: 93788632},
&VolumeReplicaStats{Id: "10.200.17.13:9004", VolumeId: 2, Files: 192, FilesDeleted: 21, TotalSize: 93788632},
&VolumeReplicaStats{Id: "10.200.17.13:9005", VolumeId: 2, Files: 192, FilesDeleted: 21, TotalSize: 93788632},
},
3: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9001", VolumeId: 3, Files: 149, FilesDeleted: 0, TotalSize: 81643872},
&VolumeReplicaStats{Id: "10.200.17.13:9006", VolumeId: 3, Files: 149, FilesDeleted: 0, TotalSize: 81643872},
&VolumeReplicaStats{Id: "10.200.17.13:9009", VolumeId: 3, Files: 149, FilesDeleted: 0, TotalSize: 81643872},
},
},
humanize: false,
want: `files:
regular: 500 file(s), 471 readable (94.20%), 29 deleted (5.80%), avg 530390 byte(s) per file
regular raw: 1500 file(s), 1413 readable (94.20%), 87 deleted (5.80%), 795585624 byte(s) total
EC: [no data]
EC raw: [no data]
`,
},
{
regularVolumeStats: RegularVolumeStats{
1: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9001", VolumeId: 1, Files: 184, FilesDeleted: 33, TotalSize: 79187475},
&VolumeReplicaStats{Id: "10.200.17.13:9008", VolumeId: 1, Files: 184, FilesDeleted: 33, TotalSize: 79187475},
},
2: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9004", VolumeId: 2, Files: 245, FilesDeleted: 4, TotalSize: 89501070},
&VolumeReplicaStats{Id: "10.200.17.13:9005", VolumeId: 2, Files: 245, FilesDeleted: 4, TotalSize: 89501070},
},
3: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9006", VolumeId: 3, Files: 171, FilesDeleted: 12, TotalSize: 124049530},
&VolumeReplicaStats{Id: "10.200.17.13:9009", VolumeId: 3, Files: 171, FilesDeleted: 12, TotalSize: 124049530},
},
},
humanize: true,
want: `files:
regular: 600 files, 551 readable (91.83%), 49 deleted (8.16%), avg 488 kB per file
regular raw: 1,200 files, 1,102 readable (91.83%), 98 deleted (8.16%), 586 MB total
EC: [no data]
EC raw: [no data]
`,
},
}
for i, tc := range testCases {
var buf bytes.Buffer
sp := &ClusterStatusPrinter{
writer: &buf,
humanize: tc.humanize,
regularVolumeStats: tc.regularVolumeStats,
}
sp.printFilesInfo()
got := buf.String()
if got != tc.want {
t.Errorf("#%d: got %v, want %v", i, got, tc.want)
}
}
}
Loading…
Cancel
Save