Browse Source

Update `cluster.status` to resolve file details on EC volumes. (#8268)

Also parallelizes queries for file metrics collections when the `--files`
flag is specified, and improves the command's output for readability:

```
> cluster.status --files
collecting file stats: 100%

cluster:
	id:       topo
	status:   LOCKED
	nodes:    10
	topology: 1 DC, 10 disks on 1 rack

volumes:
	total:    3 volumes, 1 collection
	max size: 32 GB
	regular:  1/80 volume on 3 replicas, 3 writable (100%), 0 read-only (0%)
	EC:       2 EC volumes on 28 shards (14 shards/volume)

storage:
	total:           269 MB (522 MB raw, 193.95%)
	regular volumes: 91 MB (272 MB raw, 300%)
	EC volumes:      178 MB (250 MB raw, 140%)

files:
	total:   363 files, 300 readable (82.64%), 63 deleted (17.35%), avg 522 kB per file
	regular: 168 files, 105 readable (62.5%), 63 deleted (37.5%), avg 540 kB per file
	EC:      195 files, 195 readable (100%), 0 deleted (0%), avg 506 kB per file
```
pull/8273/head
Lisandro Pin 2 days ago
committed by GitHub
parent
commit
f400fb44a0
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 170
      weed/shell/command_cluster_status.go
  2. 54
      weed/shell/command_cluster_status_test.go

170
weed/shell/command_cluster_status.go

@ -33,7 +33,17 @@ type VolumeReplicaStats struct {
FilesDeleted uint64
TotalSize uint64
}
type RegularVolumeStats map[uint32][]*VolumeReplicaStats
type RegularVolumesStats map[uint32][]*VolumeReplicaStats
// Map of ec_volume_id -> stat details.
type EcVolumeStats struct {
VolumeId uint32
Files uint64
FilesDeleted uint64
TotalSize uint64
}
type EcVolumesStats map[uint32]*EcVolumeStats
type commandClusterStatus struct{}
type ClusterStatusPrinter struct {
@ -42,11 +52,12 @@ type ClusterStatusPrinter struct {
humanize bool
maxParallelization int
locked bool
collections []string
topology *master_pb.TopologyInfo
volumeSizeLimitMb uint64
regularVolumeStats RegularVolumeStats
locked bool
collections []string
topology *master_pb.TopologyInfo
volumeSizeLimitMb uint64
regularVolumesStats RegularVolumesStats
ecVolumesStats EcVolumesStats
}
func (c *commandClusterStatus) Name() string {
@ -137,7 +148,12 @@ func (sp *ClusterStatusPrinter) bytes(b uint64) string {
func (sp *ClusterStatusPrinter) uint64Ratio(a, b uint64) string {
var p float64
if b != 0 {
p = float64(a) / float64(b)
if a%b == 0 {
// Avoid float precision issues on integer ratios.
p = float64(a / b)
} else {
p = float64(a) / float64(b)
}
}
if !sp.humanize {
return fmt.Sprintf("%.02f", p)
@ -151,8 +167,14 @@ func (sp *ClusterStatusPrinter) intRatio(a, b int) string {
func (sp *ClusterStatusPrinter) uint64Pct(a, b uint64) string {
var p float64
if b != 0 {
p = 100 * float64(a) / float64(b)
if a%b == 0 {
// avoid float rounding errors on exact ratios
p = float64(a / b * 100)
} else {
p = 100 * float64(a) / float64(b)
}
}
if !sp.humanize {
return fmt.Sprintf("%.02f%%", p)
@ -188,21 +210,31 @@ func (sp *ClusterStatusPrinter) Print() {
sp.printFilesInfo()
}
// TODO: collect stats for EC volumes as well
func (sp *ClusterStatusPrinter) loadFileStats(commandEnv *CommandEnv) error {
sp.regularVolumeStats = RegularVolumeStats{}
sp.regularVolumesStats = RegularVolumesStats{}
sp.ecVolumesStats = EcVolumesStats{}
var mu sync.Mutex
var progressTotal, progressDone uint64
ewg := NewErrorWaitGroup(sp.maxParallelization)
updateProgress := func() {
mu.Lock()
defer mu.Unlock()
progressDone++
sp.write("collecting file stats: %s \r", sp.uint64Pct(progressDone, progressTotal))
}
for _, dci := range sp.topology.DataCenterInfos {
for _, ri := range dci.RackInfos {
for _, dni := range ri.DataNodeInfos {
for _, d := range dni.DiskInfos {
mu.Lock()
progressTotal += uint64(len(d.VolumeInfos))
progressTotal += uint64(len(d.EcShardInfos))
mu.Unlock()
for _, v := range d.VolumeInfos {
ewg.Add(func() error {
// Collect regular volume stats
@ -217,10 +249,10 @@ func (sp *ClusterStatusPrinter) loadFileStats(commandEnv *CommandEnv) error {
mu.Lock()
defer mu.Unlock()
if resp != nil {
if _, ok := sp.regularVolumeStats[v.Id]; !ok {
sp.regularVolumeStats[v.Id] = []*VolumeReplicaStats{}
if _, ok := sp.regularVolumesStats[v.Id]; !ok {
sp.regularVolumesStats[v.Id] = []*VolumeReplicaStats{}
}
sp.regularVolumeStats[v.Id] = append(sp.regularVolumeStats[v.Id], &VolumeReplicaStats{
sp.regularVolumesStats[v.Id] = append(sp.regularVolumesStats[v.Id], &VolumeReplicaStats{
Id: dni.Id,
VolumeId: v.Id,
Files: resp.FileCount,
@ -228,17 +260,51 @@ func (sp *ClusterStatusPrinter) loadFileStats(commandEnv *CommandEnv) error {
TotalSize: resp.VolumeSize,
})
}
progressDone++
return nil
})
if err != nil {
return err
}
updateProgress()
return err
})
}
for _, eci := range d.EcShardInfos {
ewg.Add(func() error {
// Collect EC shard stats
var err error
mu.Lock()
sp.write("collecting file stats: %s \r", sp.uint64Pct(progressDone, progressTotal))
_, ok := sp.ecVolumesStats[eci.Id]
mu.Unlock()
return nil
if ok {
// this EC volume has been already processed, likely on a different node
return nil
}
err = operation.WithVolumeServerClient(false, pb.NewServerAddressWithGrpcPort(dni.Id, int(dni.GrpcPort)), commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
resp, reqErr := volumeServerClient.VolumeEcShardsInfo(context.Background(), &volume_server_pb.VolumeEcShardsInfoRequest{
VolumeId: uint32(eci.Id),
})
if reqErr != nil {
return reqErr
}
mu.Lock()
defer mu.Unlock()
if resp != nil {
sp.ecVolumesStats[eci.Id] = &EcVolumeStats{
VolumeId: eci.Id,
Files: resp.FileCount,
FilesDeleted: resp.FileDeletedCount,
TotalSize: resp.VolumeSize,
}
}
return nil
})
updateProgress()
return err
})
}
}
@ -380,32 +446,27 @@ func (sp *ClusterStatusPrinter) printStorageInfo() {
ecVolumeSize += s
}
totalSize := volumeSize + ecVolumeSize
totalRawSize := rawVolumeSize + rawEcVolumeSize
sp.write("storage:")
sp.write("\ttotal: %s", sp.bytes(totalSize))
sp.write("\tregular volumes: %s", sp.bytes(volumeSize))
sp.write("\tEC volumes: %s", sp.bytes(ecVolumeSize))
sp.write("\traw: %s on volume replicas, %s on EC shards", sp.bytes(rawVolumeSize), sp.bytes(rawEcVolumeSize))
sp.write("\ttotal: %s (%s raw, %s)", sp.bytes(totalSize), sp.bytes(totalRawSize), sp.uint64Pct(totalRawSize, totalSize))
sp.write("\tregular volumes: %s (%s raw, %s)", sp.bytes(volumeSize), sp.bytes(rawVolumeSize), sp.uint64Pct(rawVolumeSize, volumeSize))
sp.write("\tEC volumes: %s (%s raw, %s)", sp.bytes(ecVolumeSize), sp.bytes(rawEcVolumeSize), sp.uint64Pct(rawEcVolumeSize, ecVolumeSize))
sp.write("")
}
func (sp *ClusterStatusPrinter) printFilesInfo() {
if len(sp.regularVolumeStats) == 0 {
if len(sp.regularVolumesStats) == 0 && len(sp.ecVolumesStats) == 0 {
return
}
var regularFilesTotal, regularFilesDeleted, regularFilesSize uint64
var regularFilesTotalRaw, regularFilesDeletedRaw, regularFilesSizeRaw uint64
for _, replicaStats := range sp.regularVolumeStats {
for _, replicaStats := range sp.regularVolumesStats {
rc := uint64(len(replicaStats))
var volumeFilesTotal, volumeFilesSize, volumeFilesDeleted uint64
for _, rs := range replicaStats {
regularFilesTotalRaw += rs.Files
regularFilesSizeRaw += rs.TotalSize
regularFilesDeletedRaw += rs.FilesDeleted
volumeFilesTotal += rs.Files
volumeFilesSize += rs.TotalSize
volumeFilesDeleted += rs.FilesDeleted
@ -414,26 +475,49 @@ func (sp *ClusterStatusPrinter) printFilesInfo() {
regularFilesSize += (volumeFilesSize / rc)
regularFilesDeleted += (volumeFilesDeleted / rc)
}
regularFiles := regularFilesTotal - regularFilesDeleted
regularFilesRaw := regularFilesTotalRaw - regularFilesDeletedRaw
var avgFileSize uint64
var avgRegularFileSize uint64
if regularFilesTotal != 0 {
avgFileSize = regularFilesSize / regularFilesTotal
avgRegularFileSize = regularFilesSize / regularFilesTotal
}
var ecFilesTotal, ecFilesDeleted, ecFilesSize uint64
for _, ecStats := range sp.ecVolumesStats {
ecFilesTotal += ecStats.Files
ecFilesSize += ecStats.TotalSize
ecFilesDeleted += ecStats.FilesDeleted
}
ecFiles := ecFilesTotal - ecFilesDeleted
var avgEcFileSize uint64
if ecFilesTotal != 0 {
avgEcFileSize = ecFilesSize / ecFilesTotal
}
files := regularFiles + ecFiles
filesDeleted := regularFilesDeleted + ecFilesDeleted
filesTotal := regularFilesTotal + ecFilesTotal
filesSize := regularFilesSize + ecFilesSize
var avgFileSize uint64
if filesTotal != 0 {
avgFileSize = filesSize / filesTotal
}
sp.write("files:")
sp.write("\tregular: %s %s, %s readable (%s), %s deleted (%s), avg %s per file",
sp.write("\ttotal: %s %s, %s readable (%s), %s deleted (%s), avg %s per file",
sp.uint64(filesTotal), sp.uint64Plural(filesTotal, "file"),
sp.uint64(files), sp.uint64Pct(files, filesTotal),
sp.uint64(filesDeleted), sp.uint64Pct(filesDeleted, filesTotal),
sp.bytes(avgFileSize))
sp.write("\tregular: %s %s, %s readable (%s), %s deleted (%s), avg %s per file",
sp.uint64(regularFilesTotal), sp.uint64Plural(regularFilesTotal, "file"),
sp.uint64(regularFiles), sp.uint64Pct(regularFiles, regularFilesTotal),
sp.uint64(regularFilesDeleted), sp.uint64Pct(regularFilesDeleted, regularFilesTotal),
sp.bytes(avgFileSize))
sp.write("\tregular raw: %s %s, %s readable (%s), %s deleted (%s), %s total",
sp.uint64(regularFilesTotalRaw), sp.uint64Plural(regularFilesTotalRaw, "file"),
sp.uint64(regularFilesRaw), sp.uint64Pct(regularFilesRaw, regularFilesTotalRaw),
sp.uint64(regularFilesDeletedRaw), sp.uint64Pct(regularFilesDeletedRaw, regularFilesTotalRaw),
sp.bytes(regularFilesSizeRaw))
sp.write("\tEC: [no data]")
sp.write("\tEC raw: [no data]")
sp.bytes(avgRegularFileSize))
sp.write("\tEC: %s %s, %s readable (%s), %s deleted (%s), avg %s per file",
sp.uint64(ecFilesTotal), sp.uint64Plural(ecFilesTotal, "file"),
sp.uint64(ecFiles), sp.uint64Pct(ecFiles, ecFilesTotal),
sp.uint64(ecFilesDeleted), sp.uint64Pct(ecFilesDeleted, ecFilesTotal),
sp.bytes(avgEcFileSize))
sp.write("")
}

54
weed/shell/command_cluster_status_test.go

@ -104,20 +104,18 @@ func TestPrintStorageInfo(t *testing.T) {
{
testTopology2, true,
`storage:
total: 5.9 TB
regular volumes: 5.9 TB
EC volumes: 0 B
raw: 18 TB on volume replicas, 0 B on EC shards
total: 5.9 TB (18 TB raw, 299.97%)
regular volumes: 5.9 TB (18 TB raw, 299.97%)
EC volumes: 0 B (0 B raw, 0%)
`,
},
{
testTopology2, false,
`storage:
total: 5892610895448 byte(s)
regular volumes: 5892610895448 byte(s)
EC volumes: 0 byte(s)
raw: 17676186754616 byte(s) on volume replicas, 0 byte(s) on EC shards
total: 5892610895448 byte(s) (17676186754616 byte(s) raw, 299.97%)
regular volumes: 5892610895448 byte(s) (17676186754616 byte(s) raw, 299.97%)
EC volumes: 0 byte(s) (0 byte(s) raw, 0.00%)
`,
},
@ -141,12 +139,13 @@ func TestPrintStorageInfo(t *testing.T) {
func TestPrintFilesInfo(t *testing.T) {
testCases := []struct {
regularVolumeStats RegularVolumeStats
humanize bool
want string
regularVolumesStats RegularVolumesStats
ecVolumesStats EcVolumesStats
humanize bool
want string
}{
{
regularVolumeStats: RegularVolumeStats{
regularVolumesStats: RegularVolumesStats{
1: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9001", VolumeId: 1, Files: 159, FilesDeleted: 8, TotalSize: 89762704},
&VolumeReplicaStats{Id: "10.200.17.13:9002", VolumeId: 1, Files: 159, FilesDeleted: 8, TotalSize: 89762704},
@ -163,17 +162,20 @@ func TestPrintFilesInfo(t *testing.T) {
&VolumeReplicaStats{Id: "10.200.17.13:9009", VolumeId: 3, Files: 149, FilesDeleted: 0, TotalSize: 81643872},
},
},
ecVolumesStats: EcVolumesStats{
10: &EcVolumeStats{VolumeId: 10, Files: 30, FilesDeleted: 0, TotalSize: 34879032},
11: &EcVolumeStats{VolumeId: 11, Files: 55, FilesDeleted: 5, TotalSize: 55540341},
},
humanize: false,
want: `files:
regular: 500 file(s), 471 readable (94.20%), 29 deleted (5.80%), avg 530390 byte(s) per file
regular raw: 1500 file(s), 1413 readable (94.20%), 87 deleted (5.80%), 795585624 byte(s) total
EC: [no data]
EC raw: [no data]
total: 585 file(s), 551 readable (94.19%), 34 deleted (5.81%), avg 607888 byte(s) per file
regular: 500 file(s), 471 readable (94.20%), 29 deleted (5.80%), avg 530390 byte(s) per file
EC: 85 file(s), 80 readable (94.12%), 5 deleted (5.88%), avg 1063757 byte(s) per file
`,
},
{
regularVolumeStats: RegularVolumeStats{
regularVolumesStats: RegularVolumesStats{
1: []*VolumeReplicaStats{
&VolumeReplicaStats{Id: "10.200.17.13:9001", VolumeId: 1, Files: 184, FilesDeleted: 33, TotalSize: 79187475},
&VolumeReplicaStats{Id: "10.200.17.13:9008", VolumeId: 1, Files: 184, FilesDeleted: 33, TotalSize: 79187475},
@ -187,12 +189,15 @@ func TestPrintFilesInfo(t *testing.T) {
&VolumeReplicaStats{Id: "10.200.17.13:9009", VolumeId: 3, Files: 171, FilesDeleted: 12, TotalSize: 124049530},
},
},
ecVolumesStats: EcVolumesStats{
20: &EcVolumeStats{VolumeId: 20, Files: 22, FilesDeleted: 10, TotalSize: 27328233},
30: &EcVolumeStats{VolumeId: 30, Files: 16, FilesDeleted: 11, TotalSize: 11193827},
},
humanize: true,
want: `files:
regular: 600 files, 551 readable (91.83%), 49 deleted (8.16%), avg 488 kB per file
regular raw: 1,200 files, 1,102 readable (91.83%), 98 deleted (8.16%), 586 MB total
EC: [no data]
EC raw: [no data]
total: 638 files, 568 readable (89.02%), 70 deleted (10.97%), avg 519 kB per file
regular: 600 files, 551 readable (91.83%), 49 deleted (8.16%), avg 488 kB per file
EC: 38 files, 17 readable (44.73%), 21 deleted (55.26%), avg 1.0 MB per file
`,
},
@ -201,9 +206,10 @@ func TestPrintFilesInfo(t *testing.T) {
for i, tc := range testCases {
var buf bytes.Buffer
sp := &ClusterStatusPrinter{
writer: &buf,
humanize: tc.humanize,
regularVolumeStats: tc.regularVolumeStats,
writer: &buf,
humanize: tc.humanize,
regularVolumesStats: tc.regularVolumesStats,
ecVolumesStats: tc.ecVolumesStats,
}
sp.printFilesInfo()
got := buf.String()

Loading…
Cancel
Save