You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
439 lines
12 KiB
439 lines
12 KiB
package shell
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"math"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/dustin/go-humanize"
|
|
"github.com/dustin/go-humanize/english"
|
|
"github.com/seaweedfs/seaweedfs/weed/operation"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
|
|
"io"
|
|
)
|
|
|
|
func init() {
|
|
Commands = append(Commands, &commandClusterStatus{})
|
|
}
|
|
|
|
// Map of volume_id -> [volume replicas] with stat details.
|
|
type VolumeReplicaStats struct {
|
|
Id string
|
|
VolumeId uint32
|
|
|
|
Files uint64
|
|
FilesDeleted uint64
|
|
TotalSize uint64
|
|
}
|
|
type RegularVolumeStats map[uint32][]*VolumeReplicaStats
|
|
|
|
type commandClusterStatus struct{}
|
|
type ClusterStatusPrinter struct {
|
|
writer io.Writer
|
|
writerMu sync.Mutex
|
|
humanize bool
|
|
maxParallelization int
|
|
|
|
locked bool
|
|
collections []string
|
|
topology *master_pb.TopologyInfo
|
|
volumeSizeLimitMb uint64
|
|
regularVolumeStats RegularVolumeStats
|
|
}
|
|
|
|
func (c *commandClusterStatus) Name() string {
|
|
return "cluster.status"
|
|
}
|
|
|
|
func (c *commandClusterStatus) Help() string {
|
|
return `outputs a quick overview of the cluster status`
|
|
}
|
|
|
|
func (c *commandClusterStatus) HasTag(CommandTag) bool {
|
|
return false
|
|
}
|
|
|
|
func (c *commandClusterStatus) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
flags := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
|
humanize := flags.Bool("humanize", true, "human-readable output")
|
|
includeFiles := flags.Bool("files", false, "include detailed file metrics, from all volume servers")
|
|
maxParallelization := flags.Int("maxParallelization", DefaultMaxParallelization, "run up to X tasks in parallel, whenever possible")
|
|
|
|
if err = flags.Parse(args); err != nil {
|
|
return err
|
|
}
|
|
|
|
collections, err := ListCollectionNames(commandEnv, true, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
topology, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
sp := &ClusterStatusPrinter{
|
|
writer: writer,
|
|
humanize: *humanize,
|
|
maxParallelization: *maxParallelization,
|
|
|
|
locked: commandEnv.isLocked(),
|
|
collections: collections,
|
|
topology: topology,
|
|
volumeSizeLimitMb: volumeSizeLimitMb,
|
|
}
|
|
if *includeFiles {
|
|
if err := sp.loadFileStats(commandEnv); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
sp.Print()
|
|
|
|
return nil
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) uint64(n uint64) string {
|
|
if !sp.humanize {
|
|
return fmt.Sprintf("%d", n)
|
|
}
|
|
return humanize.Comma(int64(n))
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) int(n int) string {
|
|
return sp.uint64(uint64(n))
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) uint64Plural(n uint64, str string) string {
|
|
if !sp.humanize {
|
|
return fmt.Sprintf("%s(s)", str)
|
|
}
|
|
uin := math.MaxInt
|
|
if n < math.MaxInt {
|
|
uin = int(n)
|
|
}
|
|
return english.PluralWord(int(uin), str, "")
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) plural(n int, str string) string {
|
|
return sp.uint64Plural(uint64(n), str)
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) bytes(b uint64) string {
|
|
if !sp.humanize {
|
|
return fmt.Sprintf("%d %s", b, sp.plural(int(b), "byte"))
|
|
}
|
|
return fmt.Sprintf("%s", humanize.Bytes(b))
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) uint64Ratio(a, b uint64) string {
|
|
var p float64
|
|
if b != 0 {
|
|
p = float64(a) / float64(b)
|
|
}
|
|
if !sp.humanize {
|
|
return fmt.Sprintf("%.02f", p)
|
|
}
|
|
return fmt.Sprintf("%s", humanize.FtoaWithDigits(p, 2))
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) intRatio(a, b int) string {
|
|
return sp.uint64Ratio(uint64(a), uint64(b))
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) uint64Pct(a, b uint64) string {
|
|
var p float64
|
|
if b != 0 {
|
|
p = 100 * float64(a) / float64(b)
|
|
}
|
|
if !sp.humanize {
|
|
return fmt.Sprintf("%.02f%%", p)
|
|
}
|
|
return fmt.Sprintf("%s%%", humanize.FtoaWithDigits(p, 2))
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) intPct(a, b int) string {
|
|
return sp.uint64Pct(uint64(a), uint64(b))
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) write(format string, a ...any) {
|
|
sp.writerMu.Lock()
|
|
defer sp.writerMu.Unlock()
|
|
|
|
format = strings.TrimRight(format, " ")
|
|
if len(format) == 0 {
|
|
format = "\n"
|
|
}
|
|
fmt.Fprintf(sp.writer, format, a...)
|
|
|
|
last := format[len(format)-1:]
|
|
if last != "\n" && last != "\r" {
|
|
fmt.Fprint(sp.writer, "\n")
|
|
}
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) Print() {
|
|
sp.write("")
|
|
sp.printClusterInfo()
|
|
sp.printVolumeInfo()
|
|
sp.printStorageInfo()
|
|
sp.printFilesInfo()
|
|
}
|
|
|
|
// TODO: collect stats for EC volumes as well
|
|
func (sp *ClusterStatusPrinter) loadFileStats(commandEnv *CommandEnv) error {
|
|
sp.regularVolumeStats = RegularVolumeStats{}
|
|
|
|
var mu sync.Mutex
|
|
var progressTotal, progressDone uint64
|
|
ewg := NewErrorWaitGroup(sp.maxParallelization)
|
|
|
|
for _, dci := range sp.topology.DataCenterInfos {
|
|
for _, ri := range dci.RackInfos {
|
|
for _, dni := range ri.DataNodeInfos {
|
|
for _, d := range dni.DiskInfos {
|
|
mu.Lock()
|
|
progressTotal += uint64(len(d.VolumeInfos))
|
|
mu.Unlock()
|
|
for _, v := range d.VolumeInfos {
|
|
ewg.Add(func() error {
|
|
// Collect regular volume stats
|
|
err := operation.WithVolumeServerClient(false, pb.NewServerAddressWithGrpcPort(dni.Id, int(dni.GrpcPort)), commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
|
|
resp, reqErr := volumeServerClient.VolumeStatus(context.Background(), &volume_server_pb.VolumeStatusRequest{
|
|
VolumeId: uint32(v.Id),
|
|
})
|
|
if reqErr != nil {
|
|
return reqErr
|
|
}
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if resp != nil {
|
|
if _, ok := sp.regularVolumeStats[v.Id]; !ok {
|
|
sp.regularVolumeStats[v.Id] = []*VolumeReplicaStats{}
|
|
}
|
|
sp.regularVolumeStats[v.Id] = append(sp.regularVolumeStats[v.Id], &VolumeReplicaStats{
|
|
Id: dni.Id,
|
|
VolumeId: v.Id,
|
|
Files: resp.FileCount,
|
|
FilesDeleted: resp.FileDeletedCount,
|
|
TotalSize: resp.VolumeSize,
|
|
})
|
|
}
|
|
progressDone++
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mu.Lock()
|
|
sp.write("collecting file stats: %s \r", sp.uint64Pct(progressDone, progressTotal))
|
|
mu.Unlock()
|
|
return nil
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
err := ewg.Wait()
|
|
sp.write("")
|
|
return err
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) printClusterInfo() {
|
|
dcs := len(sp.topology.DataCenterInfos)
|
|
|
|
racks := 0
|
|
nodes := 0
|
|
disks := 0
|
|
for _, dci := range sp.topology.DataCenterInfos {
|
|
racks += len(dci.RackInfos)
|
|
for _, ri := range dci.RackInfos {
|
|
for _, dni := range ri.DataNodeInfos {
|
|
nodes++
|
|
disks += len(dni.DiskInfos)
|
|
}
|
|
}
|
|
}
|
|
|
|
status := "unlocked"
|
|
if sp.locked {
|
|
status = "LOCKED"
|
|
}
|
|
|
|
sp.write("cluster:")
|
|
sp.write("\tid: %s", sp.topology.Id)
|
|
sp.write("\tstatus: %s", status)
|
|
sp.write("\tnodes: %s", sp.int(nodes))
|
|
sp.write("\ttopology: %s %s, %s %s on %s %s",
|
|
sp.int(dcs), sp.plural(dcs, "DC"),
|
|
sp.int(disks), sp.plural(disks, "disk"),
|
|
sp.int(racks), sp.plural(racks, "rack"))
|
|
sp.write("")
|
|
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) printVolumeInfo() {
|
|
collections := len(sp.collections)
|
|
var maxVolumes uint64
|
|
volumeIds := map[needle.VolumeId]bool{}
|
|
ecVolumeIds := map[needle.VolumeId]bool{}
|
|
|
|
var replicas, roReplicas, rwReplicas, ecShards int
|
|
|
|
for _, dci := range sp.topology.DataCenterInfos {
|
|
for _, ri := range dci.RackInfos {
|
|
for _, dni := range ri.DataNodeInfos {
|
|
for _, di := range dni.DiskInfos {
|
|
maxVolumes += uint64(di.MaxVolumeCount)
|
|
for _, vi := range di.VolumeInfos {
|
|
vid := needle.VolumeId(vi.Id)
|
|
volumeIds[vid] = true
|
|
replicas++
|
|
if vi.ReadOnly {
|
|
roReplicas++
|
|
} else {
|
|
rwReplicas++
|
|
}
|
|
}
|
|
for _, eci := range di.EcShardInfos {
|
|
vid := needle.VolumeId(eci.Id)
|
|
ecVolumeIds[vid] = true
|
|
ecShards += erasure_coding.ShardBits(eci.EcIndexBits).ShardIdCount()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
volumes := len(volumeIds)
|
|
ecVolumes := len(ecVolumeIds)
|
|
totalVolumes := volumes + ecVolumes
|
|
|
|
sp.write("volumes:")
|
|
sp.write("\ttotal: %s %s, %s %s",
|
|
sp.int(totalVolumes), sp.plural(totalVolumes, "volume"),
|
|
sp.int(collections), sp.plural(collections, "collection"))
|
|
sp.write("\tmax size: %s", sp.bytes(sp.volumeSizeLimitMb*1024*1024))
|
|
sp.write("\tregular: %s/%s %s on %s %s, %s writable (%s), %s read-only (%s)",
|
|
sp.int(volumes), sp.uint64(maxVolumes), sp.plural(volumes, "volume"),
|
|
sp.int(replicas), sp.plural(replicas, "replica"),
|
|
sp.int(rwReplicas), sp.intPct(rwReplicas, replicas),
|
|
sp.int(roReplicas), sp.intPct(roReplicas, replicas))
|
|
sp.write("\tEC: %s EC %s on %s %s (%s shards/volume)",
|
|
sp.int(ecVolumes), sp.plural(ecVolumes, "volume"),
|
|
sp.int(ecShards), sp.plural(ecShards, "shard"),
|
|
sp.intRatio(ecShards, ecVolumes))
|
|
sp.write("")
|
|
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) printStorageInfo() {
|
|
perVolumeSize := map[needle.VolumeId]uint64{}
|
|
perEcVolumeSize := map[needle.VolumeId]uint64{}
|
|
var rawVolumeSize, rawEcVolumeSize uint64
|
|
|
|
for _, dci := range sp.topology.DataCenterInfos {
|
|
for _, ri := range dci.RackInfos {
|
|
for _, dni := range ri.DataNodeInfos {
|
|
for _, di := range dni.DiskInfos {
|
|
for _, vi := range di.VolumeInfos {
|
|
vid := needle.VolumeId(vi.Id)
|
|
perVolumeSize[vid] = vi.Size
|
|
rawVolumeSize += vi.Size
|
|
}
|
|
for _, eci := range di.EcShardInfos {
|
|
vid := needle.VolumeId(eci.Id)
|
|
var size uint64
|
|
for _, ss := range eci.ShardSizes {
|
|
size += uint64(ss)
|
|
}
|
|
perEcVolumeSize[vid] += size
|
|
rawEcVolumeSize += size
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// normalize EC logical volume sizes given shard settings
|
|
for vid := range perEcVolumeSize {
|
|
perEcVolumeSize[vid] = perEcVolumeSize[vid] * erasure_coding.DataShardsCount / erasure_coding.TotalShardsCount
|
|
}
|
|
|
|
var volumeSize, ecVolumeSize uint64
|
|
for _, s := range perVolumeSize {
|
|
volumeSize += s
|
|
}
|
|
for _, s := range perEcVolumeSize {
|
|
ecVolumeSize += s
|
|
}
|
|
totalSize := volumeSize + ecVolumeSize
|
|
|
|
sp.write("storage:")
|
|
sp.write("\ttotal: %s", sp.bytes(totalSize))
|
|
sp.write("\tregular volumes: %s", sp.bytes(volumeSize))
|
|
sp.write("\tEC volumes: %s", sp.bytes(ecVolumeSize))
|
|
sp.write("\traw: %s on volume replicas, %s on EC shards", sp.bytes(rawVolumeSize), sp.bytes(rawEcVolumeSize))
|
|
sp.write("")
|
|
}
|
|
|
|
func (sp *ClusterStatusPrinter) printFilesInfo() {
|
|
if len(sp.regularVolumeStats) == 0 {
|
|
return
|
|
}
|
|
|
|
var regularFilesTotal, regularFilesDeleted, regularFilesSize uint64
|
|
var regularFilesTotalRaw, regularFilesDeletedRaw, regularFilesSizeRaw uint64
|
|
|
|
for _, replicaStats := range sp.regularVolumeStats {
|
|
rc := uint64(len(replicaStats))
|
|
|
|
var volumeFilesTotal, volumeFilesSize, volumeFilesDeleted uint64
|
|
for _, rs := range replicaStats {
|
|
regularFilesTotalRaw += rs.Files
|
|
regularFilesSizeRaw += rs.TotalSize
|
|
regularFilesDeletedRaw += rs.FilesDeleted
|
|
|
|
volumeFilesTotal += rs.Files
|
|
volumeFilesSize += rs.TotalSize
|
|
volumeFilesDeleted += rs.FilesDeleted
|
|
}
|
|
regularFilesTotal += (volumeFilesTotal / rc)
|
|
regularFilesSize += (volumeFilesSize / rc)
|
|
regularFilesDeleted += (volumeFilesDeleted / rc)
|
|
}
|
|
|
|
regularFiles := regularFilesTotal - regularFilesDeleted
|
|
regularFilesRaw := regularFilesTotalRaw - regularFilesDeletedRaw
|
|
var avgFileSize uint64
|
|
if regularFilesTotal != 0 {
|
|
avgFileSize = regularFilesSize / regularFilesTotal
|
|
}
|
|
|
|
sp.write("files:")
|
|
sp.write("\tregular: %s %s, %s readable (%s), %s deleted (%s), avg %s per file",
|
|
sp.uint64(regularFilesTotal), sp.uint64Plural(regularFilesTotal, "file"),
|
|
sp.uint64(regularFiles), sp.uint64Pct(regularFiles, regularFilesTotal),
|
|
sp.uint64(regularFilesDeleted), sp.uint64Pct(regularFilesDeleted, regularFilesTotal),
|
|
sp.bytes(avgFileSize))
|
|
sp.write("\tregular raw: %s %s, %s readable (%s), %s deleted (%s), %s total",
|
|
sp.uint64(regularFilesTotalRaw), sp.uint64Plural(regularFilesTotalRaw, "file"),
|
|
sp.uint64(regularFilesRaw), sp.uint64Pct(regularFilesRaw, regularFilesTotalRaw),
|
|
sp.uint64(regularFilesDeletedRaw), sp.uint64Pct(regularFilesDeletedRaw, regularFilesTotalRaw),
|
|
sp.bytes(regularFilesSizeRaw))
|
|
sp.write("\tEC: [no data]")
|
|
sp.write("\tEC raw: [no data]")
|
|
sp.write("")
|
|
}
|