diff --git a/weed/shell/command_ec_common.go b/weed/shell/command_ec_common.go index 8bef78394..665daa1b8 100644 --- a/weed/shell/command_ec_common.go +++ b/weed/shell/command_ec_common.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "math/rand/v2" + "regexp" "slices" "sort" "time" @@ -1054,3 +1055,13 @@ func EcBalance(commandEnv *CommandEnv, collections []string, dc string, ecReplic return nil } + +// compileCollectionPattern compiles a regex pattern for collection matching. +// Empty patterns match empty collections only. +func compileCollectionPattern(pattern string) (*regexp.Regexp, error) { + if pattern == "" { + // empty pattern matches empty collection + return regexp.Compile("^$") + } + return regexp.Compile(pattern) +} diff --git a/weed/shell/command_ec_decode.go b/weed/shell/command_ec_decode.go index 673a9a4f2..f1f3bf133 100644 --- a/weed/shell/command_ec_decode.go +++ b/weed/shell/command_ec_decode.go @@ -34,6 +34,11 @@ func (c *commandEcDecode) Help() string { ec.decode [-collection=""] [-volumeId=] + The -collection parameter supports regular expressions for pattern matching: + - Use exact match: ec.decode -collection="^mybucket$" + - Match multiple buckets: ec.decode -collection="bucket.*" + - Match all collections: ec.decode -collection=".*" + ` } @@ -67,8 +72,11 @@ func (c *commandEcDecode) Do(args []string, commandEnv *CommandEnv, writer io.Wr } // apply to all volumes in the collection - volumeIds := collectEcShardIds(topologyInfo, *collection) - fmt.Printf("ec encode volumes: %v\n", volumeIds) + volumeIds, err := collectEcShardIds(topologyInfo, *collection) + if err != nil { + return err + } + fmt.Printf("ec decode volumes: %v\n", volumeIds) for _, vid := range volumeIds { if err = doEcDecode(commandEnv, topologyInfo, *collection, vid); err != nil { return err @@ -240,13 +248,18 @@ func lookupVolumeIds(commandEnv *CommandEnv, volumeIds []string) (volumeIdLocati return resp.VolumeIdLocations, nil } -func collectEcShardIds(topoInfo *master_pb.TopologyInfo, selectedCollection string) (vids []needle.VolumeId) { +func collectEcShardIds(topoInfo *master_pb.TopologyInfo, collectionPattern string) (vids []needle.VolumeId, err error) { + // compile regex pattern for collection matching + collectionRegex, err := compileCollectionPattern(collectionPattern) + if err != nil { + return nil, fmt.Errorf("invalid collection pattern '%s': %v", collectionPattern, err) + } vidMap := make(map[uint32]bool) eachDataNode(topoInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) { if diskInfo, found := dn.DiskInfos[string(types.HardDriveType)]; found { for _, v := range diskInfo.EcShardInfos { - if v.Collection == selectedCollection { + if collectionRegex.MatchString(v.Collection) { vidMap[v.Id] = true } } diff --git a/weed/shell/command_ec_encode.go b/weed/shell/command_ec_encode.go index a0794294e..d6b6b17b3 100644 --- a/weed/shell/command_ec_encode.go +++ b/weed/shell/command_ec_encode.go @@ -5,6 +5,7 @@ import ( "flag" "fmt" "io" + "sort" "time" "github.com/seaweedfs/seaweedfs/weed/storage/types" @@ -53,6 +54,11 @@ func (c *commandEcEncode) Help() string { If you only have less than 4 volume servers, with erasure coding, at least you can afford to have 4 corrupted shard files. + The -collection parameter supports regular expressions for pattern matching: + - Use exact match: ec.encode -collection="^mybucket$" + - Match multiple buckets: ec.encode -collection="bucket.*" + - Match all collections: ec.encode -collection=".*" + Options: -verbose: show detailed reasons why volumes are not selected for encoding @@ -112,12 +118,11 @@ func (c *commandEcEncode) Do(args []string, commandEnv *CommandEnv, writer io.Wr volumeIds = append(volumeIds, vid) balanceCollections = collectCollectionsForVolumeIds(topologyInfo, volumeIds) } else { - // apply to all volumes for the given collection - volumeIds, err = collectVolumeIdsForEcEncode(commandEnv, *collection, nil, *fullPercentage, *quietPeriod, *verbose) + // apply to all volumes for the given collection pattern (regex) + volumeIds, balanceCollections, err = collectVolumeIdsForEcEncode(commandEnv, *collection, nil, *fullPercentage, *quietPeriod, *verbose) if err != nil { return err } - balanceCollections = []string{*collection} } // Collect volume locations BEFORE EC encoding starts to avoid race condition @@ -270,7 +275,13 @@ func generateEcShards(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, } -func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection string, sourceDiskType *types.DiskType, fullPercentage float64, quietPeriod time.Duration, verbose bool) (vids []needle.VolumeId, err error) { +func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, collectionPattern string, sourceDiskType *types.DiskType, fullPercentage float64, quietPeriod time.Duration, verbose bool) (vids []needle.VolumeId, matchedCollections []string, err error) { + // compile regex pattern for collection matching + collectionRegex, err := compileCollectionPattern(collectionPattern) + if err != nil { + return nil, nil, fmt.Errorf("invalid collection pattern '%s': %v", collectionPattern, err) + } + // collect topology information topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0) if err != nil { @@ -280,7 +291,7 @@ func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection stri quietSeconds := int64(quietPeriod / time.Second) nowUnixSeconds := time.Now().Unix() - fmt.Printf("collect volumes quiet for: %d seconds and %.1f%% full\n", quietSeconds, fullPercentage) + fmt.Printf("collect volumes with collection pattern '%s', quiet for: %d seconds and %.1f%% full\n", collectionPattern, quietSeconds, fullPercentage) // Statistics for verbose mode var ( @@ -294,6 +305,7 @@ func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection stri ) vidMap := make(map[uint32]bool) + collectionSet := make(map[string]bool) eachDataNode(topologyInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) { for _, diskInfo := range dn.DiskInfos { for _, v := range diskInfo.VolumeInfos { @@ -309,16 +321,19 @@ func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection stri continue } - // check collection - if v.Collection != selectedCollection { + // check collection against regex pattern + if !collectionRegex.MatchString(v.Collection) { wrongCollection++ if verbose { - fmt.Printf("skip volume %d on %s: wrong collection (expected: %s, actual: %s)\n", - v.Id, dn.Id, selectedCollection, v.Collection) + fmt.Printf("skip volume %d on %s: collection doesn't match pattern (pattern: %s, actual: %s)\n", + v.Id, dn.Id, collectionPattern, v.Collection) } continue } + // track matched collection + collectionSet[v.Collection] = true + // check disk type if sourceDiskType != nil && types.ToDiskType(v.DiskType) != *sourceDiskType { wrongDiskType++ @@ -393,11 +408,18 @@ func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection stri } } + // Convert collection set to slice + for collection := range collectionSet { + matchedCollections = append(matchedCollections, collection) + } + sort.Strings(matchedCollections) + // Print summary statistics in verbose mode or when no volumes selected if verbose || len(vids) == 0 { fmt.Printf("\nVolume selection summary:\n") fmt.Printf(" Total volumes examined: %d\n", totalVolumes) fmt.Printf(" Selected for encoding: %d\n", len(vids)) + fmt.Printf(" Collections matched: %v\n", matchedCollections) if totalVolumes > 0 { fmt.Printf("\nReasons for exclusion:\n") @@ -405,7 +427,7 @@ func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection stri fmt.Printf(" Remote volumes: %d\n", remoteVolumes) } if wrongCollection > 0 { - fmt.Printf(" Wrong collection: %d\n", wrongCollection) + fmt.Printf(" Collection doesn't match pattern: %d\n", wrongCollection) } if wrongDiskType > 0 { fmt.Printf(" Wrong disk type: %d\n", wrongDiskType) diff --git a/weed/shell/command_volume_balance.go b/weed/shell/command_volume_balance.go index b3c76a172..7f6646d45 100644 --- a/weed/shell/command_volume_balance.go +++ b/weed/shell/command_volume_balance.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "os" + "regexp" "strings" "time" @@ -40,6 +41,14 @@ func (c *commandVolumeBalance) Help() string { volume.balance [-collection ALL_COLLECTIONS|EACH_COLLECTION|] [-force] [-dataCenter=] [-racks=rack_name_one,rack_name_two] [-nodes=192.168.0.1:8080,192.168.0.2:8080] + The -collection parameter supports: + - ALL_COLLECTIONS: balance across all collections + - EACH_COLLECTION: balance each collection separately + - Regular expressions for pattern matching: + * Use exact match: volume.balance -collection="^mybucket$" + * Match multiple buckets: volume.balance -collection="bucket.*" + * Match all user collections: volume.balance -collection="user-.*" + Algorithm: For each type of volume server (different max volume count limit){ @@ -118,12 +127,23 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer return err } for _, col := range collections { - if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, col); err != nil { + // Use direct string comparison for exact match (more efficient than regex) + if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, nil, col); err != nil { return err } } + } else if *collection == "ALL_COLLECTIONS" { + // Pass nil pattern for all collections + if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, nil, *collection); err != nil { + return err + } } else { - if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, *collection); err != nil { + // Compile user-provided pattern + collectionPattern, err := compileCollectionPattern(*collection) + if err != nil { + return fmt.Errorf("invalid collection pattern '%s': %v", *collection, err) + } + if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, collectionPattern, *collection); err != nil { return err } } @@ -131,24 +151,29 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer return nil } -func (c *commandVolumeBalance) balanceVolumeServers(diskTypes []types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collection string) error { - +func (c *commandVolumeBalance) balanceVolumeServers(diskTypes []types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collectionPattern *regexp.Regexp, collectionName string) error { for _, diskType := range diskTypes { - if err := c.balanceVolumeServersByDiskType(diskType, volumeReplicas, nodes, collection); err != nil { + if err := c.balanceVolumeServersByDiskType(diskType, volumeReplicas, nodes, collectionPattern, collectionName); err != nil { return err } } return nil - } -func (c *commandVolumeBalance) balanceVolumeServersByDiskType(diskType types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collection string) error { - +func (c *commandVolumeBalance) balanceVolumeServersByDiskType(diskType types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collectionPattern *regexp.Regexp, collectionName string) error { for _, n := range nodes { n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { - if collection != "ALL_COLLECTIONS" { - if v.Collection != collection { - return false + if collectionName != "ALL_COLLECTIONS" { + if collectionPattern != nil { + // Use regex pattern matching + if !collectionPattern.MatchString(v.Collection) { + return false + } + } else { + // Use exact string matching (for EACH_COLLECTION) + if v.Collection != collectionName { + return false + } } } if v.DiskType != string(diskType) { diff --git a/weed/shell/command_volume_balance_test.go b/weed/shell/command_volume_balance_test.go index 3dffb1d7d..99fdf5575 100644 --- a/weed/shell/command_volume_balance_test.go +++ b/weed/shell/command_volume_balance_test.go @@ -256,7 +256,7 @@ func TestBalance(t *testing.T) { volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) diskTypes := collectVolumeDiskTypes(topologyInfo) c := &commandVolumeBalance{} - if err := c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, "ALL_COLLECTIONS"); err != nil { + if err := c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, nil, "ALL_COLLECTIONS"); err != nil { t.Errorf("balance: %v", err) } diff --git a/weed/shell/command_volume_tier_download.go b/weed/shell/command_volume_tier_download.go index 9cea40eb2..4626bd383 100644 --- a/weed/shell/command_volume_tier_download.go +++ b/weed/shell/command_volume_tier_download.go @@ -33,6 +33,11 @@ func (c *commandVolumeTierDownload) Help() string { volume.tier.download [-collection=""] volume.tier.download [-collection=""] -volumeId= + The -collection parameter supports regular expressions for pattern matching: + - Use exact match: volume.tier.download -collection="^mybucket$" + - Match multiple buckets: volume.tier.download -collection="bucket.*" + - Match all collections: volume.tier.download -collection=".*" + e.g.: volume.tier.download -volumeId=7 @@ -73,7 +78,7 @@ func (c *commandVolumeTierDownload) Do(args []string, commandEnv *CommandEnv, wr // apply to all volumes in the collection // reusing collectVolumeIdsForEcEncode for now - volumeIds := collectRemoteVolumes(topologyInfo, *collection) + volumeIds, err := collectRemoteVolumes(topologyInfo, *collection) if err != nil { return err } @@ -87,13 +92,18 @@ func (c *commandVolumeTierDownload) Do(args []string, commandEnv *CommandEnv, wr return nil } -func collectRemoteVolumes(topoInfo *master_pb.TopologyInfo, selectedCollection string) (vids []needle.VolumeId) { +func collectRemoteVolumes(topoInfo *master_pb.TopologyInfo, collectionPattern string) (vids []needle.VolumeId, err error) { + // compile regex pattern for collection matching + collectionRegex, err := compileCollectionPattern(collectionPattern) + if err != nil { + return nil, fmt.Errorf("invalid collection pattern '%s': %v", collectionPattern, err) + } vidMap := make(map[uint32]bool) eachDataNode(topoInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) { for _, diskInfo := range dn.DiskInfos { for _, v := range diskInfo.VolumeInfos { - if v.Collection == selectedCollection && v.RemoteStorageKey != "" && v.RemoteStorageName != "" { + if collectionRegex.MatchString(v.Collection) && v.RemoteStorageKey != "" && v.RemoteStorageName != "" { vidMap[v.Id] = true } } diff --git a/weed/shell/command_volume_tier_upload.go b/weed/shell/command_volume_tier_upload.go index cbe6e6f2b..eac47c5fc 100644 --- a/weed/shell/command_volume_tier_upload.go +++ b/weed/shell/command_volume_tier_upload.go @@ -98,7 +98,7 @@ func (c *commandVolumeTierUpload) Do(args []string, commandEnv *CommandEnv, writ // apply to all volumes in the collection // reusing collectVolumeIdsForEcEncode for now - volumeIds, err := collectVolumeIdsForEcEncode(commandEnv, *collection, diskType, *fullPercentage, *quietPeriod, false) + volumeIds, _, err := collectVolumeIdsForEcEncode(commandEnv, *collection, diskType, *fullPercentage, *quietPeriod, false) if err != nil { return err }