Merge branch 'master' into simplify-worker

1 month ago · 293e9a8168
49 changed files with 904 additions and 660 deletions
--- a/k8s/charts/seaweedfs/values.yaml
+++ b/k8s/charts/seaweedfs/values.yaml
@ -873,7 +873,7 @@ filer:
    #     anonymousRead: false
 s3:
  enabled: true
  enabled: false
  imageOverride: null
  restartPolicy: null
  replicas: 1
--- a/weed/admin/dash/ec_shard_management.go
+++ b/weed/admin/dash/ec_shard_management.go
@ -68,7 +68,7 @@ func (s *AdminServer) GetClusterEcShards(page int, pageSize int, sortBy string,
 								// Create individual shard entries for each shard this server has
 								shardBits := ecShardInfo.EcIndexBits
 								for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
 								for shardId := 0; shardId < erasure_coding.MaxShardCount; shardId++ {
 									if (shardBits & (1 << uint(shardId))) != 0 {
 										// Mark this shard as present for this volume
 										volumeShardsMap[volumeId][shardId] = true
@ -112,6 +112,7 @@ func (s *AdminServer) GetClusterEcShards(page int, pageSize int, sortBy string,
 		shardCount := len(shardsPresent)
 		// Find which shards are missing for this volume across ALL servers
 		// Uses default 10+4 (14 total shards)
 		for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
 			if !shardsPresent[shardId] {
 				missingShards = append(missingShards, shardId)
@ -332,7 +333,7 @@ func (s *AdminServer) GetClusterEcVolumes(page int, pageSize int, sortBy string,
 								// Process each shard this server has for this volume
 								shardBits := ecShardInfo.EcIndexBits
 								for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
 								for shardId := 0; shardId < erasure_coding.MaxShardCount; shardId++ {
 									if (shardBits & (1 << uint(shardId))) != 0 {
 										// Record shard location
 										volume.ShardLocations[shardId] = node.Id
@ -392,7 +393,7 @@ func (s *AdminServer) GetClusterEcVolumes(page int, pageSize int, sortBy string,
 	for _, volume := range volumeData {
 		volume.TotalShards = len(volume.ShardLocations)
 		// Find missing shards
 		// Find missing shards (default 10+4 = 14 total shards)
 		var missingShards []int
 		for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
 			if _, exists := volume.ShardLocations[shardId]; !exists {
@ -523,7 +524,7 @@ func sortEcVolumes(volumes []EcVolumeWithShards, sortBy string, sortOrder string
 // getShardCount returns the number of shards represented by the bitmap
 func getShardCount(ecIndexBits uint32) int {
 	count := 0
 	for i := 0; i < erasure_coding.TotalShardsCount; i++ {
 	for i := 0; i < erasure_coding.MaxShardCount; i++ {
 		if (ecIndexBits & (1 << uint(i))) != 0 {
 			count++
 		}
@ -532,6 +533,7 @@ func getShardCount(ecIndexBits uint32) int {
 }
 // getMissingShards returns a slice of missing shard IDs for a volume
 // Assumes default 10+4 EC configuration (14 total shards)
 func getMissingShards(ecIndexBits uint32) []int {
 	var missing []int
 	for i := 0; i < erasure_coding.TotalShardsCount; i++ {
@ -614,7 +616,7 @@ func (s *AdminServer) GetEcVolumeDetails(volumeID uint32, sortBy string, sortOrd
 									// Create individual shard entries for each shard this server has
 									shardBits := ecShardInfo.EcIndexBits
 									for shardId := 0; shardId < erasure_coding.TotalShardsCount; shardId++ {
 									for shardId := 0; shardId < erasure_coding.MaxShardCount; shardId++ {
 										if (shardBits & (1 << uint(shardId))) != 0 {
 											ecShard := EcShardWithInfo{
 												VolumeID:     ecShardInfo.Id,
@ -698,6 +700,7 @@ func (s *AdminServer) GetEcVolumeDetails(volumeID uint32, sortBy string, sortOrd
 	}
 	totalUniqueShards := len(foundShards)
 	// Check completeness using default 10+4 (14 total shards)
 	isComplete := (totalUniqueShards == erasure_coding.TotalShardsCount)
 	// Calculate missing shards
--- a/weed/admin/handlers/maintenance_handlers.go
+++ b/weed/admin/handlers/maintenance_handlers.go
@ -46,7 +46,6 @@ func (h *MaintenanceHandlers) ShowTaskDetail(c *gin.Context) {
 		return
 	}
 	c.Header("Content-Type", "text/html")
 	taskDetailComponent := app.TaskDetail(taskDetail)
 	layoutComponent := layout.Layout(c, taskDetailComponent)
--- a/weed/admin/maintenance/maintenance_integration.go
+++ b/weed/admin/maintenance/maintenance_integration.go
@ -306,25 +306,21 @@ func (s *MaintenanceIntegration) CanScheduleWithTaskSchedulers(task *Maintenance
 		return false // Fallback to existing logic for unknown types
 	}
 	// Convert task objects
 	taskObject := s.convertTaskToTaskSystem(task)
 	if taskObject == nil {
 		return false
 	}
 	runningTaskObjects := s.convertTasksToTaskSystem(runningTasks)
 	workerObjects := s.convertWorkersToTaskSystem(availableWorkers)
 	// Get the appropriate scheduler
 	scheduler := s.taskRegistry.GetScheduler(taskType)
 	if scheduler == nil {
 		return false
 	}
 	canSchedule := scheduler.CanScheduleNow(taskObject, runningTaskObjects, workerObjects)
 	return canSchedule
--- a/weed/cluster/master_client.go
+++ b/weed/cluster/master_client.go
@ -16,6 +16,9 @@ func ListExistingPeerUpdates(master pb.ServerAddress, grpcDialOption grpc.DialOp
 			ClientType: clientType,
 			FilerGroup: filerGroup,
 		})
 		if err != nil {
 			return err
 		}
 		glog.V(0).Infof("the cluster has %d %s\n", len(resp.ClusterNodes), clientType)
 		for _, node := range resp.ClusterNodes {
@ -26,7 +29,7 @@ func ListExistingPeerUpdates(master pb.ServerAddress, grpcDialOption grpc.DialOp
 				CreatedAtNs: node.CreatedAtNs,
 			})
 		}
 		return err
 		return nil
 	}); grpcErr != nil {
 		glog.V(0).Infof("connect to %s: %v", master, grpcErr)
 	}
--- a/weed/command/autocomplete.go
+++ b/weed/command/autocomplete.go
@ -2,11 +2,11 @@ package command
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"github.com/posener/complete"
 	completeinstall "github.com/posener/complete/cmd/install"
 	flag "github.com/seaweedfs/seaweedfs/weed/util/fla9"
 	"os"
 	"path/filepath"
 	"runtime"
 )
@ -53,14 +53,14 @@ func printAutocompleteScript(shell string) bool {
 		return false
 	}
 	switch shell {
 	case "bash":
 		fmt.Printf("complete -C %q weed\n", binPath)
 	case "zsh":
 		fmt.Printf("autoload -U +X bashcompinit && bashcompinit\n")
 		fmt.Printf("complete -o nospace -C %q weed\n", binPath)
 	case "fish":
 		fmt.Printf(`function __complete_weed
 	switch shell {
 	case "bash":
 		fmt.Printf("complete -C %q weed\n", binPath)
 	case "zsh":
 		fmt.Printf("autoload -U +X bashcompinit && bashcompinit\n")
 		fmt.Printf("complete -o nospace -C %q weed\n", binPath)
 	case "fish":
 		fmt.Printf(`function __complete_weed
    set -lx COMP_LINE (commandline -cp)
    test -z (commandline -ct)
    and set COMP_LINE "$COMP_LINE "
@ -68,10 +68,10 @@ func printAutocompleteScript(shell string) bool {
 end
 complete -f -c weed -a "(__complete_weed)"
 `, binPath)
 	default:
 		fmt.Fprintf(os.Stderr, "unsupported shell: %s. Supported shells: bash, zsh, fish\n", shell)
 		return false
 	}
 	default:
 		fmt.Fprintf(os.Stderr, "unsupported shell: %s. Supported shells: bash, zsh, fish\n", shell)
 		return false
 	}
 	return true
 }
--- a/weed/mq/broker/broker_grpc_sub.go
+++ b/weed/mq/broker/broker_grpc_sub.go
@ -272,7 +272,6 @@ subscribeLoop:
 					TsNs:  logEntry.TsNs,
 				}
 				if err := stream.Send(&mq_pb.SubscribeMessageResponse{Message: &mq_pb.SubscribeMessageResponse_Data{
 					Data: dataMsg,
 				}}); err != nil {
--- a/weed/mq/kafka/consumer/incremental_rebalancing_test.go
+++ b/weed/mq/kafka/consumer/incremental_rebalancing_test.go
@ -103,15 +103,15 @@ func TestIncrementalCooperativeAssignmentStrategy_RebalanceWithRevocation(t *tes
 		t.Errorf("Expected member-2 to have 0 partitions during revocation, got %d", len(member2Assignments))
 	}
 	t.Logf("Revocation phase - Member-1: %d partitions, Member-2: %d partitions", 
 	t.Logf("Revocation phase - Member-1: %d partitions, Member-2: %d partitions",
 		len(member1Assignments), len(member2Assignments))
 	// Simulate time passing and second call (should move to assignment phase)
 	time.Sleep(10 * time.Millisecond)
 	// Force move to assignment phase by setting timeout to 0
 	state.RevocationTimeout = 0
 	assignments2 := strategy.Assign(members, topicPartitions)
 	// Should complete rebalance
@ -136,7 +136,7 @@ func TestIncrementalCooperativeAssignmentStrategy_RebalanceWithRevocation(t *tes
 		t.Errorf("Expected 4 total partitions after rebalance, got %d", totalFinalPartitions)
 	}
 	t.Logf("Final assignment - Member-1: %d partitions, Member-2: %d partitions", 
 	t.Logf("Final assignment - Member-1: %d partitions, Member-2: %d partitions",
 		len(member1FinalAssignments), len(member2FinalAssignments))
 }
@ -239,7 +239,7 @@ func TestIncrementalCooperativeAssignmentStrategy_MultipleTopics(t *testing.T) {
 			t.Errorf("Expected partition %s to be assigned", expected)
 		}
 	}
 	// Debug: Print all assigned partitions
 	t.Logf("All assigned partitions: %v", allAssignedPartitions)
 }
@ -390,7 +390,7 @@ func TestIncrementalCooperativeAssignmentStrategy_StateTransitions(t *testing.T)
 	// Force timeout to move to assignment phase
 	state.RevocationTimeout = 0
 	strategy.Assign(members, topicPartitions)
 	// Should complete and return to None
 	state = strategy.GetRebalanceState()
 	if state.Phase != RebalancePhaseNone {
--- a/weed/mq/kafka/consumer/rebalance_timeout.go
+++ b/weed/mq/kafka/consumer/rebalance_timeout.go
@ -24,12 +24,12 @@ func (rtm *RebalanceTimeoutManager) CheckRebalanceTimeouts() {
 	for _, group := range rtm.coordinator.groups {
 		group.Mu.Lock()
 		// Only check timeouts for groups in rebalancing states
 		if group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance {
 			rtm.checkGroupRebalanceTimeout(group, now)
 		}
 		group.Mu.Unlock()
 	}
 }
@ -37,7 +37,7 @@ func (rtm *RebalanceTimeoutManager) CheckRebalanceTimeouts() {
 // checkGroupRebalanceTimeout checks and handles rebalance timeout for a specific group
 func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGroup, now time.Time) {
 	expiredMembers := make([]string, 0)
 	for memberID, member := range group.Members {
 		// Check if member has exceeded its rebalance timeout
 		rebalanceTimeout := time.Duration(member.RebalanceTimeout) * time.Millisecond
@ -45,21 +45,21 @@ func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGr
 			// Use default rebalance timeout if not specified
 			rebalanceTimeout = time.Duration(rtm.coordinator.rebalanceTimeoutMs) * time.Millisecond
 		}
 		// For members in pending state during rebalance, check against join time
 		if member.State == MemberStatePending {
 			if now.Sub(member.JoinedAt) > rebalanceTimeout {
 				expiredMembers = append(expiredMembers, memberID)
 			}
 		}
 		// Also check session timeout as a fallback
 		sessionTimeout := time.Duration(member.SessionTimeout) * time.Millisecond
 		if now.Sub(member.LastHeartbeat) > sessionTimeout {
 			expiredMembers = append(expiredMembers, memberID)
 		}
 	}
 	// Remove expired members and trigger rebalance if necessary
 	if len(expiredMembers) > 0 {
 		rtm.evictExpiredMembers(group, expiredMembers)
@ -70,13 +70,13 @@ func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGr
 func (rtm *RebalanceTimeoutManager) evictExpiredMembers(group *ConsumerGroup, expiredMembers []string) {
 	for _, memberID := range expiredMembers {
 		delete(group.Members, memberID)
 		// If the leader was evicted, clear leader
 		if group.Leader == memberID {
 			group.Leader = ""
 		}
 	}
 	// Update group state based on remaining members
 	if len(group.Members) == 0 {
 		group.State = GroupStateEmpty
@ -92,18 +92,18 @@ func (rtm *RebalanceTimeoutManager) evictExpiredMembers(group *ConsumerGroup, ex
 					break
 				}
 			}
 			// Reset to preparing rebalance to restart the process
 			group.State = GroupStatePreparingRebalance
 			group.Generation++
 			// Mark remaining members as pending
 			for _, member := range group.Members {
 				member.State = MemberStatePending
 			}
 		}
 	}
 	group.LastActivity = time.Now()
 }
@ -112,7 +112,7 @@ func (rtm *RebalanceTimeoutManager) IsRebalanceStuck(group *ConsumerGroup, maxRe
 	if group.State != GroupStatePreparingRebalance && group.State != GroupStateCompletingRebalance {
 		return false
 	}
 	return time.Since(group.LastActivity) > maxRebalanceDuration
 }
@ -120,14 +120,14 @@ func (rtm *RebalanceTimeoutManager) IsRebalanceStuck(group *ConsumerGroup, maxRe
 func (rtm *RebalanceTimeoutManager) ForceCompleteRebalance(group *ConsumerGroup) {
 	group.Mu.Lock()
 	defer group.Mu.Unlock()
 	// If stuck in preparing rebalance, move to completing
 	if group.State == GroupStatePreparingRebalance {
 		group.State = GroupStateCompletingRebalance
 		group.LastActivity = time.Now()
 		return
 	}
 	// If stuck in completing rebalance, force to stable
 	if group.State == GroupStateCompletingRebalance {
 		group.State = GroupStateStable
@ -145,21 +145,21 @@ func (rtm *RebalanceTimeoutManager) GetRebalanceStatus(groupID string) *Rebalanc
 	if group == nil {
 		return nil
 	}
 	group.Mu.RLock()
 	defer group.Mu.RUnlock()
 	status := &RebalanceStatus{
 		GroupID:         groupID,
 		State:           group.State,
 		Generation:      group.Generation,
 		MemberCount:     len(group.Members),
 		Leader:          group.Leader,
 		LastActivity:    group.LastActivity,
 		IsRebalancing:   group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance,
 		GroupID:           groupID,
 		State:             group.State,
 		Generation:        group.Generation,
 		MemberCount:       len(group.Members),
 		Leader:            group.Leader,
 		LastActivity:      group.LastActivity,
 		IsRebalancing:     group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance,
 		RebalanceDuration: time.Since(group.LastActivity),
 	}
 	// Calculate member timeout status
 	now := time.Now()
 	for memberID, member := range group.Members {
@ -171,48 +171,48 @@ func (rtm *RebalanceTimeoutManager) GetRebalanceStatus(groupID string) *Rebalanc
 			SessionTimeout:   time.Duration(member.SessionTimeout) * time.Millisecond,
 			RebalanceTimeout: time.Duration(member.RebalanceTimeout) * time.Millisecond,
 		}
 		// Calculate time until session timeout
 		sessionTimeRemaining := memberStatus.SessionTimeout - now.Sub(member.LastHeartbeat)
 		if sessionTimeRemaining < 0 {
 			sessionTimeRemaining = 0
 		}
 		memberStatus.SessionTimeRemaining = sessionTimeRemaining
 		// Calculate time until rebalance timeout
 		rebalanceTimeRemaining := memberStatus.RebalanceTimeout - now.Sub(member.JoinedAt)
 		if rebalanceTimeRemaining < 0 {
 			rebalanceTimeRemaining = 0
 		}
 		memberStatus.RebalanceTimeRemaining = rebalanceTimeRemaining
 		status.Members = append(status.Members, memberStatus)
 	}
 	return status
 }
 // RebalanceStatus represents the current status of a group's rebalance
 type RebalanceStatus struct {
 	GroupID           string                  `json:"group_id"`
 	State             GroupState              `json:"state"`
 	Generation        int32                   `json:"generation"`
 	MemberCount       int                     `json:"member_count"`
 	Leader            string                  `json:"leader"`
 	LastActivity      time.Time               `json:"last_activity"`
 	IsRebalancing     bool                    `json:"is_rebalancing"`
 	RebalanceDuration time.Duration           `json:"rebalance_duration"`
 	Members           []MemberTimeoutStatus   `json:"members"`
 	GroupID           string                `json:"group_id"`
 	State             GroupState            `json:"state"`
 	Generation        int32                 `json:"generation"`
 	MemberCount       int                   `json:"member_count"`
 	Leader            string                `json:"leader"`
 	LastActivity      time.Time             `json:"last_activity"`
 	IsRebalancing     bool                  `json:"is_rebalancing"`
 	RebalanceDuration time.Duration         `json:"rebalance_duration"`
 	Members           []MemberTimeoutStatus `json:"members"`
 }
 // MemberTimeoutStatus represents timeout status for a group member
 type MemberTimeoutStatus struct {
 	MemberID                string        `json:"member_id"`
 	State                   MemberState   `json:"state"`
 	LastHeartbeat           time.Time     `json:"last_heartbeat"`
 	JoinedAt                time.Time     `json:"joined_at"`
 	SessionTimeout          time.Duration `json:"session_timeout"`
 	RebalanceTimeout        time.Duration `json:"rebalance_timeout"`
 	SessionTimeRemaining    time.Duration `json:"session_time_remaining"`
 	RebalanceTimeRemaining  time.Duration `json:"rebalance_time_remaining"`
 	MemberID               string        `json:"member_id"`
 	State                  MemberState   `json:"state"`
 	LastHeartbeat          time.Time     `json:"last_heartbeat"`
 	JoinedAt               time.Time     `json:"joined_at"`
 	SessionTimeout         time.Duration `json:"session_timeout"`
 	RebalanceTimeout       time.Duration `json:"rebalance_timeout"`
 	SessionTimeRemaining   time.Duration `json:"session_time_remaining"`
 	RebalanceTimeRemaining time.Duration `json:"rebalance_time_remaining"`
 }
--- a/weed/mq/kafka/consumer/rebalance_timeout_test.go
+++ b/weed/mq/kafka/consumer/rebalance_timeout_test.go
@ -8,14 +8,14 @@ import (
 func TestRebalanceTimeoutManager_CheckRebalanceTimeouts(t *testing.T) {
 	coordinator := NewGroupCoordinator()
 	defer coordinator.Close()
 	rtm := coordinator.rebalanceTimeoutManager
 	// Create a group with a member that has a short rebalance timeout
 	group := coordinator.GetOrCreateGroup("test-group")
 	group.Mu.Lock()
 	group.State = GroupStatePreparingRebalance
 	member := &GroupMember{
 		ID:               "member1",
 		ClientID:         "client1",
@ -27,15 +27,15 @@ func TestRebalanceTimeoutManager_CheckRebalanceTimeouts(t *testing.T) {
 	}
 	group.Members["member1"] = member
 	group.Mu.Unlock()
 	// Check timeouts - member should be evicted
 	rtm.CheckRebalanceTimeouts()
 	group.Mu.RLock()
 	if len(group.Members) != 0 {
 		t.Errorf("Expected member to be evicted due to rebalance timeout, but %d members remain", len(group.Members))
 	}
 	if group.State != GroupStateEmpty {
 		t.Errorf("Expected group state to be Empty after member eviction, got %s", group.State.String())
 	}
@ -45,18 +45,18 @@ func TestRebalanceTimeoutManager_CheckRebalanceTimeouts(t *testing.T) {
 func TestRebalanceTimeoutManager_SessionTimeoutFallback(t *testing.T) {
 	coordinator := NewGroupCoordinator()
 	defer coordinator.Close()
 	rtm := coordinator.rebalanceTimeoutManager
 	// Create a group with a member that has exceeded session timeout
 	group := coordinator.GetOrCreateGroup("test-group")
 	group.Mu.Lock()
 	group.State = GroupStatePreparingRebalance
 	member := &GroupMember{
 		ID:               "member1",
 		ClientID:         "client1",
 		SessionTimeout:   1000, // 1 second
 		SessionTimeout:   1000,  // 1 second
 		RebalanceTimeout: 30000, // 30 seconds
 		State:            MemberStatePending,
 		LastHeartbeat:    time.Now().Add(-2 * time.Second), // Last heartbeat 2 seconds ago
@ -64,10 +64,10 @@ func TestRebalanceTimeoutManager_SessionTimeoutFallback(t *testing.T) {
 	}
 	group.Members["member1"] = member
 	group.Mu.Unlock()
 	// Check timeouts - member should be evicted due to session timeout
 	rtm.CheckRebalanceTimeouts()
 	group.Mu.RLock()
 	if len(group.Members) != 0 {
 		t.Errorf("Expected member to be evicted due to session timeout, but %d members remain", len(group.Members))
@ -78,15 +78,15 @@ func TestRebalanceTimeoutManager_SessionTimeoutFallback(t *testing.T) {
 func TestRebalanceTimeoutManager_LeaderEviction(t *testing.T) {
 	coordinator := NewGroupCoordinator()
 	defer coordinator.Close()
 	rtm := coordinator.rebalanceTimeoutManager
 	// Create a group with leader and another member
 	group := coordinator.GetOrCreateGroup("test-group")
 	group.Mu.Lock()
 	group.State = GroupStatePreparingRebalance
 	group.Leader = "member1"
 	// Leader with expired rebalance timeout
 	leader := &GroupMember{
 		ID:               "member1",
@ -98,7 +98,7 @@ func TestRebalanceTimeoutManager_LeaderEviction(t *testing.T) {
 		JoinedAt:         time.Now().Add(-2 * time.Second),
 	}
 	group.Members["member1"] = leader
 	// Another member that's still valid
 	member2 := &GroupMember{
 		ID:               "member2",
@ -111,19 +111,19 @@ func TestRebalanceTimeoutManager_LeaderEviction(t *testing.T) {
 	}
 	group.Members["member2"] = member2
 	group.Mu.Unlock()
 	// Check timeouts - leader should be evicted, new leader selected
 	rtm.CheckRebalanceTimeouts()
 	group.Mu.RLock()
 	if len(group.Members) != 1 {
 		t.Errorf("Expected 1 member to remain after leader eviction, got %d", len(group.Members))
 	}
 	if group.Leader != "member2" {
 		t.Errorf("Expected member2 to become new leader, got %s", group.Leader)
 	}
 	if group.State != GroupStatePreparingRebalance {
 		t.Errorf("Expected group to restart rebalancing after leader eviction, got %s", group.State.String())
 	}
@ -133,37 +133,37 @@ func TestRebalanceTimeoutManager_LeaderEviction(t *testing.T) {
 func TestRebalanceTimeoutManager_IsRebalanceStuck(t *testing.T) {
 	coordinator := NewGroupCoordinator()
 	defer coordinator.Close()
 	rtm := coordinator.rebalanceTimeoutManager
 	// Create a group that's been rebalancing for a while
 	group := coordinator.GetOrCreateGroup("test-group")
 	group.Mu.Lock()
 	group.State = GroupStatePreparingRebalance
 	group.LastActivity = time.Now().Add(-15 * time.Minute) // 15 minutes ago
 	group.Mu.Unlock()
 	// Check if rebalance is stuck (max 10 minutes)
 	maxDuration := 10 * time.Minute
 	if !rtm.IsRebalanceStuck(group, maxDuration) {
 		t.Error("Expected rebalance to be detected as stuck")
 	}
 	// Test with a group that's not stuck
 	group.Mu.Lock()
 	group.LastActivity = time.Now().Add(-5 * time.Minute) // 5 minutes ago
 	group.Mu.Unlock()
 	if rtm.IsRebalanceStuck(group, maxDuration) {
 		t.Error("Expected rebalance to not be detected as stuck")
 	}
 	// Test with stable group (should not be stuck)
 	group.Mu.Lock()
 	group.State = GroupStateStable
 	group.LastActivity = time.Now().Add(-15 * time.Minute)
 	group.Mu.Unlock()
 	if rtm.IsRebalanceStuck(group, maxDuration) {
 		t.Error("Stable group should not be detected as stuck")
 	}
@ -172,37 +172,37 @@ func TestRebalanceTimeoutManager_IsRebalanceStuck(t *testing.T) {
 func TestRebalanceTimeoutManager_ForceCompleteRebalance(t *testing.T) {
 	coordinator := NewGroupCoordinator()
 	defer coordinator.Close()
 	rtm := coordinator.rebalanceTimeoutManager
 	// Test forcing completion from PreparingRebalance
 	group := coordinator.GetOrCreateGroup("test-group")
 	group.Mu.Lock()
 	group.State = GroupStatePreparingRebalance
 	member := &GroupMember{
 		ID:    "member1",
 		State: MemberStatePending,
 	}
 	group.Members["member1"] = member
 	group.Mu.Unlock()
 	rtm.ForceCompleteRebalance(group)
 	group.Mu.RLock()
 	if group.State != GroupStateCompletingRebalance {
 		t.Errorf("Expected group state to be CompletingRebalance, got %s", group.State.String())
 	}
 	group.Mu.RUnlock()
 	// Test forcing completion from CompletingRebalance
 	rtm.ForceCompleteRebalance(group)
 	group.Mu.RLock()
 	if group.State != GroupStateStable {
 		t.Errorf("Expected group state to be Stable, got %s", group.State.String())
 	}
 	if member.State != MemberStateStable {
 		t.Errorf("Expected member state to be Stable, got %s", member.State.String())
 	}
@ -212,15 +212,15 @@ func TestRebalanceTimeoutManager_ForceCompleteRebalance(t *testing.T) {
 func TestRebalanceTimeoutManager_GetRebalanceStatus(t *testing.T) {
 	coordinator := NewGroupCoordinator()
 	defer coordinator.Close()
 	rtm := coordinator.rebalanceTimeoutManager
 	// Test with non-existent group
 	status := rtm.GetRebalanceStatus("non-existent")
 	if status != nil {
 		t.Error("Expected nil status for non-existent group")
 	}
 	// Create a group with members
 	group := coordinator.GetOrCreateGroup("test-group")
 	group.Mu.Lock()
@ -228,7 +228,7 @@ func TestRebalanceTimeoutManager_GetRebalanceStatus(t *testing.T) {
 	group.Generation = 5
 	group.Leader = "member1"
 	group.LastActivity = time.Now().Add(-2 * time.Minute)
 	member1 := &GroupMember{
 		ID:               "member1",
 		State:            MemberStatePending,
@ -238,7 +238,7 @@ func TestRebalanceTimeoutManager_GetRebalanceStatus(t *testing.T) {
 		RebalanceTimeout: 300000, // 5 minutes
 	}
 	group.Members["member1"] = member1
 	member2 := &GroupMember{
 		ID:               "member2",
 		State:            MemberStatePending,
@ -249,48 +249,48 @@ func TestRebalanceTimeoutManager_GetRebalanceStatus(t *testing.T) {
 	}
 	group.Members["member2"] = member2
 	group.Mu.Unlock()
 	// Get status
 	status = rtm.GetRebalanceStatus("test-group")
 	if status == nil {
 		t.Fatal("Expected non-nil status")
 	}
 	if status.GroupID != "test-group" {
 		t.Errorf("Expected group ID 'test-group', got %s", status.GroupID)
 	}
 	if status.State != GroupStatePreparingRebalance {
 		t.Errorf("Expected state PreparingRebalance, got %s", status.State.String())
 	}
 	if status.Generation != 5 {
 		t.Errorf("Expected generation 5, got %d", status.Generation)
 	}
 	if status.MemberCount != 2 {
 		t.Errorf("Expected 2 members, got %d", status.MemberCount)
 	}
 	if status.Leader != "member1" {
 		t.Errorf("Expected leader 'member1', got %s", status.Leader)
 	}
 	if !status.IsRebalancing {
 		t.Error("Expected IsRebalancing to be true")
 	}
 	if len(status.Members) != 2 {
 		t.Errorf("Expected 2 member statuses, got %d", len(status.Members))
 	}
 	// Check member timeout calculations
 	for _, memberStatus := range status.Members {
 		if memberStatus.SessionTimeRemaining < 0 {
 			t.Errorf("Session time remaining should not be negative for member %s", memberStatus.MemberID)
 		}
 		if memberStatus.RebalanceTimeRemaining < 0 {
 			t.Errorf("Rebalance time remaining should not be negative for member %s", memberStatus.MemberID)
 		}
@ -300,14 +300,14 @@ func TestRebalanceTimeoutManager_GetRebalanceStatus(t *testing.T) {
 func TestRebalanceTimeoutManager_DefaultRebalanceTimeout(t *testing.T) {
 	coordinator := NewGroupCoordinator()
 	defer coordinator.Close()
 	rtm := coordinator.rebalanceTimeoutManager
 	// Create a group with a member that has no rebalance timeout set (0)
 	group := coordinator.GetOrCreateGroup("test-group")
 	group.Mu.Lock()
 	group.State = GroupStatePreparingRebalance
 	member := &GroupMember{
 		ID:               "member1",
 		ClientID:         "client1",
@ -319,10 +319,10 @@ func TestRebalanceTimeoutManager_DefaultRebalanceTimeout(t *testing.T) {
 	}
 	group.Members["member1"] = member
 	group.Mu.Unlock()
 	// Default rebalance timeout is 5 minutes (300000ms), so member should be evicted
 	rtm.CheckRebalanceTimeouts()
 	group.Mu.RLock()
 	if len(group.Members) != 0 {
 		t.Errorf("Expected member to be evicted using default rebalance timeout, but %d members remain", len(group.Members))
--- a/weed/mq/kafka/consumer_offset/memory_storage.go
+++ b/weed/mq/kafka/consumer_offset/memory_storage.go
@ -142,4 +142,3 @@ func (m *MemoryStorage) Close() error {
 	return nil
 }
--- a/weed/mq/kafka/consumer_offset/memory_storage_test.go
+++ b/weed/mq/kafka/consumer_offset/memory_storage_test.go
@ -206,4 +206,3 @@ func TestMemoryStorageOverwrite(t *testing.T) {
 	assert.Equal(t, int64(20), offset)
 	assert.Equal(t, "meta2", metadata)
 }
--- a/weed/mq/kafka/consumer_offset/storage.go
+++ b/weed/mq/kafka/consumer_offset/storage.go
@ -56,4 +56,3 @@ var (
 	ErrInvalidPartition = fmt.Errorf("invalid partition")
 	ErrStorageClosed    = fmt.Errorf("storage is closed")
 )
--- a/weed/mq/kafka/gateway/test_mock_handler.go
+++ b/weed/mq/kafka/gateway/test_mock_handler.go
@ -121,7 +121,6 @@ func (m *mockSeaweedMQHandler) ProduceRecord(ctx context.Context, topicName stri
 	offset := m.offsets[topicName][partitionID]
 	m.offsets[topicName][partitionID]++
 	// Store record
 	record := &mockRecord{
 		key:    key,
--- a/weed/mq/kafka/package.go
+++ b/weed/mq/kafka/package.go
@ -9,5 +9,3 @@ package kafka
 // - offset/: Offset management
 // - schema/: Schema registry integration
 // - consumer/: Consumer group coordination
--- a/weed/mq/kafka/partition_mapping.go
+++ b/weed/mq/kafka/partition_mapping.go
@ -51,5 +51,3 @@ func GetRangeSize() int32 {
 func GetMaxKafkaPartitions() int32 {
 	return int32(pub_balancer.MaxPartitionCount) / 35 // 72 partitions
 }
--- a/weed/mq/kafka/protocol/describe_cluster.go
+++ b/weed/mq/kafka/protocol/describe_cluster.go
@ -37,7 +37,6 @@ func (h *Handler) handleDescribeCluster(correlationID uint32, apiVersion uint16,
 	// Tagged fields at end of request
 	// (We don't parse them, just skip)
 	// Build response
 	response := make([]byte, 0, 256)
@ -109,6 +108,5 @@ func (h *Handler) handleDescribeCluster(correlationID uint32, apiVersion uint16,
 	// Response-level tagged fields (flexible response)
 	response = append(response, 0x00) // Empty tagged fields
 	return response, nil
 }
--- a/weed/mq/kafka/protocol/flexible_versions.go
+++ b/weed/mq/kafka/protocol/flexible_versions.go
@ -268,7 +268,6 @@ func parseCompactString(data []byte) ([]byte, int) {
 		return nil, 0
 	}
 	if actualLength == 0 {
 		// Empty string (length was 1)
 		return []byte{}, consumed
--- a/weed/mq/kafka/protocol/group_introspection.go
+++ b/weed/mq/kafka/protocol/group_introspection.go
@ -107,13 +107,13 @@ func (h *Handler) describeGroup(groupID string) DescribeGroupsGroup {
 	}
 	return DescribeGroupsGroup{
 		ErrorCode:      0,
 		GroupID:        groupID,
 		State:          stateStr,
 		ProtocolType:   "consumer", // Default protocol type
 		Protocol:       group.Protocol,
 		Members:        members,
 		AuthorizedOps:  []int32{}, // Empty for now
 		ErrorCode:     0,
 		GroupID:       groupID,
 		State:         stateStr,
 		ProtocolType:  "consumer", // Default protocol type
 		Protocol:      group.Protocol,
 		Members:       members,
 		AuthorizedOps: []int32{}, // Empty for now
 	}
 }
@ -175,8 +175,8 @@ func (h *Handler) listAllGroups(statesFilter []string) []ListGroupsGroup {
 // Request/Response structures
 type DescribeGroupsRequest struct {
 	GroupIDs                 []string
 	IncludeAuthorizedOps     bool
 	GroupIDs             []string
 	IncludeAuthorizedOps bool
 }
 type DescribeGroupsResponse struct {
--- a/weed/mq/kafka/protocol/handler.go
+++ b/weed/mq/kafka/protocol/handler.go
@ -661,7 +661,7 @@ func (h *Handler) HandleConn(ctx context.Context, conn net.Conn) error {
 					return
 				}
 				// Removed V(4) logging from hot path - only log errors and important events
 				// Wrap request processing with panic recovery to prevent deadlocks
 				// If processRequestSync panics, we MUST still send a response to avoid blocking the response writer
 				var response []byte
@ -881,7 +881,6 @@ func (h *Handler) HandleConn(ctx context.Context, conn net.Conn) error {
 			return fmt.Errorf("read message: %w", err)
 		}
 		// Parse at least the basic header to get API key and correlation ID
 		if len(messageBuf) < 8 {
 			return fmt.Errorf("message too short")
@ -890,7 +889,7 @@ func (h *Handler) HandleConn(ctx context.Context, conn net.Conn) error {
 		apiKey := binary.BigEndian.Uint16(messageBuf[0:2])
 		apiVersion := binary.BigEndian.Uint16(messageBuf[2:4])
 		correlationID := binary.BigEndian.Uint32(messageBuf[4:8])
 		// Validate API version against what we support
 		if err := h.validateAPIVersion(apiKey, apiVersion); err != nil {
 			glog.Errorf("API VERSION VALIDATION FAILED: Key=%d (%s), Version=%d, error=%v", apiKey, getAPIName(APIKey(apiKey)), apiVersion, err)
@ -1050,7 +1049,6 @@ func (h *Handler) processRequestSync(req *kafkaRequest) ([]byte, error) {
 	requestStart := time.Now()
 	apiName := getAPIName(APIKey(req.apiKey))
 	// Only log high-volume requests at V(2), not V(4)
 	if glog.V(2) {
 		glog.V(2).Infof("[API] %s (key=%d, ver=%d, corr=%d)",
@ -1589,15 +1587,15 @@ func (h *Handler) HandleMetadataV2(correlationID uint32, requestBody []byte) ([]
 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ {
 			binary.Write(&buf, binary.BigEndian, int16(0))    // ErrorCode
 			binary.Write(&buf, binary.BigEndian, partitionID) // PartitionIndex
 			binary.Write(&buf, binary.BigEndian, nodeID)    // LeaderID
 			binary.Write(&buf, binary.BigEndian, nodeID)      // LeaderID
 			// ReplicaNodes array (4 bytes length + nodes)
 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 replica
 			binary.Write(&buf, binary.BigEndian, nodeID) // NodeID 1
 			binary.Write(&buf, binary.BigEndian, nodeID)   // NodeID 1
 			// IsrNodes array (4 bytes length + nodes)
 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 ISR node
 			binary.Write(&buf, binary.BigEndian, nodeID) // NodeID 1
 			binary.Write(&buf, binary.BigEndian, nodeID)   // NodeID 1
 		}
 	}
@ -1716,15 +1714,15 @@ func (h *Handler) HandleMetadataV3V4(correlationID uint32, requestBody []byte) (
 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ {
 			binary.Write(&buf, binary.BigEndian, int16(0))    // ErrorCode
 			binary.Write(&buf, binary.BigEndian, partitionID) // PartitionIndex
 			binary.Write(&buf, binary.BigEndian, nodeID)    // LeaderID
 			binary.Write(&buf, binary.BigEndian, nodeID)      // LeaderID
 			// ReplicaNodes array (4 bytes length + nodes)
 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 replica
 			binary.Write(&buf, binary.BigEndian, nodeID) // NodeID 1
 			binary.Write(&buf, binary.BigEndian, nodeID)   // NodeID 1
 			// IsrNodes array (4 bytes length + nodes)
 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 ISR node
 			binary.Write(&buf, binary.BigEndian, nodeID) // NodeID 1
 			binary.Write(&buf, binary.BigEndian, nodeID)   // NodeID 1
 		}
 	}
@ -1737,7 +1735,7 @@ func (h *Handler) HandleMetadataV3V4(correlationID uint32, requestBody []byte) (
 	}
 	if len(response) > 100 {
 	}
 	return response, nil
 }
@ -1828,7 +1826,6 @@ func (h *Handler) handleMetadataV5ToV8(correlationID uint32, requestBody []byte,
 	// NOTE: Correlation ID is handled by writeResponseWithCorrelationID
 	// Do NOT include it in the response body
 	// ThrottleTimeMs (4 bytes) - v3+ addition
 	binary.Write(&buf, binary.BigEndian, int32(0)) // No throttling
@ -1896,7 +1893,7 @@ func (h *Handler) handleMetadataV5ToV8(correlationID uint32, requestBody []byte,
 		for partitionID := int32(0); partitionID < partitionCount; partitionID++ {
 			binary.Write(&buf, binary.BigEndian, int16(0))    // ErrorCode
 			binary.Write(&buf, binary.BigEndian, partitionID) // PartitionIndex
 			binary.Write(&buf, binary.BigEndian, nodeID)    // LeaderID
 			binary.Write(&buf, binary.BigEndian, nodeID)      // LeaderID
 			// LeaderEpoch (4 bytes) - v7+ addition
 			if apiVersion >= 7 {
@ -1905,11 +1902,11 @@ func (h *Handler) handleMetadataV5ToV8(correlationID uint32, requestBody []byte,
 			// ReplicaNodes array (4 bytes length + nodes)
 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 replica
 			binary.Write(&buf, binary.BigEndian, nodeID) // NodeID 1
 			binary.Write(&buf, binary.BigEndian, nodeID)   // NodeID 1
 			// IsrNodes array (4 bytes length + nodes)
 			binary.Write(&buf, binary.BigEndian, int32(1)) // 1 ISR node
 			binary.Write(&buf, binary.BigEndian, nodeID) // NodeID 1
 			binary.Write(&buf, binary.BigEndian, nodeID)   // NodeID 1
 			// OfflineReplicas array (4 bytes length + nodes) - v5+ addition
 			binary.Write(&buf, binary.BigEndian, int32(0)) // No offline replicas
@ -1930,7 +1927,7 @@ func (h *Handler) handleMetadataV5ToV8(correlationID uint32, requestBody []byte,
 	}
 	if len(response) > 100 {
 	}
 	return response, nil
 }
@ -1994,12 +1991,11 @@ func (h *Handler) handleListOffsets(correlationID uint32, apiVersion uint16, req
 	// Parse minimal request to understand what's being asked (header already stripped)
 	offset := 0
 	maxBytes := len(requestBody)
 	if maxBytes > 64 {
 		maxBytes = 64
 	}
 	// v1+ has replica_id(4)
 	if apiVersion >= 1 {
 		if len(requestBody) < offset+4 {
@ -3930,12 +3926,11 @@ func (h *Handler) handleInitProducerId(correlationID uint32, apiVersion uint16,
 	// v2+: transactional_id(NULLABLE_STRING) + transaction_timeout_ms(INT32) + producer_id(INT64) + producer_epoch(INT16)
 	// v4+: Uses flexible format with tagged fields
 	maxBytes := len(requestBody)
 	if maxBytes > 64 {
 		maxBytes = 64
 	}
 	offset := 0
 	// Parse transactional_id (NULLABLE_STRING or COMPACT_NULLABLE_STRING for flexible versions)
--- a/weed/mq/kafka/protocol/offset_storage_adapter.go
+++ b/weed/mq/kafka/protocol/offset_storage_adapter.go
@ -47,4 +47,3 @@ func (a *offsetStorageAdapter) DeleteGroup(group string) error {
 func (a *offsetStorageAdapter) Close() error {
 	return a.storage.Close()
 }
--- a/weed/mq/kafka/protocol/response_validation_example_test.go
+++ b/weed/mq/kafka/protocol/response_validation_example_test.go
@ -140,4 +140,3 @@ func TestMetadataResponseHasBrokers(t *testing.T) {
 	t.Logf("✓ Metadata response correctly has %d broker(s)", parsedCount)
 }
--- a/weed/mq/kafka/schema/envelope_test.go
+++ b/weed/mq/kafka/schema/envelope_test.go
@ -7,46 +7,46 @@ import (
 func TestParseConfluentEnvelope(t *testing.T) {
 	tests := []struct {
 		name        string
 		input       []byte
 		expectOK    bool
 		expectID    uint32
 		name         string
 		input        []byte
 		expectOK     bool
 		expectID     uint32
 		expectFormat Format
 	}{
 		{
 			name:        "valid Avro message",
 			input:       []byte{0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x48, 0x65, 0x6c, 0x6c, 0x6f}, // schema ID 1 + "Hello"
 			expectOK:    true,
 			expectID:    1,
 			name:         "valid Avro message",
 			input:        []byte{0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x48, 0x65, 0x6c, 0x6c, 0x6f}, // schema ID 1 + "Hello"
 			expectOK:     true,
 			expectID:     1,
 			expectFormat: FormatAvro,
 		},
 		{
 			name:        "valid message with larger schema ID",
 			input:       []byte{0x00, 0x00, 0x00, 0x04, 0xd2, 0x02, 0x66, 0x6f, 0x6f}, // schema ID 1234 + "foo"
 			expectOK:    true,
 			expectID:    1234,
 			name:         "valid message with larger schema ID",
 			input:        []byte{0x00, 0x00, 0x00, 0x04, 0xd2, 0x02, 0x66, 0x6f, 0x6f}, // schema ID 1234 + "foo"
 			expectOK:     true,
 			expectID:     1234,
 			expectFormat: FormatAvro,
 		},
 		{
 			name:        "too short message",
 			input:       []byte{0x00, 0x00, 0x00},
 			expectOK:    false,
 			name:     "too short message",
 			input:    []byte{0x00, 0x00, 0x00},
 			expectOK: false,
 		},
 		{
 			name:        "no magic byte",
 			input:       []byte{0x01, 0x00, 0x00, 0x00, 0x01, 0x48, 0x65, 0x6c, 0x6c, 0x6f},
 			expectOK:    false,
 			name:     "no magic byte",
 			input:    []byte{0x01, 0x00, 0x00, 0x00, 0x01, 0x48, 0x65, 0x6c, 0x6c, 0x6f},
 			expectOK: false,
 		},
 		{
 			name:        "empty message",
 			input:       []byte{},
 			expectOK:    false,
 			name:     "empty message",
 			input:    []byte{},
 			expectOK: false,
 		},
 		{
 			name:        "minimal valid message",
 			input:       []byte{0x00, 0x00, 0x00, 0x00, 0x01}, // schema ID 1, empty payload
 			expectOK:    true,
 			expectID:    1,
 			name:         "minimal valid message",
 			input:        []byte{0x00, 0x00, 0x00, 0x00, 0x01}, // schema ID 1, empty payload
 			expectOK:     true,
 			expectID:     1,
 			expectFormat: FormatAvro,
 		},
 	}
@ -54,24 +54,24 @@ func TestParseConfluentEnvelope(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			envelope, ok := ParseConfluentEnvelope(tt.input)
 			if ok != tt.expectOK {
 				t.Errorf("ParseConfluentEnvelope() ok = %v, want %v", ok, tt.expectOK)
 				return
 			}
 			if !tt.expectOK {
 				return // No need to check further if we expected failure
 			}
 			if envelope.SchemaID != tt.expectID {
 				t.Errorf("ParseConfluentEnvelope() schemaID = %v, want %v", envelope.SchemaID, tt.expectID)
 			}
 			if envelope.Format != tt.expectFormat {
 				t.Errorf("ParseConfluentEnvelope() format = %v, want %v", envelope.Format, tt.expectFormat)
 			}
 			// Verify payload extraction
 			expectedPayloadLen := len(tt.input) - 5 // 5 bytes for magic + schema ID
 			if len(envelope.Payload) != expectedPayloadLen {
@ -150,11 +150,11 @@ func TestExtractSchemaID(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			id, ok := ExtractSchemaID(tt.input)
 			if ok != tt.expectOK {
 				t.Errorf("ExtractSchemaID() ok = %v, want %v", ok, tt.expectOK)
 			}
 			if id != tt.expectID {
 				t.Errorf("ExtractSchemaID() id = %v, want %v", id, tt.expectID)
 			}
@ -200,12 +200,12 @@ func TestCreateConfluentEnvelope(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := CreateConfluentEnvelope(tt.format, tt.schemaID, tt.indexes, tt.payload)
 			if len(result) != len(tt.expected) {
 				t.Errorf("CreateConfluentEnvelope() length = %v, want %v", len(result), len(tt.expected))
 				return
 			}
 			for i, b := range result {
 				if b != tt.expected[i] {
 					t.Errorf("CreateConfluentEnvelope() byte[%d] = %v, want %v", i, b, tt.expected[i])
@ -262,7 +262,7 @@ func TestEnvelopeValidate(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			err := tt.envelope.Validate()
 			if (err != nil) != tt.expectErr {
 				t.Errorf("Envelope.Validate() error = %v, expectErr %v", err, tt.expectErr)
 			}
@ -297,7 +297,7 @@ func TestEnvelopeMetadata(t *testing.T) {
 func BenchmarkParseConfluentEnvelope(b *testing.B) {
 	// Create a test message
 	testMsg := make([]byte, 1024)
 	testMsg[0] = 0x00 // Magic byte
 	testMsg[0] = 0x00                             // Magic byte
 	binary.BigEndian.PutUint32(testMsg[1:5], 123) // Schema ID
 	// Fill rest with dummy data
 	for i := 5; i < len(testMsg); i++ {
--- a/weed/mq/kafka/schema/envelope_varint_test.go
+++ b/weed/mq/kafka/schema/envelope_varint_test.go
@ -100,7 +100,7 @@ func TestCreateConfluentEnvelopeWithProtobufIndexes(t *testing.T) {
 				parsed, ok := ParseConfluentEnvelope(envelope)
 				require.True(t, ok, "Should be able to parse envelope")
 				assert.Equal(t, tc.schemaID, parsed.SchemaID)
 				if tc.format == FormatProtobuf && len(tc.indexes) == 0 {
 					// For Protobuf without indexes, payload should match
 					assert.Equal(t, tc.payload, parsed.Payload, "Payload should match")
--- a/weed/mq/metadata_constants.go
+++ b/weed/mq/metadata_constants.go
@ -17,5 +17,3 @@ const (
 	// Source file tracking for parquet deduplication
 	ExtendedAttrSources = "sources" // JSON-encoded list of source log files
 )
--- a/weed/mq/offset/migration.go
+++ b/weed/mq/offset/migration.go
@ -118,17 +118,17 @@ func (m *MigrationManager) GetCurrentVersion() (int, error) {
 	if err != nil {
 		return 0, fmt.Errorf("failed to create migrations table: %w", err)
 	}
 	var version sql.NullInt64
 	err = m.db.QueryRow("SELECT MAX(version) FROM schema_migrations").Scan(&version)
 	if err != nil {
 		return 0, fmt.Errorf("failed to get current version: %w", err)
 	}
 	if !version.Valid {
 		return 0, nil // No migrations applied yet
 	}
 	return int(version.Int64), nil
 }
@ -138,29 +138,29 @@ func (m *MigrationManager) ApplyMigrations() error {
 	if err != nil {
 		return fmt.Errorf("failed to get current version: %w", err)
 	}
 	migrations := GetMigrations()
 	for _, migration := range migrations {
 		if migration.Version <= currentVersion {
 			continue // Already applied
 		}
 		fmt.Printf("Applying migration %d: %s\n", migration.Version, migration.Description)
 		// Begin transaction
 		tx, err := m.db.Begin()
 		if err != nil {
 			return fmt.Errorf("failed to begin transaction for migration %d: %w", migration.Version, err)
 		}
 		// Execute migration SQL
 		_, err = tx.Exec(migration.SQL)
 		if err != nil {
 			tx.Rollback()
 			return fmt.Errorf("failed to execute migration %d: %w", migration.Version, err)
 		}
 		// Record migration as applied
 		_, err = tx.Exec(
 			"INSERT INTO schema_migrations (version, description, applied_at) VALUES (?, ?, ?)",
@ -172,16 +172,16 @@ func (m *MigrationManager) ApplyMigrations() error {
 			tx.Rollback()
 			return fmt.Errorf("failed to record migration %d: %w", migration.Version, err)
 		}
 		// Commit transaction
 		err = tx.Commit()
 		if err != nil {
 			return fmt.Errorf("failed to commit migration %d: %w", migration.Version, err)
 		}
 		fmt.Printf("Successfully applied migration %d\n", migration.Version)
 	}
 	return nil
 }
@ -203,7 +203,7 @@ func (m *MigrationManager) GetAppliedMigrations() ([]AppliedMigration, error) {
 		return nil, fmt.Errorf("failed to query applied migrations: %w", err)
 	}
 	defer rows.Close()
 	var migrations []AppliedMigration
 	for rows.Next() {
 		var migration AppliedMigration
@ -213,7 +213,7 @@ func (m *MigrationManager) GetAppliedMigrations() ([]AppliedMigration, error) {
 		}
 		migrations = append(migrations, migration)
 	}
 	return migrations, nil
 }
@ -223,17 +223,17 @@ func (m *MigrationManager) ValidateSchema() error {
 	if err != nil {
 		return fmt.Errorf("failed to get current version: %w", err)
 	}
 	migrations := GetMigrations()
 	if len(migrations) == 0 {
 		return nil
 	}
 	latestVersion := migrations[len(migrations)-1].Version
 	if currentVersion < latestVersion {
 		return fmt.Errorf("schema is outdated: current version %d, latest version %d", currentVersion, latestVersion)
 	}
 	return nil
 }
@ -253,21 +253,21 @@ func getCurrentTimestamp() int64 {
 func CreateDatabase(dbPath string) (*sql.DB, error) {
 	// TODO: Support different database types (PostgreSQL, MySQL, etc.)
 	// ASSUMPTION: Using SQLite for now, can be extended for other databases
 	db, err := sql.Open("sqlite3", dbPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to open database: %w", err)
 	}
 	// Configure SQLite for better performance
 	pragmas := []string{
 		"PRAGMA journal_mode=WAL",           // Write-Ahead Logging for better concurrency
 		"PRAGMA synchronous=NORMAL",         // Balance between safety and performance
 		"PRAGMA cache_size=10000",           // Increase cache size
 		"PRAGMA foreign_keys=ON",            // Enable foreign key constraints
 		"PRAGMA temp_store=MEMORY",          // Store temporary tables in memory
 		"PRAGMA journal_mode=WAL",   // Write-Ahead Logging for better concurrency
 		"PRAGMA synchronous=NORMAL", // Balance between safety and performance
 		"PRAGMA cache_size=10000",   // Increase cache size
 		"PRAGMA foreign_keys=ON",    // Enable foreign key constraints
 		"PRAGMA temp_store=MEMORY",  // Store temporary tables in memory
 	}
 	for _, pragma := range pragmas {
 		_, err := db.Exec(pragma)
 		if err != nil {
@ -275,7 +275,7 @@ func CreateDatabase(dbPath string) (*sql.DB, error) {
 			return nil, fmt.Errorf("failed to set pragma %s: %w", pragma, err)
 		}
 	}
 	// Apply migrations
 	migrationManager := NewMigrationManager(db)
 	err = migrationManager.ApplyMigrations()
@ -283,7 +283,7 @@ func CreateDatabase(dbPath string) (*sql.DB, error) {
 		db.Close()
 		return nil, fmt.Errorf("failed to apply migrations: %w", err)
 	}
 	return db, nil
 }
--- a/weed/mq/schema/flat_schema_utils_test.go
+++ b/weed/mq/schema/flat_schema_utils_test.go
@ -248,11 +248,11 @@ func TestValidateKeyColumns(t *testing.T) {
 // Helper function to check if string contains substring
 func contains(str, substr string) bool {
 	return len(str) >= len(substr) && 
 		   (len(substr) == 0 || str[len(str)-len(substr):] == substr || 
 		    str[:len(substr)] == substr || 
 		    len(str) > len(substr) && (str[len(str)-len(substr)-1:len(str)-len(substr)] == " " || str[len(str)-len(substr)-1] == ' ') && str[len(str)-len(substr):] == substr ||
 		    findInString(str, substr))
 	return len(str) >= len(substr) &&
 		(len(substr) == 0 || str[len(str)-len(substr):] == substr ||
 			str[:len(substr)] == substr ||
 			len(str) > len(substr) && (str[len(str)-len(substr)-1:len(str)-len(substr)] == " " || str[len(str)-len(substr)-1] == ' ') && str[len(str)-len(substr):] == substr ||
 			findInString(str, substr))
 }
 func findInString(str, substr string) bool {
--- a/weed/pb/mq_agent_pb/publish_response_test.go
+++ b/weed/pb/mq_agent_pb/publish_response_test.go
@ -1,8 +1,8 @@
 package mq_agent_pb
 import (
 	"testing"
 	"google.golang.org/protobuf/proto"
 	"testing"
 )
 func TestPublishRecordResponseSerialization(t *testing.T) {
--- a/weed/pb/schema_pb/offset_test.go
+++ b/weed/pb/schema_pb/offset_test.go
@ -1,8 +1,8 @@
 package schema_pb
 import (
 	"testing"
 	"google.golang.org/protobuf/proto"
 	"testing"
 )
 func TestOffsetTypeEnums(t *testing.T) {
@ -34,8 +34,8 @@ func TestPartitionOffsetSerialization(t *testing.T) {
 			RangeStop:  31,
 			UnixTimeNs: 1234567890,
 		},
 		StartTsNs:    1234567890,
 		StartOffset:  42, // New field
 		StartTsNs:   1234567890,
 		StartOffset: 42, // New field
 	}
 	// Test proto marshaling/unmarshaling
--- a/weed/pb/volume_server.proto
+++ b/weed/pb/volume_server.proto
@ -525,6 +525,13 @@ message VolumeInfo {
    int64 dat_file_size = 5; // store the original dat file size
    uint64 expire_at_sec = 6; // expiration time of ec volume
    bool read_only = 7;
    EcShardConfig ec_shard_config = 8; // EC shard configuration (optional, null = use default 10+4)
 }
 // EcShardConfig specifies erasure coding shard configuration
 message EcShardConfig {
    uint32 data_shards = 1;   // Number of data shards (e.g., 10)
    uint32 parity_shards = 2; // Number of parity shards (e.g., 4)
 }
 message OldVersionVolumeInfo {
    repeated RemoteFile files = 1;
--- a/weed/pb/volume_server_pb/volume_server.pb.go
+++ b/weed/pb/volume_server_pb/volume_server.pb.go
--- a/weed/remote_storage/azure/azure_storage_client_test.go
+++ b/weed/remote_storage/azure/azure_storage_client_test.go
@ -229,22 +229,22 @@ func TestToMetadata(t *testing.T) {
 				s3_constants.AmzUserMetaPrefix + "789":      []byte("value3"),
 			},
 			expected: map[string]*string{
 				"_123key":        stringPtr("value1"), // starts with digit -> prefix _
 				"_456_2d_test":   stringPtr("value2"), // starts with digit AND has dash
 				"_789":           stringPtr("value3"),
 				"_123key":      stringPtr("value1"), // starts with digit -> prefix _
 				"_456_2d_test": stringPtr("value2"), // starts with digit AND has dash
 				"_789":         stringPtr("value3"),
 			},
 		},
 		{
 			name: "uppercase and mixed case keys",
 			input: map[string][]byte{
 				s3_constants.AmzUserMetaPrefix + "My-Key":      []byte("value1"),
 				s3_constants.AmzUserMetaPrefix + "UPPERCASE":   []byte("value2"),
 				s3_constants.AmzUserMetaPrefix + "MiXeD-CaSe":  []byte("value3"),
 				s3_constants.AmzUserMetaPrefix + "My-Key":     []byte("value1"),
 				s3_constants.AmzUserMetaPrefix + "UPPERCASE":  []byte("value2"),
 				s3_constants.AmzUserMetaPrefix + "MiXeD-CaSe": []byte("value3"),
 			},
 			expected: map[string]*string{
 				"my_2d_key":      stringPtr("value1"), // lowercase + dash -> _2d_
 				"uppercase":      stringPtr("value2"),
 				"mixed_2d_case":  stringPtr("value3"),
 				"my_2d_key":     stringPtr("value1"), // lowercase + dash -> _2d_
 				"uppercase":     stringPtr("value2"),
 				"mixed_2d_case": stringPtr("value3"),
 			},
 		},
 		{
--- a/weed/s3api/s3_sse_s3_integration_test.go
+++ b/weed/s3api/s3_sse_s3_integration_test.go
@ -78,7 +78,7 @@ func TestSSES3EndToEndSmallFile(t *testing.T) {
 			// Step 3: Decrypt (simulates what happens during GET)
 			// This tests the IV retrieval path for inline files
 			// First, deserialize metadata from storage
 			retrievedKeyData := mockEntry.Extended[s3_constants.SeaweedFSSSES3Key]
 			retrievedKey, err := DeserializeSSES3Metadata(retrievedKeyData, keyManager)
--- a/weed/s3api/s3_validation_utils.go
+++ b/weed/s3api/s3_validation_utils.go
@ -71,7 +71,7 @@ func ValidateSSES3Key(sseKey *SSES3Key) error {
 	if sseKey == nil {
 		return fmt.Errorf("SSE-S3 key cannot be nil")
 	}
 	// Validate key bytes
 	if sseKey.Key == nil {
 		return fmt.Errorf("SSE-S3 key bytes cannot be nil")
@ -79,22 +79,22 @@ func ValidateSSES3Key(sseKey *SSES3Key) error {
 	if len(sseKey.Key) != SSES3KeySize {
 		return fmt.Errorf("invalid SSE-S3 key size: expected %d bytes, got %d", SSES3KeySize, len(sseKey.Key))
 	}
 	// Validate algorithm
 	if sseKey.Algorithm != SSES3Algorithm {
 		return fmt.Errorf("invalid SSE-S3 algorithm: expected %q, got %q", SSES3Algorithm, sseKey.Algorithm)
 	}
 	// Validate key ID (should not be empty)
 	if sseKey.KeyID == "" {
 		return fmt.Errorf("SSE-S3 key ID cannot be empty")
 	}
 	// IV validation is optional during key creation - it will be set during encryption
 	// If IV is set, validate its length
 	if len(sseKey.IV) > 0 && len(sseKey.IV) != s3_constants.AESBlockSize {
 		return fmt.Errorf("invalid SSE-S3 IV length: expected %d bytes, got %d", s3_constants.AESBlockSize, len(sseKey.IV))
 	}
 	return nil
 }
--- a/weed/server/volume_grpc_erasure_coding.go
+++ b/weed/server/volume_grpc_erasure_coding.go
@ -50,20 +50,38 @@ func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_
 		return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
 	}
 	// Create EC context - prefer existing .vif config if present (for regeneration scenarios)
 	ecCtx := erasure_coding.NewDefaultECContext(req.Collection, needle.VolumeId(req.VolumeId))
 	if volumeInfo, _, found, _ := volume_info.MaybeLoadVolumeInfo(baseFileName + ".vif"); found && volumeInfo.EcShardConfig != nil {
 		ds := int(volumeInfo.EcShardConfig.DataShards)
 		ps := int(volumeInfo.EcShardConfig.ParityShards)
 		// Validate and use existing EC config
 		if ds > 0 && ps > 0 && ds+ps <= erasure_coding.MaxShardCount {
 			ecCtx.DataShards = ds
 			ecCtx.ParityShards = ps
 			glog.V(0).Infof("Using existing EC config for volume %d: %s", req.VolumeId, ecCtx.String())
 		} else {
 			glog.Warningf("Invalid EC config in .vif for volume %d (data=%d, parity=%d), using defaults", req.VolumeId, ds, ps)
 		}
 	} else {
 		glog.V(0).Infof("Using default EC config for volume %d: %s", req.VolumeId, ecCtx.String())
 	}
 	shouldCleanup := true
 	defer func() {
 		if !shouldCleanup {
 			return
 		}
 		for i := 0; i < erasure_coding.TotalShardsCount; i++ {
 			os.Remove(fmt.Sprintf("%s.ec%2d", baseFileName, i))
 		for i := 0; i < ecCtx.Total(); i++ {
 			os.Remove(baseFileName + ecCtx.ToExt(i))
 		}
 		os.Remove(v.IndexFileName() + ".ecx")
 	}()
 	// write .ec00 ~ .ec13 files
 	if err := erasure_coding.WriteEcFiles(baseFileName); err != nil {
 		return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err)
 	// write .ec00 ~ .ec[TotalShards-1] files using context
 	if err := erasure_coding.WriteEcFilesWithContext(baseFileName, ecCtx); err != nil {
 		return nil, fmt.Errorf("WriteEcFilesWithContext %s: %v", baseFileName, err)
 	}
 	// write .ecx file
@ -84,6 +102,21 @@ func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_
 	datSize, _, _ := v.FileStat()
 	volumeInfo.DatFileSize = int64(datSize)
 	// Validate EC configuration before saving to .vif
 	if ecCtx.DataShards <= 0 || ecCtx.ParityShards <= 0 || ecCtx.Total() > erasure_coding.MaxShardCount {
 		return nil, fmt.Errorf("invalid EC config before saving: data=%d, parity=%d, total=%d (max=%d)",
 			ecCtx.DataShards, ecCtx.ParityShards, ecCtx.Total(), erasure_coding.MaxShardCount)
 	}
 	// Save EC configuration to VolumeInfo
 	volumeInfo.EcShardConfig = &volume_server_pb.EcShardConfig{
 		DataShards:   uint32(ecCtx.DataShards),
 		ParityShards: uint32(ecCtx.ParityShards),
 	}
 	glog.V(1).Infof("Saving EC config to .vif for volume %d: %d+%d (total: %d)",
 		req.VolumeId, ecCtx.DataShards, ecCtx.ParityShards, ecCtx.Total())
 	if err := volume_info.SaveVolumeInfo(baseFileName+".vif", volumeInfo); err != nil {
 		return nil, fmt.Errorf("SaveVolumeInfo %s: %v", baseFileName, err)
 	}
@ -442,9 +475,10 @@ func (vs *VolumeServer) VolumeEcShardsToVolume(ctx context.Context, req *volume_
 	glog.V(0).Infof("VolumeEcShardsToVolume: %v", req)
 	// collect .ec00 ~ .ec09 files
 	shardFileNames := make([]string, erasure_coding.DataShardsCount)
 	v, found := vs.store.CollectEcShards(needle.VolumeId(req.VolumeId), shardFileNames)
 	// Collect all EC shards (NewEcVolume will load EC config from .vif into v.ECContext)
 	// Use MaxShardCount (32) to support custom EC ratios up to 32 total shards
 	tempShards := make([]string, erasure_coding.MaxShardCount)
 	v, found := vs.store.CollectEcShards(needle.VolumeId(req.VolumeId), tempShards)
 	if !found {
 		return nil, fmt.Errorf("ec volume %d not found", req.VolumeId)
 	}
@ -453,7 +487,19 @@ func (vs *VolumeServer) VolumeEcShardsToVolume(ctx context.Context, req *volume_
 		return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
 	}
 	for shardId := 0; shardId < erasure_coding.DataShardsCount; shardId++ {
 	// Use EC context (already loaded from .vif) to determine data shard count
 	dataShards := v.ECContext.DataShards
 	// Defensive validation to prevent panics from corrupted ECContext
 	if dataShards <= 0 || dataShards > erasure_coding.MaxShardCount {
 		return nil, fmt.Errorf("invalid data shard count %d for volume %d (must be 1..%d)", dataShards, req.VolumeId, erasure_coding.MaxShardCount)
 	}
 	shardFileNames := tempShards[:dataShards]
 	glog.V(1).Infof("Using EC config from volume %d: %d data shards", req.VolumeId, dataShards)
 	// Verify all data shards are present
 	for shardId := 0; shardId < dataShards; shardId++ {
 		if shardFileNames[shardId] == "" {
 			return nil, fmt.Errorf("ec volume %d missing shard %d", req.VolumeId, shardId)
 		}
--- a/weed/shell/command_ec_common.go
+++ b/weed/shell/command_ec_common.go
@ -622,7 +622,8 @@ func (ecb *ecBalancer) deleteDuplicatedEcShards(collection string) error {
 func (ecb *ecBalancer) doDeduplicateEcShards(collection string, vid needle.VolumeId, locations []*EcNode) error {
 	// check whether this volume has ecNodes that are over average
 	shardToLocations := make([][]*EcNode, erasure_coding.TotalShardsCount)
 	// Use MaxShardCount (32) to support custom EC ratios
 	shardToLocations := make([][]*EcNode, erasure_coding.MaxShardCount)
 	for _, ecNode := range locations {
 		shardBits := findEcVolumeShards(ecNode, vid)
 		for _, shardId := range shardBits.ShardIds() {
@ -677,11 +678,16 @@ func countShardsByRack(vid needle.VolumeId, locations []*EcNode) map[string]int
 func (ecb *ecBalancer) doBalanceEcShardsAcrossRacks(collection string, vid needle.VolumeId, locations []*EcNode) error {
 	racks := ecb.racks()
 	// calculate average number of shards an ec rack should have for one volume
 	averageShardsPerEcRack := ceilDivide(erasure_coding.TotalShardsCount, len(racks))
 	// see the volume's shards are in how many racks, and how many in each rack
 	rackToShardCount := countShardsByRack(vid, locations)
 	// Calculate actual total shards for this volume (not hardcoded default)
 	var totalShardsForVolume int
 	for _, count := range rackToShardCount {
 		totalShardsForVolume += count
 	}
 	// calculate average number of shards an ec rack should have for one volume
 	averageShardsPerEcRack := ceilDivide(totalShardsForVolume, len(racks))
 	rackEcNodesWithVid := groupBy(locations, func(ecNode *EcNode) string {
 		return string(ecNode.rack)
 	})
--- a/weed/shell/command_ec_rebuild.go
+++ b/weed/shell/command_ec_rebuild.go
@ -264,7 +264,8 @@ func (ecShardMap EcShardMap) registerEcNode(ecNode *EcNode, collection string) {
 			if shardInfo.Collection == collection {
 				existing, found := ecShardMap[needle.VolumeId(shardInfo.Id)]
 				if !found {
 					existing = make([][]*EcNode, erasure_coding.TotalShardsCount)
 					// Use MaxShardCount (32) to support custom EC ratios
 					existing = make([][]*EcNode, erasure_coding.MaxShardCount)
 					ecShardMap[needle.VolumeId(shardInfo.Id)] = existing
 				}
 				for _, shardId := range erasure_coding.ShardBits(shardInfo.EcIndexBits).ShardIds() {
--- a/weed/storage/disk_location_ec.go
+++ b/weed/storage/disk_location_ec.go
@ -16,7 +16,9 @@ import (
 )
 var (
 	re = regexp.MustCompile(`\.ec[0-9][0-9]`)
 	// Match .ec00 through .ec999 (currently only .ec00-.ec31 are used)
 	// Using \d{2,3} for future-proofing if MaxShardCount is ever increased beyond 99
 	re = regexp.MustCompile(`\.ec\d{2,3}`)
 )
 func (l *DiskLocation) FindEcVolume(vid needle.VolumeId) (*erasure_coding.EcVolume, bool) {
@ -398,8 +400,8 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
 	var actualShardSize int64 = -1
 	// Count shards and validate they all have the same size (required for Reed-Solomon EC)
 	// Shard files (.ec00 - .ec13) are always in l.Directory, not l.IdxDirectory
 	for i := 0; i < erasure_coding.TotalShardsCount; i++ {
 	// Check up to MaxShardCount (32) to support custom EC ratios
 	for i := 0; i < erasure_coding.MaxShardCount; i++ {
 		shardFileName := baseFileName + erasure_coding.ToExt(i)
 		fi, err := os.Stat(shardFileName)
@ -472,8 +474,9 @@ func (l *DiskLocation) removeEcVolumeFiles(collection string, vid needle.VolumeI
 	removeFile(indexBaseFileName+".ecx", "EC index file")
 	removeFile(indexBaseFileName+".ecj", "EC journal file")
 	// Remove all EC shard files (.ec00 ~ .ec13) from data directory
 	for i := 0; i < erasure_coding.TotalShardsCount; i++ {
 	// Remove all EC shard files (.ec00 ~ .ec31) from data directory
 	// Use MaxShardCount (32) to support custom EC ratios
 	for i := 0; i < erasure_coding.MaxShardCount; i++ {
 		removeFile(baseFileName+erasure_coding.ToExt(i), "EC shard file")
 	}
 }
--- a/weed/storage/erasure_coding/ec_context.go
+++ b/weed/storage/erasure_coding/ec_context.go
@ -0,0 +1,46 @@
 package erasure_coding
 import (
 	"fmt"
 	"github.com/klauspost/reedsolomon"
 	"github.com/seaweedfs/seaweedfs/weed/storage/needle"
 )
 // ECContext encapsulates erasure coding parameters for encoding/decoding operations
 type ECContext struct {
 	DataShards   int
 	ParityShards int
 	Collection   string
 	VolumeId     needle.VolumeId
 }
 // Total returns the total number of shards (data + parity)
 func (ctx *ECContext) Total() int {
 	return ctx.DataShards + ctx.ParityShards
 }
 // NewDefaultECContext creates a context with default 10+4 shard configuration
 func NewDefaultECContext(collection string, volumeId needle.VolumeId) *ECContext {
 	return &ECContext{
 		DataShards:   DataShardsCount,
 		ParityShards: ParityShardsCount,
 		Collection:   collection,
 		VolumeId:     volumeId,
 	}
 }
 // CreateEncoder creates a Reed-Solomon encoder for this context
 func (ctx *ECContext) CreateEncoder() (reedsolomon.Encoder, error) {
 	return reedsolomon.New(ctx.DataShards, ctx.ParityShards)
 }
 // ToExt returns the file extension for a given shard index
 func (ctx *ECContext) ToExt(shardIndex int) string {
 	return fmt.Sprintf(".ec%02d", shardIndex)
 }
 // String returns a human-readable representation of the EC configuration
 func (ctx *ECContext) String() string {
 	return fmt.Sprintf("%d+%d (total: %d)", ctx.DataShards, ctx.ParityShards, ctx.Total())
 }
--- a/weed/storage/erasure_coding/ec_encoder.go
+++ b/weed/storage/erasure_coding/ec_encoder.go
@ -11,6 +11,7 @@ import (
 	"github.com/seaweedfs/seaweedfs/weed/storage/idx"
 	"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
 	"github.com/seaweedfs/seaweedfs/weed/storage/types"
 	"github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
 	"github.com/seaweedfs/seaweedfs/weed/util"
 )
@ -18,6 +19,7 @@ const (
 	DataShardsCount             = 10
 	ParityShardsCount           = 4
 	TotalShardsCount            = DataShardsCount + ParityShardsCount
 	MaxShardCount               = 32 // Maximum number of shards since ShardBits is uint32 (bits 0-31)
 	MinTotalDisks               = TotalShardsCount/ParityShardsCount + 1
 	ErasureCodingLargeBlockSize = 1024 * 1024 * 1024 // 1GB
 	ErasureCodingSmallBlockSize = 1024 * 1024        // 1MB
@ -54,20 +56,53 @@ func WriteSortedFileFromIdx(baseFileName string, ext string) (e error) {
 	return nil
 }
 // WriteEcFiles generates .ec00 ~ .ec13 files
 // WriteEcFiles generates .ec00 ~ .ec13 files using default EC context
 func WriteEcFiles(baseFileName string) error {
 	return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
 	ctx := NewDefaultECContext("", 0)
 	return WriteEcFilesWithContext(baseFileName, ctx)
 }
 // WriteEcFilesWithContext generates EC files using the provided context
 func WriteEcFilesWithContext(baseFileName string, ctx *ECContext) error {
 	return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, ctx)
 }
 func RebuildEcFiles(baseFileName string) ([]uint32, error) {
 	return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
 	// Attempt to load EC config from .vif file to preserve original configuration
 	var ctx *ECContext
 	if volumeInfo, _, found, _ := volume_info.MaybeLoadVolumeInfo(baseFileName + ".vif"); found && volumeInfo.EcShardConfig != nil {
 		ds := int(volumeInfo.EcShardConfig.DataShards)
 		ps := int(volumeInfo.EcShardConfig.ParityShards)
 		// Validate EC config before using it
 		if ds > 0 && ps > 0 && ds+ps <= MaxShardCount {
 			ctx = &ECContext{
 				DataShards:   ds,
 				ParityShards: ps,
 			}
 			glog.V(0).Infof("Rebuilding EC files for %s with config from .vif: %s", baseFileName, ctx.String())
 		} else {
 			glog.Warningf("Invalid EC config in .vif for %s (data=%d, parity=%d), using default", baseFileName, ds, ps)
 			ctx = NewDefaultECContext("", 0)
 		}
 	} else {
 		glog.V(0).Infof("Rebuilding EC files for %s with default config", baseFileName)
 		ctx = NewDefaultECContext("", 0)
 	}
 	return RebuildEcFilesWithContext(baseFileName, ctx)
 }
 // RebuildEcFilesWithContext rebuilds missing EC files using the provided context
 func RebuildEcFilesWithContext(baseFileName string, ctx *ECContext) ([]uint32, error) {
 	return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, ctx)
 }
 func ToExt(ecIndex int) string {
 	return fmt.Sprintf(".ec%02d", ecIndex)
 }
 func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error {
 func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64, ctx *ECContext) error {
 	file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
 	if err != nil {
 		return fmt.Errorf("failed to open dat file: %w", err)
@ -79,21 +114,21 @@ func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64,
 		return fmt.Errorf("failed to stat dat file: %w", err)
 	}
 	glog.V(0).Infof("encodeDatFile %s.dat size:%d", baseFileName, fi.Size())
 	err = encodeDatFile(fi.Size(), baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
 	glog.V(0).Infof("encodeDatFile %s.dat size:%d with EC context %s", baseFileName, fi.Size(), ctx.String())
 	err = encodeDatFile(fi.Size(), baseFileName, bufferSize, largeBlockSize, file, smallBlockSize, ctx)
 	if err != nil {
 		return fmt.Errorf("encodeDatFile: %w", err)
 	}
 	return nil
 }
 func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) (generatedShardIds []uint32, err error) {
 func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64, ctx *ECContext) (generatedShardIds []uint32, err error) {
 	shardHasData := make([]bool, TotalShardsCount)
 	inputFiles := make([]*os.File, TotalShardsCount)
 	outputFiles := make([]*os.File, TotalShardsCount)
 	for shardId := 0; shardId < TotalShardsCount; shardId++ {
 		shardFileName := baseFileName + ToExt(shardId)
 	shardHasData := make([]bool, ctx.Total())
 	inputFiles := make([]*os.File, ctx.Total())
 	outputFiles := make([]*os.File, ctx.Total())
 	for shardId := 0; shardId < ctx.Total(); shardId++ {
 		shardFileName := baseFileName + ctx.ToExt(shardId)
 		if util.FileExists(shardFileName) {
 			shardHasData[shardId] = true
 			inputFiles[shardId], err = os.OpenFile(shardFileName, os.O_RDONLY, 0)
@ -111,14 +146,14 @@ func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize
 		}
 	}
 	err = rebuildEcFiles(shardHasData, inputFiles, outputFiles)
 	err = rebuildEcFiles(shardHasData, inputFiles, outputFiles, ctx)
 	if err != nil {
 		return nil, fmt.Errorf("rebuildEcFiles: %w", err)
 	}
 	return
 }
 func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
 func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File, ctx *ECContext) error {
 	bufferSize := int64(len(buffers[0]))
 	if bufferSize == 0 {
@ -131,7 +166,7 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i
 	}
 	for b := int64(0); b < batchCount; b++ {
 		err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs)
 		err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs, ctx)
 		if err != nil {
 			return err
 		}
@ -140,9 +175,9 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i
 	return nil
 }
 func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error) {
 	for i := 0; i < TotalShardsCount; i++ {
 		fname := baseFileName + ToExt(i)
 func openEcFiles(baseFileName string, forRead bool, ctx *ECContext) (files []*os.File, err error) {
 	for i := 0; i < ctx.Total(); i++ {
 		fname := baseFileName + ctx.ToExt(i)
 		openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY
 		if forRead {
 			openOption = os.O_RDONLY
@ -164,10 +199,10 @@ func closeEcFiles(files []*os.File) {
 	}
 }
 func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
 func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File, ctx *ECContext) error {
 	// read data into buffers
 	for i := 0; i < DataShardsCount; i++ {
 	for i := 0; i < ctx.DataShards; i++ {
 		n, err := file.ReadAt(buffers[i], startOffset+blockSize*int64(i))
 		if err != nil {
 			if err != io.EOF {
@ -186,7 +221,7 @@ func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blo
 		return err
 	}
 	for i := 0; i < TotalShardsCount; i++ {
 	for i := 0; i < ctx.Total(); i++ {
 		_, err := outputs[i].Write(buffers[i])
 		if err != nil {
 			return err
@ -196,53 +231,57 @@ func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blo
 	return nil
 }
 func encodeDatFile(remainingSize int64, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
 func encodeDatFile(remainingSize int64, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64, ctx *ECContext) error {
 	var processedSize int64
 	enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
 	enc, err := ctx.CreateEncoder()
 	if err != nil {
 		return fmt.Errorf("failed to create encoder: %w", err)
 	}
 	buffers := make([][]byte, TotalShardsCount)
 	buffers := make([][]byte, ctx.Total())
 	for i := range buffers {
 		buffers[i] = make([]byte, bufferSize)
 	}
 	outputs, err := openEcFiles(baseFileName, false)
 	outputs, err := openEcFiles(baseFileName, false, ctx)
 	defer closeEcFiles(outputs)
 	if err != nil {
 		return fmt.Errorf("failed to open ec files %s: %v", baseFileName, err)
 	}
 	for remainingSize > largeBlockSize*DataShardsCount {
 		err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
 	// Pre-calculate row sizes to avoid redundant calculations in loops
 	largeRowSize := largeBlockSize * int64(ctx.DataShards)
 	smallRowSize := smallBlockSize * int64(ctx.DataShards)
 	for remainingSize >= largeRowSize {
 		err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs, ctx)
 		if err != nil {
 			return fmt.Errorf("failed to encode large chunk data: %w", err)
 		}
 		remainingSize -= largeBlockSize * DataShardsCount
 		processedSize += largeBlockSize * DataShardsCount
 		remainingSize -= largeRowSize
 		processedSize += largeRowSize
 	}
 	for remainingSize > 0 {
 		err = encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
 		err = encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs, ctx)
 		if err != nil {
 			return fmt.Errorf("failed to encode small chunk data: %w", err)
 		}
 		remainingSize -= smallBlockSize * DataShardsCount
 		processedSize += smallBlockSize * DataShardsCount
 		remainingSize -= smallRowSize
 		processedSize += smallRowSize
 	}
 	return nil
 }
 func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File) error {
 func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File, ctx *ECContext) error {
 	enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
 	enc, err := ctx.CreateEncoder()
 	if err != nil {
 		return fmt.Errorf("failed to create encoder: %w", err)
 	}
 	buffers := make([][]byte, TotalShardsCount)
 	buffers := make([][]byte, ctx.Total())
 	for i := range buffers {
 		if shardHasData[i] {
 			buffers[i] = make([]byte, ErasureCodingSmallBlockSize)
@ -254,7 +293,7 @@ func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*o
 	for {
 		// read the input data from files
 		for i := 0; i < TotalShardsCount; i++ {
 		for i := 0; i < ctx.Total(); i++ {
 			if shardHasData[i] {
 				n, _ := inputFiles[i].ReadAt(buffers[i], startOffset)
 				if n == 0 {
@ -278,7 +317,7 @@ func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*o
 		}
 		// write the data to output files
 		for i := 0; i < TotalShardsCount; i++ {
 		for i := 0; i < ctx.Total(); i++ {
 			if !shardHasData[i] {
 				n, _ := outputFiles[i].WriteAt(buffers[i][:inputBufferDataSize], startOffset)
 				if inputBufferDataSize != n {
--- a/weed/storage/erasure_coding/ec_test.go
+++ b/weed/storage/erasure_coding/ec_test.go
@ -23,7 +23,10 @@ func TestEncodingDecoding(t *testing.T) {
 	bufferSize := 50
 	baseFileName := "1"
 	err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize)
 	// Create default EC context for testing
 	ctx := NewDefaultECContext("", 0)
 	err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize, ctx)
 	if err != nil {
 		t.Logf("generateEcFiles: %v", err)
 	}
@ -33,16 +36,16 @@ func TestEncodingDecoding(t *testing.T) {
 		t.Logf("WriteSortedFileFromIdx: %v", err)
 	}
 	err = validateFiles(baseFileName)
 	err = validateFiles(baseFileName, ctx)
 	if err != nil {
 		t.Logf("WriteSortedFileFromIdx: %v", err)
 	}
 	removeGeneratedFiles(baseFileName)
 	removeGeneratedFiles(baseFileName, ctx)
 }
 func validateFiles(baseFileName string) error {
 func validateFiles(baseFileName string, ctx *ECContext) error {
 	nm, err := readNeedleMap(baseFileName)
 	if err != nil {
 		return fmt.Errorf("readNeedleMap: %v", err)
@ -60,7 +63,7 @@ func validateFiles(baseFileName string) error {
 		return fmt.Errorf("failed to stat dat file: %v", err)
 	}
 	ecFiles, err := openEcFiles(baseFileName, true)
 	ecFiles, err := openEcFiles(baseFileName, true, ctx)
 	if err != nil {
 		return fmt.Errorf("error opening ec files: %w", err)
 	}
@ -184,9 +187,9 @@ func readFromFile(file *os.File, data []byte, ecFileOffset int64) (err error) {
 	return
 }
 func removeGeneratedFiles(baseFileName string) {
 	for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
 		fname := fmt.Sprintf("%s.ec%02d", baseFileName, i)
 func removeGeneratedFiles(baseFileName string, ctx *ECContext) {
 	for i := 0; i < ctx.Total(); i++ {
 		fname := baseFileName + ctx.ToExt(i)
 		os.Remove(fname)
 	}
 	os.Remove(baseFileName + ".ecx")
--- a/weed/storage/erasure_coding/ec_volume.go
+++ b/weed/storage/erasure_coding/ec_volume.go
@ -41,7 +41,8 @@ type EcVolume struct {
 	ecjFileAccessLock         sync.Mutex
 	diskType                  types.DiskType
 	datFileSize               int64
 	ExpireAtSec               uint64 //ec volume destroy time, calculated from the ec volume was created
 	ExpireAtSec               uint64     //ec volume destroy time, calculated from the ec volume was created
 	ECContext                 *ECContext // EC encoding parameters
 }
 func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection string, vid needle.VolumeId) (ev *EcVolume, err error) {
@ -73,9 +74,32 @@ func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection
 		ev.Version = needle.Version(volumeInfo.Version)
 		ev.datFileSize = volumeInfo.DatFileSize
 		ev.ExpireAtSec = volumeInfo.ExpireAtSec
 		// Initialize EC context from .vif if present; fallback to defaults
 		if volumeInfo.EcShardConfig != nil {
 			ds := int(volumeInfo.EcShardConfig.DataShards)
 			ps := int(volumeInfo.EcShardConfig.ParityShards)
 			// Validate shard counts to prevent zero or invalid values
 			if ds <= 0 || ps <= 0 || ds+ps > MaxShardCount {
 				glog.Warningf("Invalid EC config in VolumeInfo for volume %d (data=%d, parity=%d), using defaults", vid, ds, ps)
 				ev.ECContext = NewDefaultECContext(collection, vid)
 			} else {
 				ev.ECContext = &ECContext{
 					Collection:   collection,
 					VolumeId:     vid,
 					DataShards:   ds,
 					ParityShards: ps,
 				}
 				glog.V(1).Infof("Loaded EC config from VolumeInfo for volume %d: %s", vid, ev.ECContext.String())
 			}
 		} else {
 			ev.ECContext = NewDefaultECContext(collection, vid)
 		}
 	} else {
 		glog.Warningf("vif file not found,volumeId:%d, filename:%s", vid, dataBaseFileName)
 		volume_info.SaveVolumeInfo(dataBaseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(ev.Version)})
 		ev.ECContext = NewDefaultECContext(collection, vid)
 	}
 	ev.ShardLocations = make(map[ShardId][]pb.ServerAddress)
@ -260,7 +284,7 @@ func (ev *EcVolume) LocateEcShardNeedleInterval(version needle.Version, offset i
 	if ev.datFileSize > 0 {
 		// To get the correct LargeBlockRowsCount
 		// use datFileSize to calculate the shardSize to match the EC encoding logic.
 		shardSize = ev.datFileSize / DataShardsCount
 		shardSize = ev.datFileSize / int64(ev.ECContext.DataShards)
 	}
 	// calculate the locations in the ec shards
 	intervals = LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, shardSize, offset, types.Size(needle.GetActualSize(size, version)))
--- a/weed/storage/erasure_coding/ec_volume_info.go
+++ b/weed/storage/erasure_coding/ec_volume_info.go
@ -87,7 +87,7 @@ func (ecInfo *EcVolumeInfo) Minus(other *EcVolumeInfo) *EcVolumeInfo {
 	// Copy shard sizes for remaining shards
 	retIndex := 0
 	for shardId := ShardId(0); shardId < TotalShardsCount && retIndex < len(ret.ShardSizes); shardId++ {
 	for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && retIndex < len(ret.ShardSizes); shardId++ {
 		if ret.ShardBits.HasShardId(shardId) {
 			if size, exists := ecInfo.GetShardSize(shardId); exists {
 				ret.ShardSizes[retIndex] = size
@ -119,19 +119,28 @@ func (ecInfo *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb.
 type ShardBits uint32 // use bits to indicate the shard id, use 32 bits just for possible future extension
 func (b ShardBits) AddShardId(id ShardId) ShardBits {
 	if id >= MaxShardCount {
 		return b // Reject out-of-range shard IDs
 	}
 	return b | (1 << id)
 }
 func (b ShardBits) RemoveShardId(id ShardId) ShardBits {
 	if id >= MaxShardCount {
 		return b // Reject out-of-range shard IDs
 	}
 	return b &^ (1 << id)
 }
 func (b ShardBits) HasShardId(id ShardId) bool {
 	if id >= MaxShardCount {
 		return false // Out-of-range shard IDs are never present
 	}
 	return b&(1<<id) > 0
 }
 func (b ShardBits) ShardIds() (ret []ShardId) {
 	for i := ShardId(0); i < TotalShardsCount; i++ {
 	for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
 		if b.HasShardId(i) {
 			ret = append(ret, i)
 		}
@ -140,7 +149,7 @@ func (b ShardBits) ShardIds() (ret []ShardId) {
 }
 func (b ShardBits) ToUint32Slice() (ret []uint32) {
 	for i := uint32(0); i < TotalShardsCount; i++ {
 	for i := uint32(0); i < uint32(MaxShardCount); i++ {
 		if b.HasShardId(ShardId(i)) {
 			ret = append(ret, i)
 		}
@ -164,6 +173,8 @@ func (b ShardBits) Plus(other ShardBits) ShardBits {
 }
 func (b ShardBits) MinusParityShards() ShardBits {
 	// Removes parity shards from the bit mask
 	// Assumes default 10+4 EC layout where parity shards are IDs 10-13
 	for i := DataShardsCount; i < TotalShardsCount; i++ {
 		b = b.RemoveShardId(ShardId(i))
 	}
@ -205,7 +216,7 @@ func (b ShardBits) IndexToShardId(index int) (shardId ShardId, found bool) {
 	}
 	currentIndex := 0
 	for i := ShardId(0); i < TotalShardsCount; i++ {
 	for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
 		if b.HasShardId(i) {
 			if currentIndex == index {
 				return i, true
@ -234,7 +245,7 @@ func (ecInfo *EcVolumeInfo) resizeShardSizes(prevShardBits ShardBits) {
 	// Copy existing sizes to new positions based on current ShardBits
 	if len(ecInfo.ShardSizes) > 0 {
 		newIndex := 0
 		for shardId := ShardId(0); shardId < TotalShardsCount && newIndex < expectedLength; shardId++ {
 		for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && newIndex < expectedLength; shardId++ {
 			if ecInfo.ShardBits.HasShardId(shardId) {
 				// Try to find the size for this shard in the old array using previous ShardBits
 				if oldIndex, found := prevShardBits.ShardIdToIndex(shardId); found && oldIndex < len(ecInfo.ShardSizes) {
--- a/weed/storage/store_ec.go
+++ b/weed/storage/store_ec.go
@ -350,7 +350,8 @@ func (s *Store) recoverOneRemoteEcShardInterval(needleId types.NeedleId, ecVolum
 		return 0, false, fmt.Errorf("failed to create encoder: %w", err)
 	}
 	bufs := make([][]byte, erasure_coding.TotalShardsCount)
 	// Use MaxShardCount to support custom EC ratios up to 32 shards
 	bufs := make([][]byte, erasure_coding.MaxShardCount)
 	var wg sync.WaitGroup
 	ecVolume.ShardLocationsLock.RLock()
--- a/weed/topology/topology_ec.go
+++ b/weed/topology/topology_ec.go
@ -10,7 +10,8 @@ import (
 type EcShardLocations struct {
 	Collection string
 	Locations  [erasure_coding.TotalShardsCount][]*DataNode
 	// Use MaxShardCount (32) to support custom EC ratios
 	Locations [erasure_coding.MaxShardCount][]*DataNode
 }
 func (t *Topology) SyncDataNodeEcShards(shardInfos []*master_pb.VolumeEcShardInformationMessage, dn *DataNode) (newShards, deletedShards []*erasure_coding.EcVolumeInfo) {
@ -90,6 +91,10 @@ func NewEcShardLocations(collection string) *EcShardLocations {
 }
 func (loc *EcShardLocations) AddShard(shardId erasure_coding.ShardId, dn *DataNode) (added bool) {
 	// Defensive bounds check to prevent panic with out-of-range shard IDs
 	if int(shardId) >= erasure_coding.MaxShardCount {
 		return false
 	}
 	dataNodes := loc.Locations[shardId]
 	for _, n := range dataNodes {
 		if n.Id() == dn.Id() {
@ -101,6 +106,10 @@ func (loc *EcShardLocations) AddShard(shardId erasure_coding.ShardId, dn *DataNo
 }
 func (loc *EcShardLocations) DeleteShard(shardId erasure_coding.ShardId, dn *DataNode) (deleted bool) {
 	// Defensive bounds check to prevent panic with out-of-range shard IDs
 	if int(shardId) >= erasure_coding.MaxShardCount {
 		return false
 	}
 	dataNodes := loc.Locations[shardId]
 	foundIndex := -1
 	for index, n := range dataNodes {
--- a/weed/util/log_buffer/log_buffer_flush_gap_test.go
+++ b/weed/util/log_buffer/log_buffer_flush_gap_test.go
@ -15,10 +15,11 @@ import (
 // are lost in the gap between flushed disk data and in-memory buffer.
 //
 // OBSERVED BEHAVIOR FROM LOGS:
 //   Request offset: 1764
 //   Disk contains: 1000-1763 (764 messages)
 //   Memory buffer starts at: 1800
 //   Gap: 1764-1799 (36 messages) ← MISSING!
 //
 //	Request offset: 1764
 //	Disk contains: 1000-1763 (764 messages)
 //	Memory buffer starts at: 1800
 //	Gap: 1764-1799 (36 messages) ← MISSING!
 //
 // This test verifies:
 // 1. All messages sent to buffer are accounted for
@ -27,46 +28,46 @@ import (
 func TestFlushOffsetGap_ReproduceDataLoss(t *testing.T) {
 	var flushedMessages []*filer_pb.LogEntry
 	var flushMu sync.Mutex
 	flushFn := func(logBuffer *LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
 		t.Logf("FLUSH: minOffset=%d maxOffset=%d size=%d bytes", minOffset, maxOffset, len(buf))
 		// Parse and store flushed messages
 		flushMu.Lock()
 		defer flushMu.Unlock()
 		// Parse buffer to extract messages
 		parsedCount := 0
 		for pos := 0; pos+4 < len(buf); {
 			if pos+4 > len(buf) {
 				break
 			}
 			size := uint32(buf[pos])<<24 | uint32(buf[pos+1])<<16 | uint32(buf[pos+2])<<8 | uint32(buf[pos+3])
 			if pos+4+int(size) > len(buf) {
 				break
 			}
 			entryData := buf[pos+4 : pos+4+int(size)]
 			logEntry := &filer_pb.LogEntry{}
 			if err := proto.Unmarshal(entryData, logEntry); err == nil {
 				flushedMessages = append(flushedMessages, logEntry)
 				parsedCount++
 			}
 			pos += 4 + int(size)
 		}
 		t.Logf("  Parsed %d messages from flush buffer", parsedCount)
 	}
 	logBuffer := NewLogBuffer("test", 100*time.Millisecond, flushFn, nil, nil)
 	defer logBuffer.ShutdownLogBuffer()
 	// Send 100 messages
 	messageCount := 100
 	t.Logf("Sending %d messages...", messageCount)
 	for i := 0; i < messageCount; i++ {
 		logBuffer.AddToBuffer(&mq_pb.DataMessage{
 			Key:   []byte(fmt.Sprintf("key-%d", i)),
@ -74,11 +75,11 @@ func TestFlushOffsetGap_ReproduceDataLoss(t *testing.T) {
 			TsNs:  time.Now().UnixNano(),
 		})
 	}
 	// Force flush multiple times to simulate real workload
 	t.Logf("Forcing flush...")
 	logBuffer.ForceFlush()
 	// Add more messages after flush
 	for i := messageCount; i < messageCount+50; i++ {
 		logBuffer.AddToBuffer(&mq_pb.DataMessage{
@ -87,18 +88,18 @@ func TestFlushOffsetGap_ReproduceDataLoss(t *testing.T) {
 			TsNs:  time.Now().UnixNano(),
 		})
 	}
 	// Force another flush
 	logBuffer.ForceFlush()
 	time.Sleep(200 * time.Millisecond) // Wait for flush to complete
 	// Now check the buffer state
 	logBuffer.RLock()
 	bufferStartOffset := logBuffer.bufferStartOffset
 	currentOffset := logBuffer.offset
 	pos := logBuffer.pos
 	logBuffer.RUnlock()
 	flushMu.Lock()
 	flushedCount := len(flushedMessages)
 	var maxFlushedOffset int64 = -1
@ -108,23 +109,23 @@ func TestFlushOffsetGap_ReproduceDataLoss(t *testing.T) {
 		maxFlushedOffset = flushedMessages[flushedCount-1].Offset
 	}
 	flushMu.Unlock()
 	t.Logf("\nBUFFER STATE AFTER FLUSH:")
 	t.Logf("  bufferStartOffset: %d", bufferStartOffset)
 	t.Logf("  currentOffset (HWM): %d", currentOffset)
 	t.Logf("  pos (bytes in buffer): %d", pos)
 	t.Logf("  Messages sent: %d (offsets 0-%d)", messageCount+50, messageCount+49)
 	t.Logf("  Messages flushed to disk: %d (offsets %d-%d)", flushedCount, minFlushedOffset, maxFlushedOffset)
 	// CRITICAL CHECK: Is there a gap between flushed data and memory buffer?
 	if flushedCount > 0 && maxFlushedOffset >= 0 {
 		gap := bufferStartOffset - (maxFlushedOffset + 1)
 		t.Logf("\nOFFSET CONTINUITY CHECK:")
 		t.Logf("  Last flushed offset: %d", maxFlushedOffset)
 		t.Logf("  Buffer starts at: %d", bufferStartOffset)
 		t.Logf("  Gap: %d offsets", gap)
 		if gap > 0 {
 			t.Errorf("❌ CRITICAL BUG REPRODUCED: OFFSET GAP DETECTED!")
 			t.Errorf("   Disk has offsets %d-%d", minFlushedOffset, maxFlushedOffset)
@ -137,22 +138,22 @@ func TestFlushOffsetGap_ReproduceDataLoss(t *testing.T) {
 		} else {
 			t.Logf("✅ PASS: No gap detected - offsets are continuous")
 		}
 		// Check if we can read all expected offsets
 		t.Logf("\nREADABILITY CHECK:")
 		for testOffset := int64(0); testOffset < currentOffset; testOffset += 10 {
 			// Try to read from buffer
 			requestPosition := NewMessagePositionFromOffset(testOffset)
 			buf, _, err := logBuffer.ReadFromBuffer(requestPosition)
 			isReadable := (buf != nil && len(buf.Bytes()) > 0) || err == ResumeFromDiskError
 			status := "✅"
 			if !isReadable && err == nil {
 				status = "❌ NOT READABLE"
 			}
 			t.Logf("  Offset %d: %s (buf=%v, err=%v)", testOffset, status, buf != nil, err)
 			// If offset is in the gap, it should fail to read
 			if flushedCount > 0 && testOffset > maxFlushedOffset && testOffset < bufferStartOffset {
 				if isReadable {
@ -163,19 +164,19 @@ func TestFlushOffsetGap_ReproduceDataLoss(t *testing.T) {
 			}
 		}
 	}
 	// Check that all sent messages are accounted for
 	expectedMessageCount := messageCount + 50
 	messagesInMemory := int(currentOffset - bufferStartOffset)
 	totalAccountedFor := flushedCount + messagesInMemory
 	t.Logf("\nMESSAGE ACCOUNTING:")
 	t.Logf("  Expected: %d messages", expectedMessageCount)
 	t.Logf("  Flushed to disk: %d", flushedCount)
 	t.Logf("  In memory buffer: %d (offset range %d-%d)", messagesInMemory, bufferStartOffset, currentOffset-1)
 	t.Logf("  Total accounted for: %d", totalAccountedFor)
 	t.Logf("  Missing: %d messages", expectedMessageCount-totalAccountedFor)
 	if totalAccountedFor < expectedMessageCount {
 		t.Errorf("❌ DATA LOSS CONFIRMED: %d messages are missing!", expectedMessageCount-totalAccountedFor)
 	} else {
@ -188,23 +189,23 @@ func TestFlushOffsetGap_ReproduceDataLoss(t *testing.T) {
 func TestFlushOffsetGap_CheckPrevBuffers(t *testing.T) {
 	var flushCount int
 	var flushMu sync.Mutex
 	flushFn := func(logBuffer *LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
 		flushMu.Lock()
 		flushCount++
 		count := flushCount
 		flushMu.Unlock()
 		t.Logf("FLUSH #%d: minOffset=%d maxOffset=%d size=%d bytes", count, minOffset, maxOffset, len(buf))
 	}
 	logBuffer := NewLogBuffer("test", 100*time.Millisecond, flushFn, nil, nil)
 	defer logBuffer.ShutdownLogBuffer()
 	// Send messages in batches with flushes in between
 	for batch := 0; batch < 5; batch++ {
 		t.Logf("\nBatch %d:", batch)
 		// Send 20 messages
 		for i := 0; i < 20; i++ {
 			offset := int64(batch*20 + i)
@ -214,28 +215,28 @@ func TestFlushOffsetGap_CheckPrevBuffers(t *testing.T) {
 				TsNs:  time.Now().UnixNano(),
 			})
 		}
 		// Check state before flush
 		logBuffer.RLock()
 		beforeFlushOffset := logBuffer.offset
 		beforeFlushStart := logBuffer.bufferStartOffset
 		logBuffer.RUnlock()
 		// Force flush
 		logBuffer.ForceFlush()
 		time.Sleep(50 * time.Millisecond)
 		// Check state after flush
 		logBuffer.RLock()
 		afterFlushOffset := logBuffer.offset
 		afterFlushStart := logBuffer.bufferStartOffset
 		prevBufferCount := len(logBuffer.prevBuffers.buffers)
 		// Check prevBuffers state
 		t.Logf("  Before flush: offset=%d, bufferStartOffset=%d", beforeFlushOffset, beforeFlushStart)
 		t.Logf("  After flush: offset=%d, bufferStartOffset=%d, prevBuffers=%d",
 			afterFlushOffset, afterFlushStart, prevBufferCount)
 		// Check each prevBuffer
 		for i, prevBuf := range logBuffer.prevBuffers.buffers {
 			if prevBuf.size > 0 {
@ -244,7 +245,7 @@ func TestFlushOffsetGap_CheckPrevBuffers(t *testing.T) {
 			}
 		}
 		logBuffer.RUnlock()
 		// CRITICAL: Check if bufferStartOffset advanced correctly
 		expectedNewStart := beforeFlushOffset
 		if afterFlushStart != expectedNewStart {
@ -261,10 +262,10 @@ func TestFlushOffsetGap_CheckPrevBuffers(t *testing.T) {
 func TestFlushOffsetGap_ConcurrentWriteAndFlush(t *testing.T) {
 	var allFlushedOffsets []int64
 	var flushMu sync.Mutex
 	flushFn := func(logBuffer *LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
 		t.Logf("FLUSH: offsets %d-%d (%d bytes)", minOffset, maxOffset, len(buf))
 		flushMu.Lock()
 		// Record the offset range that was flushed
 		for offset := minOffset; offset <= maxOffset; offset++ {
@ -272,13 +273,13 @@ func TestFlushOffsetGap_ConcurrentWriteAndFlush(t *testing.T) {
 		}
 		flushMu.Unlock()
 	}
 	logBuffer := NewLogBuffer("test", 50*time.Millisecond, flushFn, nil, nil)
 	defer logBuffer.ShutdownLogBuffer()
 	// Concurrently write messages and force flushes
 	var wg sync.WaitGroup
 	// Writer goroutine
 	wg.Add(1)
 	go func() {
@ -294,7 +295,7 @@ func TestFlushOffsetGap_ConcurrentWriteAndFlush(t *testing.T) {
 			}
 		}
 	}()
 	// Flusher goroutine
 	wg.Add(1)
 	go func() {
@ -304,31 +305,31 @@ func TestFlushOffsetGap_ConcurrentWriteAndFlush(t *testing.T) {
 			logBuffer.ForceFlush()
 		}
 	}()
 	wg.Wait()
 	time.Sleep(200 * time.Millisecond) // Wait for final flush
 	// Check final state
 	logBuffer.RLock()
 	finalOffset := logBuffer.offset
 	finalBufferStart := logBuffer.bufferStartOffset
 	logBuffer.RUnlock()
 	flushMu.Lock()
 	flushedCount := len(allFlushedOffsets)
 	flushMu.Unlock()
 	expectedCount := int(finalOffset)
 	inMemory := int(finalOffset - finalBufferStart)
 	totalAccountedFor := flushedCount + inMemory
 	t.Logf("\nFINAL STATE:")
 	t.Logf("  Total messages sent: %d (offsets 0-%d)", expectedCount, expectedCount-1)
 	t.Logf("  Flushed to disk: %d", flushedCount)
 	t.Logf("  In memory: %d (offsets %d-%d)", inMemory, finalBufferStart, finalOffset-1)
 	t.Logf("  Total accounted: %d", totalAccountedFor)
 	t.Logf("  Missing: %d", expectedCount-totalAccountedFor)
 	if totalAccountedFor < expectedCount {
 		t.Errorf("❌ DATA LOSS in concurrent scenario: %d messages missing!", expectedCount-totalAccountedFor)
 	}
@ -344,7 +345,7 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 		messages  []*filer_pb.LogEntry
 	}
 	var flushMu sync.Mutex
 	flushFn := func(logBuffer *LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
 		// Parse messages from buffer
 		messages := []*filer_pb.LogEntry{}
@ -360,7 +361,7 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 			}
 			pos += 4 + int(size)
 		}
 		flushMu.Lock()
 		flushedData = append(flushedData, struct {
 			minOffset int64
@ -368,17 +369,17 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 			messages  []*filer_pb.LogEntry
 		}{minOffset, maxOffset, messages})
 		flushMu.Unlock()
 		t.Logf("FLUSH: minOffset=%d maxOffset=%d, parsed %d messages", minOffset, maxOffset, len(messages))
 	}
 	logBuffer := NewLogBuffer("test", time.Hour, flushFn, nil, nil)
 	defer logBuffer.ShutdownLogBuffer()
 	// Simulate broker behavior: assign Kafka offsets and add to buffer
 	// This is what PublishWithOffset() does
 	nextKafkaOffset := int64(0)
 	// Round 1: Add 50 messages with Kafka offsets 0-49
 	t.Logf("\n=== ROUND 1: Adding messages 0-49 ===")
 	for i := 0; i < 50; i++ {
@ -391,7 +392,7 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 		logBuffer.AddLogEntryToBuffer(logEntry)
 		nextKafkaOffset++
 	}
 	// Check buffer state before flush
 	logBuffer.RLock()
 	beforeFlushOffset := logBuffer.offset
@ -399,11 +400,11 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 	logBuffer.RUnlock()
 	t.Logf("Before flush: logBuffer.offset=%d, bufferStartOffset=%d, nextKafkaOffset=%d",
 		beforeFlushOffset, beforeFlushStart, nextKafkaOffset)
 	// Flush
 	logBuffer.ForceFlush()
 	time.Sleep(100 * time.Millisecond)
 	// Check buffer state after flush
 	logBuffer.RLock()
 	afterFlushOffset := logBuffer.offset
@ -411,7 +412,7 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 	logBuffer.RUnlock()
 	t.Logf("After flush: logBuffer.offset=%d, bufferStartOffset=%d",
 		afterFlushOffset, afterFlushStart)
 	// Round 2: Add another 50 messages with Kafka offsets 50-99
 	t.Logf("\n=== ROUND 2: Adding messages 50-99 ===")
 	for i := 0; i < 50; i++ {
@ -424,20 +425,20 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 		logBuffer.AddLogEntryToBuffer(logEntry)
 		nextKafkaOffset++
 	}
 	logBuffer.ForceFlush()
 	time.Sleep(100 * time.Millisecond)
 	// Verification: Check if all Kafka offsets are accounted for
 	flushMu.Lock()
 	t.Logf("\n=== VERIFICATION ===")
 	t.Logf("Expected Kafka offsets: 0-%d", nextKafkaOffset-1)
 	allOffsets := make(map[int64]bool)
 	for flushIdx, flush := range flushedData {
 		t.Logf("Flush #%d: minOffset=%d, maxOffset=%d, messages=%d",
 			flushIdx, flush.minOffset, flush.maxOffset, len(flush.messages))
 		for _, msg := range flush.messages {
 			if allOffsets[msg.Offset] {
 				t.Errorf("  ❌ DUPLICATE: Offset %d appears multiple times!", msg.Offset)
@ -446,7 +447,7 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 		}
 	}
 	flushMu.Unlock()
 	// Check for missing offsets
 	missingOffsets := []int64{}
 	for expectedOffset := int64(0); expectedOffset < nextKafkaOffset; expectedOffset++ {
@ -454,7 +455,7 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 			missingOffsets = append(missingOffsets, expectedOffset)
 		}
 	}
 	if len(missingOffsets) > 0 {
 		t.Errorf("\n❌ MISSING OFFSETS DETECTED: %d offsets missing", len(missingOffsets))
 		if len(missingOffsets) <= 20 {
@ -466,18 +467,18 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 	} else {
 		t.Logf("\n✅ SUCCESS: All %d Kafka offsets accounted for (0-%d)", nextKafkaOffset, nextKafkaOffset-1)
 	}
 	// Check buffer offset consistency
 	logBuffer.RLock()
 	finalOffset := logBuffer.offset
 	finalBufferStart := logBuffer.bufferStartOffset
 	logBuffer.RUnlock()
 	t.Logf("\nFinal buffer state:")
 	t.Logf("  logBuffer.offset: %d", finalOffset)
 	t.Logf("  bufferStartOffset: %d", finalBufferStart)
 	t.Logf("  Expected (nextKafkaOffset): %d", nextKafkaOffset)
 	if finalOffset != nextKafkaOffset {
 		t.Errorf("❌ logBuffer.offset mismatch: expected %d, got %d", nextKafkaOffset, finalOffset)
 	}
@ -488,12 +489,12 @@ func TestFlushOffsetGap_ProductionScenario(t *testing.T) {
 func TestFlushOffsetGap_ConcurrentReadDuringFlush(t *testing.T) {
 	var flushedOffsets []int64
 	var flushMu sync.Mutex
 	readFromDiskFn := func(startPosition MessagePosition, stopTsNs int64, eachLogEntryFn EachLogEntryFuncType) (MessagePosition, bool, error) {
 		// Simulate reading from disk - return flushed offsets
 		flushMu.Lock()
 		defer flushMu.Unlock()
 		for _, offset := range flushedOffsets {
 			if offset >= startPosition.Offset {
 				logEntry := &filer_pb.LogEntry{
@ -510,12 +511,12 @@ func TestFlushOffsetGap_ConcurrentReadDuringFlush(t *testing.T) {
 		}
 		return startPosition, false, nil
 	}
 	flushFn := func(logBuffer *LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
 		// Parse and store flushed offsets
 		flushMu.Lock()
 		defer flushMu.Unlock()
 		for pos := 0; pos+4 < len(buf); {
 			size := uint32(buf[pos])<<24 | uint32(buf[pos+1])<<16 | uint32(buf[pos+2])<<8 | uint32(buf[pos+3])
 			if pos+4+int(size) > len(buf) {
@ -528,14 +529,14 @@ func TestFlushOffsetGap_ConcurrentReadDuringFlush(t *testing.T) {
 			}
 			pos += 4 + int(size)
 		}
 		t.Logf("FLUSH: Stored %d offsets to disk (minOffset=%d, maxOffset=%d)",
 			len(flushedOffsets), minOffset, maxOffset)
 	}
 	logBuffer := NewLogBuffer("test", time.Hour, flushFn, readFromDiskFn, nil)
 	defer logBuffer.ShutdownLogBuffer()
 	// Add 100 messages
 	t.Logf("Adding 100 messages...")
 	for i := int64(0); i < 100; i++ {
@ -547,32 +548,32 @@ func TestFlushOffsetGap_ConcurrentReadDuringFlush(t *testing.T) {
 		}
 		logBuffer.AddLogEntryToBuffer(logEntry)
 	}
 	// Flush (moves data to disk)
 	t.Logf("Flushing...")
 	logBuffer.ForceFlush()
 	time.Sleep(100 * time.Millisecond)
 	// Now try to read all messages using ReadMessagesAtOffset
 	t.Logf("\nReading messages from offset 0...")
 	messages, nextOffset, hwm, endOfPartition, err := logBuffer.ReadMessagesAtOffset(0, 1000, 1024*1024)
 	t.Logf("Read result: messages=%d, nextOffset=%d, hwm=%d, endOfPartition=%v, err=%v",
 		len(messages), nextOffset, hwm, endOfPartition, err)
 	// Verify all offsets can be read
 	readOffsets := make(map[int64]bool)
 	for _, msg := range messages {
 		readOffsets[msg.Offset] = true
 	}
 	missingOffsets := []int64{}
 	for expectedOffset := int64(0); expectedOffset < 100; expectedOffset++ {
 		if !readOffsets[expectedOffset] {
 			missingOffsets = append(missingOffsets, expectedOffset)
 		}
 	}
 	if len(missingOffsets) > 0 {
 		t.Errorf("❌ MISSING OFFSETS after flush: %d offsets cannot be read", len(missingOffsets))
 		if len(missingOffsets) <= 20 {
@ -590,29 +591,29 @@ func TestFlushOffsetGap_ConcurrentReadDuringFlush(t *testing.T) {
 func TestFlushOffsetGap_ForceFlushAdvancesBuffer(t *testing.T) {
 	flushedRanges := []struct{ min, max int64 }{}
 	var flushMu sync.Mutex
 	flushFn := func(logBuffer *LogBuffer, startTime, stopTime time.Time, buf []byte, minOffset, maxOffset int64) {
 		flushMu.Lock()
 		flushedRanges = append(flushedRanges, struct{ min, max int64 }{minOffset, maxOffset})
 		flushMu.Unlock()
 		t.Logf("FLUSH: offsets %d-%d", minOffset, maxOffset)
 	}
 	logBuffer := NewLogBuffer("test", time.Hour, flushFn, nil, nil) // Long interval, manual flush only
 	defer logBuffer.ShutdownLogBuffer()
 	// Send messages, flush, check state - repeat
 	for round := 0; round < 3; round++ {
 		t.Logf("\n=== ROUND %d ===", round)
 		// Check state before adding messages
 		logBuffer.RLock()
 		beforeOffset := logBuffer.offset
 		beforeStart := logBuffer.bufferStartOffset
 		logBuffer.RUnlock()
 		t.Logf("Before adding: offset=%d, bufferStartOffset=%d", beforeOffset, beforeStart)
 		// Add 10 messages
 		for i := 0; i < 10; i++ {
 			logBuffer.AddToBuffer(&mq_pb.DataMessage{
@ -621,28 +622,28 @@ func TestFlushOffsetGap_ForceFlushAdvancesBuffer(t *testing.T) {
 				TsNs:  time.Now().UnixNano(),
 			})
 		}
 		// Check state after adding
 		logBuffer.RLock()
 		afterAddOffset := logBuffer.offset
 		afterAddStart := logBuffer.bufferStartOffset
 		logBuffer.RUnlock()
 		t.Logf("After adding: offset=%d, bufferStartOffset=%d", afterAddOffset, afterAddStart)
 		// Force flush
 		t.Logf("Forcing flush...")
 		logBuffer.ForceFlush()
 		time.Sleep(100 * time.Millisecond)
 		// Check state after flush
 		logBuffer.RLock()
 		afterFlushOffset := logBuffer.offset
 		afterFlushStart := logBuffer.bufferStartOffset
 		logBuffer.RUnlock()
 		t.Logf("After flush: offset=%d, bufferStartOffset=%d", afterFlushOffset, afterFlushStart)
 		// CRITICAL CHECK: bufferStartOffset should advance to where offset was before flush
 		if afterFlushStart != afterAddOffset {
 			t.Errorf("❌ FLUSH BUG: bufferStartOffset did NOT advance correctly!")
@ -653,19 +654,19 @@ func TestFlushOffsetGap_ForceFlushAdvancesBuffer(t *testing.T) {
 			t.Logf("✅ bufferStartOffset correctly advanced to %d", afterFlushStart)
 		}
 	}
 	// Final verification: check all offset ranges are continuous
 	flushMu.Lock()
 	t.Logf("\n=== FLUSHED RANGES ===")
 	for i, r := range flushedRanges {
 		t.Logf("Flush #%d: offsets %d-%d", i, r.min, r.max)
 		// Check continuity with previous flush
 		if i > 0 {
 			prevMax := flushedRanges[i-1].max
 			currentMin := r.min
 			gap := currentMin - (prevMax + 1)
 			if gap > 0 {
 				t.Errorf("❌ GAP between flush #%d and #%d: %d offsets missing!", i-1, i, gap)
 			} else if gap < 0 {
@ -677,4 +678,3 @@ func TestFlushOffsetGap_ForceFlushAdvancesBuffer(t *testing.T) {
 	}
 	flushMu.Unlock()
 }
--- a/weed/util/version/constants.go
+++ b/weed/util/version/constants.go
@ -8,7 +8,7 @@ import (
 var (
 	MAJOR_VERSION  = int32(3)
 	MINOR_VERSION  = int32(98)
 	MINOR_VERSION  = int32(99)
 	VERSION_NUMBER = fmt.Sprintf("%d.%02d", MAJOR_VERSION, MINOR_VERSION)
 	VERSION        = util.SizeLimit + " " + VERSION_NUMBER
 	COMMIT         = ""
--- a/weed/worker/client.go
+++ b/weed/worker/client.go
@ -18,7 +18,6 @@ type GrpcAdminClient struct {
 	adminAddress string
 	workerID     string
 	dialOption   grpc.DialOption
 	comms clientChannels
 	// Reconnection parameters
--- a/weed/worker/tasks/erasure_coding/ec_task.go
+++ b/weed/worker/tasks/erasure_coding/ec_task.go
@ -374,7 +374,8 @@ func (t *ErasureCodingTask) generateEcShardsLocally(localFiles map[string]string
 	var generatedShards []string
 	var totalShardSize int64
 	for i := 0; i < erasure_coding.TotalShardsCount; i++ {
 	// Check up to MaxShardCount (32) to support custom EC ratios
 	for i := 0; i < erasure_coding.MaxShardCount; i++ {
 		shardFile := fmt.Sprintf("%s.ec%02d", baseName, i)
 		if info, err := os.Stat(shardFile); err == nil {
 			shardKey := fmt.Sprintf("ec%02d", i)