diff --git a/weed/server/volume_grpc_erasure_coding.go b/weed/server/volume_grpc_erasure_coding.go index ec868aa9c..32446bf90 100644 --- a/weed/server/volume_grpc_erasure_coding.go +++ b/weed/server/volume_grpc_erasure_coding.go @@ -36,6 +36,25 @@ Steps to apply erasure coding to .dat .idx files */ +// isGenerationCompatible checks if requested and actual generations are compatible +// for mixed-version cluster support +func isGenerationCompatible(actualGeneration, requestedGeneration uint32) bool { + // Exact match is always compatible + if actualGeneration == requestedGeneration { + return true + } + + // Mixed-version compatibility: if client requests generation 0 (default/legacy), + // allow access to any generation for backward compatibility + if requestedGeneration == 0 { + return true + } + + // If client requests specific generation but volume has different generation, + // this is not compatible (strict generation matching) + return false +} + // VolumeEcShardsGenerate generates the .ecx and .ec00 ~ .ec13 files func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_server_pb.VolumeEcShardsGenerateRequest) (*volume_server_pb.VolumeEcShardsGenerateResponse, error) { @@ -370,9 +389,9 @@ func (vs *VolumeServer) VolumeEcShardRead(req *volume_server_pb.VolumeEcShardRea return fmt.Errorf("VolumeEcShardRead not found ec volume id %d", req.VolumeId) } - // Validate generation matches the request + // Validate generation matches with mixed-version compatibility requestedGeneration := req.Generation - if ecVolume.Generation != requestedGeneration { + if !isGenerationCompatible(ecVolume.Generation, requestedGeneration) { return fmt.Errorf("VolumeEcShardRead volume %d generation mismatch: requested %d, found %d", req.VolumeId, requestedGeneration, ecVolume.Generation) } diff --git a/weed/storage/store_ec.go b/weed/storage/store_ec.go index cc79730c3..39da17a80 100644 --- a/weed/storage/store_ec.go +++ b/weed/storage/store_ec.go @@ -273,13 +273,23 @@ func (s *Store) cachedLookupEcShardLocations(ecVolume *erasure_coding.EcVolume) ecVolume.ShardLocationsLock.Lock() for _, shardIdLocations := range resp.ShardIdLocations { - // Validate that the returned generation matches our request - if shardIdLocations.Generation != ecVolume.Generation { + // Mixed-version compatibility: be more flexible with generation matching + // If we requested generation 0 or if the response has generation 0 (older master), + // be more permissive to support rolling upgrades + generationMatches := shardIdLocations.Generation == ecVolume.Generation + mixedVersionCompatible := (ecVolume.Generation == 0 || shardIdLocations.Generation == 0) + + if !generationMatches && !mixedVersionCompatible { glog.Warningf("received shard locations for generation %d but requested generation %d for volume %d shard %d", shardIdLocations.Generation, ecVolume.Generation, ecVolume.VolumeId, shardIdLocations.ShardId) continue // skip mismatched generation shards } + if !generationMatches && mixedVersionCompatible { + glog.V(1).Infof("accepting shard locations with generation mismatch for mixed-version compatibility: volume %d shard %d response_gen=%d requested_gen=%d", + ecVolume.VolumeId, shardIdLocations.ShardId, shardIdLocations.Generation, ecVolume.Generation) + } + shardId := erasure_coding.ShardId(shardIdLocations.ShardId) delete(ecVolume.ShardLocations, shardId) for _, loc := range shardIdLocations.Locations { diff --git a/weed/topology/topology_ec.go b/weed/topology/topology_ec.go index fa31ebfe4..570c7b062 100644 --- a/weed/topology/topology_ec.go +++ b/weed/topology/topology_ec.go @@ -307,35 +307,44 @@ func (t *Topology) ListEcVolumesWithActiveGeneration() map[needle.VolumeId]uint3 } // LookupEcShardsWithFallback looks up EC shards for a volume with intelligent fallback +// This function provides mixed-version cluster compatibility by falling back gracefully // If no specific generation is requested (generation == 0), it uses the active generation // If the requested/active generation is not found, it falls back to generation 0 func (t *Topology) LookupEcShardsWithFallback(vid needle.VolumeId, requestedGeneration uint32) (locations *EcShardLocations, actualGeneration uint32, found bool) { // Determine target generation targetGeneration := requestedGeneration if requestedGeneration == 0 { - // Use active generation if available + // Use active generation if available (new behavior) if activeGen, exists := t.GetEcActiveGeneration(vid); exists { targetGeneration = activeGen + glog.V(4).Infof("LookupEcShardsWithFallback: using active generation %d for volume %d", activeGen, vid) } } // Try the target generation first if locations, found = t.LookupEcShards(vid, targetGeneration); found { + if targetGeneration != requestedGeneration { + glog.V(3).Infof("LookupEcShardsWithFallback: found volume %d generation %d (requested %d)", vid, targetGeneration, requestedGeneration) + } return locations, targetGeneration, true } - // If requested specific generation and not found, don't fallback + // If requested specific generation and not found, don't fallback for strict clients if requestedGeneration != 0 { + glog.V(2).Infof("LookupEcShardsWithFallback: volume %d generation %d not found, no fallback for specific request", vid, requestedGeneration) return nil, 0, false } - // Fallback to generation 0 if target generation wasn't found + // Mixed-version compatibility: fallback to generation 0 if target generation wasn't found + // This helps during rolling upgrades when some shards might not have generation info yet if targetGeneration != 0 { if locations, found = t.LookupEcShards(vid, 0); found { + glog.V(2).Infof("LookupEcShardsWithFallback: falling back to generation 0 for volume %d (target generation %d not found)", vid, targetGeneration) return locations, 0, true } } + glog.V(2).Infof("LookupEcShardsWithFallback: volume %d not found in any generation", vid) return nil, 0, false }