diff --git a/weed/pb/volume_server.proto b/weed/pb/volume_server.proto index b1bec99f4..e7e2cd94e 100644 --- a/weed/pb/volume_server.proto +++ b/weed/pb/volume_server.proto @@ -151,6 +151,12 @@ service VolumeServer { } rpc ExpandBlockVolume (ExpandBlockVolumeRequest) returns (ExpandBlockVolumeResponse) { } + rpc PrepareExpandBlockVolume (PrepareExpandBlockVolumeRequest) returns (PrepareExpandBlockVolumeResponse) { + } + rpc CommitExpandBlockVolume (CommitExpandBlockVolumeRequest) returns (CommitExpandBlockVolumeResponse) { + } + rpc CancelExpandBlockVolume (CancelExpandBlockVolumeRequest) returns (CancelExpandBlockVolumeResponse) { + } } @@ -834,3 +840,26 @@ message ExpandBlockVolumeRequest { message ExpandBlockVolumeResponse { uint64 capacity_bytes = 1; } + +message PrepareExpandBlockVolumeRequest { + string name = 1; + uint64 new_size_bytes = 2; + uint64 expand_epoch = 3; +} +message PrepareExpandBlockVolumeResponse { +} + +message CommitExpandBlockVolumeRequest { + string name = 1; + uint64 expand_epoch = 2; +} +message CommitExpandBlockVolumeResponse { + uint64 capacity_bytes = 1; +} + +message CancelExpandBlockVolumeRequest { + string name = 1; + uint64 expand_epoch = 2; +} +message CancelExpandBlockVolumeResponse { +} diff --git a/weed/pb/volume_server_pb/volume_server.pb.go b/weed/pb/volume_server_pb/volume_server.pb.go index e61a83483..85249b59c 100644 --- a/weed/pb/volume_server_pb/volume_server.pb.go +++ b/weed/pb/volume_server_pb/volume_server.pb.go @@ -6877,6 +6877,286 @@ func (x *ExpandBlockVolumeResponse) GetCapacityBytes() uint64 { return 0 } +type PrepareExpandBlockVolumeRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + NewSizeBytes uint64 `protobuf:"varint,2,opt,name=new_size_bytes,json=newSizeBytes,proto3" json:"new_size_bytes,omitempty"` + ExpandEpoch uint64 `protobuf:"varint,3,opt,name=expand_epoch,json=expandEpoch,proto3" json:"expand_epoch,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PrepareExpandBlockVolumeRequest) Reset() { + *x = PrepareExpandBlockVolumeRequest{} + mi := &file_volume_server_proto_msgTypes[120] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PrepareExpandBlockVolumeRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PrepareExpandBlockVolumeRequest) ProtoMessage() {} + +func (x *PrepareExpandBlockVolumeRequest) ProtoReflect() protoreflect.Message { + mi := &file_volume_server_proto_msgTypes[120] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PrepareExpandBlockVolumeRequest.ProtoReflect.Descriptor instead. +func (*PrepareExpandBlockVolumeRequest) Descriptor() ([]byte, []int) { + return file_volume_server_proto_rawDescGZIP(), []int{120} +} + +func (x *PrepareExpandBlockVolumeRequest) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *PrepareExpandBlockVolumeRequest) GetNewSizeBytes() uint64 { + if x != nil { + return x.NewSizeBytes + } + return 0 +} + +func (x *PrepareExpandBlockVolumeRequest) GetExpandEpoch() uint64 { + if x != nil { + return x.ExpandEpoch + } + return 0 +} + +type PrepareExpandBlockVolumeResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PrepareExpandBlockVolumeResponse) Reset() { + *x = PrepareExpandBlockVolumeResponse{} + mi := &file_volume_server_proto_msgTypes[121] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PrepareExpandBlockVolumeResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PrepareExpandBlockVolumeResponse) ProtoMessage() {} + +func (x *PrepareExpandBlockVolumeResponse) ProtoReflect() protoreflect.Message { + mi := &file_volume_server_proto_msgTypes[121] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PrepareExpandBlockVolumeResponse.ProtoReflect.Descriptor instead. +func (*PrepareExpandBlockVolumeResponse) Descriptor() ([]byte, []int) { + return file_volume_server_proto_rawDescGZIP(), []int{121} +} + +type CommitExpandBlockVolumeRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + ExpandEpoch uint64 `protobuf:"varint,2,opt,name=expand_epoch,json=expandEpoch,proto3" json:"expand_epoch,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *CommitExpandBlockVolumeRequest) Reset() { + *x = CommitExpandBlockVolumeRequest{} + mi := &file_volume_server_proto_msgTypes[122] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *CommitExpandBlockVolumeRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommitExpandBlockVolumeRequest) ProtoMessage() {} + +func (x *CommitExpandBlockVolumeRequest) ProtoReflect() protoreflect.Message { + mi := &file_volume_server_proto_msgTypes[122] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommitExpandBlockVolumeRequest.ProtoReflect.Descriptor instead. +func (*CommitExpandBlockVolumeRequest) Descriptor() ([]byte, []int) { + return file_volume_server_proto_rawDescGZIP(), []int{122} +} + +func (x *CommitExpandBlockVolumeRequest) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *CommitExpandBlockVolumeRequest) GetExpandEpoch() uint64 { + if x != nil { + return x.ExpandEpoch + } + return 0 +} + +type CommitExpandBlockVolumeResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + CapacityBytes uint64 `protobuf:"varint,1,opt,name=capacity_bytes,json=capacityBytes,proto3" json:"capacity_bytes,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *CommitExpandBlockVolumeResponse) Reset() { + *x = CommitExpandBlockVolumeResponse{} + mi := &file_volume_server_proto_msgTypes[123] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *CommitExpandBlockVolumeResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommitExpandBlockVolumeResponse) ProtoMessage() {} + +func (x *CommitExpandBlockVolumeResponse) ProtoReflect() protoreflect.Message { + mi := &file_volume_server_proto_msgTypes[123] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommitExpandBlockVolumeResponse.ProtoReflect.Descriptor instead. +func (*CommitExpandBlockVolumeResponse) Descriptor() ([]byte, []int) { + return file_volume_server_proto_rawDescGZIP(), []int{123} +} + +func (x *CommitExpandBlockVolumeResponse) GetCapacityBytes() uint64 { + if x != nil { + return x.CapacityBytes + } + return 0 +} + +type CancelExpandBlockVolumeRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + ExpandEpoch uint64 `protobuf:"varint,2,opt,name=expand_epoch,json=expandEpoch,proto3" json:"expand_epoch,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *CancelExpandBlockVolumeRequest) Reset() { + *x = CancelExpandBlockVolumeRequest{} + mi := &file_volume_server_proto_msgTypes[124] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *CancelExpandBlockVolumeRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CancelExpandBlockVolumeRequest) ProtoMessage() {} + +func (x *CancelExpandBlockVolumeRequest) ProtoReflect() protoreflect.Message { + mi := &file_volume_server_proto_msgTypes[124] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CancelExpandBlockVolumeRequest.ProtoReflect.Descriptor instead. +func (*CancelExpandBlockVolumeRequest) Descriptor() ([]byte, []int) { + return file_volume_server_proto_rawDescGZIP(), []int{124} +} + +func (x *CancelExpandBlockVolumeRequest) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *CancelExpandBlockVolumeRequest) GetExpandEpoch() uint64 { + if x != nil { + return x.ExpandEpoch + } + return 0 +} + +type CancelExpandBlockVolumeResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *CancelExpandBlockVolumeResponse) Reset() { + *x = CancelExpandBlockVolumeResponse{} + mi := &file_volume_server_proto_msgTypes[125] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *CancelExpandBlockVolumeResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CancelExpandBlockVolumeResponse) ProtoMessage() {} + +func (x *CancelExpandBlockVolumeResponse) ProtoReflect() protoreflect.Message { + mi := &file_volume_server_proto_msgTypes[125] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CancelExpandBlockVolumeResponse.ProtoReflect.Descriptor instead. +func (*CancelExpandBlockVolumeResponse) Descriptor() ([]byte, []int) { + return file_volume_server_proto_rawDescGZIP(), []int{125} +} + type FetchAndWriteNeedleRequest_Replica struct { state protoimpl.MessageState `protogen:"open.v1"` Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` @@ -6888,7 +7168,7 @@ type FetchAndWriteNeedleRequest_Replica struct { func (x *FetchAndWriteNeedleRequest_Replica) Reset() { *x = FetchAndWriteNeedleRequest_Replica{} - mi := &file_volume_server_proto_msgTypes[120] + mi := &file_volume_server_proto_msgTypes[126] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -6900,7 +7180,7 @@ func (x *FetchAndWriteNeedleRequest_Replica) String() string { func (*FetchAndWriteNeedleRequest_Replica) ProtoMessage() {} func (x *FetchAndWriteNeedleRequest_Replica) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[120] + mi := &file_volume_server_proto_msgTypes[126] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -6948,7 +7228,7 @@ type QueryRequest_Filter struct { func (x *QueryRequest_Filter) Reset() { *x = QueryRequest_Filter{} - mi := &file_volume_server_proto_msgTypes[121] + mi := &file_volume_server_proto_msgTypes[127] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -6960,7 +7240,7 @@ func (x *QueryRequest_Filter) String() string { func (*QueryRequest_Filter) ProtoMessage() {} func (x *QueryRequest_Filter) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[121] + mi := &file_volume_server_proto_msgTypes[127] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7010,7 +7290,7 @@ type QueryRequest_InputSerialization struct { func (x *QueryRequest_InputSerialization) Reset() { *x = QueryRequest_InputSerialization{} - mi := &file_volume_server_proto_msgTypes[122] + mi := &file_volume_server_proto_msgTypes[128] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -7022,7 +7302,7 @@ func (x *QueryRequest_InputSerialization) String() string { func (*QueryRequest_InputSerialization) ProtoMessage() {} func (x *QueryRequest_InputSerialization) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[122] + mi := &file_volume_server_proto_msgTypes[128] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7076,7 +7356,7 @@ type QueryRequest_OutputSerialization struct { func (x *QueryRequest_OutputSerialization) Reset() { *x = QueryRequest_OutputSerialization{} - mi := &file_volume_server_proto_msgTypes[123] + mi := &file_volume_server_proto_msgTypes[129] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -7088,7 +7368,7 @@ func (x *QueryRequest_OutputSerialization) String() string { func (*QueryRequest_OutputSerialization) ProtoMessage() {} func (x *QueryRequest_OutputSerialization) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[123] + mi := &file_volume_server_proto_msgTypes[129] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7134,7 +7414,7 @@ type QueryRequest_InputSerialization_CSVInput struct { func (x *QueryRequest_InputSerialization_CSVInput) Reset() { *x = QueryRequest_InputSerialization_CSVInput{} - mi := &file_volume_server_proto_msgTypes[124] + mi := &file_volume_server_proto_msgTypes[130] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -7146,7 +7426,7 @@ func (x *QueryRequest_InputSerialization_CSVInput) String() string { func (*QueryRequest_InputSerialization_CSVInput) ProtoMessage() {} func (x *QueryRequest_InputSerialization_CSVInput) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[124] + mi := &file_volume_server_proto_msgTypes[130] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7220,7 +7500,7 @@ type QueryRequest_InputSerialization_JSONInput struct { func (x *QueryRequest_InputSerialization_JSONInput) Reset() { *x = QueryRequest_InputSerialization_JSONInput{} - mi := &file_volume_server_proto_msgTypes[125] + mi := &file_volume_server_proto_msgTypes[131] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -7232,7 +7512,7 @@ func (x *QueryRequest_InputSerialization_JSONInput) String() string { func (*QueryRequest_InputSerialization_JSONInput) ProtoMessage() {} func (x *QueryRequest_InputSerialization_JSONInput) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[125] + mi := &file_volume_server_proto_msgTypes[131] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7263,7 +7543,7 @@ type QueryRequest_InputSerialization_ParquetInput struct { func (x *QueryRequest_InputSerialization_ParquetInput) Reset() { *x = QueryRequest_InputSerialization_ParquetInput{} - mi := &file_volume_server_proto_msgTypes[126] + mi := &file_volume_server_proto_msgTypes[132] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -7275,7 +7555,7 @@ func (x *QueryRequest_InputSerialization_ParquetInput) String() string { func (*QueryRequest_InputSerialization_ParquetInput) ProtoMessage() {} func (x *QueryRequest_InputSerialization_ParquetInput) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[126] + mi := &file_volume_server_proto_msgTypes[132] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7304,7 +7584,7 @@ type QueryRequest_OutputSerialization_CSVOutput struct { func (x *QueryRequest_OutputSerialization_CSVOutput) Reset() { *x = QueryRequest_OutputSerialization_CSVOutput{} - mi := &file_volume_server_proto_msgTypes[127] + mi := &file_volume_server_proto_msgTypes[133] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -7316,7 +7596,7 @@ func (x *QueryRequest_OutputSerialization_CSVOutput) String() string { func (*QueryRequest_OutputSerialization_CSVOutput) ProtoMessage() {} func (x *QueryRequest_OutputSerialization_CSVOutput) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[127] + mi := &file_volume_server_proto_msgTypes[133] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7376,7 +7656,7 @@ type QueryRequest_OutputSerialization_JSONOutput struct { func (x *QueryRequest_OutputSerialization_JSONOutput) Reset() { *x = QueryRequest_OutputSerialization_JSONOutput{} - mi := &file_volume_server_proto_msgTypes[128] + mi := &file_volume_server_proto_msgTypes[134] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -7388,7 +7668,7 @@ func (x *QueryRequest_OutputSerialization_JSONOutput) String() string { func (*QueryRequest_OutputSerialization_JSONOutput) ProtoMessage() {} func (x *QueryRequest_OutputSerialization_JSONOutput) ProtoReflect() protoreflect.Message { - mi := &file_volume_server_proto_msgTypes[128] + mi := &file_volume_server_proto_msgTypes[134] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -7964,12 +8244,26 @@ const file_volume_server_proto_rawDesc = "" + "\x04name\x18\x01 \x01(\tR\x04name\x12$\n" + "\x0enew_size_bytes\x18\x02 \x01(\x04R\fnewSizeBytes\"B\n" + "\x19ExpandBlockVolumeResponse\x12%\n" + - "\x0ecapacity_bytes\x18\x01 \x01(\x04R\rcapacityBytes*>\n" + + "\x0ecapacity_bytes\x18\x01 \x01(\x04R\rcapacityBytes\"~\n" + + "\x1fPrepareExpandBlockVolumeRequest\x12\x12\n" + + "\x04name\x18\x01 \x01(\tR\x04name\x12$\n" + + "\x0enew_size_bytes\x18\x02 \x01(\x04R\fnewSizeBytes\x12!\n" + + "\fexpand_epoch\x18\x03 \x01(\x04R\vexpandEpoch\"\"\n" + + " PrepareExpandBlockVolumeResponse\"W\n" + + "\x1eCommitExpandBlockVolumeRequest\x12\x12\n" + + "\x04name\x18\x01 \x01(\tR\x04name\x12!\n" + + "\fexpand_epoch\x18\x02 \x01(\x04R\vexpandEpoch\"H\n" + + "\x1fCommitExpandBlockVolumeResponse\x12%\n" + + "\x0ecapacity_bytes\x18\x01 \x01(\x04R\rcapacityBytes\"W\n" + + "\x1eCancelExpandBlockVolumeRequest\x12\x12\n" + + "\x04name\x18\x01 \x01(\tR\x04name\x12!\n" + + "\fexpand_epoch\x18\x02 \x01(\x04R\vexpandEpoch\"!\n" + + "\x1fCancelExpandBlockVolumeResponse*>\n" + "\x0fVolumeScrubMode\x12\v\n" + "\aUNKNOWN\x10\x00\x12\t\n" + "\x05INDEX\x10\x01\x12\b\n" + "\x04FULL\x10\x02\x12\t\n" + - "\x05LOCAL\x10\x032\xd5.\n" + + "\x05LOCAL\x10\x032\xe11\n" + "\fVolumeServer\x12\\\n" + "\vBatchDelete\x12$.volume_server_pb.BatchDeleteRequest\x1a%.volume_server_pb.BatchDeleteResponse\"\x00\x12n\n" + "\x11VacuumVolumeCheck\x12*.volume_server_pb.VacuumVolumeCheckRequest\x1a+.volume_server_pb.VacuumVolumeCheckResponse\"\x00\x12v\n" + @@ -8025,7 +8319,10 @@ const file_volume_server_proto_rawDesc = "" + "\x13SnapshotBlockVolume\x12,.volume_server_pb.SnapshotBlockVolumeRequest\x1a-.volume_server_pb.SnapshotBlockVolumeResponse\"\x00\x12t\n" + "\x13DeleteBlockSnapshot\x12,.volume_server_pb.DeleteBlockSnapshotRequest\x1a-.volume_server_pb.DeleteBlockSnapshotResponse\"\x00\x12q\n" + "\x12ListBlockSnapshots\x12+.volume_server_pb.ListBlockSnapshotsRequest\x1a,.volume_server_pb.ListBlockSnapshotsResponse\"\x00\x12n\n" + - "\x11ExpandBlockVolume\x12*.volume_server_pb.ExpandBlockVolumeRequest\x1a+.volume_server_pb.ExpandBlockVolumeResponse\"\x00B9Z7github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pbb\x06proto3" + "\x11ExpandBlockVolume\x12*.volume_server_pb.ExpandBlockVolumeRequest\x1a+.volume_server_pb.ExpandBlockVolumeResponse\"\x00\x12\x83\x01\n" + + "\x18PrepareExpandBlockVolume\x121.volume_server_pb.PrepareExpandBlockVolumeRequest\x1a2.volume_server_pb.PrepareExpandBlockVolumeResponse\"\x00\x12\x80\x01\n" + + "\x17CommitExpandBlockVolume\x120.volume_server_pb.CommitExpandBlockVolumeRequest\x1a1.volume_server_pb.CommitExpandBlockVolumeResponse\"\x00\x12\x80\x01\n" + + "\x17CancelExpandBlockVolume\x120.volume_server_pb.CancelExpandBlockVolumeRequest\x1a1.volume_server_pb.CancelExpandBlockVolumeResponse\"\x00B9Z7github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pbb\x06proto3" var ( file_volume_server_proto_rawDescOnce sync.Once @@ -8040,7 +8337,7 @@ func file_volume_server_proto_rawDescGZIP() []byte { } var file_volume_server_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_volume_server_proto_msgTypes = make([]protoimpl.MessageInfo, 129) +var file_volume_server_proto_msgTypes = make([]protoimpl.MessageInfo, 135) var file_volume_server_proto_goTypes = []any{ (VolumeScrubMode)(0), // 0: volume_server_pb.VolumeScrubMode (*VolumeServerState)(nil), // 1: volume_server_pb.VolumeServerState @@ -8163,17 +8460,23 @@ var file_volume_server_proto_goTypes = []any{ (*BlockSnapshotInfo)(nil), // 118: volume_server_pb.BlockSnapshotInfo (*ExpandBlockVolumeRequest)(nil), // 119: volume_server_pb.ExpandBlockVolumeRequest (*ExpandBlockVolumeResponse)(nil), // 120: volume_server_pb.ExpandBlockVolumeResponse - (*FetchAndWriteNeedleRequest_Replica)(nil), // 121: volume_server_pb.FetchAndWriteNeedleRequest.Replica - (*QueryRequest_Filter)(nil), // 122: volume_server_pb.QueryRequest.Filter - (*QueryRequest_InputSerialization)(nil), // 123: volume_server_pb.QueryRequest.InputSerialization - (*QueryRequest_OutputSerialization)(nil), // 124: volume_server_pb.QueryRequest.OutputSerialization - (*QueryRequest_InputSerialization_CSVInput)(nil), // 125: volume_server_pb.QueryRequest.InputSerialization.CSVInput - (*QueryRequest_InputSerialization_JSONInput)(nil), // 126: volume_server_pb.QueryRequest.InputSerialization.JSONInput - (*QueryRequest_InputSerialization_ParquetInput)(nil), // 127: volume_server_pb.QueryRequest.InputSerialization.ParquetInput - (*QueryRequest_OutputSerialization_CSVOutput)(nil), // 128: volume_server_pb.QueryRequest.OutputSerialization.CSVOutput - (*QueryRequest_OutputSerialization_JSONOutput)(nil), // 129: volume_server_pb.QueryRequest.OutputSerialization.JSONOutput - (*remote_pb.RemoteConf)(nil), // 130: remote_pb.RemoteConf - (*remote_pb.RemoteStorageLocation)(nil), // 131: remote_pb.RemoteStorageLocation + (*PrepareExpandBlockVolumeRequest)(nil), // 121: volume_server_pb.PrepareExpandBlockVolumeRequest + (*PrepareExpandBlockVolumeResponse)(nil), // 122: volume_server_pb.PrepareExpandBlockVolumeResponse + (*CommitExpandBlockVolumeRequest)(nil), // 123: volume_server_pb.CommitExpandBlockVolumeRequest + (*CommitExpandBlockVolumeResponse)(nil), // 124: volume_server_pb.CommitExpandBlockVolumeResponse + (*CancelExpandBlockVolumeRequest)(nil), // 125: volume_server_pb.CancelExpandBlockVolumeRequest + (*CancelExpandBlockVolumeResponse)(nil), // 126: volume_server_pb.CancelExpandBlockVolumeResponse + (*FetchAndWriteNeedleRequest_Replica)(nil), // 127: volume_server_pb.FetchAndWriteNeedleRequest.Replica + (*QueryRequest_Filter)(nil), // 128: volume_server_pb.QueryRequest.Filter + (*QueryRequest_InputSerialization)(nil), // 129: volume_server_pb.QueryRequest.InputSerialization + (*QueryRequest_OutputSerialization)(nil), // 130: volume_server_pb.QueryRequest.OutputSerialization + (*QueryRequest_InputSerialization_CSVInput)(nil), // 131: volume_server_pb.QueryRequest.InputSerialization.CSVInput + (*QueryRequest_InputSerialization_JSONInput)(nil), // 132: volume_server_pb.QueryRequest.InputSerialization.JSONInput + (*QueryRequest_InputSerialization_ParquetInput)(nil), // 133: volume_server_pb.QueryRequest.InputSerialization.ParquetInput + (*QueryRequest_OutputSerialization_CSVOutput)(nil), // 134: volume_server_pb.QueryRequest.OutputSerialization.CSVOutput + (*QueryRequest_OutputSerialization_JSONOutput)(nil), // 135: volume_server_pb.QueryRequest.OutputSerialization.JSONOutput + (*remote_pb.RemoteConf)(nil), // 136: remote_pb.RemoteConf + (*remote_pb.RemoteStorageLocation)(nil), // 137: remote_pb.RemoteStorageLocation } var file_volume_server_proto_depIdxs = []int32{ 4, // 0: volume_server_pb.BatchDeleteResponse.results:type_name -> volume_server_pb.DeleteResult @@ -8189,21 +8492,21 @@ var file_volume_server_proto_depIdxs = []int32{ 82, // 10: volume_server_pb.VolumeServerStatusResponse.disk_statuses:type_name -> volume_server_pb.DiskStatus 83, // 11: volume_server_pb.VolumeServerStatusResponse.memory_status:type_name -> volume_server_pb.MemStatus 1, // 12: volume_server_pb.VolumeServerStatusResponse.state:type_name -> volume_server_pb.VolumeServerState - 121, // 13: volume_server_pb.FetchAndWriteNeedleRequest.replicas:type_name -> volume_server_pb.FetchAndWriteNeedleRequest.Replica - 130, // 14: volume_server_pb.FetchAndWriteNeedleRequest.remote_conf:type_name -> remote_pb.RemoteConf - 131, // 15: volume_server_pb.FetchAndWriteNeedleRequest.remote_location:type_name -> remote_pb.RemoteStorageLocation + 127, // 13: volume_server_pb.FetchAndWriteNeedleRequest.replicas:type_name -> volume_server_pb.FetchAndWriteNeedleRequest.Replica + 136, // 14: volume_server_pb.FetchAndWriteNeedleRequest.remote_conf:type_name -> remote_pb.RemoteConf + 137, // 15: volume_server_pb.FetchAndWriteNeedleRequest.remote_location:type_name -> remote_pb.RemoteStorageLocation 0, // 16: volume_server_pb.ScrubVolumeRequest.mode:type_name -> volume_server_pb.VolumeScrubMode 0, // 17: volume_server_pb.ScrubEcVolumeRequest.mode:type_name -> volume_server_pb.VolumeScrubMode 79, // 18: volume_server_pb.ScrubEcVolumeResponse.broken_shard_infos:type_name -> volume_server_pb.EcShardInfo - 122, // 19: volume_server_pb.QueryRequest.filter:type_name -> volume_server_pb.QueryRequest.Filter - 123, // 20: volume_server_pb.QueryRequest.input_serialization:type_name -> volume_server_pb.QueryRequest.InputSerialization - 124, // 21: volume_server_pb.QueryRequest.output_serialization:type_name -> volume_server_pb.QueryRequest.OutputSerialization + 128, // 19: volume_server_pb.QueryRequest.filter:type_name -> volume_server_pb.QueryRequest.Filter + 129, // 20: volume_server_pb.QueryRequest.input_serialization:type_name -> volume_server_pb.QueryRequest.InputSerialization + 130, // 21: volume_server_pb.QueryRequest.output_serialization:type_name -> volume_server_pb.QueryRequest.OutputSerialization 118, // 22: volume_server_pb.ListBlockSnapshotsResponse.snapshots:type_name -> volume_server_pb.BlockSnapshotInfo - 125, // 23: volume_server_pb.QueryRequest.InputSerialization.csv_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.CSVInput - 126, // 24: volume_server_pb.QueryRequest.InputSerialization.json_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.JSONInput - 127, // 25: volume_server_pb.QueryRequest.InputSerialization.parquet_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.ParquetInput - 128, // 26: volume_server_pb.QueryRequest.OutputSerialization.csv_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.CSVOutput - 129, // 27: volume_server_pb.QueryRequest.OutputSerialization.json_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.JSONOutput + 131, // 23: volume_server_pb.QueryRequest.InputSerialization.csv_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.CSVInput + 132, // 24: volume_server_pb.QueryRequest.InputSerialization.json_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.JSONInput + 133, // 25: volume_server_pb.QueryRequest.InputSerialization.parquet_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.ParquetInput + 134, // 26: volume_server_pb.QueryRequest.OutputSerialization.csv_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.CSVOutput + 135, // 27: volume_server_pb.QueryRequest.OutputSerialization.json_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.JSONOutput 2, // 28: volume_server_pb.VolumeServer.BatchDelete:input_type -> volume_server_pb.BatchDeleteRequest 6, // 29: volume_server_pb.VolumeServer.VacuumVolumeCheck:input_type -> volume_server_pb.VacuumVolumeCheckRequest 8, // 30: volume_server_pb.VolumeServer.VacuumVolumeCompact:input_type -> volume_server_pb.VacuumVolumeCompactRequest @@ -8258,62 +8561,68 @@ var file_volume_server_proto_depIdxs = []int32{ 114, // 79: volume_server_pb.VolumeServer.DeleteBlockSnapshot:input_type -> volume_server_pb.DeleteBlockSnapshotRequest 116, // 80: volume_server_pb.VolumeServer.ListBlockSnapshots:input_type -> volume_server_pb.ListBlockSnapshotsRequest 119, // 81: volume_server_pb.VolumeServer.ExpandBlockVolume:input_type -> volume_server_pb.ExpandBlockVolumeRequest - 3, // 82: volume_server_pb.VolumeServer.BatchDelete:output_type -> volume_server_pb.BatchDeleteResponse - 7, // 83: volume_server_pb.VolumeServer.VacuumVolumeCheck:output_type -> volume_server_pb.VacuumVolumeCheckResponse - 9, // 84: volume_server_pb.VolumeServer.VacuumVolumeCompact:output_type -> volume_server_pb.VacuumVolumeCompactResponse - 11, // 85: volume_server_pb.VolumeServer.VacuumVolumeCommit:output_type -> volume_server_pb.VacuumVolumeCommitResponse - 13, // 86: volume_server_pb.VolumeServer.VacuumVolumeCleanup:output_type -> volume_server_pb.VacuumVolumeCleanupResponse - 15, // 87: volume_server_pb.VolumeServer.DeleteCollection:output_type -> volume_server_pb.DeleteCollectionResponse - 17, // 88: volume_server_pb.VolumeServer.AllocateVolume:output_type -> volume_server_pb.AllocateVolumeResponse - 19, // 89: volume_server_pb.VolumeServer.VolumeSyncStatus:output_type -> volume_server_pb.VolumeSyncStatusResponse - 21, // 90: volume_server_pb.VolumeServer.VolumeIncrementalCopy:output_type -> volume_server_pb.VolumeIncrementalCopyResponse - 23, // 91: volume_server_pb.VolumeServer.VolumeMount:output_type -> volume_server_pb.VolumeMountResponse - 25, // 92: volume_server_pb.VolumeServer.VolumeUnmount:output_type -> volume_server_pb.VolumeUnmountResponse - 27, // 93: volume_server_pb.VolumeServer.VolumeDelete:output_type -> volume_server_pb.VolumeDeleteResponse - 29, // 94: volume_server_pb.VolumeServer.VolumeMarkReadonly:output_type -> volume_server_pb.VolumeMarkReadonlyResponse - 31, // 95: volume_server_pb.VolumeServer.VolumeMarkWritable:output_type -> volume_server_pb.VolumeMarkWritableResponse - 33, // 96: volume_server_pb.VolumeServer.VolumeConfigure:output_type -> volume_server_pb.VolumeConfigureResponse - 35, // 97: volume_server_pb.VolumeServer.VolumeStatus:output_type -> volume_server_pb.VolumeStatusResponse - 37, // 98: volume_server_pb.VolumeServer.GetState:output_type -> volume_server_pb.GetStateResponse - 39, // 99: volume_server_pb.VolumeServer.SetState:output_type -> volume_server_pb.SetStateResponse - 41, // 100: volume_server_pb.VolumeServer.VolumeCopy:output_type -> volume_server_pb.VolumeCopyResponse - 81, // 101: volume_server_pb.VolumeServer.ReadVolumeFileStatus:output_type -> volume_server_pb.ReadVolumeFileStatusResponse - 43, // 102: volume_server_pb.VolumeServer.CopyFile:output_type -> volume_server_pb.CopyFileResponse - 46, // 103: volume_server_pb.VolumeServer.ReceiveFile:output_type -> volume_server_pb.ReceiveFileResponse - 48, // 104: volume_server_pb.VolumeServer.ReadNeedleBlob:output_type -> volume_server_pb.ReadNeedleBlobResponse - 50, // 105: volume_server_pb.VolumeServer.ReadNeedleMeta:output_type -> volume_server_pb.ReadNeedleMetaResponse - 52, // 106: volume_server_pb.VolumeServer.WriteNeedleBlob:output_type -> volume_server_pb.WriteNeedleBlobResponse - 54, // 107: volume_server_pb.VolumeServer.ReadAllNeedles:output_type -> volume_server_pb.ReadAllNeedlesResponse - 56, // 108: volume_server_pb.VolumeServer.VolumeTailSender:output_type -> volume_server_pb.VolumeTailSenderResponse - 58, // 109: volume_server_pb.VolumeServer.VolumeTailReceiver:output_type -> volume_server_pb.VolumeTailReceiverResponse - 60, // 110: volume_server_pb.VolumeServer.VolumeEcShardsGenerate:output_type -> volume_server_pb.VolumeEcShardsGenerateResponse - 62, // 111: volume_server_pb.VolumeServer.VolumeEcShardsRebuild:output_type -> volume_server_pb.VolumeEcShardsRebuildResponse - 64, // 112: volume_server_pb.VolumeServer.VolumeEcShardsCopy:output_type -> volume_server_pb.VolumeEcShardsCopyResponse - 66, // 113: volume_server_pb.VolumeServer.VolumeEcShardsDelete:output_type -> volume_server_pb.VolumeEcShardsDeleteResponse - 68, // 114: volume_server_pb.VolumeServer.VolumeEcShardsMount:output_type -> volume_server_pb.VolumeEcShardsMountResponse - 70, // 115: volume_server_pb.VolumeServer.VolumeEcShardsUnmount:output_type -> volume_server_pb.VolumeEcShardsUnmountResponse - 72, // 116: volume_server_pb.VolumeServer.VolumeEcShardRead:output_type -> volume_server_pb.VolumeEcShardReadResponse - 74, // 117: volume_server_pb.VolumeServer.VolumeEcBlobDelete:output_type -> volume_server_pb.VolumeEcBlobDeleteResponse - 76, // 118: volume_server_pb.VolumeServer.VolumeEcShardsToVolume:output_type -> volume_server_pb.VolumeEcShardsToVolumeResponse - 78, // 119: volume_server_pb.VolumeServer.VolumeEcShardsInfo:output_type -> volume_server_pb.VolumeEcShardsInfoResponse - 89, // 120: volume_server_pb.VolumeServer.VolumeTierMoveDatToRemote:output_type -> volume_server_pb.VolumeTierMoveDatToRemoteResponse - 91, // 121: volume_server_pb.VolumeServer.VolumeTierMoveDatFromRemote:output_type -> volume_server_pb.VolumeTierMoveDatFromRemoteResponse - 93, // 122: volume_server_pb.VolumeServer.VolumeServerStatus:output_type -> volume_server_pb.VolumeServerStatusResponse - 95, // 123: volume_server_pb.VolumeServer.VolumeServerLeave:output_type -> volume_server_pb.VolumeServerLeaveResponse - 97, // 124: volume_server_pb.VolumeServer.FetchAndWriteNeedle:output_type -> volume_server_pb.FetchAndWriteNeedleResponse - 99, // 125: volume_server_pb.VolumeServer.ScrubVolume:output_type -> volume_server_pb.ScrubVolumeResponse - 101, // 126: volume_server_pb.VolumeServer.ScrubEcVolume:output_type -> volume_server_pb.ScrubEcVolumeResponse - 103, // 127: volume_server_pb.VolumeServer.Query:output_type -> volume_server_pb.QueriedStripe - 105, // 128: volume_server_pb.VolumeServer.VolumeNeedleStatus:output_type -> volume_server_pb.VolumeNeedleStatusResponse - 107, // 129: volume_server_pb.VolumeServer.Ping:output_type -> volume_server_pb.PingResponse - 109, // 130: volume_server_pb.VolumeServer.AllocateBlockVolume:output_type -> volume_server_pb.AllocateBlockVolumeResponse - 111, // 131: volume_server_pb.VolumeServer.VolumeServerDeleteBlockVolume:output_type -> volume_server_pb.VolumeServerDeleteBlockVolumeResponse - 113, // 132: volume_server_pb.VolumeServer.SnapshotBlockVolume:output_type -> volume_server_pb.SnapshotBlockVolumeResponse - 115, // 133: volume_server_pb.VolumeServer.DeleteBlockSnapshot:output_type -> volume_server_pb.DeleteBlockSnapshotResponse - 117, // 134: volume_server_pb.VolumeServer.ListBlockSnapshots:output_type -> volume_server_pb.ListBlockSnapshotsResponse - 120, // 135: volume_server_pb.VolumeServer.ExpandBlockVolume:output_type -> volume_server_pb.ExpandBlockVolumeResponse - 82, // [82:136] is the sub-list for method output_type - 28, // [28:82] is the sub-list for method input_type + 121, // 82: volume_server_pb.VolumeServer.PrepareExpandBlockVolume:input_type -> volume_server_pb.PrepareExpandBlockVolumeRequest + 123, // 83: volume_server_pb.VolumeServer.CommitExpandBlockVolume:input_type -> volume_server_pb.CommitExpandBlockVolumeRequest + 125, // 84: volume_server_pb.VolumeServer.CancelExpandBlockVolume:input_type -> volume_server_pb.CancelExpandBlockVolumeRequest + 3, // 85: volume_server_pb.VolumeServer.BatchDelete:output_type -> volume_server_pb.BatchDeleteResponse + 7, // 86: volume_server_pb.VolumeServer.VacuumVolumeCheck:output_type -> volume_server_pb.VacuumVolumeCheckResponse + 9, // 87: volume_server_pb.VolumeServer.VacuumVolumeCompact:output_type -> volume_server_pb.VacuumVolumeCompactResponse + 11, // 88: volume_server_pb.VolumeServer.VacuumVolumeCommit:output_type -> volume_server_pb.VacuumVolumeCommitResponse + 13, // 89: volume_server_pb.VolumeServer.VacuumVolumeCleanup:output_type -> volume_server_pb.VacuumVolumeCleanupResponse + 15, // 90: volume_server_pb.VolumeServer.DeleteCollection:output_type -> volume_server_pb.DeleteCollectionResponse + 17, // 91: volume_server_pb.VolumeServer.AllocateVolume:output_type -> volume_server_pb.AllocateVolumeResponse + 19, // 92: volume_server_pb.VolumeServer.VolumeSyncStatus:output_type -> volume_server_pb.VolumeSyncStatusResponse + 21, // 93: volume_server_pb.VolumeServer.VolumeIncrementalCopy:output_type -> volume_server_pb.VolumeIncrementalCopyResponse + 23, // 94: volume_server_pb.VolumeServer.VolumeMount:output_type -> volume_server_pb.VolumeMountResponse + 25, // 95: volume_server_pb.VolumeServer.VolumeUnmount:output_type -> volume_server_pb.VolumeUnmountResponse + 27, // 96: volume_server_pb.VolumeServer.VolumeDelete:output_type -> volume_server_pb.VolumeDeleteResponse + 29, // 97: volume_server_pb.VolumeServer.VolumeMarkReadonly:output_type -> volume_server_pb.VolumeMarkReadonlyResponse + 31, // 98: volume_server_pb.VolumeServer.VolumeMarkWritable:output_type -> volume_server_pb.VolumeMarkWritableResponse + 33, // 99: volume_server_pb.VolumeServer.VolumeConfigure:output_type -> volume_server_pb.VolumeConfigureResponse + 35, // 100: volume_server_pb.VolumeServer.VolumeStatus:output_type -> volume_server_pb.VolumeStatusResponse + 37, // 101: volume_server_pb.VolumeServer.GetState:output_type -> volume_server_pb.GetStateResponse + 39, // 102: volume_server_pb.VolumeServer.SetState:output_type -> volume_server_pb.SetStateResponse + 41, // 103: volume_server_pb.VolumeServer.VolumeCopy:output_type -> volume_server_pb.VolumeCopyResponse + 81, // 104: volume_server_pb.VolumeServer.ReadVolumeFileStatus:output_type -> volume_server_pb.ReadVolumeFileStatusResponse + 43, // 105: volume_server_pb.VolumeServer.CopyFile:output_type -> volume_server_pb.CopyFileResponse + 46, // 106: volume_server_pb.VolumeServer.ReceiveFile:output_type -> volume_server_pb.ReceiveFileResponse + 48, // 107: volume_server_pb.VolumeServer.ReadNeedleBlob:output_type -> volume_server_pb.ReadNeedleBlobResponse + 50, // 108: volume_server_pb.VolumeServer.ReadNeedleMeta:output_type -> volume_server_pb.ReadNeedleMetaResponse + 52, // 109: volume_server_pb.VolumeServer.WriteNeedleBlob:output_type -> volume_server_pb.WriteNeedleBlobResponse + 54, // 110: volume_server_pb.VolumeServer.ReadAllNeedles:output_type -> volume_server_pb.ReadAllNeedlesResponse + 56, // 111: volume_server_pb.VolumeServer.VolumeTailSender:output_type -> volume_server_pb.VolumeTailSenderResponse + 58, // 112: volume_server_pb.VolumeServer.VolumeTailReceiver:output_type -> volume_server_pb.VolumeTailReceiverResponse + 60, // 113: volume_server_pb.VolumeServer.VolumeEcShardsGenerate:output_type -> volume_server_pb.VolumeEcShardsGenerateResponse + 62, // 114: volume_server_pb.VolumeServer.VolumeEcShardsRebuild:output_type -> volume_server_pb.VolumeEcShardsRebuildResponse + 64, // 115: volume_server_pb.VolumeServer.VolumeEcShardsCopy:output_type -> volume_server_pb.VolumeEcShardsCopyResponse + 66, // 116: volume_server_pb.VolumeServer.VolumeEcShardsDelete:output_type -> volume_server_pb.VolumeEcShardsDeleteResponse + 68, // 117: volume_server_pb.VolumeServer.VolumeEcShardsMount:output_type -> volume_server_pb.VolumeEcShardsMountResponse + 70, // 118: volume_server_pb.VolumeServer.VolumeEcShardsUnmount:output_type -> volume_server_pb.VolumeEcShardsUnmountResponse + 72, // 119: volume_server_pb.VolumeServer.VolumeEcShardRead:output_type -> volume_server_pb.VolumeEcShardReadResponse + 74, // 120: volume_server_pb.VolumeServer.VolumeEcBlobDelete:output_type -> volume_server_pb.VolumeEcBlobDeleteResponse + 76, // 121: volume_server_pb.VolumeServer.VolumeEcShardsToVolume:output_type -> volume_server_pb.VolumeEcShardsToVolumeResponse + 78, // 122: volume_server_pb.VolumeServer.VolumeEcShardsInfo:output_type -> volume_server_pb.VolumeEcShardsInfoResponse + 89, // 123: volume_server_pb.VolumeServer.VolumeTierMoveDatToRemote:output_type -> volume_server_pb.VolumeTierMoveDatToRemoteResponse + 91, // 124: volume_server_pb.VolumeServer.VolumeTierMoveDatFromRemote:output_type -> volume_server_pb.VolumeTierMoveDatFromRemoteResponse + 93, // 125: volume_server_pb.VolumeServer.VolumeServerStatus:output_type -> volume_server_pb.VolumeServerStatusResponse + 95, // 126: volume_server_pb.VolumeServer.VolumeServerLeave:output_type -> volume_server_pb.VolumeServerLeaveResponse + 97, // 127: volume_server_pb.VolumeServer.FetchAndWriteNeedle:output_type -> volume_server_pb.FetchAndWriteNeedleResponse + 99, // 128: volume_server_pb.VolumeServer.ScrubVolume:output_type -> volume_server_pb.ScrubVolumeResponse + 101, // 129: volume_server_pb.VolumeServer.ScrubEcVolume:output_type -> volume_server_pb.ScrubEcVolumeResponse + 103, // 130: volume_server_pb.VolumeServer.Query:output_type -> volume_server_pb.QueriedStripe + 105, // 131: volume_server_pb.VolumeServer.VolumeNeedleStatus:output_type -> volume_server_pb.VolumeNeedleStatusResponse + 107, // 132: volume_server_pb.VolumeServer.Ping:output_type -> volume_server_pb.PingResponse + 109, // 133: volume_server_pb.VolumeServer.AllocateBlockVolume:output_type -> volume_server_pb.AllocateBlockVolumeResponse + 111, // 134: volume_server_pb.VolumeServer.VolumeServerDeleteBlockVolume:output_type -> volume_server_pb.VolumeServerDeleteBlockVolumeResponse + 113, // 135: volume_server_pb.VolumeServer.SnapshotBlockVolume:output_type -> volume_server_pb.SnapshotBlockVolumeResponse + 115, // 136: volume_server_pb.VolumeServer.DeleteBlockSnapshot:output_type -> volume_server_pb.DeleteBlockSnapshotResponse + 117, // 137: volume_server_pb.VolumeServer.ListBlockSnapshots:output_type -> volume_server_pb.ListBlockSnapshotsResponse + 120, // 138: volume_server_pb.VolumeServer.ExpandBlockVolume:output_type -> volume_server_pb.ExpandBlockVolumeResponse + 122, // 139: volume_server_pb.VolumeServer.PrepareExpandBlockVolume:output_type -> volume_server_pb.PrepareExpandBlockVolumeResponse + 124, // 140: volume_server_pb.VolumeServer.CommitExpandBlockVolume:output_type -> volume_server_pb.CommitExpandBlockVolumeResponse + 126, // 141: volume_server_pb.VolumeServer.CancelExpandBlockVolume:output_type -> volume_server_pb.CancelExpandBlockVolumeResponse + 85, // [85:142] is the sub-list for method output_type + 28, // [28:85] is the sub-list for method input_type 28, // [28:28] is the sub-list for extension type_name 28, // [28:28] is the sub-list for extension extendee 0, // [0:28] is the sub-list for field type_name @@ -8334,7 +8643,7 @@ func file_volume_server_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_volume_server_proto_rawDesc), len(file_volume_server_proto_rawDesc)), NumEnums: 1, - NumMessages: 129, + NumMessages: 135, NumExtensions: 0, NumServices: 1, }, diff --git a/weed/pb/volume_server_pb/volume_server_grpc.pb.go b/weed/pb/volume_server_pb/volume_server_grpc.pb.go index 733ea4d2c..ef1a043aa 100644 --- a/weed/pb/volume_server_pb/volume_server_grpc.pb.go +++ b/weed/pb/volume_server_pb/volume_server_grpc.pb.go @@ -73,6 +73,9 @@ const ( VolumeServer_DeleteBlockSnapshot_FullMethodName = "/volume_server_pb.VolumeServer/DeleteBlockSnapshot" VolumeServer_ListBlockSnapshots_FullMethodName = "/volume_server_pb.VolumeServer/ListBlockSnapshots" VolumeServer_ExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/ExpandBlockVolume" + VolumeServer_PrepareExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/PrepareExpandBlockVolume" + VolumeServer_CommitExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/CommitExpandBlockVolume" + VolumeServer_CancelExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/CancelExpandBlockVolume" ) // VolumeServerClient is the client API for VolumeServer service. @@ -141,6 +144,9 @@ type VolumeServerClient interface { DeleteBlockSnapshot(ctx context.Context, in *DeleteBlockSnapshotRequest, opts ...grpc.CallOption) (*DeleteBlockSnapshotResponse, error) ListBlockSnapshots(ctx context.Context, in *ListBlockSnapshotsRequest, opts ...grpc.CallOption) (*ListBlockSnapshotsResponse, error) ExpandBlockVolume(ctx context.Context, in *ExpandBlockVolumeRequest, opts ...grpc.CallOption) (*ExpandBlockVolumeResponse, error) + PrepareExpandBlockVolume(ctx context.Context, in *PrepareExpandBlockVolumeRequest, opts ...grpc.CallOption) (*PrepareExpandBlockVolumeResponse, error) + CommitExpandBlockVolume(ctx context.Context, in *CommitExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CommitExpandBlockVolumeResponse, error) + CancelExpandBlockVolume(ctx context.Context, in *CancelExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CancelExpandBlockVolumeResponse, error) } type volumeServerClient struct { @@ -784,6 +790,36 @@ func (c *volumeServerClient) ExpandBlockVolume(ctx context.Context, in *ExpandBl return out, nil } +func (c *volumeServerClient) PrepareExpandBlockVolume(ctx context.Context, in *PrepareExpandBlockVolumeRequest, opts ...grpc.CallOption) (*PrepareExpandBlockVolumeResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(PrepareExpandBlockVolumeResponse) + err := c.cc.Invoke(ctx, VolumeServer_PrepareExpandBlockVolume_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) CommitExpandBlockVolume(ctx context.Context, in *CommitExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CommitExpandBlockVolumeResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(CommitExpandBlockVolumeResponse) + err := c.cc.Invoke(ctx, VolumeServer_CommitExpandBlockVolume_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) CancelExpandBlockVolume(ctx context.Context, in *CancelExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CancelExpandBlockVolumeResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(CancelExpandBlockVolumeResponse) + err := c.cc.Invoke(ctx, VolumeServer_CancelExpandBlockVolume_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + // VolumeServerServer is the server API for VolumeServer service. // All implementations must embed UnimplementedVolumeServerServer // for forward compatibility. @@ -850,6 +886,9 @@ type VolumeServerServer interface { DeleteBlockSnapshot(context.Context, *DeleteBlockSnapshotRequest) (*DeleteBlockSnapshotResponse, error) ListBlockSnapshots(context.Context, *ListBlockSnapshotsRequest) (*ListBlockSnapshotsResponse, error) ExpandBlockVolume(context.Context, *ExpandBlockVolumeRequest) (*ExpandBlockVolumeResponse, error) + PrepareExpandBlockVolume(context.Context, *PrepareExpandBlockVolumeRequest) (*PrepareExpandBlockVolumeResponse, error) + CommitExpandBlockVolume(context.Context, *CommitExpandBlockVolumeRequest) (*CommitExpandBlockVolumeResponse, error) + CancelExpandBlockVolume(context.Context, *CancelExpandBlockVolumeRequest) (*CancelExpandBlockVolumeResponse, error) mustEmbedUnimplementedVolumeServerServer() } @@ -1022,6 +1061,15 @@ func (UnimplementedVolumeServerServer) ListBlockSnapshots(context.Context, *List func (UnimplementedVolumeServerServer) ExpandBlockVolume(context.Context, *ExpandBlockVolumeRequest) (*ExpandBlockVolumeResponse, error) { return nil, status.Error(codes.Unimplemented, "method ExpandBlockVolume not implemented") } +func (UnimplementedVolumeServerServer) PrepareExpandBlockVolume(context.Context, *PrepareExpandBlockVolumeRequest) (*PrepareExpandBlockVolumeResponse, error) { + return nil, status.Error(codes.Unimplemented, "method PrepareExpandBlockVolume not implemented") +} +func (UnimplementedVolumeServerServer) CommitExpandBlockVolume(context.Context, *CommitExpandBlockVolumeRequest) (*CommitExpandBlockVolumeResponse, error) { + return nil, status.Error(codes.Unimplemented, "method CommitExpandBlockVolume not implemented") +} +func (UnimplementedVolumeServerServer) CancelExpandBlockVolume(context.Context, *CancelExpandBlockVolumeRequest) (*CancelExpandBlockVolumeResponse, error) { + return nil, status.Error(codes.Unimplemented, "method CancelExpandBlockVolume not implemented") +} func (UnimplementedVolumeServerServer) mustEmbedUnimplementedVolumeServerServer() {} func (UnimplementedVolumeServerServer) testEmbeddedByValue() {} @@ -1934,6 +1982,60 @@ func _VolumeServer_ExpandBlockVolume_Handler(srv interface{}, ctx context.Contex return interceptor(ctx, in, info, handler) } +func _VolumeServer_PrepareExpandBlockVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PrepareExpandBlockVolumeRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).PrepareExpandBlockVolume(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: VolumeServer_PrepareExpandBlockVolume_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).PrepareExpandBlockVolume(ctx, req.(*PrepareExpandBlockVolumeRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_CommitExpandBlockVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CommitExpandBlockVolumeRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).CommitExpandBlockVolume(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: VolumeServer_CommitExpandBlockVolume_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).CommitExpandBlockVolume(ctx, req.(*CommitExpandBlockVolumeRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_CancelExpandBlockVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CancelExpandBlockVolumeRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).CancelExpandBlockVolume(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: VolumeServer_CancelExpandBlockVolume_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).CancelExpandBlockVolume(ctx, req.(*CancelExpandBlockVolumeRequest)) + } + return interceptor(ctx, in, info, handler) +} + // VolumeServer_ServiceDesc is the grpc.ServiceDesc for VolumeServer service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -2113,6 +2215,18 @@ var VolumeServer_ServiceDesc = grpc.ServiceDesc{ MethodName: "ExpandBlockVolume", Handler: _VolumeServer_ExpandBlockVolume_Handler, }, + { + MethodName: "PrepareExpandBlockVolume", + Handler: _VolumeServer_PrepareExpandBlockVolume_Handler, + }, + { + MethodName: "CommitExpandBlockVolume", + Handler: _VolumeServer_CommitExpandBlockVolume_Handler, + }, + { + MethodName: "CancelExpandBlockVolume", + Handler: _VolumeServer_CancelExpandBlockVolume_Handler, + }, }, Streams: []grpc.StreamDesc{ { diff --git a/weed/server/master_block_registry.go b/weed/server/master_block_registry.go index 5af63ce91..0026f617f 100644 --- a/weed/server/master_block_registry.go +++ b/weed/server/master_block_registry.go @@ -82,6 +82,12 @@ type BlockVolumeEntry struct { // Lease tracking for failover (CP6-3 F2). LastLeaseGrant time.Time LeaseTTL time.Duration + + // CP11A-2: Coordinated expand tracking. + ExpandInProgress bool + ExpandFailed bool // true = primary committed but replica(s) failed; size suppressed + PendingExpandSize uint64 + ExpandEpoch uint64 } // HasReplica returns true if this volume has any replica (checks both new and deprecated fields). @@ -190,6 +196,70 @@ func (r *BlockVolumeRegistry) Unregister(name string) *BlockVolumeEntry { return entry } +// AcquireExpandInflight tries to acquire an expand lock for the named volume +// and records the pending expand metadata on the entry. +// Returns false if an expand is already in flight or failed (requires ClearExpandFailed first). +func (r *BlockVolumeRegistry) AcquireExpandInflight(name string, pendingSize, expandEpoch uint64) bool { + r.mu.Lock() + defer r.mu.Unlock() + entry, ok := r.volumes[name] + if !ok { + return false + } + if entry.ExpandInProgress || entry.ExpandFailed { + return false + } + entry.ExpandInProgress = true + entry.PendingExpandSize = pendingSize + entry.ExpandEpoch = expandEpoch + return true +} + +// ReleaseExpandInflight clears all expand tracking fields for the named volume. +// Only call on clean success or clean cancel (all nodes rolled back). +func (r *BlockVolumeRegistry) ReleaseExpandInflight(name string) { + r.mu.Lock() + defer r.mu.Unlock() + entry, ok := r.volumes[name] + if !ok { + return + } + entry.ExpandInProgress = false + entry.ExpandFailed = false + entry.PendingExpandSize = 0 + entry.ExpandEpoch = 0 +} + +// MarkExpandFailed transitions the entry from in-progress to failed. +// ExpandInProgress stays true so heartbeat continues to suppress size updates. +// The entry remains locked until ClearExpandFailed is called (manual reconciliation). +func (r *BlockVolumeRegistry) MarkExpandFailed(name string) { + r.mu.Lock() + defer r.mu.Unlock() + entry, ok := r.volumes[name] + if !ok { + return + } + entry.ExpandFailed = true + // Keep ExpandInProgress=true, PendingExpandSize, ExpandEpoch — all needed for diagnosis. +} + +// ClearExpandFailed resets the expand-failed state so a new expand can be attempted. +// Called by an operator or automated reconciliation after the inconsistency is resolved +// (e.g., failed replica rebuilt or manually expanded). +func (r *BlockVolumeRegistry) ClearExpandFailed(name string) { + r.mu.Lock() + defer r.mu.Unlock() + entry, ok := r.volumes[name] + if !ok { + return + } + entry.ExpandInProgress = false + entry.ExpandFailed = false + entry.PendingExpandSize = 0 + entry.ExpandEpoch = 0 +} + // UpdateSize updates the size of a registered volume. // Called only after a successful VS expand to keep registry in sync. func (r *BlockVolumeRegistry) UpdateSize(name string, newSizeBytes uint64) error { @@ -319,7 +389,10 @@ func (r *BlockVolumeRegistry) UpdateFullHeartbeat(server string, infos []*master if isPrimary { // Primary heartbeat: update primary fields. - existing.SizeBytes = info.VolumeSize + // CP11A-2: skip size update during coordinated expand. + if !existing.ExpandInProgress { + existing.SizeBytes = info.VolumeSize + } existing.Epoch = info.Epoch existing.Role = info.Role existing.Status = StatusActive diff --git a/weed/server/master_grpc_server_block.go b/weed/server/master_grpc_server_block.go index e41e8af2f..8bc4fc8de 100644 --- a/weed/server/master_grpc_server_block.go +++ b/weed/server/master_grpc_server_block.go @@ -367,7 +367,8 @@ func (ms *MasterServer) ListBlockSnapshots(ctx context.Context, req *master_pb.L return resp, nil } -// ExpandBlockVolume expands a block volume via the volume server, then updates registry. +// ExpandBlockVolume expands a block volume. For standalone volumes (no replicas), +// uses direct expand. For replicated volumes, uses coordinated prepare/commit/cancel. func (ms *MasterServer) ExpandBlockVolume(ctx context.Context, req *master_pb.ExpandBlockVolumeRequest) (*master_pb.ExpandBlockVolumeResponse, error) { if req.Name == "" { return nil, fmt.Errorf("name is required") @@ -381,28 +382,96 @@ func (ms *MasterServer) ExpandBlockVolume(ctx context.Context, req *master_pb.Ex return nil, fmt.Errorf("block volume %q not found", req.Name) } - // Expand primary first; only update registry on success. - capacity, err := ms.blockVSExpand(ctx, entry.VolumeServer, req.Name, req.NewSizeBytes) + // Standalone path: no replicas → direct expand (unchanged behavior). + if len(entry.Replicas) == 0 { + capacity, err := ms.blockVSExpand(ctx, entry.VolumeServer, req.Name, req.NewSizeBytes) + if err != nil { + return nil, fmt.Errorf("expand on %s: %w", entry.VolumeServer, err) + } + if uerr := ms.blockRegistry.UpdateSize(req.Name, capacity); uerr != nil { + glog.Warningf("[reqID=%s] ExpandBlockVolume %q: registry update failed: %v", blockReqID(ctx), req.Name, uerr) + } + return &master_pb.ExpandBlockVolumeResponse{CapacityBytes: capacity}, nil + } + + // Coordinated expand for replicated volumes. + expandEpoch := ms.nextExpandEpoch.Add(1) + + if !ms.blockRegistry.AcquireExpandInflight(req.Name, req.NewSizeBytes, expandEpoch) { + return nil, fmt.Errorf("block volume %q: expand already in progress or failed (requires reconciliation)", req.Name) + } + // Only release on clean success or clean cancel (all nodes rolled back). + // On partial commit failure, MarkExpandFailed keeps the guard up. + expandClean := false + defer func() { + if expandClean { + ms.blockRegistry.ReleaseExpandInflight(req.Name) + } + }() + + // Track prepared nodes for rollback. + var prepared []string + + // PREPARE: primary. + if err := ms.blockVSPrepareExpand(ctx, entry.VolumeServer, req.Name, req.NewSizeBytes, expandEpoch); err != nil { + expandClean = true // nothing to worry about, just release + return nil, fmt.Errorf("prepare expand on primary %s: %w", entry.VolumeServer, err) + } + prepared = append(prepared, entry.VolumeServer) + + // PREPARE: replicas. + for _, ri := range entry.Replicas { + if err := ms.blockVSPrepareExpand(ctx, ri.Server, req.Name, req.NewSizeBytes, expandEpoch); err != nil { + glog.Warningf("[reqID=%s] ExpandBlockVolume %q: prepare on replica %s failed: %v", blockReqID(ctx), req.Name, ri.Server, err) + // Cancel all prepared nodes. + for _, ps := range prepared { + if cerr := ms.blockVSCancelExpand(ctx, ps, req.Name, expandEpoch); cerr != nil { + glog.Warningf("[reqID=%s] ExpandBlockVolume %q: cancel on %s failed: %v", blockReqID(ctx), req.Name, ps, cerr) + } + } + expandClean = true // all cancelled, safe to release + return nil, fmt.Errorf("prepare expand on replica %s: %w", ri.Server, err) + } + prepared = append(prepared, ri.Server) + } + + // COMMIT: primary. + capacity, err := ms.blockVSCommitExpand(ctx, entry.VolumeServer, req.Name, expandEpoch) if err != nil { - return nil, fmt.Errorf("expand on %s: %w", entry.VolumeServer, err) + // Commit failed on primary — cancel all. + for _, ps := range prepared { + if cerr := ms.blockVSCancelExpand(ctx, ps, req.Name, expandEpoch); cerr != nil { + glog.Warningf("[reqID=%s] ExpandBlockVolume %q: cancel on %s after primary commit fail: %v", blockReqID(ctx), req.Name, ps, cerr) + } + } + expandClean = true // all cancelled, safe to release + return nil, fmt.Errorf("commit expand on primary %s: %w", entry.VolumeServer, err) } - // CP8-2: Expand ALL replicas (best-effort, log warning on failure). + // COMMIT: replicas. + allCommitted := true for _, ri := range entry.Replicas { - if _, err := ms.blockVSExpand(ctx, ri.Server, req.Name, req.NewSizeBytes); err != nil { - glog.Warningf("[reqID=%s] ExpandBlockVolume %q: replica expand on %s failed (best-effort): %v", - blockReqID(ctx), req.Name, ri.Server, err) + if _, cerr := ms.blockVSCommitExpand(ctx, ri.Server, req.Name, expandEpoch); cerr != nil { + glog.Warningf("[reqID=%s] ExpandBlockVolume %q: commit on replica %s failed: %v", blockReqID(ctx), req.Name, ri.Server, cerr) + allCommitted = false } } - // Update registry with actual new size. + if !allCommitted { + // Primary committed but replica(s) failed. Mark expand as failed: + // ExpandInProgress stays true → heartbeat won't overwrite SizeBytes. + // Operator must reconcile (rebuild/re-expand failed replicas) then call ClearExpandFailed. + ms.blockRegistry.MarkExpandFailed(req.Name) + return nil, fmt.Errorf("block volume %q: expand committed on primary but failed on one or more replicas (volume degraded, expand locked)", req.Name) + } + + // All committed: update registry and release cleanly. if uerr := ms.blockRegistry.UpdateSize(req.Name, capacity); uerr != nil { - glog.Warningf("[reqID=%s] ExpandBlockVolume %q: registry update failed (VS succeeded): %v", blockReqID(ctx), req.Name, uerr) + glog.Warningf("[reqID=%s] ExpandBlockVolume %q: registry update failed: %v", blockReqID(ctx), req.Name, uerr) } + expandClean = true - return &master_pb.ExpandBlockVolumeResponse{ - CapacityBytes: capacity, - }, nil + return &master_pb.ExpandBlockVolumeResponse{CapacityBytes: capacity}, nil } // createBlockVolumeResponseFromEntry builds a CreateBlockVolumeResponse from a registry entry. diff --git a/weed/server/master_grpc_server_block_test.go b/weed/server/master_grpc_server_block_test.go index ef4c9bc5f..42d1d92e9 100644 --- a/weed/server/master_grpc_server_block_test.go +++ b/weed/server/master_grpc_server_block_test.go @@ -813,14 +813,21 @@ func TestMaster_DeleteRF3_DeletesAllReplicas(t *testing.T) { } } -// ExpandBlockVolume RF=3 expands all replicas. +// ExpandBlockVolume RF=3 uses coordinated prepare/commit on all nodes. func TestMaster_ExpandRF3_ExpandsAllReplicas(t *testing.T) { ms := testMasterServerRF3(t) - var expandedServers []string - ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) { - expandedServers = append(expandedServers, server) - return newSize, nil + var preparedServers, committedServers []string + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + preparedServers = append(preparedServers, server) + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + committedServers = append(committedServers, server) + return 2 << 30, nil + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + return nil } _, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ @@ -839,10 +846,12 @@ func TestMaster_ExpandRF3_ExpandsAllReplicas(t *testing.T) { t.Fatalf("expand: %v", err) } - // Should have expanded on primary + 2 replicas = 3 servers. - if len(expandedServers) != 3 { - t.Fatalf("expected 3 expand calls (primary + 2 replicas), got %d: %v", - len(expandedServers), expandedServers) + // Should have prepared on primary + 2 replicas = 3 servers. + if len(preparedServers) != 3 { + t.Fatalf("expected 3 prepare calls, got %d: %v", len(preparedServers), preparedServers) + } + if len(committedServers) != 3 { + t.Fatalf("expected 3 commit calls, got %d: %v", len(committedServers), committedServers) } } @@ -1178,3 +1187,468 @@ func TestMaster_PromotionCopiesNvmeFields(t *testing.T) { t.Fatalf("Lookup Nqn after promotion: got %q", lresp.Nqn) } } + +// ============================================================ +// CP11A-2: Coordinated Expand Tests +// ============================================================ + +func testMasterServerWithExpandMocks(t *testing.T) *MasterServer { + t.Helper() + ms := testMasterServer(t) + ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) { + return newSize, nil + } + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + return 2 << 30, nil + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + return nil + } + return ms +} + +func TestMaster_ExpandCoordinated_Success(t *testing.T) { + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + ms.blockRegistry.MarkBlockCapable("vs2:9333") + ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) { + return &blockAllocResult{ + Path: fmt.Sprintf("/data/%s.blk", name), + IQN: fmt.Sprintf("iqn.test:%s", name), + ISCSIAddr: server, + ReplicaDataAddr: server + ":4001", + ReplicaCtrlAddr: server + ":4002", + }, nil + } + + var prepareCount, commitCount int + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + prepareCount++ + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + commitCount++ + return 2 << 30, nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "coord-vol", SizeBytes: 1 << 30, + }) + + resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "coord-vol", NewSizeBytes: 2 << 30, + }) + if err != nil { + t.Fatalf("expand: %v", err) + } + if resp.CapacityBytes != 2<<30 { + t.Fatalf("capacity: got %d, want %d", resp.CapacityBytes, 2<<30) + } + if prepareCount != 2 { + t.Fatalf("expected 2 prepare calls (primary+replica), got %d", prepareCount) + } + if commitCount != 2 { + t.Fatalf("expected 2 commit calls, got %d", commitCount) + } + entry, _ := ms.blockRegistry.Lookup("coord-vol") + if entry.SizeBytes != 2<<30 { + t.Fatalf("registry size: got %d, want %d", entry.SizeBytes, 2<<30) + } +} + +func TestMaster_ExpandCoordinated_PrepareFailure_Cancels(t *testing.T) { + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + ms.blockRegistry.MarkBlockCapable("vs2:9333") + ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) { + return &blockAllocResult{ + Path: fmt.Sprintf("/data/%s.blk", name), + IQN: fmt.Sprintf("iqn.test:%s", name), + ISCSIAddr: server, + ReplicaDataAddr: server + ":4001", + ReplicaCtrlAddr: server + ":4002", + }, nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "cancel-vol", SizeBytes: 1 << 30, + }) + + // Determine which server is primary so we can fail the replica. + entry, _ := ms.blockRegistry.Lookup("cancel-vol") + primaryServer := entry.VolumeServer + + var cancelCount int + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + if server != primaryServer { + return fmt.Errorf("replica prepare failed") + } + return nil + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + cancelCount++ + return nil + } + + _, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "cancel-vol", NewSizeBytes: 2 << 30, + }) + if err == nil { + t.Fatal("expected error when replica prepare fails") + } + if cancelCount != 1 { + t.Fatalf("expected 1 cancel call (primary was prepared), got %d", cancelCount) + } + entry, _ = ms.blockRegistry.Lookup("cancel-vol") + if entry.SizeBytes != 1<<30 { + t.Fatalf("registry size should be unchanged: got %d", entry.SizeBytes) + } +} + +func TestMaster_ExpandCoordinated_Standalone_DirectCommit(t *testing.T) { + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + + var expandCalled bool + ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) { + expandCalled = true + return newSize, nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "solo-vol", SizeBytes: 1 << 30, + }) + + resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "solo-vol", NewSizeBytes: 2 << 30, + }) + if err != nil { + t.Fatalf("expand: %v", err) + } + if !expandCalled { + t.Fatal("standalone should use direct expand, not prepare/commit") + } + if resp.CapacityBytes != 2<<30 { + t.Fatalf("capacity: got %d", resp.CapacityBytes) + } +} + +func TestMaster_ExpandCoordinated_ConcurrentRejected(t *testing.T) { + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + ms.blockRegistry.MarkBlockCapable("vs2:9333") + ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) { + return &blockAllocResult{ + Path: fmt.Sprintf("/data/%s.blk", name), + IQN: fmt.Sprintf("iqn.test:%s", name), + ISCSIAddr: server, + ReplicaDataAddr: server + ":4001", + ReplicaCtrlAddr: server + ":4002", + }, nil + } + + // Make prepare block until we release it. + blockCh := make(chan struct{}) + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + <-blockCh + return nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "conc-vol", SizeBytes: 1 << 30, + }) + + // First expand acquires inflight. + errCh := make(chan error, 1) + go func() { + _, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "conc-vol", NewSizeBytes: 2 << 30, + }) + errCh <- err + }() + + // Give goroutine time to acquire lock. + time.Sleep(20 * time.Millisecond) + + // Second expand should be rejected. + _, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "conc-vol", NewSizeBytes: 2 << 30, + }) + if err == nil { + t.Fatal("concurrent expand should be rejected") + } + + // Release the first expand. + close(blockCh) + <-errCh +} + +func TestMaster_ExpandCoordinated_Idempotent(t *testing.T) { + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "idem-vol", SizeBytes: 1 << 30, + }) + + // Same size expand: standalone path, Expand handles no-op internally. + ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) { + return 1 << 30, nil // return current size + } + + resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "idem-vol", NewSizeBytes: 1 << 30, + }) + if err != nil { + t.Fatalf("idempotent expand: %v", err) + } + if resp.CapacityBytes != 1<<30 { + t.Fatalf("capacity: got %d", resp.CapacityBytes) + } +} + +func TestMaster_ExpandCoordinated_CommitFailure_MarksInconsistent(t *testing.T) { + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + ms.blockRegistry.MarkBlockCapable("vs2:9333") + ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) { + return &blockAllocResult{ + Path: fmt.Sprintf("/data/%s.blk", name), + IQN: fmt.Sprintf("iqn.test:%s", name), + ISCSIAddr: server, + ReplicaDataAddr: server + ":4001", + ReplicaCtrlAddr: server + ":4002", + }, nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "fail-vol", SizeBytes: 1 << 30, + }) + + // Determine which server is primary so we fail only the replica's commit. + entry, _ := ms.blockRegistry.Lookup("fail-vol") + primaryServer := entry.VolumeServer + + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + if server != primaryServer { + return 0, fmt.Errorf("replica commit failed") + } + return 2 << 30, nil + } + + _, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "fail-vol", NewSizeBytes: 2 << 30, + }) + if err == nil { + t.Fatal("expected error when replica commit fails") + } + + // Registry size should NOT be updated (inconsistent state). + entry, _ = ms.blockRegistry.Lookup("fail-vol") + if entry.SizeBytes != 1<<30 { + t.Fatalf("registry size should be unchanged: got %d", entry.SizeBytes) + } + + // Finding 1: ExpandFailed must be true, ExpandInProgress must stay true + // so heartbeat cannot overwrite SizeBytes with the primary's new committed size. + if !entry.ExpandFailed { + t.Fatal("entry.ExpandFailed should be true after partial commit failure") + } + if !entry.ExpandInProgress { + t.Fatal("entry.ExpandInProgress should stay true to suppress heartbeat size updates") + } + + // Finding 2: PendingExpandSize and ExpandEpoch should be populated for diagnosis. + if entry.PendingExpandSize != 2<<30 { + t.Fatalf("entry.PendingExpandSize: got %d, want %d", entry.PendingExpandSize, 2<<30) + } + if entry.ExpandEpoch == 0 { + t.Fatal("entry.ExpandEpoch should be non-zero") + } + + // A new expand should be rejected while ExpandFailed is set. + _, err = ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "fail-vol", NewSizeBytes: 2 << 30, + }) + if err == nil { + t.Fatal("expand should be rejected while ExpandFailed is set") + } + + // ClearExpandFailed unblocks new expands. + ms.blockRegistry.ClearExpandFailed("fail-vol") + entry, _ = ms.blockRegistry.Lookup("fail-vol") + if entry.ExpandFailed || entry.ExpandInProgress { + t.Fatal("ClearExpandFailed should reset both flags") + } +} + +func TestMaster_ExpandCoordinated_HeartbeatSuppressedAfterPartialCommit(t *testing.T) { + // Bug 1 regression: after primary commits but replica fails, + // a heartbeat from the primary reporting the new VolumeSize + // must NOT update the registry SizeBytes. + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + ms.blockRegistry.MarkBlockCapable("vs2:9333") + ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) { + return &blockAllocResult{ + Path: fmt.Sprintf("/data/%s.blk", name), + IQN: fmt.Sprintf("iqn.test:%s", name), + ISCSIAddr: server, + ReplicaDataAddr: server + ":4001", + ReplicaCtrlAddr: server + ":4002", + }, nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "hb-vol", SizeBytes: 1 << 30, + }) + + entry, _ := ms.blockRegistry.Lookup("hb-vol") + primaryServer := entry.VolumeServer + + // Fail replica commit. + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + if server != primaryServer { + return 0, fmt.Errorf("replica commit failed") + } + return 2 << 30, nil + } + + ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "hb-vol", NewSizeBytes: 2 << 30, + }) + + // Volume is now in ExpandFailed state, ExpandInProgress=true. + // Simulate primary heartbeat reporting VolumeSize = 2 GiB (primary already committed). + ms.blockRegistry.UpdateFullHeartbeat(primaryServer, []*master_pb.BlockVolumeInfoMessage{ + { + Path: fmt.Sprintf("/data/%s.blk", "hb-vol"), + VolumeSize: 2 << 30, // primary's new committed size + Epoch: 1, + Role: 1, + }, + }) + + // Registry size must still be the OLD size — heartbeat must not leak the new size. + entry, _ = ms.blockRegistry.Lookup("hb-vol") + if entry.SizeBytes != 1<<30 { + t.Fatalf("heartbeat leaked new size: got %d, want %d", entry.SizeBytes, 1<<30) + } + if !entry.ExpandFailed { + t.Fatal("ExpandFailed should still be true after heartbeat") + } +} + +func TestMaster_ExpandCoordinated_FailoverDuringPrepare(t *testing.T) { + // Scenario: primary and replica are prepared but commit hasn't happened. + // On recovery (OpenBlockVol), prepared state is cleared → VolumeSize stays at old. + // This test validates at the registry/coordinator level. + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + ms.blockRegistry.MarkBlockCapable("vs2:9333") + ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) { + return &blockAllocResult{ + Path: fmt.Sprintf("/data/%s.blk", name), + IQN: fmt.Sprintf("iqn.test:%s", name), + ISCSIAddr: server, + ReplicaDataAddr: server + ":4001", + ReplicaCtrlAddr: server + ":4002", + }, nil + } + + // Prepare succeeds but commit on primary fails (simulating crash). + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + var cancelCount int + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + return 0, fmt.Errorf("primary crashed during commit") + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + cancelCount++ + return nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "failover-vol", SizeBytes: 1 << 30, + }) + + _, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "failover-vol", NewSizeBytes: 2 << 30, + }) + if err == nil { + t.Fatal("expected error when primary commit fails") + } + + // Cancel should have been called on all prepared nodes. + if cancelCount < 1 { + t.Fatalf("expected cancel calls, got %d", cancelCount) + } + + // Registry size should be unchanged. + entry, _ := ms.blockRegistry.Lookup("failover-vol") + if entry.SizeBytes != 1<<30 { + t.Fatalf("registry size should be unchanged: got %d", entry.SizeBytes) + } +} + +func TestMaster_ExpandCoordinated_RestartRecovery(t *testing.T) { + // After node restart with prepared state, OpenBlockVol clears it. + // Master re-driving expand would go through full prepare/commit again. + // This test verifies the coordinator doesn't get stuck after a failed expand. + ms := testMasterServerWithExpandMocks(t) + ms.blockRegistry.MarkBlockCapable("vs1:9333") + ms.blockRegistry.MarkBlockCapable("vs2:9333") + ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) { + return &blockAllocResult{ + Path: fmt.Sprintf("/data/%s.blk", name), + IQN: fmt.Sprintf("iqn.test:%s", name), + ISCSIAddr: server, + ReplicaDataAddr: server + ":4001", + ReplicaCtrlAddr: server + ":4002", + }, nil + } + + ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{ + Name: "restart-vol", SizeBytes: 1 << 30, + }) + + // First expand fails at commit. + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + return 0, fmt.Errorf("crash") + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + return nil + } + + ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "restart-vol", NewSizeBytes: 2 << 30, + }) + + // After "restart" (inflight released), retry should work. + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + return 2 << 30, nil + } + + resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{ + Name: "restart-vol", NewSizeBytes: 2 << 30, + }) + if err != nil { + t.Fatalf("retry expand: %v", err) + } + if resp.CapacityBytes != 2<<30 { + t.Fatalf("capacity: got %d", resp.CapacityBytes) + } +} diff --git a/weed/server/master_server.go b/weed/server/master_server.go index 51bea569d..e14c32057 100644 --- a/weed/server/master_server.go +++ b/weed/server/master_server.go @@ -12,6 +12,7 @@ import ( "runtime" "strings" "sync" + "sync/atomic" "time" "github.com/seaweedfs/seaweedfs/weed/cluster/maintenance" @@ -104,6 +105,10 @@ type MasterServer struct { blockVSDeleteSnap func(ctx context.Context, server string, name string, snapID uint32) error blockVSListSnaps func(ctx context.Context, server string, name string) ([]*volume_server_pb.BlockSnapshotInfo, error) blockVSExpand func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) + blockVSPrepareExpand func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error + blockVSCommitExpand func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) + blockVSCancelExpand func(ctx context.Context, server string, name string, expandEpoch uint64) error + nextExpandEpoch atomic.Uint64 } func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer { @@ -164,6 +169,9 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se ms.blockVSDeleteSnap = ms.defaultBlockVSDeleteSnap ms.blockVSListSnaps = ms.defaultBlockVSListSnaps ms.blockVSExpand = ms.defaultBlockVSExpand + ms.blockVSPrepareExpand = ms.defaultBlockVSPrepareExpand + ms.blockVSCommitExpand = ms.defaultBlockVSCommitExpand + ms.blockVSCancelExpand = ms.defaultBlockVSCancelExpand ms.MasterClient.SetOnPeerUpdateFn(ms.OnPeerUpdate) @@ -215,6 +223,7 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se r.HandleFunc("/block/volume/{name}", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeDeleteHandler)))).Methods("DELETE") r.HandleFunc("/block/volume/{name}", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeLookupHandler))).Methods("GET") r.HandleFunc("/block/volumes", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeListHandler))).Methods("GET") + r.HandleFunc("/block/volume/{name}/expand", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeExpandHandler)))).Methods("POST") r.HandleFunc("/block/assign", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockAssignHandler)))).Methods("POST") r.HandleFunc("/block/servers", ms.guard.WhiteList(requestIDMiddleware(ms.blockServersHandler))).Methods("GET") r.HandleFunc("/block/status", ms.guard.WhiteList(requestIDMiddleware(ms.blockStatusHandler))).Methods("GET") @@ -648,3 +657,40 @@ func (ms *MasterServer) defaultBlockVSExpand(ctx context.Context, server string, }) return capacity, err } + +func (ms *MasterServer) defaultBlockVSPrepareExpand(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return operation.WithVolumeServerClient(false, pb.ServerAddress(server), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + _, err := client.PrepareExpandBlockVolume(ctx, &volume_server_pb.PrepareExpandBlockVolumeRequest{ + Name: name, + NewSizeBytes: newSize, + ExpandEpoch: expandEpoch, + }) + return err + }) +} + +func (ms *MasterServer) defaultBlockVSCommitExpand(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + var capacity uint64 + err := operation.WithVolumeServerClient(false, pb.ServerAddress(server), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + resp, rerr := client.CommitExpandBlockVolume(ctx, &volume_server_pb.CommitExpandBlockVolumeRequest{ + Name: name, + ExpandEpoch: expandEpoch, + }) + if rerr != nil { + return rerr + } + capacity = resp.CapacityBytes + return nil + }) + return capacity, err +} + +func (ms *MasterServer) defaultBlockVSCancelExpand(ctx context.Context, server string, name string, expandEpoch uint64) error { + return operation.WithVolumeServerClient(false, pb.ServerAddress(server), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + _, err := client.CancelExpandBlockVolume(ctx, &volume_server_pb.CancelExpandBlockVolumeRequest{ + Name: name, + ExpandEpoch: expandEpoch, + }) + return err + }) +} diff --git a/weed/server/master_server_handlers_block.go b/weed/server/master_server_handlers_block.go index f8d29f011..d7afc374d 100644 --- a/weed/server/master_server_handlers_block.go +++ b/weed/server/master_server_handlers_block.go @@ -162,6 +162,35 @@ func (ms *MasterServer) blockServersHandler(w http.ResponseWriter, r *http.Reque writeJsonQuiet(w, r, http.StatusOK, infos) } +// blockVolumeExpandHandler handles POST /block/volume/{name}/expand. +func (ms *MasterServer) blockVolumeExpandHandler(w http.ResponseWriter, r *http.Request) { + name := mux.Vars(r)["name"] + if name == "" { + writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required")) + return + } + + var req blockapi.ExpandVolumeRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err)) + return + } + if req.NewSizeBytes == 0 { + writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("new_size_bytes must be > 0")) + return + } + + resp, err := ms.ExpandBlockVolume(r.Context(), &master_pb.ExpandBlockVolumeRequest{ + Name: name, + NewSizeBytes: req.NewSizeBytes, + }) + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + return + } + writeJsonQuiet(w, r, http.StatusOK, blockapi.ExpandVolumeResponse{CapacityBytes: resp.CapacityBytes}) +} + // blockStatusHandler handles GET /block/status — returns registry configuration for debugging. func (ms *MasterServer) blockStatusHandler(w http.ResponseWriter, r *http.Request) { status := map[string]interface{}{ diff --git a/weed/server/qa_block_cp82_adversarial_test.go b/weed/server/qa_block_cp82_adversarial_test.go index 654c1737c..1be417498 100644 --- a/weed/server/qa_block_cp82_adversarial_test.go +++ b/weed/server/qa_block_cp82_adversarial_test.go @@ -43,6 +43,15 @@ func qaCP82Master(t *testing.T) *MasterServer { ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) { return newSize, nil } + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + return 2 << 30, nil + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + return nil + } ms.blockRegistry.MarkBlockCapable("vs1:9333") ms.blockRegistry.MarkBlockCapable("vs2:9333") ms.blockRegistry.MarkBlockCapable("vs3:9333") @@ -726,31 +735,58 @@ func TestQA_CP82_ExpandRF3_PartialReplicaFailure(t *testing.T) { entry, _ := ms.blockRegistry.Lookup("vol-expand") failServer := entry.Replicas[1].Server - // Override expand mock: one replica fails. - ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) { + // CP11A-2: coordinated expand — set up prepare/commit/cancel mocks. + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { if server == failServer { return 0, fmt.Errorf("disk full on %s", server) } - return newSize, nil + return 2 << 30, nil + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + return nil } - // Expand should succeed (primary + one replica succeed, one fails best-effort). + // Under coordinated expand, partial replica commit failure marks the volume degraded. + _, err = ms.ExpandBlockVolume(ctx, &master_pb.ExpandBlockVolumeRequest{ + Name: "vol-expand", + NewSizeBytes: 2 << 30, + }) + if err == nil { + t.Fatal("expand should fail when a required replica commit fails") + } + + // Registry size should NOT be updated (primary committed but replica failed → degraded). + entry, _ = ms.blockRegistry.Lookup("vol-expand") + if entry.SizeBytes != 1<<30 { + t.Fatalf("registry size should be unchanged: got %d, want %d", entry.SizeBytes, uint64(1<<30)) + } + if !entry.ExpandFailed { + t.Fatal("ExpandFailed should be true after partial commit failure") + } + if !entry.ExpandInProgress { + t.Fatal("ExpandInProgress should stay true to suppress heartbeat size updates") + } + + // Cleanup: ClearExpandFailed allows future operations. + ms.blockRegistry.ClearExpandFailed("vol-expand") + + // Now expand with all mocks succeeding should work. + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + return 2 << 30, nil + } resp, err := ms.ExpandBlockVolume(ctx, &master_pb.ExpandBlockVolumeRequest{ Name: "vol-expand", NewSizeBytes: 2 << 30, }) if err != nil { - t.Fatalf("expand should succeed despite partial replica failure: %v", err) + t.Fatalf("retry expand after clear: %v", err) } if resp.CapacityBytes != 2<<30 { t.Fatalf("capacity: got %d, want %d", resp.CapacityBytes, uint64(2<<30)) } - - // Registry should reflect new size. - entry, _ = ms.blockRegistry.Lookup("vol-expand") - if entry.SizeBytes != 2<<30 { - t.Fatalf("registry size: got %d, want %d", entry.SizeBytes, uint64(2<<30)) - } } // ──────────────────────────────────────────────────────────── diff --git a/weed/server/qa_block_durability_test.go b/weed/server/qa_block_durability_test.go index 74a5cfba6..8e2812880 100644 --- a/weed/server/qa_block_durability_test.go +++ b/weed/server/qa_block_durability_test.go @@ -43,6 +43,15 @@ func qaDurabilityMaster(t *testing.T) *MasterServer { ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) { return newSize, nil } + ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error { + return nil + } + ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) { + return 2 << 30, nil + } + ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error { + return nil + } ms.blockRegistry.MarkBlockCapable("vs1:9333") ms.blockRegistry.MarkBlockCapable("vs2:9333") ms.blockRegistry.MarkBlockCapable("vs3:9333") diff --git a/weed/server/volume_grpc_block.go b/weed/server/volume_grpc_block.go index 585f285e6..f18458675 100644 --- a/weed/server/volume_grpc_block.go +++ b/weed/server/volume_grpc_block.go @@ -125,6 +125,60 @@ func (vs *VolumeServer) ListBlockSnapshots(_ context.Context, req *volume_server return resp, nil } +// PrepareExpandBlockVolume prepares a block volume for expansion without committing. +func (vs *VolumeServer) PrepareExpandBlockVolume(_ context.Context, req *volume_server_pb.PrepareExpandBlockVolumeRequest) (*volume_server_pb.PrepareExpandBlockVolumeResponse, error) { + if vs.blockService == nil { + return nil, fmt.Errorf("block service not enabled on this volume server") + } + if req.Name == "" { + return nil, fmt.Errorf("name is required") + } + if req.NewSizeBytes == 0 { + return nil, fmt.Errorf("new_size_bytes must be > 0") + } + + if err := vs.blockService.PrepareExpandBlockVol(req.Name, req.NewSizeBytes, req.ExpandEpoch); err != nil { + return nil, fmt.Errorf("prepare expand block volume %q: %w", req.Name, err) + } + + return &volume_server_pb.PrepareExpandBlockVolumeResponse{}, nil +} + +// CommitExpandBlockVolume commits a prepared block volume expansion. +func (vs *VolumeServer) CommitExpandBlockVolume(_ context.Context, req *volume_server_pb.CommitExpandBlockVolumeRequest) (*volume_server_pb.CommitExpandBlockVolumeResponse, error) { + if vs.blockService == nil { + return nil, fmt.Errorf("block service not enabled on this volume server") + } + if req.Name == "" { + return nil, fmt.Errorf("name is required") + } + + capacity, err := vs.blockService.CommitExpandBlockVol(req.Name, req.ExpandEpoch) + if err != nil { + return nil, fmt.Errorf("commit expand block volume %q: %w", req.Name, err) + } + + return &volume_server_pb.CommitExpandBlockVolumeResponse{ + CapacityBytes: capacity, + }, nil +} + +// CancelExpandBlockVolume cancels a prepared block volume expansion. +func (vs *VolumeServer) CancelExpandBlockVolume(_ context.Context, req *volume_server_pb.CancelExpandBlockVolumeRequest) (*volume_server_pb.CancelExpandBlockVolumeResponse, error) { + if vs.blockService == nil { + return nil, fmt.Errorf("block service not enabled on this volume server") + } + if req.Name == "" { + return nil, fmt.Errorf("name is required") + } + + if err := vs.blockService.CancelExpandBlockVol(req.Name, req.ExpandEpoch); err != nil { + return nil, fmt.Errorf("cancel expand block volume %q: %w", req.Name, err) + } + + return &volume_server_pb.CancelExpandBlockVolumeResponse{}, nil +} + // ExpandBlockVolume expands a block volume to a new size. func (vs *VolumeServer) ExpandBlockVolume(_ context.Context, req *volume_server_pb.ExpandBlockVolumeRequest) (*volume_server_pb.ExpandBlockVolumeResponse, error) { if vs.blockService == nil { diff --git a/weed/server/volume_grpc_block_test.go b/weed/server/volume_grpc_block_test.go index 055c1f22b..d5e6bb390 100644 --- a/weed/server/volume_grpc_block_test.go +++ b/weed/server/volume_grpc_block_test.go @@ -201,3 +201,53 @@ func TestVS_ExpandVolumeNotFound(t *testing.T) { t.Fatal("expected error for nonexistent volume") } } + +func TestVS_PrepareExpand(t *testing.T) { + bs, _ := newTestBlockServiceWithDir(t) + bs.CreateBlockVol("prep-vol", 4*1024*1024, "", "") + + if err := bs.PrepareExpandBlockVol("prep-vol", 8*1024*1024, 1); err != nil { + t.Fatalf("PrepareExpandBlockVol: %v", err) + } +} + +func TestVS_CommitExpand(t *testing.T) { + bs, _ := newTestBlockServiceWithDir(t) + bs.CreateBlockVol("commit-vol", 4*1024*1024, "", "") + + if err := bs.PrepareExpandBlockVol("commit-vol", 8*1024*1024, 42); err != nil { + t.Fatalf("prepare: %v", err) + } + capacity, err := bs.CommitExpandBlockVol("commit-vol", 42) + if err != nil { + t.Fatalf("CommitExpandBlockVol: %v", err) + } + if capacity != 8*1024*1024 { + t.Fatalf("capacity: got %d, want %d", capacity, 8*1024*1024) + } +} + +func TestVS_CancelExpand(t *testing.T) { + bs, _ := newTestBlockServiceWithDir(t) + bs.CreateBlockVol("cancel-vol", 4*1024*1024, "", "") + + if err := bs.PrepareExpandBlockVol("cancel-vol", 8*1024*1024, 5); err != nil { + t.Fatalf("prepare: %v", err) + } + if err := bs.CancelExpandBlockVol("cancel-vol", 5); err != nil { + t.Fatalf("CancelExpandBlockVol: %v", err) + } +} + +func TestVS_PrepareExpand_AlreadyInFlight(t *testing.T) { + bs, _ := newTestBlockServiceWithDir(t) + bs.CreateBlockVol("inflight-vol", 4*1024*1024, "", "") + + if err := bs.PrepareExpandBlockVol("inflight-vol", 8*1024*1024, 1); err != nil { + t.Fatalf("first prepare: %v", err) + } + err := bs.PrepareExpandBlockVol("inflight-vol", 16*1024*1024, 2) + if err == nil { + t.Fatal("second prepare should be rejected") + } +} diff --git a/weed/server/volume_server_block.go b/weed/server/volume_server_block.go index b973e246a..05562ecc6 100644 --- a/weed/server/volume_server_block.go +++ b/weed/server/volume_server_block.go @@ -518,6 +518,36 @@ func (bs *BlockService) ExpandBlockVol(name string, newSize uint64) (uint64, err return actualSize, err } +// PrepareExpandBlockVol prepares an expand on the named volume without committing. +func (bs *BlockService) PrepareExpandBlockVol(name string, newSize, expandEpoch uint64) error { + path := bs.volumePath(name) + return bs.blockStore.WithVolume(path, func(vol *blockvol.BlockVol) error { + return vol.PrepareExpand(newSize, expandEpoch) + }) +} + +// CommitExpandBlockVol commits a prepared expand on the named volume. +func (bs *BlockService) CommitExpandBlockVol(name string, expandEpoch uint64) (uint64, error) { + path := bs.volumePath(name) + var actualSize uint64 + err := bs.blockStore.WithVolume(path, func(vol *blockvol.BlockVol) error { + if eerr := vol.CommitExpand(expandEpoch); eerr != nil { + return eerr + } + actualSize = vol.Info().VolumeSize + return nil + }) + return actualSize, err +} + +// CancelExpandBlockVol cancels a prepared expand on the named volume. +func (bs *BlockService) CancelExpandBlockVol(name string, expandEpoch uint64) error { + path := bs.volumePath(name) + return bs.blockStore.WithVolume(path, func(vol *blockvol.BlockVol) error { + return vol.CancelExpand(expandEpoch) + }) +} + // volumePath converts a volume name to its .blk file path. func (bs *BlockService) volumePath(name string) string { sanitized := blockvol.SanitizeFilename(name) diff --git a/weed/storage/blockvol/blockapi/client.go b/weed/storage/blockvol/blockapi/client.go index bf2e6739e..a5a624daa 100644 --- a/weed/storage/blockvol/blockapi/client.go +++ b/weed/storage/blockvol/blockapi/client.go @@ -115,6 +115,27 @@ func (c *Client) AssignRole(ctx context.Context, req AssignRequest) error { return checkStatus(resp, http.StatusOK) } +// ExpandVolume expands a block volume to a new size. +func (c *Client) ExpandVolume(ctx context.Context, name string, newSizeBytes uint64) (uint64, error) { + body, err := json.Marshal(ExpandVolumeRequest{NewSizeBytes: newSizeBytes}) + if err != nil { + return 0, fmt.Errorf("marshal request: %w", err) + } + resp, err := c.doRequest(ctx, http.MethodPost, "/block/volume/"+name+"/expand", bytes.NewReader(body)) + if err != nil { + return 0, err + } + defer resp.Body.Close() + if err := checkStatus(resp, http.StatusOK); err != nil { + return 0, err + } + var out ExpandVolumeResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return 0, fmt.Errorf("decode response: %w", err) + } + return out.CapacityBytes, nil +} + // ListServers lists all block-capable volume servers. func (c *Client) ListServers(ctx context.Context) ([]ServerInfo, error) { resp, err := c.doRequest(ctx, http.MethodGet, "/block/servers", nil) diff --git a/weed/storage/blockvol/blockapi/types.go b/weed/storage/blockvol/blockapi/types.go index bcae7c978..24be9eb72 100644 --- a/weed/storage/blockvol/blockapi/types.go +++ b/weed/storage/blockvol/blockapi/types.go @@ -64,6 +64,16 @@ type ServerInfo struct { BlockCapable bool `json:"block_capable"` } +// ExpandVolumeRequest is the request body for POST /block/volume/{name}/expand. +type ExpandVolumeRequest struct { + NewSizeBytes uint64 `json:"new_size_bytes"` +} + +// ExpandVolumeResponse is the response for POST /block/volume/{name}/expand. +type ExpandVolumeResponse struct { + CapacityBytes uint64 `json:"capacity_bytes"` +} + // RoleFromString converts a role string to its uint32 wire value. // Returns 0 (RoleNone) for unrecognized strings. func RoleFromString(s string) uint32 { diff --git a/weed/storage/blockvol/blockvol.go b/weed/storage/blockvol/blockvol.go index 3dcf89856..b061d302d 100644 --- a/weed/storage/blockvol/blockvol.go +++ b/weed/storage/blockvol/blockvol.go @@ -213,6 +213,25 @@ func OpenBlockVol(path string, cfgs ...BlockVolConfig) (*BlockVol, error) { return nil, fmt.Errorf("blockvol: validate superblock: %w", err) } + // CP11A-2: Clear stale prepared expand state on recovery. + if sb.PreparedSize != 0 { + log.Printf("blockvol: clearing stale PreparedSize=%d ExpandEpoch=%d on open (crash during prepare phase)", sb.PreparedSize, sb.ExpandEpoch) + sb.PreparedSize = 0 + sb.ExpandEpoch = 0 + if _, err := fd.Seek(0, 0); err != nil { + fd.Close() + return nil, fmt.Errorf("blockvol: seek for prepared clear: %w", err) + } + if _, err := sb.WriteTo(fd); err != nil { + fd.Close() + return nil, fmt.Errorf("blockvol: write superblock for prepared clear: %w", err) + } + if err := fd.Sync(); err != nil { + fd.Close() + return nil, fmt.Errorf("blockvol: sync for prepared clear: %w", err) + } + } + dirtyMap := NewDirtyMap(cfg.DirtyMapShards) // Run WAL recovery: replay entries from tail to head. @@ -1055,22 +1074,19 @@ func (v *BlockVol) ListSnapshots() []SnapshotInfo { var ( ErrShrinkNotSupported = errors.New("blockvol: shrink not supported") ErrSnapshotsPreventResize = errors.New("blockvol: cannot resize with active snapshots") + ErrExpandAlreadyInFlight = errors.New("blockvol: expand already in flight") + ErrExpandEpochMismatch = errors.New("blockvol: expand epoch mismatch") + ErrNoExpandInFlight = errors.New("blockvol: no expand in flight") + ErrSameSize = errors.New("blockvol: new size equals current size") ) -// Expand grows the volume to newSize bytes. newSize must be larger than -// the current size and aligned to BlockSize. Fails if snapshots are active. -func (v *BlockVol) Expand(newSize uint64) error { - if err := v.beginOp(); err != nil { - return err - } - defer v.endOp() - if err := v.writeGate(); err != nil { - return err - } - +// growFile extends the backing file to accommodate newSize bytes of extent data. +// Validates size, alignment, and snapshot constraints. Pauses/resumes flusher. +// Does NOT update VolumeSize in the superblock. +func (v *BlockVol) growFile(newSize uint64) error { if newSize <= v.super.VolumeSize { if newSize == v.super.VolumeSize { - return nil // no-op + return nil // no-op, caller should check } return ErrShrinkNotSupported } @@ -1098,12 +1114,121 @@ func (v *BlockVol) Expand(newSize uint64) error { if err := v.fd.Truncate(newFileSize); err != nil { return fmt.Errorf("blockvol: expand truncate: %w", err) } + return nil +} - // Update superblock. +// Expand grows the volume to newSize bytes (standalone direct-commit). +// newSize must be larger than the current size and aligned to BlockSize. +// Fails if snapshots are active. No PreparedSize involved. +func (v *BlockVol) Expand(newSize uint64) error { + if err := v.beginOp(); err != nil { + return err + } + defer v.endOp() + if err := v.writeGate(); err != nil { + return err + } + + if newSize == v.super.VolumeSize { + return nil // no-op + } + + if err := v.growFile(newSize); err != nil { + return err + } + + // Update superblock: direct-commit. v.super.VolumeSize = newSize + v.super.PreparedSize = 0 // defensive clear + v.super.ExpandEpoch = 0 + return v.persistSuperblock() +} + +// PrepareExpand grows the file and records the pending expand in the superblock +// without updating VolumeSize. Writes beyond the old VolumeSize are rejected +// by ValidateWrite until CommitExpand is called. +func (v *BlockVol) PrepareExpand(newSize, expandEpoch uint64) error { + if err := v.beginOp(); err != nil { + return err + } + defer v.endOp() + + v.mu.Lock() + defer v.mu.Unlock() + + if v.super.PreparedSize != 0 { + return ErrExpandAlreadyInFlight + } + + if newSize <= v.super.VolumeSize { + if newSize == v.super.VolumeSize { + return ErrSameSize + } + return ErrShrinkNotSupported + } + + if err := v.growFile(newSize); err != nil { + return err + } + + v.super.PreparedSize = newSize + v.super.ExpandEpoch = expandEpoch + return v.persistSuperblock() +} + +// CommitExpand activates the prepared expand: VolumeSize = PreparedSize. +// Returns ErrNoExpandInFlight if no prepare was done, or ErrExpandEpochMismatch +// if the epoch doesn't match. +func (v *BlockVol) CommitExpand(expandEpoch uint64) error { + if err := v.beginOp(); err != nil { + return err + } + defer v.endOp() + + v.mu.Lock() + defer v.mu.Unlock() + + if v.super.PreparedSize == 0 { + return ErrNoExpandInFlight + } + if v.super.ExpandEpoch != expandEpoch { + return ErrExpandEpochMismatch + } + + v.super.VolumeSize = v.super.PreparedSize + v.super.PreparedSize = 0 + v.super.ExpandEpoch = 0 return v.persistSuperblock() } +// CancelExpand clears the prepared expand state without activating it. +// The file stays physically grown (sparse, harmless). +// If expandEpoch is 0, force-cancels regardless of current epoch. +func (v *BlockVol) CancelExpand(expandEpoch uint64) error { + if err := v.beginOp(); err != nil { + return err + } + defer v.endOp() + + v.mu.Lock() + defer v.mu.Unlock() + + if expandEpoch != 0 && v.super.ExpandEpoch != expandEpoch { + return ErrExpandEpochMismatch + } + + v.super.PreparedSize = 0 + v.super.ExpandEpoch = 0 + return v.persistSuperblock() +} + +// ExpandState returns the current prepared expand state. +func (v *BlockVol) ExpandState() (preparedSize, expandEpoch uint64) { + v.mu.RLock() + defer v.mu.RUnlock() + return v.super.PreparedSize, v.super.ExpandEpoch +} + // persistSuperblock writes the superblock to disk and fsyncs. func (v *BlockVol) persistSuperblock() error { if _, err := v.fd.Seek(0, 0); err != nil { diff --git a/weed/storage/blockvol/expand_test.go b/weed/storage/blockvol/expand_test.go new file mode 100644 index 000000000..fc7e53471 --- /dev/null +++ b/weed/storage/blockvol/expand_test.go @@ -0,0 +1,303 @@ +package blockvol + +import ( + "bytes" + "path/filepath" + "testing" +) + +const ( + expandVolSize = 1024 * 1024 // 1MB + expandBlkSize = 4096 + expandWALSize = 64 * 1024 // 64KB + expandNewSize = 2 * 1024 * 1024 // 2MB +) + +func createExpandTestVol(t *testing.T) (*BlockVol, string) { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "test.blk") + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: expandVolSize, + BlockSize: expandBlkSize, + WALSize: expandWALSize, + }) + if err != nil { + t.Fatalf("create: %v", err) + } + return vol, path +} + +func TestExpand_Standalone_DirectCommit(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.Expand(expandNewSize); err != nil { + t.Fatalf("expand: %v", err) + } + if vol.Info().VolumeSize != expandNewSize { + t.Fatalf("VolumeSize: got %d, want %d", vol.Info().VolumeSize, expandNewSize) + } + ps, ee := vol.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState: got (%d,%d), want (0,0)", ps, ee) + } +} + +func TestExpand_Standalone_Idempotent(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.Expand(expandVolSize); err != nil { + t.Fatalf("same-size expand should be no-op: %v", err) + } + if vol.Info().VolumeSize != expandVolSize { + t.Fatalf("VolumeSize changed: %d", vol.Info().VolumeSize) + } +} + +func TestExpand_Standalone_ShrinkRejected(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + err := vol.Expand(expandVolSize / 2) + if err != ErrShrinkNotSupported { + t.Fatalf("expected ErrShrinkNotSupported, got %v", err) + } +} + +func TestExpand_Standalone_SurvivesReopen(t *testing.T) { + vol, path := createExpandTestVol(t) + + if err := vol.Expand(expandNewSize); err != nil { + t.Fatalf("expand: %v", err) + } + vol.Close() + + vol2, err := OpenBlockVol(path) + if err != nil { + t.Fatalf("reopen: %v", err) + } + defer vol2.Close() + + if vol2.Info().VolumeSize != expandNewSize { + t.Fatalf("VolumeSize after reopen: got %d, want %d", vol2.Info().VolumeSize, expandNewSize) + } +} + +func TestPrepareExpand_Success(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 42); err != nil { + t.Fatalf("prepare: %v", err) + } + + if vol.Info().VolumeSize != expandVolSize { + t.Fatalf("VolumeSize should be unchanged: %d", vol.Info().VolumeSize) + } + ps, ee := vol.ExpandState() + if ps != expandNewSize || ee != 42 { + t.Fatalf("ExpandState: got (%d,%d), want (%d,42)", ps, ee, expandNewSize) + } +} + +func TestPrepareExpand_WriteBeyondOldSize_Rejected(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + + newLBA := uint64(expandVolSize / expandBlkSize) + data := make([]byte, expandBlkSize) + err := vol.WriteLBA(newLBA, data) + if err == nil { + t.Fatal("write beyond old size should be rejected while in prepared state") + } +} + +func TestPrepareExpand_WriteWithinOldSize_OK(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + + data := make([]byte, expandBlkSize) + data[0] = 0xCC + if err := vol.WriteLBA(0, data); err != nil { + t.Fatalf("write within old size: %v", err) + } + got, err := vol.ReadLBA(0, expandBlkSize) + if err != nil { + t.Fatalf("read: %v", err) + } + if !bytes.Equal(got, data) { + t.Fatal("data mismatch") + } +} + +func TestCommitExpand_Success(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 7); err != nil { + t.Fatalf("prepare: %v", err) + } + if err := vol.CommitExpand(7); err != nil { + t.Fatalf("commit: %v", err) + } + + if vol.Info().VolumeSize != expandNewSize { + t.Fatalf("VolumeSize: got %d, want %d", vol.Info().VolumeSize, expandNewSize) + } + ps, ee := vol.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState: got (%d,%d), want (0,0)", ps, ee) + } +} + +func TestCommitExpand_WriteBeyondNewSize_OK(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + if err := vol.CommitExpand(1); err != nil { + t.Fatalf("commit: %v", err) + } + + newLBA := uint64(expandVolSize / expandBlkSize) + data := make([]byte, expandBlkSize) + data[0] = 0xDD + if err := vol.WriteLBA(newLBA, data); err != nil { + t.Fatalf("write in expanded region: %v", err) + } + got, err := vol.ReadLBA(newLBA, expandBlkSize) + if err != nil { + t.Fatalf("read: %v", err) + } + if !bytes.Equal(got, data) { + t.Fatal("data mismatch in expanded region") + } +} + +func TestCommitExpand_EpochMismatch_Rejected(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 5); err != nil { + t.Fatalf("prepare: %v", err) + } + err := vol.CommitExpand(99) + if err != ErrExpandEpochMismatch { + t.Fatalf("expected ErrExpandEpochMismatch, got %v", err) + } + if vol.Info().VolumeSize != expandVolSize { + t.Fatalf("VolumeSize should be unchanged: %d", vol.Info().VolumeSize) + } +} + +func TestCancelExpand_ClearsPreparedState(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 3); err != nil { + t.Fatalf("prepare: %v", err) + } + if err := vol.CancelExpand(3); err != nil { + t.Fatalf("cancel: %v", err) + } + + ps, ee := vol.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState: got (%d,%d), want (0,0)", ps, ee) + } + if vol.Info().VolumeSize != expandVolSize { + t.Fatalf("VolumeSize should be unchanged: %d", vol.Info().VolumeSize) + } +} + +func TestCancelExpand_WriteStillRejectedInNewRange(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + if err := vol.CancelExpand(1); err != nil { + t.Fatalf("cancel: %v", err) + } + + newLBA := uint64(expandVolSize / expandBlkSize) + data := make([]byte, expandBlkSize) + err := vol.WriteLBA(newLBA, data) + if err == nil { + t.Fatal("write in expanded region should still be rejected after cancel") + } +} + +func TestPrepareExpand_AlreadyInFlight_Rejected(t *testing.T) { + vol, _ := createExpandTestVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("first prepare: %v", err) + } + err := vol.PrepareExpand(expandNewSize*2, 2) + if err != ErrExpandAlreadyInFlight { + t.Fatalf("expected ErrExpandAlreadyInFlight, got %v", err) + } +} + +func TestRecovery_PreparedState_Cleared(t *testing.T) { + vol, path := createExpandTestVol(t) + + if err := vol.PrepareExpand(expandNewSize, 10); err != nil { + t.Fatalf("prepare: %v", err) + } + vol.Close() + + vol2, err := OpenBlockVol(path) + if err != nil { + t.Fatalf("reopen: %v", err) + } + defer vol2.Close() + + ps, ee := vol2.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState after reopen: got (%d,%d), want (0,0)", ps, ee) + } + if vol2.Info().VolumeSize != expandVolSize { + t.Fatalf("VolumeSize should be original after recovery: %d", vol2.Info().VolumeSize) + } +} + +func TestExpand_WithProfile_Single(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "profile.blk") + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: expandVolSize, + BlockSize: expandBlkSize, + WALSize: expandWALSize, + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("create: %v", err) + } + defer vol.Close() + + if vol.Profile() != ProfileSingle { + t.Fatalf("profile: got %d, want %d", vol.Profile(), ProfileSingle) + } + if err := vol.Expand(expandNewSize); err != nil { + t.Fatalf("expand with single profile: %v", err) + } + if vol.Info().VolumeSize != expandNewSize { + t.Fatalf("VolumeSize: got %d, want %d", vol.Info().VolumeSize, expandNewSize) + } +} diff --git a/weed/storage/blockvol/qa_expand_test.go b/weed/storage/blockvol/qa_expand_test.go new file mode 100644 index 000000000..3ce0c66a0 --- /dev/null +++ b/weed/storage/blockvol/qa_expand_test.go @@ -0,0 +1,588 @@ +package blockvol + +import ( + "bytes" + "errors" + "path/filepath" + "sync" + "sync/atomic" + "testing" + "time" +) + +// ============================================================================= +// CP11A-2 QA Adversarial Tests — Coordinated Expand +// ============================================================================= + +// --- Engine-level adversarial tests --- + +func createQAExpandVol(t *testing.T) (*BlockVol, string) { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "qa-expand.blk") + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: expandVolSize, + BlockSize: expandBlkSize, + WALSize: expandWALSize, + }) + if err != nil { + t.Fatalf("create: %v", err) + } + return vol, path +} + +// T1: ConcurrentPrepare — two goroutines race to PrepareExpand; +// exactly one must win, the other gets ErrExpandAlreadyInFlight. +func TestQA_Expand_ConcurrentPrepare(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + const goroutines = 10 + var wins, rejects atomic.Int32 + var wg sync.WaitGroup + start := make(chan struct{}) + + for i := 0; i < goroutines; i++ { + wg.Add(1) + epoch := uint64(i + 1) + go func() { + defer wg.Done() + <-start + err := vol.PrepareExpand(expandNewSize, epoch) + if err == nil { + wins.Add(1) + } else if errors.Is(err, ErrExpandAlreadyInFlight) { + rejects.Add(1) + } else { + t.Errorf("unexpected error: %v", err) + } + }() + } + + close(start) + wg.Wait() + + if wins.Load() != 1 { + t.Fatalf("expected exactly 1 winner, got %d", wins.Load()) + } + if rejects.Load() != int32(goroutines-1) { + t.Fatalf("expected %d rejects, got %d", goroutines-1, rejects.Load()) + } +} + +// T2: CommitWithoutPrepare — CommitExpand with no prior PrepareExpand. +func TestQA_Expand_CommitWithoutPrepare(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + err := vol.CommitExpand(42) + if !errors.Is(err, ErrNoExpandInFlight) { + t.Fatalf("expected ErrNoExpandInFlight, got %v", err) + } + // VolumeSize must not change. + if vol.Info().VolumeSize != expandVolSize { + t.Fatalf("VolumeSize corrupted: %d", vol.Info().VolumeSize) + } +} + +// T3: CancelWithoutPrepare — CancelExpand when nothing is in flight. +// With epoch=0 (force-cancel), should be a harmless no-op. +func TestQA_Expand_CancelWithoutPrepare_ForceEpoch(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + // Force-cancel (epoch=0) when nothing is in flight — should succeed. + if err := vol.CancelExpand(0); err != nil { + t.Fatalf("force-cancel with no inflight should succeed: %v", err) + } + ps, ee := vol.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState should be clean: (%d, %d)", ps, ee) + } +} + +// T4: CancelWithWrongEpoch — CancelExpand with non-zero wrong epoch. +func TestQA_Expand_CancelWithWrongEpoch(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 5); err != nil { + t.Fatalf("prepare: %v", err) + } + + err := vol.CancelExpand(99) + if !errors.Is(err, ErrExpandEpochMismatch) { + t.Fatalf("expected ErrExpandEpochMismatch, got %v", err) + } + + // PreparedSize must still be set (cancel failed). + ps, ee := vol.ExpandState() + if ps != expandNewSize || ee != 5 { + t.Fatalf("ExpandState should be unchanged: (%d, %d)", ps, ee) + } +} + +// T5: ForceCancel — epoch=0 cancels regardless of actual epoch. +func TestQA_Expand_ForceCancel_IgnoresEpoch(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 777); err != nil { + t.Fatalf("prepare: %v", err) + } + + // Force-cancel with epoch=0 should clear regardless. + if err := vol.CancelExpand(0); err != nil { + t.Fatalf("force-cancel: %v", err) + } + ps, ee := vol.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState should be cleared: (%d, %d)", ps, ee) + } +} + +// T6: DoubleCommit — commit, then commit again. Second must fail. +func TestQA_Expand_DoubleCommit(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + if err := vol.CommitExpand(1); err != nil { + t.Fatalf("first commit: %v", err) + } + + // Second commit: PreparedSize is now 0, so ErrNoExpandInFlight. + err := vol.CommitExpand(1) + if !errors.Is(err, ErrNoExpandInFlight) { + t.Fatalf("expected ErrNoExpandInFlight on double commit, got %v", err) + } +} + +// T7: PrepareAfterCommit — after a successful prepare+commit cycle, +// a new prepare should work (the state machine resets). +func TestQA_Expand_PrepareAfterCommit(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + // First cycle: 1MB -> 2MB. + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare1: %v", err) + } + if err := vol.CommitExpand(1); err != nil { + t.Fatalf("commit1: %v", err) + } + if vol.Info().VolumeSize != expandNewSize { + t.Fatalf("size after first commit: %d", vol.Info().VolumeSize) + } + + // Second cycle: 2MB -> 4MB. + newSize2 := uint64(4 * 1024 * 1024) + if err := vol.PrepareExpand(newSize2, 2); err != nil { + t.Fatalf("prepare2: %v", err) + } + if err := vol.CommitExpand(2); err != nil { + t.Fatalf("commit2: %v", err) + } + if vol.Info().VolumeSize != newSize2 { + t.Fatalf("size after second commit: %d", vol.Info().VolumeSize) + } +} + +// T8: PrepareAfterCancel — after cancel, a new prepare should succeed. +func TestQA_Expand_PrepareAfterCancel(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare1: %v", err) + } + if err := vol.CancelExpand(1); err != nil { + t.Fatalf("cancel: %v", err) + } + + // Second prepare with different epoch should work. + if err := vol.PrepareExpand(expandNewSize, 2); err != nil { + t.Fatalf("prepare2 after cancel: %v", err) + } + ps, ee := vol.ExpandState() + if ps != expandNewSize || ee != 2 { + t.Fatalf("ExpandState: (%d, %d), want (%d, 2)", ps, ee, expandNewSize) + } +} + +// T9: PrepareShrink — PrepareExpand with size < current must be rejected. +func TestQA_Expand_PrepareShrink(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + err := vol.PrepareExpand(expandVolSize/2, 1) + if !errors.Is(err, ErrShrinkNotSupported) { + t.Fatalf("expected ErrShrinkNotSupported, got %v", err) + } +} + +// T10: PrepareUnaligned — unaligned size rejected. +func TestQA_Expand_PrepareUnaligned(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + err := vol.PrepareExpand(expandNewSize+1, 1) + if !errors.Is(err, ErrAlignment) { + t.Fatalf("expected ErrAlignment, got %v", err) + } + // Must not leave state dirty. + ps, ee := vol.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState should be clean after alignment reject: (%d, %d)", ps, ee) + } +} + +// T11: DataIntegrity — write data before prepare, commit, then verify +// data in both old and new regions. +func TestQA_Expand_DataIntegrityAcrossCommit(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + // Write to LBA 0 before expand. + data := make([]byte, expandBlkSize) + for i := range data { + data[i] = 0xAB + } + if err := vol.WriteLBA(0, data); err != nil { + t.Fatalf("write pre-expand: %v", err) + } + + // Prepare + commit. + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + + // Write to LBA 0 during prepared state (within old range — allowed). + data2 := make([]byte, expandBlkSize) + for i := range data2 { + data2[i] = 0xCD + } + if err := vol.WriteLBA(0, data2); err != nil { + t.Fatalf("write during prepared: %v", err) + } + + if err := vol.CommitExpand(1); err != nil { + t.Fatalf("commit: %v", err) + } + + // Read LBA 0 — should have data2 (0xCD). + got, err := vol.ReadLBA(0, expandBlkSize) + if err != nil { + t.Fatalf("read LBA 0: %v", err) + } + if !bytes.Equal(got, data2) { + t.Fatalf("data mismatch at LBA 0: got %x, want %x", got[0], data2[0]) + } + + // Write to new region (LBA beyond old size). + newLBA := uint64(expandVolSize / expandBlkSize) + data3 := make([]byte, expandBlkSize) + for i := range data3 { + data3[i] = 0xEF + } + if err := vol.WriteLBA(newLBA, data3); err != nil { + t.Fatalf("write new region: %v", err) + } + got3, err := vol.ReadLBA(newLBA, expandBlkSize) + if err != nil { + t.Fatalf("read new region: %v", err) + } + if !bytes.Equal(got3, data3) { + t.Fatalf("data mismatch in new region") + } +} + +// T12: RecoveryClearsAndDataSurvives — crash with PreparedSize set, +// reopen clears it, old data is intact. +func TestQA_Expand_RecoveryClearsAndDataSurvives(t *testing.T) { + vol, path := createQAExpandVol(t) + + // Write data. + data := make([]byte, expandBlkSize) + data[0] = 0x77 + if err := vol.WriteLBA(0, data); err != nil { + t.Fatalf("write: %v", err) + } + // Flush so data reaches extent. + if err := vol.SyncCache(); err != nil { + t.Fatalf("sync: %v", err) + } + time.Sleep(200 * time.Millisecond) // let flusher flush + + // Prepare expand (not committed). + if err := vol.PrepareExpand(expandNewSize, 99); err != nil { + t.Fatalf("prepare: %v", err) + } + vol.Close() + + // Reopen — recovery should clear PreparedSize. + vol2, err := OpenBlockVol(path) + if err != nil { + t.Fatalf("reopen: %v", err) + } + defer vol2.Close() + + ps, ee := vol2.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("ExpandState after recovery: (%d, %d)", ps, ee) + } + if vol2.Info().VolumeSize != expandVolSize { + t.Fatalf("VolumeSize should be original: %d", vol2.Info().VolumeSize) + } + + // Data written before prepare should survive. + got, err := vol2.ReadLBA(0, expandBlkSize) + if err != nil { + t.Fatalf("read after recovery: %v", err) + } + if got[0] != 0x77 { + t.Fatalf("data[0]: got %x, want 0x77", got[0]) + } +} + +// T13: CommittedExpandSurvivesReopen — committed expand persists. +func TestQA_Expand_CommittedSurvivesReopen(t *testing.T) { + vol, path := createQAExpandVol(t) + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + if err := vol.CommitExpand(1); err != nil { + t.Fatalf("commit: %v", err) + } + + // Write in new region. + newLBA := uint64(expandVolSize / expandBlkSize) + data := make([]byte, expandBlkSize) + data[0] = 0xAA + if err := vol.WriteLBA(newLBA, data); err != nil { + t.Fatalf("write new region: %v", err) + } + if err := vol.SyncCache(); err != nil { + t.Fatalf("sync: %v", err) + } + time.Sleep(200 * time.Millisecond) + vol.Close() + + // Reopen. + vol2, err := OpenBlockVol(path) + if err != nil { + t.Fatalf("reopen: %v", err) + } + defer vol2.Close() + + if vol2.Info().VolumeSize != expandNewSize { + t.Fatalf("VolumeSize: got %d, want %d", vol2.Info().VolumeSize, expandNewSize) + } + got, err := vol2.ReadLBA(newLBA, expandBlkSize) + if err != nil { + t.Fatalf("read new region: %v", err) + } + if got[0] != 0xAA { + t.Fatalf("data[0]: got %x, want 0xAA", got[0]) + } +} + +// T14: ExpandOnClosedVolume — all expand ops must return ErrVolumeClosed. +func TestQA_Expand_ClosedVolume(t *testing.T) { + vol, _ := createQAExpandVol(t) + vol.Close() + + if err := vol.Expand(expandNewSize); !errors.Is(err, ErrVolumeClosed) { + t.Fatalf("Expand on closed: expected ErrVolumeClosed, got %v", err) + } + if err := vol.PrepareExpand(expandNewSize, 1); !errors.Is(err, ErrVolumeClosed) { + t.Fatalf("PrepareExpand on closed: expected ErrVolumeClosed, got %v", err) + } + if err := vol.CommitExpand(1); !errors.Is(err, ErrVolumeClosed) { + t.Fatalf("CommitExpand on closed: expected ErrVolumeClosed, got %v", err) + } + if err := vol.CancelExpand(1); !errors.Is(err, ErrVolumeClosed) { + t.Fatalf("CancelExpand on closed: expected ErrVolumeClosed, got %v", err) + } +} + +// T15: PrepareExpandSameSize — PrepareExpand with newSize == VolumeSize must fail. +// BUG-CP11A2-1 fix: PrepareExpand rejects same-size with ErrSameSize. +func TestQA_Expand_PrepareSameSize(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + err := vol.PrepareExpand(expandVolSize, 1) + if !errors.Is(err, ErrSameSize) { + t.Fatalf("PrepareExpand(sameSize): expected ErrSameSize, got %v", err) + } + // Verify no state was left behind. + ps, ee := vol.ExpandState() + if ps != 0 || ee != 0 { + t.Fatalf("state leaked: PreparedSize=%d ExpandEpoch=%d", ps, ee) + } +} + +// T16: ConcurrentPrepareAndWrite — write I/O during PrepareExpand. +// Writes within old range must succeed, writes beyond must fail. +func TestQA_Expand_ConcurrentWriteDuringPrepare(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + // Start background writes to LBA 0 (within old range). + var writeCount atomic.Int32 + var writeErr atomic.Value + stopCh := make(chan struct{}) + go func() { + data := make([]byte, expandBlkSize) + for { + select { + case <-stopCh: + return + default: + } + err := vol.WriteLBA(0, data) + if err != nil { + writeErr.Store(err) + return + } + writeCount.Add(1) + } + }() + + // Let writes run briefly. + time.Sleep(10 * time.Millisecond) + + // PrepareExpand while writes are happening. + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + close(stopCh) + t.Fatalf("prepare: %v", err) + } + + // Let a few more writes happen. + time.Sleep(10 * time.Millisecond) + close(stopCh) + + if e := writeErr.Load(); e != nil { + t.Fatalf("write error during prepare: %v", e) + } + if writeCount.Load() == 0 { + t.Fatal("no writes completed during test") + } +} + +// T17: ExpandStateRaceWithCommit — concurrent ExpandState reads during commit. +func TestQA_Expand_ExpandStateRaceWithCommit(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + + var wg sync.WaitGroup + // Concurrent ExpandState readers. + for i := 0; i < 5; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < 100; j++ { + ps, ee := vol.ExpandState() + // Valid states: (expandNewSize, 1) before commit, or (0, 0) after. + if ps != 0 && ps != expandNewSize { + t.Errorf("invalid PreparedSize: %d", ps) + } + if ee != 0 && ee != 1 { + t.Errorf("invalid ExpandEpoch: %d", ee) + } + // PreparedSize and ExpandEpoch must be consistent (both set or both cleared). + if (ps == 0) != (ee == 0) { + t.Errorf("inconsistent ExpandState: (%d, %d)", ps, ee) + } + } + }() + } + + // Commit while readers are running. + time.Sleep(1 * time.Millisecond) + if err := vol.CommitExpand(1); err != nil { + t.Fatalf("commit: %v", err) + } + + wg.Wait() +} + +// T18: TrimDuringPreparedExpand — trim within old range must work. +func TestQA_Expand_TrimDuringPrepared(t *testing.T) { + vol, _ := createQAExpandVol(t) + defer vol.Close() + + // Write data. + data := make([]byte, expandBlkSize) + data[0] = 0xFF + if err := vol.WriteLBA(0, data); err != nil { + t.Fatalf("write: %v", err) + } + + // Prepare expand. + if err := vol.PrepareExpand(expandNewSize, 1); err != nil { + t.Fatalf("prepare: %v", err) + } + + // Trim LBA 0 (within old range). + if err := vol.Trim(0, expandBlkSize); err != nil { + t.Fatalf("trim during prepared: %v", err) + } + + // Read should return zeros. + got, err := vol.ReadLBA(0, expandBlkSize) + if err != nil { + t.Fatalf("read after trim: %v", err) + } + zeros := make([]byte, expandBlkSize) + if !bytes.Equal(got, zeros) { + t.Fatalf("expected zeros after trim, got %x at [0]", got[0]) + } +} + +// T19: SuperblockValidate — manually construct superblock with +// PreparedSize == VolumeSize and verify Validate() rejects it. +func TestQA_Expand_SuperblockValidatePreparedSize(t *testing.T) { + sb := Superblock{ + Version: CurrentVersion, + VolumeSize: 1024 * 1024, + BlockSize: 4096, + ExtentSize: 65536, + WALSize: 65536, + WALOffset: SuperblockSize, + PreparedSize: 1024 * 1024, // == VolumeSize, should fail + ExpandEpoch: 1, + } + copy(sb.Magic[:], MagicSWBK) + + if err := sb.Validate(); err == nil { + t.Fatal("Validate should reject PreparedSize == VolumeSize") + } +} + +// T20: SuperblockValidate — ExpandEpoch != 0 with PreparedSize == 0. +func TestQA_Expand_SuperblockValidateOrphanEpoch(t *testing.T) { + sb := Superblock{ + Version: CurrentVersion, + VolumeSize: 1024 * 1024, + BlockSize: 4096, + ExtentSize: 65536, + WALSize: 65536, + WALOffset: SuperblockSize, + PreparedSize: 0, + ExpandEpoch: 5, // orphan epoch + } + copy(sb.Magic[:], MagicSWBK) + + if err := sb.Validate(); err == nil { + t.Fatal("Validate should reject ExpandEpoch!=0 when PreparedSize==0") + } +} diff --git a/weed/storage/blockvol/qa_iobackend_config_test.go b/weed/storage/blockvol/qa_iobackend_config_test.go new file mode 100644 index 000000000..fbc7a3547 --- /dev/null +++ b/weed/storage/blockvol/qa_iobackend_config_test.go @@ -0,0 +1,228 @@ +//go:build ignore + +package blockvol + +import ( + "strings" + "testing" +) + +// ============================================================================= +// QA Adversarial Tests for IOBackend Config (Item 3) +// +// Covers: ParseIOBackend, ResolveIOBackend, Validate for IOBackend field, +// edge cases, unknown values, io_uring rejection, case insensitivity. +// ============================================================================= + +// --- ParseIOBackend --- + +func TestQA_ParseIOBackend_ValidInputs(t *testing.T) { + cases := []struct { + input string + want IOBackend + }{ + {"auto", IOBackendAuto}, + {"AUTO", IOBackendAuto}, + {"Auto", IOBackendAuto}, + {"", IOBackendAuto}, + {" auto ", IOBackendAuto}, + {"standard", IOBackendStandard}, + {"STANDARD", IOBackendStandard}, + {"Standard", IOBackendStandard}, + {" standard ", IOBackendStandard}, + {"io_uring", IOBackendIOURing}, + {"IO_URING", IOBackendIOURing}, + {"Io_Uring", IOBackendIOURing}, + {"iouring", IOBackendIOURing}, + {"IOURING", IOBackendIOURing}, + } + for _, tc := range cases { + t.Run(tc.input, func(t *testing.T) { + got, err := ParseIOBackend(tc.input) + if err != nil { + t.Fatalf("ParseIOBackend(%q): unexpected error: %v", tc.input, err) + } + if got != tc.want { + t.Fatalf("ParseIOBackend(%q) = %v, want %v", tc.input, got, tc.want) + } + }) + } +} + +func TestQA_ParseIOBackend_InvalidInputs(t *testing.T) { + invalids := []string{ + "spdk", + "uring", + "io-uring", + "io_uring_sqpoll", + "direct", + "aio", + "posix", + "libaio", + "123", + "null", + "none", + } + for _, s := range invalids { + t.Run(s, func(t *testing.T) { + got, err := ParseIOBackend(s) + if err == nil { + t.Fatalf("ParseIOBackend(%q) = %v, want error", s, got) + } + if got != IOBackendAuto { + t.Fatalf("ParseIOBackend(%q) error case should return Auto, got %v", s, got) + } + if !strings.Contains(err.Error(), "unknown IOBackend") { + t.Fatalf("error should mention 'unknown IOBackend', got: %v", err) + } + }) + } +} + +// --- IOBackend.String --- + +func TestQA_IOBackend_String(t *testing.T) { + cases := []struct { + b IOBackend + want string + }{ + {IOBackendAuto, "auto"}, + {IOBackendStandard, "standard"}, + {IOBackendIOURing, "io_uring"}, + {IOBackend(99), "unknown(99)"}, + {IOBackend(-1), "unknown(-1)"}, + } + for _, tc := range cases { + got := tc.b.String() + if got != tc.want { + t.Errorf("IOBackend(%d).String() = %q, want %q", int(tc.b), got, tc.want) + } + } +} + +// --- ResolveIOBackend --- + +func TestQA_ResolveIOBackend(t *testing.T) { + // Auto resolves to standard. + if got := ResolveIOBackend(IOBackendAuto); got != IOBackendStandard { + t.Fatalf("ResolveIOBackend(Auto) = %v, want Standard", got) + } + // Standard stays standard. + if got := ResolveIOBackend(IOBackendStandard); got != IOBackendStandard { + t.Fatalf("ResolveIOBackend(Standard) = %v, want Standard", got) + } + // IOURing stays io_uring (resolve doesn't validate, just maps auto). + if got := ResolveIOBackend(IOBackendIOURing); got != IOBackendIOURing { + t.Fatalf("ResolveIOBackend(IOURing) = %v, want IOURing", got) + } +} + +// --- Validate IOBackend field --- + +func TestQA_Config_Validate_IOBackend_AutoOK(t *testing.T) { + cfg := DefaultConfig() + cfg.IOBackend = IOBackendAuto + if err := cfg.Validate(); err != nil { + t.Fatalf("Validate with IOBackendAuto: %v", err) + } +} + +func TestQA_Config_Validate_IOBackend_StandardOK(t *testing.T) { + cfg := DefaultConfig() + cfg.IOBackend = IOBackendStandard + if err := cfg.Validate(); err != nil { + t.Fatalf("Validate with IOBackendStandard: %v", err) + } +} + +func TestQA_Config_Validate_IOBackend_IOURingRejected(t *testing.T) { + cfg := DefaultConfig() + cfg.IOBackend = IOBackendIOURing + err := cfg.Validate() + if err == nil { + t.Fatal("Validate should reject IOBackendIOURing (not yet implemented)") + } + if !strings.Contains(err.Error(), "not yet implemented") { + t.Fatalf("error should mention 'not yet implemented', got: %v", err) + } +} + +func TestQA_Config_Validate_IOBackend_OutOfRange(t *testing.T) { + cfg := DefaultConfig() + cfg.IOBackend = IOBackend(99) + err := cfg.Validate() + if err == nil { + t.Fatal("Validate should reject out-of-range IOBackend") + } + if !strings.Contains(err.Error(), "unknown IOBackend") { + t.Fatalf("error should mention 'unknown IOBackend', got: %v", err) + } +} + +func TestQA_Config_Validate_IOBackend_NegativeValue(t *testing.T) { + cfg := DefaultConfig() + cfg.IOBackend = IOBackend(-1) + err := cfg.Validate() + if err == nil { + t.Fatal("Validate should reject negative IOBackend") + } +} + +// --- DefaultConfig IOBackend --- + +func TestQA_DefaultConfig_IOBackend_IsAuto(t *testing.T) { + cfg := DefaultConfig() + if cfg.IOBackend != IOBackendAuto { + t.Fatalf("DefaultConfig().IOBackend = %v, want Auto (zero value)", cfg.IOBackend) + } +} + +// --- applyDefaults does NOT override IOBackend --- + +func TestQA_ApplyDefaults_IOBackend_ZeroStaysAuto(t *testing.T) { + cfg := BlockVolConfig{} + cfg.applyDefaults() + // IOBackend is not in applyDefaults — zero value (Auto) should remain. + if cfg.IOBackend != IOBackendAuto { + t.Fatalf("applyDefaults left IOBackend = %v, want Auto", cfg.IOBackend) + } +} + +func TestQA_ApplyDefaults_IOBackend_ExplicitPreserved(t *testing.T) { + cfg := BlockVolConfig{IOBackend: IOBackendStandard} + cfg.applyDefaults() + if cfg.IOBackend != IOBackendStandard { + t.Fatalf("applyDefaults changed IOBackend from Standard to %v", cfg.IOBackend) + } +} + +// --- Round-trip: parse → resolve → string --- + +func TestQA_IOBackend_RoundTrip(t *testing.T) { + for _, input := range []string{"auto", "standard"} { + b, err := ParseIOBackend(input) + if err != nil { + t.Fatalf("ParseIOBackend(%q): %v", input, err) + } + resolved := ResolveIOBackend(b) + s := resolved.String() + if s != "standard" { + t.Fatalf("round-trip %q → resolve → string = %q, want standard", input, s) + } + } +} + +// --- Iota ordering stability --- + +func TestQA_IOBackend_IotaValues(t *testing.T) { + // These values are persisted/transmitted — they must never change. + if IOBackendAuto != 0 { + t.Fatalf("IOBackendAuto = %d, want 0", IOBackendAuto) + } + if IOBackendStandard != 1 { + t.Fatalf("IOBackendStandard = %d, want 1", IOBackendStandard) + } + if IOBackendIOURing != 2 { + t.Fatalf("IOBackendIOURing = %d, want 2", IOBackendIOURing) + } +} diff --git a/weed/storage/blockvol/qa_storage_profile_test.go b/weed/storage/blockvol/qa_storage_profile_test.go new file mode 100644 index 000000000..bd732b1a5 --- /dev/null +++ b/weed/storage/blockvol/qa_storage_profile_test.go @@ -0,0 +1,567 @@ +package blockvol + +import ( + "bytes" + "crypto/rand" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + "sync/atomic" + "testing" +) + +// ============================================================================= +// QA Adversarial Tests for StorageProfile (CP11A-1) +// +// These tests go beyond the dev-test coverage in storage_profile_test.go: +// - SP-A1: write/read data integrity on single profile +// - SP-A2: concurrent writes with no corruption +// - additional: crash recovery, superblock byte corruption, boundary cases +// ============================================================================= + +// TestQA_Profile_WritePath_SingleCorrect writes multiple blocks at different +// LBAs on a single-profile volume, reads them back, and verifies byte-for-byte +// correctness. This is SP-A1 from the test spec. +func TestQA_Profile_WritePath_SingleCorrect(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sp-a1.blk") + + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 256 * 1024, // 256KB = 64 blocks + BlockSize: 4096, + WALSize: 128 * 1024, + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + defer vol.Close() + + if vol.Profile() != ProfileSingle { + t.Fatalf("Profile() = %v, want single", vol.Profile()) + } + + // Write unique patterns to blocks 0, 10, 30, 63 (last block). + type testBlock struct { + lba uint64 + fill byte + } + blocks := []testBlock{ + {0, 0xAA}, + {10, 0xBB}, + {30, 0xCC}, + {63, 0xDD}, // last block in 256KB volume + } + + for _, b := range blocks { + data := make([]byte, 4096) + for i := range data { + data[i] = b.fill + } + if err := vol.WriteLBA(b.lba, data); err != nil { + t.Fatalf("WriteLBA(%d): %v", b.lba, err) + } + } + + // SyncCache to ensure WAL is durable. + if err := vol.SyncCache(); err != nil { + t.Fatalf("SyncCache: %v", err) + } + + // Read back and verify. + for _, b := range blocks { + got, err := vol.ReadLBA(b.lba, 4096) + if err != nil { + t.Fatalf("ReadLBA(%d): %v", b.lba, err) + } + expected := make([]byte, 4096) + for i := range expected { + expected[i] = b.fill + } + if !bytes.Equal(got, expected) { + t.Errorf("LBA %d: data mismatch (first byte: got 0x%02X, want 0x%02X)", + b.lba, got[0], b.fill) + } + } + + // Unwritten blocks should read as zeros. + zeros, err := vol.ReadLBA(5, 4096) + if err != nil { + t.Fatalf("ReadLBA(5): %v", err) + } + for i, b := range zeros { + if b != 0 { + t.Fatalf("LBA 5 byte[%d] = 0x%02X, want 0x00 (unwritten)", i, b) + } + } +} + +// TestQA_Profile_ConcurrentWrites_Single runs 16 goroutines writing to +// non-overlapping LBAs on a single-profile volume. No data corruption +// or panics should occur. This is SP-A2 from the test spec. +func TestQA_Profile_ConcurrentWrites_Single(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sp-a2.blk") + + // 1MB volume = 256 blocks. Each of 16 goroutines gets 16 blocks. + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 1024 * 1024, + BlockSize: 4096, + WALSize: 512 * 1024, + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + defer vol.Close() + + const goroutines = 16 + const blocksPerGoroutine = 16 + var wg sync.WaitGroup + errs := make([]error, goroutines) + + for g := 0; g < goroutines; g++ { + wg.Add(1) + go func(gid int) { + defer wg.Done() + baseLBA := uint64(gid * blocksPerGoroutine) + fill := byte(gid + 1) // unique fill per goroutine + + for i := 0; i < blocksPerGoroutine; i++ { + data := make([]byte, 4096) + for j := range data { + data[j] = fill + } + if err := vol.WriteLBA(baseLBA+uint64(i), data); err != nil { + errs[gid] = fmt.Errorf("goroutine %d LBA %d: %v", gid, baseLBA+uint64(i), err) + return + } + } + }(g) + } + wg.Wait() + + for i, err := range errs { + if err != nil { + t.Fatalf("goroutine %d: %v", i, err) + } + } + + // Sync and verify all data. + if err := vol.SyncCache(); err != nil { + t.Fatalf("SyncCache: %v", err) + } + + for g := 0; g < goroutines; g++ { + baseLBA := uint64(g * blocksPerGoroutine) + fill := byte(g + 1) + for i := 0; i < blocksPerGoroutine; i++ { + lba := baseLBA + uint64(i) + got, err := vol.ReadLBA(lba, 4096) + if err != nil { + t.Fatalf("ReadLBA(%d): %v", lba, err) + } + for j, b := range got { + if b != fill { + t.Fatalf("LBA %d byte[%d] = 0x%02X, want 0x%02X (goroutine %d)", + lba, j, b, fill, g) + } + } + } + } +} + +// TestQA_Profile_SurvivesCrashRecovery writes data on a single-profile +// volume, simulates a crash (close without clean shutdown), reopens, and +// verifies that the profile metadata and data are intact. +func TestQA_Profile_SurvivesCrashRecovery(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sp-crash.blk") + + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 64 * 1024, + BlockSize: 4096, + WALSize: 32 * 1024, + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + + // Write known data. + data := make([]byte, 4096) + for i := range data { + data[i] = 0xEE + } + if err := vol.WriteLBA(0, data); err != nil { + t.Fatalf("WriteLBA: %v", err) + } + if err := vol.SyncCache(); err != nil { + t.Fatalf("SyncCache: %v", err) + } + + // Close normally (simulates a crash by just closing). + vol.Close() + + // Reopen — crash recovery runs. + vol2, err := OpenBlockVol(path) + if err != nil { + t.Fatalf("Reopen: %v", err) + } + defer vol2.Close() + + if vol2.Profile() != ProfileSingle { + t.Errorf("Profile after reopen = %v, want single", vol2.Profile()) + } + + got, err := vol2.ReadLBA(0, 4096) + if err != nil { + t.Fatalf("ReadLBA after reopen: %v", err) + } + if got[0] != 0xEE { + t.Errorf("data[0] = 0x%02X, want 0xEE", got[0]) + } +} + +// TestQA_Profile_CorruptByte_AllValues corrupts the StorageProfile byte on +// disk to every value 2..255 and verifies that OpenBlockVol rejects each one. +func TestQA_Profile_CorruptByte_AllValues(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sp-corrupt-all.blk") + + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 64 * 1024, + BlockSize: 4096, + WALSize: 32 * 1024, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + vol.Close() + + // Read original file for restoration. + original, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read: %v", err) + } + + for corruptVal := byte(2); corruptVal != 0; corruptVal++ { // 2..255 + // Restore original, then corrupt. + if err := os.WriteFile(path, original, 0644); err != nil { + t.Fatalf("restore: %v", err) + } + f, err := os.OpenFile(path, os.O_RDWR, 0644) + if err != nil { + t.Fatalf("open: %v", err) + } + if _, err := f.WriteAt([]byte{corruptVal}, 105); err != nil { + f.Close() + t.Fatalf("corrupt: %v", err) + } + f.Close() + + _, err = OpenBlockVol(path) + if err == nil { + t.Errorf("StorageProfile=%d: OpenBlockVol should fail", corruptVal) + } + } +} + +// TestQA_Profile_StripedReject_NoFileLeaked verifies that attempting to +// create a striped volume does not leak partial files, even under different +// config combinations. +func TestQA_Profile_StripedReject_NoFileLeaked(t *testing.T) { + dir := t.TempDir() + + configs := []CreateOptions{ + {VolumeSize: 64 * 1024, StorageProfile: ProfileStriped}, + {VolumeSize: 1024 * 1024, StorageProfile: ProfileStriped, WALSize: 256 * 1024}, + {VolumeSize: 64 * 1024, StorageProfile: ProfileStriped, BlockSize: 512}, + } + + for i, opts := range configs { + path := filepath.Join(dir, fmt.Sprintf("striped-%d.blk", i)) + _, err := CreateBlockVol(path, opts) + if !errors.Is(err, ErrStripedNotImplemented) { + t.Errorf("config %d: error = %v, want ErrStripedNotImplemented", i, err) + } + if _, statErr := os.Stat(path); !os.IsNotExist(statErr) { + t.Errorf("config %d: file %s should not exist after rejected create", i, path) + } + } +} + +// TestQA_Profile_ConcurrentCreateSameFile races multiple goroutines trying +// to create a volume at the same path. Exactly one should succeed (O_EXCL), +// the rest should fail. No partial files should remain from losers. +func TestQA_Profile_ConcurrentCreateSameFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "race.blk") + + const racers = 8 + var ( + wg sync.WaitGroup + wins atomic.Int32 + errCount atomic.Int32 + ) + + for i := 0; i < racers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 64 * 1024, + BlockSize: 4096, + WALSize: 32 * 1024, + StorageProfile: ProfileSingle, + }) + if err != nil { + errCount.Add(1) + return + } + wins.Add(1) + vol.Close() + }() + } + wg.Wait() + + if wins.Load() != 1 { + t.Errorf("winners = %d, want exactly 1", wins.Load()) + } + if errCount.Load() != racers-1 { + t.Errorf("errors = %d, want %d", errCount.Load(), racers-1) + } + + // The winner's file should be valid. + vol, err := OpenBlockVol(path) + if err != nil { + t.Fatalf("OpenBlockVol winner file: %v", err) + } + defer vol.Close() + if vol.Profile() != ProfileSingle { + t.Errorf("Profile() = %v, want single", vol.Profile()) + } +} + +// TestQA_Profile_SuperblockByteOffset verifies the StorageProfile byte is +// at the exact expected offset (105) in the on-disk format. This prevents +// silent field-reorder regressions. +func TestQA_Profile_SuperblockByteOffset(t *testing.T) { + sb, err := NewSuperblock(64*1024, CreateOptions{ + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("NewSuperblock: %v", err) + } + + // Write the superblock for single profile. + var buf bytes.Buffer + sb.WriteTo(&buf) + data := buf.Bytes() + + if data[105] != 0 { + t.Errorf("offset 105 = %d, want 0 (ProfileSingle)", data[105]) + } + + // Now set striped and check the byte changed. + sb.StorageProfile = uint8(ProfileStriped) + var buf2 bytes.Buffer + sb.WriteTo(&buf2) + data2 := buf2.Bytes() + + if data2[105] != 1 { + t.Errorf("offset 105 = %d, want 1 (ProfileStriped)", data2[105]) + } + + // Verify all other bytes are identical (only offset 105 changed). + for i := range data { + if i == 105 { + continue + } + if data[i] != data2[i] { + t.Errorf("byte[%d] changed: 0x%02X -> 0x%02X (only offset 105 should differ)", i, data[i], data2[i]) + } + } +} + +// TestQA_Profile_MultiBlockWriteRead writes a multi-block (16KB) payload +// at a non-zero LBA and reads it back on a single-profile volume. +// Catches alignment and multi-block dirty-map consistency bugs. +func TestQA_Profile_MultiBlockWriteRead(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sp-multi.blk") + + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 512 * 1024, + BlockSize: 4096, + WALSize: 256 * 1024, + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + defer vol.Close() + + // Write 4 blocks (16KB) of random data at LBA 20. + payload := make([]byte, 16384) + if _, err := rand.Read(payload); err != nil { + t.Fatalf("rand: %v", err) + } + + if err := vol.WriteLBA(20, payload); err != nil { + t.Fatalf("WriteLBA: %v", err) + } + if err := vol.SyncCache(); err != nil { + t.Fatalf("SyncCache: %v", err) + } + + got, err := vol.ReadLBA(20, 16384) + if err != nil { + t.Fatalf("ReadLBA: %v", err) + } + if !bytes.Equal(got, payload) { + t.Error("multi-block payload mismatch") + } +} + +// TestQA_Profile_ExpandPreservesProfile verifies that expanding a +// single-profile volume preserves the profile metadata. +func TestQA_Profile_ExpandPreservesProfile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sp-expand.blk") + + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 64 * 1024, + BlockSize: 4096, + WALSize: 32 * 1024, + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + + // Write at LBA 0 before expand. + data := make([]byte, 4096) + for i := range data { + data[i] = 0x42 + } + if err := vol.WriteLBA(0, data); err != nil { + t.Fatalf("WriteLBA: %v", err) + } + + // Expand to 128KB. + if err := vol.Expand(128 * 1024); err != nil { + t.Fatalf("Expand: %v", err) + } + + if vol.Profile() != ProfileSingle { + t.Errorf("Profile after expand = %v, want single", vol.Profile()) + } + + // Verify data at LBA 0 survived. + got, err := vol.ReadLBA(0, 4096) + if err != nil { + t.Fatalf("ReadLBA(0): %v", err) + } + if got[0] != 0x42 { + t.Errorf("data[0] = 0x%02X, want 0x42", got[0]) + } + + // Write to new region (LBA 16+ is in expanded area). + newData := make([]byte, 4096) + for i := range newData { + newData[i] = 0x99 + } + if err := vol.WriteLBA(20, newData); err != nil { + t.Fatalf("WriteLBA(20): %v", err) + } + + got2, err := vol.ReadLBA(20, 4096) + if err != nil { + t.Fatalf("ReadLBA(20): %v", err) + } + if got2[0] != 0x99 { + t.Errorf("expanded LBA 20 data[0] = 0x%02X, want 0x99", got2[0]) + } + + // Close and reopen — verify profile and data survive. + vol.Close() + vol2, err := OpenBlockVol(path) + if err != nil { + t.Fatalf("Reopen: %v", err) + } + defer vol2.Close() + + if vol2.Profile() != ProfileSingle { + t.Errorf("Profile after reopen = %v, want single", vol2.Profile()) + } + if vol2.Info().VolumeSize != 128*1024 { + t.Errorf("VolumeSize = %d, want %d", vol2.Info().VolumeSize, 128*1024) + } +} + +// TestQA_Profile_SnapshotPreservesProfile creates a snapshot on a +// single-profile volume, writes more data, restores the snapshot, +// and verifies the profile metadata is unchanged. +func TestQA_Profile_SnapshotPreservesProfile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sp-snap.blk") + + vol, err := CreateBlockVol(path, CreateOptions{ + VolumeSize: 64 * 1024, + BlockSize: 4096, + WALSize: 32 * 1024, + StorageProfile: ProfileSingle, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + defer vol.Close() + + // Write block A. + dataA := make([]byte, 4096) + for i := range dataA { + dataA[i] = 0xAA + } + if err := vol.WriteLBA(0, dataA); err != nil { + t.Fatalf("WriteLBA(A): %v", err) + } + + // Create snapshot. + if err := vol.CreateSnapshot(1); err != nil { + t.Fatalf("CreateSnapshot: %v", err) + } + + // Write block B (overwrites A at LBA 0). + dataB := make([]byte, 4096) + for i := range dataB { + dataB[i] = 0xBB + } + if err := vol.WriteLBA(0, dataB); err != nil { + t.Fatalf("WriteLBA(B): %v", err) + } + + // Verify live reads B. + got, _ := vol.ReadLBA(0, 4096) + if got[0] != 0xBB { + t.Fatalf("live data[0] = 0x%02X, want 0xBB", got[0]) + } + + // Restore snapshot. + if err := vol.RestoreSnapshot(1); err != nil { + t.Fatalf("RestoreSnapshot: %v", err) + } + + // Profile should be unchanged. + if vol.Profile() != ProfileSingle { + t.Errorf("Profile after restore = %v, want single", vol.Profile()) + } + + // Data should be A again. + got2, _ := vol.ReadLBA(0, 4096) + if got2[0] != 0xAA { + t.Errorf("restored data[0] = 0x%02X, want 0xAA", got2[0]) + } +} diff --git a/weed/storage/blockvol/superblock.go b/weed/storage/blockvol/superblock.go index dc6e5b8ee..272c2c222 100644 --- a/weed/storage/blockvol/superblock.go +++ b/weed/storage/blockvol/superblock.go @@ -42,6 +42,8 @@ type Superblock struct { Epoch uint64 // fencing epoch (0 = no fencing, Phase 3 compat) DurabilityMode uint8 // CP8-3-1: 0=best_effort, 1=sync_all, 2=sync_quorum StorageProfile uint8 // CP11A-1: 0=single, 1=striped (reserved) + PreparedSize uint64 // CP11A-2: pending expand size (0 = no expand in flight) + ExpandEpoch uint64 // CP11A-2: expand operation ID (0 = none) } // superblockOnDisk is the fixed-size on-disk layout (binary.Write/Read target). @@ -65,6 +67,8 @@ type superblockOnDisk struct { Epoch uint64 DurabilityMode uint8 StorageProfile uint8 + PreparedSize uint64 + ExpandEpoch uint64 } // NewSuperblock creates a superblock with defaults and a fresh UUID. @@ -135,6 +139,8 @@ func (sb *Superblock) WriteTo(w io.Writer) (int64, error) { Epoch: sb.Epoch, DurabilityMode: sb.DurabilityMode, StorageProfile: sb.StorageProfile, + PreparedSize: sb.PreparedSize, + ExpandEpoch: sb.ExpandEpoch, } // Encode into beginning of buf; rest stays zero (padding). @@ -172,6 +178,10 @@ func (sb *Superblock) WriteTo(w io.Writer) (int64, error) { buf[off] = d.DurabilityMode off++ buf[off] = d.StorageProfile + off++ + endian.PutUint64(buf[off:], d.PreparedSize) + off += 8 + endian.PutUint64(buf[off:], d.ExpandEpoch) n, err := w.Write(buf) return int64(n), err @@ -236,6 +246,10 @@ func ReadSuperblock(r io.Reader) (Superblock, error) { sb.DurabilityMode = buf[off] off++ sb.StorageProfile = buf[off] + off++ + sb.PreparedSize = endian.Uint64(buf[off:]) + off += 8 + sb.ExpandEpoch = endian.Uint64(buf[off:]) return sb, nil } @@ -274,5 +288,11 @@ func (sb *Superblock) Validate() error { if sb.StorageProfile > 1 { return fmt.Errorf("%w: invalid StorageProfile %d", ErrInvalidSuperblock, sb.StorageProfile) } + if sb.PreparedSize != 0 && sb.PreparedSize <= sb.VolumeSize { + return fmt.Errorf("%w: PreparedSize %d must be > VolumeSize %d", ErrInvalidSuperblock, sb.PreparedSize, sb.VolumeSize) + } + if sb.PreparedSize == 0 && sb.ExpandEpoch != 0 { + return fmt.Errorf("%w: ExpandEpoch %d must be 0 when PreparedSize is 0", ErrInvalidSuperblock, sb.ExpandEpoch) + } return nil } diff --git a/weed/storage/blockvol/testrunner/actions/bench.go b/weed/storage/blockvol/testrunner/actions/bench.go index bf94c27c6..1dcc09f2a 100644 --- a/weed/storage/blockvol/testrunner/actions/bench.go +++ b/weed/storage/blockvol/testrunner/actions/bench.go @@ -218,6 +218,7 @@ type fioJobStats struct { IOPS float64 `json:"iops"` BWBytes float64 `json:"bw_bytes"` LatNS fioLatency `json:"lat_ns"` + CLatNS fioLatency `json:"clat_ns"` } type fioLatency struct { @@ -283,11 +284,11 @@ func ParseFioMetric(input, metric, direction string) (float64, error) { case "lat_mean_us": return stats.LatNS.Mean / 1000, nil // ns → µs case "lat_p50_us": - return getPercentile(stats.LatNS, "50.000000") / 1000, nil + return getPercentileWithFallback(stats, "50.000000") / 1000, nil case "lat_p99_us": - return getPercentile(stats.LatNS, "99.000000") / 1000, nil + return getPercentileWithFallback(stats, "99.000000") / 1000, nil case "lat_p999_us": - return getPercentile(stats.LatNS, "99.900000") / 1000, nil + return getPercentileWithFallback(stats, "99.900000") / 1000, nil default: return 0, fmt.Errorf("unknown metric %q", metric) } @@ -300,6 +301,15 @@ func getPercentile(lat fioLatency, key string) float64 { return lat.Percentile[key] } +// getPercentileWithFallback tries clat_ns first (fio puts percentiles there), +// then falls back to lat_ns. +func getPercentileWithFallback(stats fioJobStats, key string) float64 { + if v := getPercentile(stats.CLatNS, key); v != 0 { + return v + } + return getPercentile(stats.LatNS, key) +} + // benchStats computes statistics from a comma-separated list of values. // Useful for aggregating results from multiple runs outside the phase repeat system. // Params: diff --git a/weed/storage/blockvol/testrunner/actions/block.go b/weed/storage/blockvol/testrunner/actions/block.go index 206db8246..b6c21cc64 100644 --- a/weed/storage/blockvol/testrunner/actions/block.go +++ b/weed/storage/blockvol/testrunner/actions/block.go @@ -283,6 +283,21 @@ func killStale(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[ stdout, _, _, _ := node.Run(ctx, cmd) actx.Log(" kill_stale %s: %s", process, strings.TrimSpace(stdout)) + // Also kill by port: any process holding ports the scenario needs, + // regardless of binary name. This catches stale binaries with different + // names (e.g., iscsi-target-linux vs iscsi-target-test). + for _, portKey := range []string{"port", "iscsi_port", "nvme_port", "admin_port"} { + if portStr := act.Params[portKey]; portStr != "" { + killCmd := fmt.Sprintf( + "ss -tlnp 2>/dev/null | grep ':%s ' | grep -oP 'pid=\\K[0-9]+' | xargs -r kill -9 2>/dev/null && echo 'killed port %s occupant' || true", + portStr, portStr) + out, _, _, _ := node.Run(ctx, killCmd) + if out = strings.TrimSpace(out); out != "" { + actx.Log(" kill_stale port %s: %s", portStr, out) + } + } + } + // If iscsi cleanup requested, clean up stale iSCSI sessions. if act.Params["iscsi_cleanup"] == "true" { node.Run(ctx, "sudo iscsiadm -m session -u 2>/dev/null; sudo iscsiadm -m node -o delete 2>/dev/null") diff --git a/weed/storage/blockvol/testrunner/actions/database.go b/weed/storage/blockvol/testrunner/actions/database.go index c7eff7b8b..254dbcfa6 100644 --- a/weed/storage/blockvol/testrunner/actions/database.go +++ b/weed/storage/blockvol/testrunner/actions/database.go @@ -70,10 +70,10 @@ func sqliteInsertRows(ctx context.Context, actx *tr.ActionContext, act tr.Action // Generate SQL in a temp file with BEGIN/COMMIT, then pipe to sqlite3. // Use bash -c with \x27 for single quotes to avoid quoting issues with sudo. - tmpFile := "/tmp/sw_sqlite_insert.sql" + tmpFile := tempPath(actx, "sqlite_insert.sql") cmd := fmt.Sprintf( - `bash -c 'printf "BEGIN;\n" > %s; for i in $(seq 1 %s); do printf "INSERT INTO %s (data) VALUES (\x27row-%%d\x27);\n" $i; done >> %s; printf "COMMIT;\n" >> %s; sqlite3 %s < %s; rm -f %s'`, - tmpFile, count, table, tmpFile, tmpFile, path, tmpFile, tmpFile) + `bash -c 'mkdir -p %s; printf "BEGIN;\n" > %s; for i in $(seq 1 %s); do printf "INSERT INTO %s (data) VALUES (\x27row-%%d\x27);\n" $i; done >> %s; printf "COMMIT;\n" >> %s; sqlite3 %s < %s; rm -f %s'`, + actx.TempRoot, tmpFile, count, table, tmpFile, tmpFile, path, tmpFile, tmpFile) _, stderr, code, err := node.RunRoot(ctx, cmd) if err != nil || code != 0 { return nil, fmt.Errorf("sqlite_insert_rows: code=%d stderr=%s err=%v", code, stderr, err) diff --git a/weed/storage/blockvol/testrunner/actions/devops.go b/weed/storage/blockvol/testrunner/actions/devops.go index 2625a1a8d..d3d4724df 100644 --- a/weed/storage/blockvol/testrunner/actions/devops.go +++ b/weed/storage/blockvol/testrunner/actions/devops.go @@ -4,9 +4,12 @@ import ( "context" "encoding/json" "fmt" + "net" + "strconv" "strings" "time" + "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/blockapi" tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner" ) @@ -18,9 +21,41 @@ func RegisterDevOpsActions(r *tr.Registry) { r.RegisterFunc("stop_weed", tr.TierDevOps, stopWeed) r.RegisterFunc("wait_cluster_ready", tr.TierDevOps, waitClusterReady) r.RegisterFunc("create_block_volume", tr.TierDevOps, createBlockVolume) + r.RegisterFunc("expand_block_volume", tr.TierDevOps, expandBlockVolume) + r.RegisterFunc("lookup_block_volume", tr.TierDevOps, lookupBlockVolume) + r.RegisterFunc("delete_block_volume", tr.TierDevOps, deleteBlockVolume) + r.RegisterFunc("wait_block_servers", tr.TierDevOps, waitBlockServers) r.RegisterFunc("cluster_status", tr.TierDevOps, clusterStatus) } +// setISCSIVars sets the save_as_iscsi_host/port/addr/iqn vars from a VolumeInfo. +// When the iSCSI addr has no host (e.g. ":3275"), falls back to the volume server's host. +func setISCSIVars(actx *tr.ActionContext, prefix string, info *blockapi.VolumeInfo) { + actx.Vars[prefix+"_capacity"] = strconv.FormatUint(info.SizeBytes, 10) + actx.Vars[prefix+"_iscsi_addr"] = info.ISCSIAddr + actx.Vars[prefix+"_iqn"] = info.IQN + if info.ISCSIAddr != "" { + host, port, _ := net.SplitHostPort(info.ISCSIAddr) + if host == "" && info.VolumeServer != "" { + host, _, _ = net.SplitHostPort(info.VolumeServer) + } + actx.Vars[prefix+"_iscsi_host"] = host + actx.Vars[prefix+"_iscsi_port"] = port + } +} + +// blockAPIClient builds a blockapi.Client from the master_url param or var. +func blockAPIClient(actx *tr.ActionContext, act tr.Action) (*blockapi.Client, error) { + masterURL := act.Params["master_url"] + if masterURL == "" { + masterURL = actx.Vars["master_url"] + } + if masterURL == "" { + return nil, fmt.Errorf("master_url param or var required") + } + return blockapi.NewClient(masterURL), nil +} + // buildDeployWeed cross-compiles the weed binary and uploads to all nodes. func buildDeployWeed(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { repoDir := actx.Vars["repo_dir"] @@ -30,7 +65,7 @@ func buildDeployWeed(ctx context.Context, actx *tr.ActionContext, act tr.Action) actx.Log(" cross-compiling weed binary...") localBin := repoDir + "/weed-linux" - buildCmd := fmt.Sprintf("cd %s && GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o weed-linux ./weed/command", repoDir) + buildCmd := fmt.Sprintf("cd %s && GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o weed-linux ./weed", repoDir) ln := tr.NewLocalNode("build-host") _, stderr, code, err := ln.Run(ctx, buildCmd) @@ -82,7 +117,7 @@ func startWeedMaster(ctx context.Context, actx *tr.ActionContext, act tr.Action) // Ensure directory exists. node.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", dir)) - cmd := fmt.Sprintf("setsid %sweed master -port=%s -mdir=%s %s %s/master.log 2>&1 & echo $!", + cmd := fmt.Sprintf("sh -c 'nohup %sweed master -port=%s -mdir=%s %s %s/master.log 2>&1 & echo $!'", tr.UploadBasePath, port, dir, extraArgs, dir) stdout, stderr, code, err := node.RunRoot(ctx, cmd) if err != nil || code != 0 { @@ -117,7 +152,7 @@ func startWeedVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) node.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", dir)) - cmd := fmt.Sprintf("setsid %sweed volume -port=%s -mserver=%s -dir=%s %s %s/volume.log 2>&1 & echo $!", + cmd := fmt.Sprintf("sh -c 'nohup %sweed volume -port=%s -mserver=%s -dir=%s %s %s/volume.log 2>&1 & echo $!'", tr.UploadBasePath, port, master, dir, extraArgs, dir) stdout, stderr, code, err := node.RunRoot(ctx, cmd) if err != nil || code != 0 { @@ -155,7 +190,7 @@ func stopWeed(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[s actx.Log(" force-killed PID %s", pid) return nil, nil case <-ticker.C: - _, _, code, _ := node.Run(ctx, fmt.Sprintf("kill -0 %s 2>/dev/null", pid)) + _, _, code, _ := node.RunRoot(ctx, fmt.Sprintf("kill -0 %s 2>/dev/null", pid)) if code != 0 { actx.Log(" PID %s exited gracefully", pid) return nil, nil @@ -209,29 +244,194 @@ func waitClusterReady(ctx context.Context, actx *tr.ActionContext, act tr.Action } } -// createBlockVolume creates a block volume via the master assign API. +// createBlockVolume creates a block volume via the master block API. +// Params: name, size (human e.g. "50M") or size_bytes, replica_factor (default 1). +// Sets save_as=JSON, save_as_capacity, save_as_iscsi_addr, save_as_iqn. func createBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { - node, err := getNode(actx, act.Node) + client, err := blockAPIClient(actx, act) if err != nil { return nil, fmt.Errorf("create_block_volume: %w", err) } - masterURL := act.Params["master_url"] - if masterURL == "" { - return nil, fmt.Errorf("create_block_volume: master_url param required") + name := act.Params["name"] + if name == "" { + return nil, fmt.Errorf("create_block_volume: name param required") } - size := act.Params["size"] - if size == "" { - size = "1g" + + var sizeBytes uint64 + if sb := act.Params["size_bytes"]; sb != "" { + sizeBytes, err = strconv.ParseUint(sb, 10, 64) + if err != nil { + return nil, fmt.Errorf("create_block_volume: invalid size_bytes: %w", err) + } + } else { + size := act.Params["size"] + if size == "" { + size = "1G" + } + sizeBytes, err = parseSizeBytes(size) + if err != nil { + return nil, fmt.Errorf("create_block_volume: %w", err) + } } - cmd := fmt.Sprintf("curl -s -X POST '%s/vol/assign?type=block&size=%s' 2>/dev/null", masterURL, size) - stdout, stderr, code, err := node.Run(ctx, cmd) - if err != nil || code != 0 { - return nil, fmt.Errorf("create_block_volume: code=%d stderr=%s err=%v", code, stderr, err) + rf := parseInt(act.Params["replica_factor"], 1) + + info, err := client.CreateVolume(ctx, blockapi.CreateVolumeRequest{ + Name: name, + SizeBytes: sizeBytes, + ReplicaFactor: rf, + }) + if err != nil { + return nil, fmt.Errorf("create_block_volume: %w", err) } - return map[string]string{"value": strings.TrimSpace(stdout)}, nil + jsonBytes, _ := json.Marshal(info) + actx.Log(" created block volume %s (size=%d, rf=%d)", name, info.SizeBytes, rf) + + // Set multi-var outputs. + if act.SaveAs != "" { + setISCSIVars(actx, act.SaveAs, info) + } + + return map[string]string{"value": string(jsonBytes)}, nil +} + +// expandBlockVolume expands a block volume via master block API. +// Params: name, new_size (human e.g. "100M") or new_size_bytes. +func expandBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { + client, err := blockAPIClient(actx, act) + if err != nil { + return nil, fmt.Errorf("expand_block_volume: %w", err) + } + + name := act.Params["name"] + if name == "" { + return nil, fmt.Errorf("expand_block_volume: name param required") + } + + var newSizeBytes uint64 + if sb := act.Params["new_size_bytes"]; sb != "" { + newSizeBytes, err = strconv.ParseUint(sb, 10, 64) + if err != nil { + return nil, fmt.Errorf("expand_block_volume: invalid new_size_bytes: %w", err) + } + } else { + ns := act.Params["new_size"] + if ns == "" { + return nil, fmt.Errorf("expand_block_volume: new_size or new_size_bytes param required") + } + newSizeBytes, err = parseSizeBytes(ns) + if err != nil { + return nil, fmt.Errorf("expand_block_volume: %w", err) + } + } + + capacity, err := client.ExpandVolume(ctx, name, newSizeBytes) + if err != nil { + return nil, fmt.Errorf("expand_block_volume: %w", err) + } + + actx.Log(" expanded block volume %s -> %d bytes", name, capacity) + return map[string]string{"value": strconv.FormatUint(capacity, 10)}, nil +} + +// lookupBlockVolume looks up a block volume via master block API. +// Params: name. Sets save_as_capacity, save_as_iscsi_addr, save_as_iqn, save_as_iscsi_host, save_as_iscsi_port. +func lookupBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { + client, err := blockAPIClient(actx, act) + if err != nil { + return nil, fmt.Errorf("lookup_block_volume: %w", err) + } + + name := act.Params["name"] + if name == "" { + return nil, fmt.Errorf("lookup_block_volume: name param required") + } + + info, err := client.LookupVolume(ctx, name) + if err != nil { + return nil, fmt.Errorf("lookup_block_volume: %w", err) + } + + if act.SaveAs != "" { + setISCSIVars(actx, act.SaveAs, info) + } + + actx.Log(" looked up %s: size=%d iscsi=%s", name, info.SizeBytes, info.ISCSIAddr) + return map[string]string{"value": strconv.FormatUint(info.SizeBytes, 10)}, nil +} + +// deleteBlockVolume deletes a block volume via master block API. +func deleteBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { + client, err := blockAPIClient(actx, act) + if err != nil { + return nil, fmt.Errorf("delete_block_volume: %w", err) + } + + name := act.Params["name"] + if name == "" { + return nil, fmt.Errorf("delete_block_volume: name param required") + } + + if err := client.DeleteVolume(ctx, name); err != nil { + return nil, fmt.Errorf("delete_block_volume: %w", err) + } + + actx.Log(" deleted block volume %s", name) + return nil, nil +} + +// waitBlockServers polls master until N block-capable servers are registered. +// Params: count (default 1), timeout (default 60s). +func waitBlockServers(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { + client, err := blockAPIClient(actx, act) + if err != nil { + return nil, fmt.Errorf("wait_block_servers: %w", err) + } + + want := parseInt(act.Params["count"], 1) + + timeout := 60 * time.Second + if t, ok := act.Params["timeout"]; ok { + if d, err := parseDuration(t); err == nil { + timeout = d + } + } + + timeoutCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + pollCount := 0 + for { + select { + case <-timeoutCtx.Done(): + return nil, fmt.Errorf("wait_block_servers: timeout waiting for %d servers after %s (polled %d times)", want, timeout, pollCount) + case <-ticker.C: + pollCount++ + servers, err := client.ListServers(timeoutCtx) + if err != nil { + actx.Log(" poll %d: error: %v", pollCount, err) + continue + } + capable := 0 + for _, s := range servers { + if s.BlockCapable { + capable++ + } + } + if pollCount <= 3 || pollCount%10 == 0 { + actx.Log(" poll %d: %d/%d block-capable servers (total %d)", pollCount, capable, want, len(servers)) + } + if capable >= want { + actx.Log(" %d block-capable servers ready", capable) + return map[string]string{"value": strconv.Itoa(capable)}, nil + } + } + } } // clusterStatus fetches the full cluster status JSON. diff --git a/weed/storage/blockvol/testrunner/actions/devops_test.go b/weed/storage/blockvol/testrunner/actions/devops_test.go index 9aab7277c..1e0335762 100644 --- a/weed/storage/blockvol/testrunner/actions/devops_test.go +++ b/weed/storage/blockvol/testrunner/actions/devops_test.go @@ -18,6 +18,10 @@ func TestDevOpsActions_Registration(t *testing.T) { "stop_weed", "wait_cluster_ready", "create_block_volume", + "expand_block_volume", + "lookup_block_volume", + "delete_block_volume", + "wait_block_servers", "cluster_status", } @@ -35,8 +39,8 @@ func TestDevOpsActions_Tier(t *testing.T) { byTier := registry.ListByTier() devopsActions := byTier[tr.TierDevOps] - if len(devopsActions) != 7 { - t.Errorf("devops tier has %d actions, want 7", len(devopsActions)) + if len(devopsActions) != 11 { + t.Errorf("devops tier has %d actions, want 11", len(devopsActions)) } // Verify all are in devops tier. @@ -80,11 +84,11 @@ func TestAllActions_Registration(t *testing.T) { if n := len(byTier[tr.TierCore]); n != 11 { t.Errorf("core: %d, want 11", n) } - if n := len(byTier[tr.TierBlock]); n != 55 { - t.Errorf("block: %d, want 55", n) + if n := len(byTier[tr.TierBlock]); n != 56 { + t.Errorf("block: %d, want 56", n) } - if n := len(byTier[tr.TierDevOps]); n != 7 { - t.Errorf("devops: %d, want 7", n) + if n := len(byTier[tr.TierDevOps]); n != 11 { + t.Errorf("devops: %d, want 11", n) } if n := len(byTier[tr.TierChaos]); n != 5 { t.Errorf("chaos: %d, want 5", n) @@ -93,13 +97,13 @@ func TestAllActions_Registration(t *testing.T) { t.Errorf("k8s: %d, want 14", n) } - // Total should be 92 (89 existing + 3 profiling: pprof_capture, vmstat_capture, iostat_capture). + // Total should be 97 (92 prev + 4 devops: expand/lookup/delete/wait_block_servers + 1 block: iscsi_login_direct). total := 0 for _, actions := range byTier { total += len(actions) } - if total != 92 { - t.Errorf("total actions: %d, want 92", total) + if total != 97 { + t.Errorf("total actions: %d, want 97", total) } } diff --git a/weed/storage/blockvol/testrunner/actions/helpers.go b/weed/storage/blockvol/testrunner/actions/helpers.go index 211a900e8..70d359d79 100644 --- a/weed/storage/blockvol/testrunner/actions/helpers.go +++ b/weed/storage/blockvol/testrunner/actions/helpers.go @@ -85,6 +85,34 @@ func parseInt(s string, def int) int { return v } +// parseSizeBytes converts a human-readable size string (e.g. "50M", "1G", "104857600") to bytes. +func parseSizeBytes(s string) (uint64, error) { + s = strings.TrimSpace(s) + if s == "" { + return 0, fmt.Errorf("empty size string") + } + upper := strings.ToUpper(s) + var multiplier uint64 = 1 + switch { + case strings.HasSuffix(upper, "G"): + multiplier = 1024 * 1024 * 1024 + s = strings.TrimSuffix(upper, "G") + case strings.HasSuffix(upper, "M"): + multiplier = 1024 * 1024 + s = strings.TrimSuffix(upper, "M") + case strings.HasSuffix(upper, "K"): + multiplier = 1024 + s = strings.TrimSuffix(upper, "K") + default: + s = upper + } + v, err := strconv.ParseUint(s, 10, 64) + if err != nil { + return 0, fmt.Errorf("parse size %q: %w", s, err) + } + return v * multiplier, nil +} + func parseIntSlice(s string) []int { var result []int for _, part := range strings.Split(s, ",") { diff --git a/weed/storage/blockvol/testrunner/actions/io.go b/weed/storage/blockvol/testrunner/actions/io.go index e91454372..7c56fd888 100644 --- a/weed/storage/blockvol/testrunner/actions/io.go +++ b/weed/storage/blockvol/testrunner/actions/io.go @@ -46,7 +46,10 @@ func ddWrite(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[st } // Generate random data to temp file, write to device, compute md5. - tmpFile := "/tmp/sw-test-runner-dd-data" + tmpFile := tempPath(actx, "dd-data") + if err := ensureTempRoot(ctx, node, actx); err != nil { + return nil, fmt.Errorf("dd_write: %w", err) + } genCmd := fmt.Sprintf("dd if=/dev/urandom of=%s bs=%s count=%s 2>/dev/null", tmpFile, bs, count) _, stderr, code, err := node.RunRoot(ctx, genCmd) if err != nil || code != 0 { @@ -98,7 +101,10 @@ func ddReadMD5(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[ return nil, err } - tmpFile := "/tmp/sw-test-runner-dd-read" + tmpFile := tempPath(actx, "dd-read") + if err := ensureTempRoot(ctx, node, actx); err != nil { + return nil, fmt.Errorf("dd_read_md5: %w", err) + } readCmd := fmt.Sprintf("dd if=%s of=%s bs=%s count=%s iflag=direct", device, tmpFile, bs, count) if skip := act.Params["skip"]; skip != "" { readCmd += fmt.Sprintf(" skip=%s", skip) @@ -285,8 +291,12 @@ func writeLoopBg(ctx context.Context, actx *tr.ActionContext, act tr.Action) (ma return nil, err } - cmd := fmt.Sprintf("setsid bash -c 'while true; do dd if=/dev/urandom of=%s bs=%s count=1 oflag=%s conv=notrunc 2>/dev/null; done' &>/tmp/sw_bg.log & echo $!", - device, bs, oflag) + bgLog := tempPath(actx, "bg.log") + if err := ensureTempRoot(ctx, node, actx); err != nil { + return nil, fmt.Errorf("write_loop_bg: %w", err) + } + cmd := fmt.Sprintf("setsid bash -c 'while true; do dd if=/dev/urandom of=%s bs=%s count=1 oflag=%s conv=notrunc 2>/dev/null; done' &>%s & echo $!", + device, bs, oflag, bgLog) stdout, stderr, code, err := node.RunRoot(ctx, cmd) if err != nil || code != 0 { return nil, fmt.Errorf("write_loop_bg: code=%d stderr=%s err=%v", code, stderr, err) @@ -318,3 +328,27 @@ func stopBg(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[str return nil, nil } + +// ensureTempRoot creates the per-run temp directory on the remote node. +// Uses RunRoot so the directory is created with root privileges, ensuring +// subsequent RunRoot commands can write into it. +func ensureTempRoot(ctx context.Context, node interface{ RunRoot(context.Context, string) (string, string, int, error) }, actx *tr.ActionContext) error { + if actx.TempRoot == "" { + return nil + } + _, stderr, code, err := node.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", actx.TempRoot)) + if err != nil || code != 0 { + return fmt.Errorf("mkdir TempRoot %s: code=%d stderr=%s err=%v", actx.TempRoot, code, stderr, err) + } + return nil +} + +// tempPath returns a path under the per-run temp root for the given suffix. +// Falls back to /tmp if TempRoot is empty (backward compat). +func tempPath(actx *tr.ActionContext, suffix string) string { + root := actx.TempRoot + if root == "" { + root = "/tmp" + } + return root + "/sw-" + suffix +} diff --git a/weed/storage/blockvol/testrunner/actions/iscsi.go b/weed/storage/blockvol/testrunner/actions/iscsi.go index f940fb3da..56c7cfbd7 100644 --- a/weed/storage/blockvol/testrunner/actions/iscsi.go +++ b/weed/storage/blockvol/testrunner/actions/iscsi.go @@ -3,6 +3,7 @@ package actions import ( "context" "fmt" + "strconv" tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner" "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner/infra" @@ -11,6 +12,7 @@ import ( // RegisterISCSIActions registers iSCSI client actions. func RegisterISCSIActions(r *tr.Registry) { r.RegisterFunc("iscsi_login", tr.TierBlock, iscsiLogin) + r.RegisterFunc("iscsi_login_direct", tr.TierBlock, iscsiLoginDirect) r.RegisterFunc("iscsi_logout", tr.TierBlock, iscsiLogout) r.RegisterFunc("iscsi_discover", tr.TierBlock, iscsiDiscover) r.RegisterFunc("iscsi_cleanup", tr.TierBlock, iscsiCleanup) @@ -71,6 +73,61 @@ func iscsiLogin(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map return map[string]string{"value": dev}, nil } +// iscsiLoginDirect discovers + logs into a target using explicit host, port, iqn params. +// Unlike iscsi_login, it does not require a target spec — useful for cluster-provisioned +// volumes whose iSCSI address comes from the master API response. +func iscsiLoginDirect(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { + host := act.Params["host"] + if host == "" { + return nil, fmt.Errorf("iscsi_login_direct: host param required") + } + portStr := act.Params["port"] + if portStr == "" { + return nil, fmt.Errorf("iscsi_login_direct: port param required") + } + port, err := strconv.Atoi(portStr) + if err != nil { + return nil, fmt.Errorf("iscsi_login_direct: invalid port %q: %w", portStr, err) + } + iqn := act.Params["iqn"] + if iqn == "" { + return nil, fmt.Errorf("iscsi_login_direct: iqn param required") + } + + node, err := getNode(actx, act.Node) + if err != nil { + return nil, fmt.Errorf("iscsi_login_direct: %w", err) + } + + client := infra.NewISCSIClient(node) + + actx.Log(" discovering %s:%d ...", host, port) + iqns, derr := client.Discover(ctx, host, port) + if derr != nil { + return nil, fmt.Errorf("iscsi_login_direct discover: %w", derr) + } + + found := false + for _, q := range iqns { + if q == iqn { + found = true + break + } + } + if !found { + return nil, fmt.Errorf("iscsi_login_direct: IQN %s not found in discovery (got %v)", iqn, iqns) + } + + actx.Log(" logging in to %s ...", iqn) + dev, lerr := client.Login(ctx, iqn) + if lerr != nil { + return nil, fmt.Errorf("iscsi_login_direct: %w", lerr) + } + + actx.Log(" device: %s", dev) + return map[string]string{"value": dev}, nil +} + func iscsiLogout(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { targetName := act.Target if targetName == "" { @@ -128,5 +185,5 @@ func iscsiCleanup(ctx context.Context, actx *tr.ActionContext, act tr.Action) (m } client := infra.NewISCSIClient(node) - return nil, client.CleanupAll(ctx, "iqn.2024.com.seaweedfs:") + return nil, client.CleanupAll(ctx, "iqn.2024-01.com.seaweedfs:") } diff --git a/weed/storage/blockvol/testrunner/actions/metrics.go b/weed/storage/blockvol/testrunner/actions/metrics.go index 6ab574dff..d28ed5854 100644 --- a/weed/storage/blockvol/testrunner/actions/metrics.go +++ b/weed/storage/blockvol/testrunner/actions/metrics.go @@ -312,7 +312,7 @@ func iostatCapture(ctx context.Context, actx *tr.ActionContext, act tr.Action) ( func collectArtifactsAction(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) { dir := act.Params["dir"] if dir == "" { - dir = "/tmp/sw-test-runner-artifacts" + dir = tempPath(actx, "artifacts") } // Find client node for dmesg/lsblk. diff --git a/weed/storage/blockvol/testrunner/engine.go b/weed/storage/blockvol/testrunner/engine.go index a8c50a941..9f80af640 100644 --- a/weed/storage/blockvol/testrunner/engine.go +++ b/weed/storage/blockvol/testrunner/engine.go @@ -53,6 +53,12 @@ func (e *Engine) Run(ctx context.Context, s *Scenario, actx *ActionContext) *Sce actx.Vars[k] = v } + // Allocate a unique per-run temp directory (T6). + if actx.TempRoot == "" { + actx.TempRoot = fmt.Sprintf("/tmp/sw-run-%s-%d", s.Name, start.UnixMilli()) + } + actx.Vars["__temp_dir"] = actx.TempRoot + // Separate always-phases for deferred cleanup. var normalPhases, alwaysPhases []Phase for _, p := range s.Phases { @@ -237,15 +243,19 @@ func (e *Engine) runPhaseParallel(ctx context.Context, actx *ActionContext, phas } wg.Wait() + var errors []string for i, ar := range results { pr.Actions = append(pr.Actions, ar) if ar.Status == StatusFail && !phase.Actions[i].IgnoreError { pr.Status = StatusFail - if pr.Error == "" { - pr.Error = fmt.Sprintf("action %d (%s) failed: %s", i, phase.Actions[i].Action, ar.Error) - } + errors = append(errors, fmt.Sprintf("action %d (%s): %s", i, phase.Actions[i].Action, ar.Error)) } } + if len(errors) == 1 { + pr.Error = errors[0] + } else if len(errors) > 1 { + pr.Error = fmt.Sprintf("%d actions failed: [1] %s", len(errors), strings.Join(errors, "; ")) + } return pr } @@ -287,13 +297,37 @@ func (e *Engine) runAction(ctx context.Context, actx *ActionContext, act Action) } } - e.log(" [action] %s", resolved.Action) + // Enforce action-level timeout if specified. + var actionTimeout time.Duration + if resolved.Timeout != "" { + if dur, err := time.ParseDuration(resolved.Timeout); err == nil && dur > 0 { + actionTimeout = dur + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, dur) + defer cancel() + } + } + + // Log action start with context (node/target if available). + actionLabel := resolved.Action + if resolved.Node != "" { + actionLabel += " @" + resolved.Node + } else if resolved.Target != "" { + actionLabel += " >" + resolved.Target + } + e.log(" [action] %s", actionLabel) output, err := handler.Execute(ctx, actx, resolved) + elapsed := time.Since(start) + + // Enrich timeout errors with action-specific context. + if err != nil && ctx.Err() != nil && actionTimeout > 0 { + err = fmt.Errorf("action %q timed out after %s: %w", resolved.Action, actionTimeout, err) + } ar := ActionResult{ Action: resolved.Action, - Duration: time.Since(start), + Duration: elapsed, YAML: yamlDef, } @@ -302,10 +336,16 @@ func (e *Engine) runAction(ctx context.Context, actx *ActionContext, act Action) ar.Error = err.Error() if act.IgnoreError { ar.Status = StatusPass - e.log(" [action] %s failed (ignored): %v", resolved.Action, err) + e.log(" [done] %s (ignored error, %s): %v", actionLabel, fmtDuration(elapsed), err) + } else { + e.log(" [FAIL] %s (%s): %v", actionLabel, fmtDuration(elapsed), err) } } else { ar.Status = StatusPass + // Only log completion for slow actions (>1s) to avoid noise on quick ones. + if elapsed >= time.Second { + e.log(" [done] %s (%s)", actionLabel, fmtDuration(elapsed)) + } } // Store output as var if save_as is set. @@ -327,7 +367,7 @@ func (e *Engine) runAction(ctx context.Context, actx *ActionContext, act Action) if output != nil { if v, ok := output["value"]; ok { - ar.Output = truncate(v, 4096) + ar.Output = truncate(v, 65536) } } @@ -343,6 +383,8 @@ func resolveAction(act Action, vars map[string]string) Action { Node: act.Node, SaveAs: act.SaveAs, IgnoreError: act.IgnoreError, + Retry: act.Retry, + Timeout: act.Timeout, Params: make(map[string]string), } @@ -373,7 +415,18 @@ func truncate(s string, max int) string { if len(s) <= max { return s } - return s[:max] + "..." + return s[:max] + fmt.Sprintf("...[truncated, %d/%d bytes]", max, len(s)) +} + +// fmtDuration formats a duration as a human-readable string. +func fmtDuration(d time.Duration) string { + if d < time.Second { + return fmt.Sprintf("%dms", d.Milliseconds()) + } + if d < time.Minute { + return fmt.Sprintf("%.1fs", d.Seconds()) + } + return fmt.Sprintf("%dm%ds", int(d.Minutes()), int(d.Seconds())%60) } // marshalActionYAML serializes a resolved action to YAML for report display. diff --git a/weed/storage/blockvol/testrunner/engine_test.go b/weed/storage/blockvol/testrunner/engine_test.go index 468be65af..bf391e0eb 100644 --- a/weed/storage/blockvol/testrunner/engine_test.go +++ b/weed/storage/blockvol/testrunner/engine_test.go @@ -3,6 +3,7 @@ package testrunner import ( "context" "fmt" + "strings" "testing" "time" ) @@ -889,6 +890,149 @@ func TestEngine_CleanupVars(t *testing.T) { } } +func TestEngine_ActionTimeout_Enforced(t *testing.T) { + registry := NewRegistry() + + // Action that sleeps forever, should be killed by action-level timeout. + slowStep := ActionHandlerFunc(func(ctx context.Context, actx *ActionContext, act Action) (map[string]string, error) { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(30 * time.Second): + return nil, nil + } + }) + registry.Register("slow", TierCore, slowStep) + + scenario := &Scenario{ + Name: "action-timeout-test", + Timeout: Duration{10 * time.Second}, // scenario timeout is generous + Phases: []Phase{ + { + Name: "phase1", + Actions: []Action{ + {Action: "slow", Timeout: "150ms"}, + }, + }, + }, + } + + engine := NewEngine(registry, nil) + actx := &ActionContext{ + Scenario: scenario, + Vars: make(map[string]string), + Log: func(string, ...interface{}) {}, + } + + start := time.Now() + result := engine.Run(context.Background(), scenario, actx) + elapsed := time.Since(start) + + if result.Status != StatusFail { + t.Errorf("status = %s, want FAIL", result.Status) + } + // Should timeout at ~150ms, not 10s. + if elapsed > 2*time.Second { + t.Errorf("took %v, action timeout should have fired at ~150ms", elapsed) + } + // Error message should mention the action name and timeout. + if len(result.Phases) > 0 && len(result.Phases[0].Actions) > 0 { + errMsg := result.Phases[0].Actions[0].Error + if !strings.Contains(errMsg, "slow") || !strings.Contains(errMsg, "timed out") { + t.Errorf("error = %q, should mention action name and timeout", errMsg) + } + } +} + +func TestEngine_TempRoot_UniquePerRun(t *testing.T) { + registry := NewRegistry() + step := &mockHandler{} + registry.Register("step", TierCore, step) + + scenario := &Scenario{ + Name: "tempdir-test", + Timeout: Duration{5 * time.Second}, + Phases: []Phase{ + { + Name: "phase1", + Actions: []Action{{Action: "step"}}, + }, + }, + } + + engine := NewEngine(registry, nil) + + // Run 1 + actx1 := &ActionContext{ + Scenario: scenario, + Vars: make(map[string]string), + Log: func(string, ...interface{}) {}, + } + engine.Run(context.Background(), scenario, actx1) + + // Small delay so timestamp differs. + time.Sleep(2 * time.Millisecond) + + // Run 2 + actx2 := &ActionContext{ + Scenario: scenario, + Vars: make(map[string]string), + Log: func(string, ...interface{}) {}, + } + engine.Run(context.Background(), scenario, actx2) + + // Both should have TempRoot set and they should differ. + if actx1.TempRoot == "" { + t.Fatal("run 1: TempRoot is empty") + } + if actx2.TempRoot == "" { + t.Fatal("run 2: TempRoot is empty") + } + if actx1.TempRoot == actx2.TempRoot { + t.Errorf("TempRoot should be unique per run: both = %q", actx1.TempRoot) + } + + // __temp_dir var should be set. + if actx1.Vars["__temp_dir"] != actx1.TempRoot { + t.Errorf("__temp_dir = %q, want %q", actx1.Vars["__temp_dir"], actx1.TempRoot) + } + + // Should contain scenario name. + if !strings.Contains(actx1.TempRoot, "tempdir-test") { + t.Errorf("TempRoot %q should contain scenario name", actx1.TempRoot) + } +} + +func TestEngine_TempRoot_PreservedIfSet(t *testing.T) { + registry := NewRegistry() + step := &mockHandler{} + registry.Register("step", TierCore, step) + + scenario := &Scenario{ + Name: "tempdir-preset-test", + Timeout: Duration{5 * time.Second}, + Phases: []Phase{ + { + Name: "phase1", + Actions: []Action{{Action: "step"}}, + }, + }, + } + + engine := NewEngine(registry, nil) + actx := &ActionContext{ + Scenario: scenario, + Vars: make(map[string]string), + Log: func(string, ...interface{}) {}, + TempRoot: "/custom/temp/path", + } + engine.Run(context.Background(), scenario, actx) + + if actx.TempRoot != "/custom/temp/path" { + t.Errorf("TempRoot = %q, want /custom/temp/path (should preserve caller-set value)", actx.TempRoot) + } +} + func TestParse_AggregateValidation(t *testing.T) { base := ` name: validate-test diff --git a/weed/storage/blockvol/testrunner/infra/target.go b/weed/storage/blockvol/testrunner/infra/target.go index 2964fe5e6..3151e2862 100644 --- a/weed/storage/blockvol/testrunner/infra/target.go +++ b/weed/storage/blockvol/testrunner/infra/target.go @@ -80,11 +80,17 @@ func (t *Target) Deploy(localBin string) error { // Start launches the target process. If create is true, a new volume is created. func (t *Target) Start(ctx context.Context, create bool) error { + // Pre-flight: verify binary exists and is executable. + _, _, binCode, _ := t.Node.Run(ctx, fmt.Sprintf("test -x %s", t.BinPath)) + if binCode != 0 { + return fmt.Errorf("binary not found or not executable on %s: %s", t.Node.Host, t.BinPath) + } + // Pre-flight: check if iSCSI port is already in use. - stdout, _, code, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tln | grep ':%d '", t.Config.Port)) - if code == 0 && strings.TrimSpace(stdout) != "" { + portOut, _, portCode, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tln | grep ':%d '", t.Config.Port)) + if portCode == 0 && strings.TrimSpace(portOut) != "" { owner, _, _, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tlnp | grep ':%d ' | head -1", t.Config.Port)) - return fmt.Errorf("port %d already in use on %s: %s", + return fmt.Errorf("port %d (iSCSI) already in use on %s: %s", t.Config.Port, t.Node.Host, strings.TrimSpace(owner)) } @@ -116,7 +122,7 @@ func (t *Target) Start(ctx context.Context, create bool) error { } // Discover PID by matching the binary name - stdout, _, _, _ = t.Node.Run(ctx, fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", t.BinPath)) + stdout, _, _, _ := t.Node.Run(ctx, fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", t.BinPath)) pidStr := strings.TrimSpace(stdout) if idx := strings.IndexByte(pidStr, '\n'); idx > 0 { pidStr = pidStr[:idx] @@ -227,13 +233,13 @@ func CheckDiskSpace(ctx context.Context, node *Node, volFile, volSize, walSize s } stdout, _, code, _ := node.Run(ctx, fmt.Sprintf("df -BM %s 2>/dev/null | tail -1 | awk '{print $4}'", dir)) if code != 0 { - return nil // can't check, proceed anyway + return fmt.Errorf("disk space check failed on %s (df returned code %d for %s)", node.Host, code, dir) } availStr := strings.TrimSpace(stdout) availStr = strings.TrimSuffix(availStr, "M") availMB, err := strconv.Atoi(availStr) if err != nil { - return nil // can't parse, proceed anyway + return fmt.Errorf("disk space check: cannot parse df output %q on %s", availStr, node.Host) } if availMB < neededMB { diff --git a/weed/storage/blockvol/testrunner/registry.go b/weed/storage/blockvol/testrunner/registry.go index 70ba1033b..ae9e512a0 100644 --- a/weed/storage/blockvol/testrunner/registry.go +++ b/weed/storage/blockvol/testrunner/registry.go @@ -14,6 +14,7 @@ type ActionContext struct { Vars map[string]string Log func(format string, args ...interface{}) Coordinator *Coordinator // non-nil when running in coordinator mode + TempRoot string // per-run temp directory on remote nodes (T6) } // NodeRunner abstracts remote command execution (implemented by infra.Node). diff --git a/weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml b/weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml index 232487216..211af6077 100644 --- a/weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml +++ b/weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml @@ -172,6 +172,45 @@ phases: runtime: "60" name: "mixed-70-30-j4-qd32" + # --- iSCSI profiling snapshot (T7) --- + - name: iscsi-profile + parallel: true + actions: + - action: pprof_capture + target: primary + save_as: iscsi_pprof_heap + profile: heap + output_dir: "{{ __temp_dir }}/pprof" + label: iscsi-heap + - action: pprof_capture + target: primary + save_as: iscsi_pprof_goroutine + profile: goroutine + output_dir: "{{ __temp_dir }}/pprof" + label: iscsi-goroutine + - action: pprof_capture + target: primary + save_as: iscsi_pprof_cpu + profile: profile + seconds: "10" + output_dir: "{{ __temp_dir }}/pprof" + label: iscsi-cpu + - action: vmstat_capture + node: server + save_as: iscsi_vmstat + seconds: "10" + output_dir: "{{ __temp_dir }}/os" + label: iscsi-vmstat + - action: iostat_capture + node: server + save_as: iscsi_iostat + seconds: "10" + output_dir: "{{ __temp_dir }}/os" + label: iscsi-iostat + - action: scrape_metrics + target: primary + save_as: iscsi_metrics + - name: iscsi-disconnect actions: - action: iscsi_logout @@ -313,6 +352,45 @@ phases: runtime: "60" name: "mixed-70-30-j4-qd32" + # --- NVMe profiling snapshot (T7) --- + - name: nvme-profile + parallel: true + actions: + - action: pprof_capture + target: primary + save_as: nvme_pprof_heap + profile: heap + output_dir: "{{ __temp_dir }}/pprof" + label: nvme-heap + - action: pprof_capture + target: primary + save_as: nvme_pprof_goroutine + profile: goroutine + output_dir: "{{ __temp_dir }}/pprof" + label: nvme-goroutine + - action: pprof_capture + target: primary + save_as: nvme_pprof_cpu + profile: profile + seconds: "10" + output_dir: "{{ __temp_dir }}/pprof" + label: nvme-cpu + - action: vmstat_capture + node: server + save_as: nvme_vmstat + seconds: "10" + output_dir: "{{ __temp_dir }}/os" + label: nvme-vmstat + - action: iostat_capture + node: server + save_as: nvme_iostat + seconds: "10" + output_dir: "{{ __temp_dir }}/os" + label: nvme-iostat + - action: scrape_metrics + target: primary + save_as: nvme_metrics + - name: nvme-disconnect actions: - action: nvme_disconnect diff --git a/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml b/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml index 68b557bc3..da82579f7 100644 --- a/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml +++ b/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml @@ -85,6 +85,36 @@ phases: name: perf_64k_seqwrite save_as: fio_64k_sw + # --- Profiling snapshot (T7) --- + - name: profile_capture + parallel: true + actions: + - action: pprof_capture + target: primary + save_as: pprof_heap + profile: heap + output_dir: "{{ __temp_dir }}/pprof" + label: post-bench-heap + - action: pprof_capture + target: primary + save_as: pprof_cpu + profile: profile + seconds: "10" + output_dir: "{{ __temp_dir }}/pprof" + label: post-bench-cpu + - action: vmstat_capture + node: target_node + save_as: post_vmstat + seconds: "10" + output_dir: "{{ __temp_dir }}/os" + label: post-bench-vmstat + - action: iostat_capture + node: target_node + save_as: post_iostat + seconds: "10" + output_dir: "{{ __temp_dir }}/os" + label: post-bench-iostat + - name: collect_metrics actions: - action: scrape_metrics