Browse Source

feat: CP11A-2 coordinated expand protocol for replicated block volumes

Two-phase prepare/commit/cancel protocol ensures all replicas expand
atomically. Standalone volumes use direct-commit (unchanged behavior).

Engine: PrepareExpand/CommitExpand/CancelExpand with on-disk
PreparedSize+ExpandEpoch in superblock, crash recovery clears stale
prepare state on open, v.mu serializes concurrent expand operations.

Proto: 3 new RPCs (PrepareExpand/CommitExpand/CancelExpandBlockVolume).

Coordinator: expandClean flag pattern — ReleaseExpandInflight only on
clean success or full cancel. Partial replica commit failure calls
MarkExpandFailed (keeps ExpandInProgress=true, suppresses heartbeat
size updates). ClearExpandFailed for manual reconciliation.

Registry: AcquireExpandInflight records PendingExpandSize+ExpandEpoch.
ExpandFailed state blocks new expands until cleared.

Tests: 15 engine + 4 VS + 10 coordinator + heartbeat suppression
regression + updated QA CP82/durability tests with prepare/commit mocks.

Also includes CP11A-1 remaining: QA storage profile tests, QA
io_backend config tests, testrunner perf-baseline scenarios and
coordinated-expand actions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feature/sw-block
Ping Qiu 19 hours ago
parent
commit
1b3edd7856
  1. 29
      weed/pb/volume_server.proto
  2. 511
      weed/pb/volume_server_pb/volume_server.pb.go
  3. 114
      weed/pb/volume_server_pb/volume_server_grpc.pb.go
  4. 75
      weed/server/master_block_registry.go
  5. 95
      weed/server/master_grpc_server_block.go
  6. 492
      weed/server/master_grpc_server_block_test.go
  7. 46
      weed/server/master_server.go
  8. 29
      weed/server/master_server_handlers_block.go
  9. 58
      weed/server/qa_block_cp82_adversarial_test.go
  10. 9
      weed/server/qa_block_durability_test.go
  11. 54
      weed/server/volume_grpc_block.go
  12. 50
      weed/server/volume_grpc_block_test.go
  13. 30
      weed/server/volume_server_block.go
  14. 21
      weed/storage/blockvol/blockapi/client.go
  15. 10
      weed/storage/blockvol/blockapi/types.go
  16. 151
      weed/storage/blockvol/blockvol.go
  17. 303
      weed/storage/blockvol/expand_test.go
  18. 588
      weed/storage/blockvol/qa_expand_test.go
  19. 228
      weed/storage/blockvol/qa_iobackend_config_test.go
  20. 567
      weed/storage/blockvol/qa_storage_profile_test.go
  21. 20
      weed/storage/blockvol/superblock.go
  22. 16
      weed/storage/blockvol/testrunner/actions/bench.go
  23. 15
      weed/storage/blockvol/testrunner/actions/block.go
  24. 6
      weed/storage/blockvol/testrunner/actions/database.go
  25. 234
      weed/storage/blockvol/testrunner/actions/devops.go
  26. 22
      weed/storage/blockvol/testrunner/actions/devops_test.go
  27. 28
      weed/storage/blockvol/testrunner/actions/helpers.go
  28. 42
      weed/storage/blockvol/testrunner/actions/io.go
  29. 59
      weed/storage/blockvol/testrunner/actions/iscsi.go
  30. 2
      weed/storage/blockvol/testrunner/actions/metrics.go
  31. 69
      weed/storage/blockvol/testrunner/engine.go
  32. 144
      weed/storage/blockvol/testrunner/engine_test.go
  33. 18
      weed/storage/blockvol/testrunner/infra/target.go
  34. 1
      weed/storage/blockvol/testrunner/registry.go
  35. 78
      weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml
  36. 30
      weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml

29
weed/pb/volume_server.proto

@ -151,6 +151,12 @@ service VolumeServer {
}
rpc ExpandBlockVolume (ExpandBlockVolumeRequest) returns (ExpandBlockVolumeResponse) {
}
rpc PrepareExpandBlockVolume (PrepareExpandBlockVolumeRequest) returns (PrepareExpandBlockVolumeResponse) {
}
rpc CommitExpandBlockVolume (CommitExpandBlockVolumeRequest) returns (CommitExpandBlockVolumeResponse) {
}
rpc CancelExpandBlockVolume (CancelExpandBlockVolumeRequest) returns (CancelExpandBlockVolumeResponse) {
}
}
@ -834,3 +840,26 @@ message ExpandBlockVolumeRequest {
message ExpandBlockVolumeResponse {
uint64 capacity_bytes = 1;
}
message PrepareExpandBlockVolumeRequest {
string name = 1;
uint64 new_size_bytes = 2;
uint64 expand_epoch = 3;
}
message PrepareExpandBlockVolumeResponse {
}
message CommitExpandBlockVolumeRequest {
string name = 1;
uint64 expand_epoch = 2;
}
message CommitExpandBlockVolumeResponse {
uint64 capacity_bytes = 1;
}
message CancelExpandBlockVolumeRequest {
string name = 1;
uint64 expand_epoch = 2;
}
message CancelExpandBlockVolumeResponse {
}

511
weed/pb/volume_server_pb/volume_server.pb.go

@ -6877,6 +6877,286 @@ func (x *ExpandBlockVolumeResponse) GetCapacityBytes() uint64 {
return 0
}
type PrepareExpandBlockVolumeRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
NewSizeBytes uint64 `protobuf:"varint,2,opt,name=new_size_bytes,json=newSizeBytes,proto3" json:"new_size_bytes,omitempty"`
ExpandEpoch uint64 `protobuf:"varint,3,opt,name=expand_epoch,json=expandEpoch,proto3" json:"expand_epoch,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *PrepareExpandBlockVolumeRequest) Reset() {
*x = PrepareExpandBlockVolumeRequest{}
mi := &file_volume_server_proto_msgTypes[120]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *PrepareExpandBlockVolumeRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*PrepareExpandBlockVolumeRequest) ProtoMessage() {}
func (x *PrepareExpandBlockVolumeRequest) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[120]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use PrepareExpandBlockVolumeRequest.ProtoReflect.Descriptor instead.
func (*PrepareExpandBlockVolumeRequest) Descriptor() ([]byte, []int) {
return file_volume_server_proto_rawDescGZIP(), []int{120}
}
func (x *PrepareExpandBlockVolumeRequest) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *PrepareExpandBlockVolumeRequest) GetNewSizeBytes() uint64 {
if x != nil {
return x.NewSizeBytes
}
return 0
}
func (x *PrepareExpandBlockVolumeRequest) GetExpandEpoch() uint64 {
if x != nil {
return x.ExpandEpoch
}
return 0
}
type PrepareExpandBlockVolumeResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *PrepareExpandBlockVolumeResponse) Reset() {
*x = PrepareExpandBlockVolumeResponse{}
mi := &file_volume_server_proto_msgTypes[121]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *PrepareExpandBlockVolumeResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*PrepareExpandBlockVolumeResponse) ProtoMessage() {}
func (x *PrepareExpandBlockVolumeResponse) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[121]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use PrepareExpandBlockVolumeResponse.ProtoReflect.Descriptor instead.
func (*PrepareExpandBlockVolumeResponse) Descriptor() ([]byte, []int) {
return file_volume_server_proto_rawDescGZIP(), []int{121}
}
type CommitExpandBlockVolumeRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
ExpandEpoch uint64 `protobuf:"varint,2,opt,name=expand_epoch,json=expandEpoch,proto3" json:"expand_epoch,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CommitExpandBlockVolumeRequest) Reset() {
*x = CommitExpandBlockVolumeRequest{}
mi := &file_volume_server_proto_msgTypes[122]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CommitExpandBlockVolumeRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CommitExpandBlockVolumeRequest) ProtoMessage() {}
func (x *CommitExpandBlockVolumeRequest) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[122]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CommitExpandBlockVolumeRequest.ProtoReflect.Descriptor instead.
func (*CommitExpandBlockVolumeRequest) Descriptor() ([]byte, []int) {
return file_volume_server_proto_rawDescGZIP(), []int{122}
}
func (x *CommitExpandBlockVolumeRequest) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *CommitExpandBlockVolumeRequest) GetExpandEpoch() uint64 {
if x != nil {
return x.ExpandEpoch
}
return 0
}
type CommitExpandBlockVolumeResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
CapacityBytes uint64 `protobuf:"varint,1,opt,name=capacity_bytes,json=capacityBytes,proto3" json:"capacity_bytes,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CommitExpandBlockVolumeResponse) Reset() {
*x = CommitExpandBlockVolumeResponse{}
mi := &file_volume_server_proto_msgTypes[123]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CommitExpandBlockVolumeResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CommitExpandBlockVolumeResponse) ProtoMessage() {}
func (x *CommitExpandBlockVolumeResponse) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[123]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CommitExpandBlockVolumeResponse.ProtoReflect.Descriptor instead.
func (*CommitExpandBlockVolumeResponse) Descriptor() ([]byte, []int) {
return file_volume_server_proto_rawDescGZIP(), []int{123}
}
func (x *CommitExpandBlockVolumeResponse) GetCapacityBytes() uint64 {
if x != nil {
return x.CapacityBytes
}
return 0
}
type CancelExpandBlockVolumeRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
ExpandEpoch uint64 `protobuf:"varint,2,opt,name=expand_epoch,json=expandEpoch,proto3" json:"expand_epoch,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CancelExpandBlockVolumeRequest) Reset() {
*x = CancelExpandBlockVolumeRequest{}
mi := &file_volume_server_proto_msgTypes[124]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CancelExpandBlockVolumeRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CancelExpandBlockVolumeRequest) ProtoMessage() {}
func (x *CancelExpandBlockVolumeRequest) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[124]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CancelExpandBlockVolumeRequest.ProtoReflect.Descriptor instead.
func (*CancelExpandBlockVolumeRequest) Descriptor() ([]byte, []int) {
return file_volume_server_proto_rawDescGZIP(), []int{124}
}
func (x *CancelExpandBlockVolumeRequest) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *CancelExpandBlockVolumeRequest) GetExpandEpoch() uint64 {
if x != nil {
return x.ExpandEpoch
}
return 0
}
type CancelExpandBlockVolumeResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CancelExpandBlockVolumeResponse) Reset() {
*x = CancelExpandBlockVolumeResponse{}
mi := &file_volume_server_proto_msgTypes[125]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CancelExpandBlockVolumeResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CancelExpandBlockVolumeResponse) ProtoMessage() {}
func (x *CancelExpandBlockVolumeResponse) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[125]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CancelExpandBlockVolumeResponse.ProtoReflect.Descriptor instead.
func (*CancelExpandBlockVolumeResponse) Descriptor() ([]byte, []int) {
return file_volume_server_proto_rawDescGZIP(), []int{125}
}
type FetchAndWriteNeedleRequest_Replica struct {
state protoimpl.MessageState `protogen:"open.v1"`
Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"`
@ -6888,7 +7168,7 @@ type FetchAndWriteNeedleRequest_Replica struct {
func (x *FetchAndWriteNeedleRequest_Replica) Reset() {
*x = FetchAndWriteNeedleRequest_Replica{}
mi := &file_volume_server_proto_msgTypes[120]
mi := &file_volume_server_proto_msgTypes[126]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -6900,7 +7180,7 @@ func (x *FetchAndWriteNeedleRequest_Replica) String() string {
func (*FetchAndWriteNeedleRequest_Replica) ProtoMessage() {}
func (x *FetchAndWriteNeedleRequest_Replica) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[120]
mi := &file_volume_server_proto_msgTypes[126]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -6948,7 +7228,7 @@ type QueryRequest_Filter struct {
func (x *QueryRequest_Filter) Reset() {
*x = QueryRequest_Filter{}
mi := &file_volume_server_proto_msgTypes[121]
mi := &file_volume_server_proto_msgTypes[127]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -6960,7 +7240,7 @@ func (x *QueryRequest_Filter) String() string {
func (*QueryRequest_Filter) ProtoMessage() {}
func (x *QueryRequest_Filter) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[121]
mi := &file_volume_server_proto_msgTypes[127]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7010,7 +7290,7 @@ type QueryRequest_InputSerialization struct {
func (x *QueryRequest_InputSerialization) Reset() {
*x = QueryRequest_InputSerialization{}
mi := &file_volume_server_proto_msgTypes[122]
mi := &file_volume_server_proto_msgTypes[128]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -7022,7 +7302,7 @@ func (x *QueryRequest_InputSerialization) String() string {
func (*QueryRequest_InputSerialization) ProtoMessage() {}
func (x *QueryRequest_InputSerialization) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[122]
mi := &file_volume_server_proto_msgTypes[128]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7076,7 +7356,7 @@ type QueryRequest_OutputSerialization struct {
func (x *QueryRequest_OutputSerialization) Reset() {
*x = QueryRequest_OutputSerialization{}
mi := &file_volume_server_proto_msgTypes[123]
mi := &file_volume_server_proto_msgTypes[129]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -7088,7 +7368,7 @@ func (x *QueryRequest_OutputSerialization) String() string {
func (*QueryRequest_OutputSerialization) ProtoMessage() {}
func (x *QueryRequest_OutputSerialization) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[123]
mi := &file_volume_server_proto_msgTypes[129]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7134,7 +7414,7 @@ type QueryRequest_InputSerialization_CSVInput struct {
func (x *QueryRequest_InputSerialization_CSVInput) Reset() {
*x = QueryRequest_InputSerialization_CSVInput{}
mi := &file_volume_server_proto_msgTypes[124]
mi := &file_volume_server_proto_msgTypes[130]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -7146,7 +7426,7 @@ func (x *QueryRequest_InputSerialization_CSVInput) String() string {
func (*QueryRequest_InputSerialization_CSVInput) ProtoMessage() {}
func (x *QueryRequest_InputSerialization_CSVInput) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[124]
mi := &file_volume_server_proto_msgTypes[130]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7220,7 +7500,7 @@ type QueryRequest_InputSerialization_JSONInput struct {
func (x *QueryRequest_InputSerialization_JSONInput) Reset() {
*x = QueryRequest_InputSerialization_JSONInput{}
mi := &file_volume_server_proto_msgTypes[125]
mi := &file_volume_server_proto_msgTypes[131]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -7232,7 +7512,7 @@ func (x *QueryRequest_InputSerialization_JSONInput) String() string {
func (*QueryRequest_InputSerialization_JSONInput) ProtoMessage() {}
func (x *QueryRequest_InputSerialization_JSONInput) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[125]
mi := &file_volume_server_proto_msgTypes[131]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7263,7 +7543,7 @@ type QueryRequest_InputSerialization_ParquetInput struct {
func (x *QueryRequest_InputSerialization_ParquetInput) Reset() {
*x = QueryRequest_InputSerialization_ParquetInput{}
mi := &file_volume_server_proto_msgTypes[126]
mi := &file_volume_server_proto_msgTypes[132]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -7275,7 +7555,7 @@ func (x *QueryRequest_InputSerialization_ParquetInput) String() string {
func (*QueryRequest_InputSerialization_ParquetInput) ProtoMessage() {}
func (x *QueryRequest_InputSerialization_ParquetInput) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[126]
mi := &file_volume_server_proto_msgTypes[132]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7304,7 +7584,7 @@ type QueryRequest_OutputSerialization_CSVOutput struct {
func (x *QueryRequest_OutputSerialization_CSVOutput) Reset() {
*x = QueryRequest_OutputSerialization_CSVOutput{}
mi := &file_volume_server_proto_msgTypes[127]
mi := &file_volume_server_proto_msgTypes[133]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -7316,7 +7596,7 @@ func (x *QueryRequest_OutputSerialization_CSVOutput) String() string {
func (*QueryRequest_OutputSerialization_CSVOutput) ProtoMessage() {}
func (x *QueryRequest_OutputSerialization_CSVOutput) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[127]
mi := &file_volume_server_proto_msgTypes[133]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7376,7 +7656,7 @@ type QueryRequest_OutputSerialization_JSONOutput struct {
func (x *QueryRequest_OutputSerialization_JSONOutput) Reset() {
*x = QueryRequest_OutputSerialization_JSONOutput{}
mi := &file_volume_server_proto_msgTypes[128]
mi := &file_volume_server_proto_msgTypes[134]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@ -7388,7 +7668,7 @@ func (x *QueryRequest_OutputSerialization_JSONOutput) String() string {
func (*QueryRequest_OutputSerialization_JSONOutput) ProtoMessage() {}
func (x *QueryRequest_OutputSerialization_JSONOutput) ProtoReflect() protoreflect.Message {
mi := &file_volume_server_proto_msgTypes[128]
mi := &file_volume_server_proto_msgTypes[134]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@ -7964,12 +8244,26 @@ const file_volume_server_proto_rawDesc = "" +
"\x04name\x18\x01 \x01(\tR\x04name\x12$\n" +
"\x0enew_size_bytes\x18\x02 \x01(\x04R\fnewSizeBytes\"B\n" +
"\x19ExpandBlockVolumeResponse\x12%\n" +
"\x0ecapacity_bytes\x18\x01 \x01(\x04R\rcapacityBytes*>\n" +
"\x0ecapacity_bytes\x18\x01 \x01(\x04R\rcapacityBytes\"~\n" +
"\x1fPrepareExpandBlockVolumeRequest\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12$\n" +
"\x0enew_size_bytes\x18\x02 \x01(\x04R\fnewSizeBytes\x12!\n" +
"\fexpand_epoch\x18\x03 \x01(\x04R\vexpandEpoch\"\"\n" +
" PrepareExpandBlockVolumeResponse\"W\n" +
"\x1eCommitExpandBlockVolumeRequest\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12!\n" +
"\fexpand_epoch\x18\x02 \x01(\x04R\vexpandEpoch\"H\n" +
"\x1fCommitExpandBlockVolumeResponse\x12%\n" +
"\x0ecapacity_bytes\x18\x01 \x01(\x04R\rcapacityBytes\"W\n" +
"\x1eCancelExpandBlockVolumeRequest\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12!\n" +
"\fexpand_epoch\x18\x02 \x01(\x04R\vexpandEpoch\"!\n" +
"\x1fCancelExpandBlockVolumeResponse*>\n" +
"\x0fVolumeScrubMode\x12\v\n" +
"\aUNKNOWN\x10\x00\x12\t\n" +
"\x05INDEX\x10\x01\x12\b\n" +
"\x04FULL\x10\x02\x12\t\n" +
"\x05LOCAL\x10\x032\xd5.\n" +
"\x05LOCAL\x10\x032\xe11\n" +
"\fVolumeServer\x12\\\n" +
"\vBatchDelete\x12$.volume_server_pb.BatchDeleteRequest\x1a%.volume_server_pb.BatchDeleteResponse\"\x00\x12n\n" +
"\x11VacuumVolumeCheck\x12*.volume_server_pb.VacuumVolumeCheckRequest\x1a+.volume_server_pb.VacuumVolumeCheckResponse\"\x00\x12v\n" +
@ -8025,7 +8319,10 @@ const file_volume_server_proto_rawDesc = "" +
"\x13SnapshotBlockVolume\x12,.volume_server_pb.SnapshotBlockVolumeRequest\x1a-.volume_server_pb.SnapshotBlockVolumeResponse\"\x00\x12t\n" +
"\x13DeleteBlockSnapshot\x12,.volume_server_pb.DeleteBlockSnapshotRequest\x1a-.volume_server_pb.DeleteBlockSnapshotResponse\"\x00\x12q\n" +
"\x12ListBlockSnapshots\x12+.volume_server_pb.ListBlockSnapshotsRequest\x1a,.volume_server_pb.ListBlockSnapshotsResponse\"\x00\x12n\n" +
"\x11ExpandBlockVolume\x12*.volume_server_pb.ExpandBlockVolumeRequest\x1a+.volume_server_pb.ExpandBlockVolumeResponse\"\x00B9Z7github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pbb\x06proto3"
"\x11ExpandBlockVolume\x12*.volume_server_pb.ExpandBlockVolumeRequest\x1a+.volume_server_pb.ExpandBlockVolumeResponse\"\x00\x12\x83\x01\n" +
"\x18PrepareExpandBlockVolume\x121.volume_server_pb.PrepareExpandBlockVolumeRequest\x1a2.volume_server_pb.PrepareExpandBlockVolumeResponse\"\x00\x12\x80\x01\n" +
"\x17CommitExpandBlockVolume\x120.volume_server_pb.CommitExpandBlockVolumeRequest\x1a1.volume_server_pb.CommitExpandBlockVolumeResponse\"\x00\x12\x80\x01\n" +
"\x17CancelExpandBlockVolume\x120.volume_server_pb.CancelExpandBlockVolumeRequest\x1a1.volume_server_pb.CancelExpandBlockVolumeResponse\"\x00B9Z7github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pbb\x06proto3"
var (
file_volume_server_proto_rawDescOnce sync.Once
@ -8040,7 +8337,7 @@ func file_volume_server_proto_rawDescGZIP() []byte {
}
var file_volume_server_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
var file_volume_server_proto_msgTypes = make([]protoimpl.MessageInfo, 129)
var file_volume_server_proto_msgTypes = make([]protoimpl.MessageInfo, 135)
var file_volume_server_proto_goTypes = []any{
(VolumeScrubMode)(0), // 0: volume_server_pb.VolumeScrubMode
(*VolumeServerState)(nil), // 1: volume_server_pb.VolumeServerState
@ -8163,17 +8460,23 @@ var file_volume_server_proto_goTypes = []any{
(*BlockSnapshotInfo)(nil), // 118: volume_server_pb.BlockSnapshotInfo
(*ExpandBlockVolumeRequest)(nil), // 119: volume_server_pb.ExpandBlockVolumeRequest
(*ExpandBlockVolumeResponse)(nil), // 120: volume_server_pb.ExpandBlockVolumeResponse
(*FetchAndWriteNeedleRequest_Replica)(nil), // 121: volume_server_pb.FetchAndWriteNeedleRequest.Replica
(*QueryRequest_Filter)(nil), // 122: volume_server_pb.QueryRequest.Filter
(*QueryRequest_InputSerialization)(nil), // 123: volume_server_pb.QueryRequest.InputSerialization
(*QueryRequest_OutputSerialization)(nil), // 124: volume_server_pb.QueryRequest.OutputSerialization
(*QueryRequest_InputSerialization_CSVInput)(nil), // 125: volume_server_pb.QueryRequest.InputSerialization.CSVInput
(*QueryRequest_InputSerialization_JSONInput)(nil), // 126: volume_server_pb.QueryRequest.InputSerialization.JSONInput
(*QueryRequest_InputSerialization_ParquetInput)(nil), // 127: volume_server_pb.QueryRequest.InputSerialization.ParquetInput
(*QueryRequest_OutputSerialization_CSVOutput)(nil), // 128: volume_server_pb.QueryRequest.OutputSerialization.CSVOutput
(*QueryRequest_OutputSerialization_JSONOutput)(nil), // 129: volume_server_pb.QueryRequest.OutputSerialization.JSONOutput
(*remote_pb.RemoteConf)(nil), // 130: remote_pb.RemoteConf
(*remote_pb.RemoteStorageLocation)(nil), // 131: remote_pb.RemoteStorageLocation
(*PrepareExpandBlockVolumeRequest)(nil), // 121: volume_server_pb.PrepareExpandBlockVolumeRequest
(*PrepareExpandBlockVolumeResponse)(nil), // 122: volume_server_pb.PrepareExpandBlockVolumeResponse
(*CommitExpandBlockVolumeRequest)(nil), // 123: volume_server_pb.CommitExpandBlockVolumeRequest
(*CommitExpandBlockVolumeResponse)(nil), // 124: volume_server_pb.CommitExpandBlockVolumeResponse
(*CancelExpandBlockVolumeRequest)(nil), // 125: volume_server_pb.CancelExpandBlockVolumeRequest
(*CancelExpandBlockVolumeResponse)(nil), // 126: volume_server_pb.CancelExpandBlockVolumeResponse
(*FetchAndWriteNeedleRequest_Replica)(nil), // 127: volume_server_pb.FetchAndWriteNeedleRequest.Replica
(*QueryRequest_Filter)(nil), // 128: volume_server_pb.QueryRequest.Filter
(*QueryRequest_InputSerialization)(nil), // 129: volume_server_pb.QueryRequest.InputSerialization
(*QueryRequest_OutputSerialization)(nil), // 130: volume_server_pb.QueryRequest.OutputSerialization
(*QueryRequest_InputSerialization_CSVInput)(nil), // 131: volume_server_pb.QueryRequest.InputSerialization.CSVInput
(*QueryRequest_InputSerialization_JSONInput)(nil), // 132: volume_server_pb.QueryRequest.InputSerialization.JSONInput
(*QueryRequest_InputSerialization_ParquetInput)(nil), // 133: volume_server_pb.QueryRequest.InputSerialization.ParquetInput
(*QueryRequest_OutputSerialization_CSVOutput)(nil), // 134: volume_server_pb.QueryRequest.OutputSerialization.CSVOutput
(*QueryRequest_OutputSerialization_JSONOutput)(nil), // 135: volume_server_pb.QueryRequest.OutputSerialization.JSONOutput
(*remote_pb.RemoteConf)(nil), // 136: remote_pb.RemoteConf
(*remote_pb.RemoteStorageLocation)(nil), // 137: remote_pb.RemoteStorageLocation
}
var file_volume_server_proto_depIdxs = []int32{
4, // 0: volume_server_pb.BatchDeleteResponse.results:type_name -> volume_server_pb.DeleteResult
@ -8189,21 +8492,21 @@ var file_volume_server_proto_depIdxs = []int32{
82, // 10: volume_server_pb.VolumeServerStatusResponse.disk_statuses:type_name -> volume_server_pb.DiskStatus
83, // 11: volume_server_pb.VolumeServerStatusResponse.memory_status:type_name -> volume_server_pb.MemStatus
1, // 12: volume_server_pb.VolumeServerStatusResponse.state:type_name -> volume_server_pb.VolumeServerState
121, // 13: volume_server_pb.FetchAndWriteNeedleRequest.replicas:type_name -> volume_server_pb.FetchAndWriteNeedleRequest.Replica
130, // 14: volume_server_pb.FetchAndWriteNeedleRequest.remote_conf:type_name -> remote_pb.RemoteConf
131, // 15: volume_server_pb.FetchAndWriteNeedleRequest.remote_location:type_name -> remote_pb.RemoteStorageLocation
127, // 13: volume_server_pb.FetchAndWriteNeedleRequest.replicas:type_name -> volume_server_pb.FetchAndWriteNeedleRequest.Replica
136, // 14: volume_server_pb.FetchAndWriteNeedleRequest.remote_conf:type_name -> remote_pb.RemoteConf
137, // 15: volume_server_pb.FetchAndWriteNeedleRequest.remote_location:type_name -> remote_pb.RemoteStorageLocation
0, // 16: volume_server_pb.ScrubVolumeRequest.mode:type_name -> volume_server_pb.VolumeScrubMode
0, // 17: volume_server_pb.ScrubEcVolumeRequest.mode:type_name -> volume_server_pb.VolumeScrubMode
79, // 18: volume_server_pb.ScrubEcVolumeResponse.broken_shard_infos:type_name -> volume_server_pb.EcShardInfo
122, // 19: volume_server_pb.QueryRequest.filter:type_name -> volume_server_pb.QueryRequest.Filter
123, // 20: volume_server_pb.QueryRequest.input_serialization:type_name -> volume_server_pb.QueryRequest.InputSerialization
124, // 21: volume_server_pb.QueryRequest.output_serialization:type_name -> volume_server_pb.QueryRequest.OutputSerialization
128, // 19: volume_server_pb.QueryRequest.filter:type_name -> volume_server_pb.QueryRequest.Filter
129, // 20: volume_server_pb.QueryRequest.input_serialization:type_name -> volume_server_pb.QueryRequest.InputSerialization
130, // 21: volume_server_pb.QueryRequest.output_serialization:type_name -> volume_server_pb.QueryRequest.OutputSerialization
118, // 22: volume_server_pb.ListBlockSnapshotsResponse.snapshots:type_name -> volume_server_pb.BlockSnapshotInfo
125, // 23: volume_server_pb.QueryRequest.InputSerialization.csv_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.CSVInput
126, // 24: volume_server_pb.QueryRequest.InputSerialization.json_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.JSONInput
127, // 25: volume_server_pb.QueryRequest.InputSerialization.parquet_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.ParquetInput
128, // 26: volume_server_pb.QueryRequest.OutputSerialization.csv_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.CSVOutput
129, // 27: volume_server_pb.QueryRequest.OutputSerialization.json_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.JSONOutput
131, // 23: volume_server_pb.QueryRequest.InputSerialization.csv_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.CSVInput
132, // 24: volume_server_pb.QueryRequest.InputSerialization.json_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.JSONInput
133, // 25: volume_server_pb.QueryRequest.InputSerialization.parquet_input:type_name -> volume_server_pb.QueryRequest.InputSerialization.ParquetInput
134, // 26: volume_server_pb.QueryRequest.OutputSerialization.csv_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.CSVOutput
135, // 27: volume_server_pb.QueryRequest.OutputSerialization.json_output:type_name -> volume_server_pb.QueryRequest.OutputSerialization.JSONOutput
2, // 28: volume_server_pb.VolumeServer.BatchDelete:input_type -> volume_server_pb.BatchDeleteRequest
6, // 29: volume_server_pb.VolumeServer.VacuumVolumeCheck:input_type -> volume_server_pb.VacuumVolumeCheckRequest
8, // 30: volume_server_pb.VolumeServer.VacuumVolumeCompact:input_type -> volume_server_pb.VacuumVolumeCompactRequest
@ -8258,62 +8561,68 @@ var file_volume_server_proto_depIdxs = []int32{
114, // 79: volume_server_pb.VolumeServer.DeleteBlockSnapshot:input_type -> volume_server_pb.DeleteBlockSnapshotRequest
116, // 80: volume_server_pb.VolumeServer.ListBlockSnapshots:input_type -> volume_server_pb.ListBlockSnapshotsRequest
119, // 81: volume_server_pb.VolumeServer.ExpandBlockVolume:input_type -> volume_server_pb.ExpandBlockVolumeRequest
3, // 82: volume_server_pb.VolumeServer.BatchDelete:output_type -> volume_server_pb.BatchDeleteResponse
7, // 83: volume_server_pb.VolumeServer.VacuumVolumeCheck:output_type -> volume_server_pb.VacuumVolumeCheckResponse
9, // 84: volume_server_pb.VolumeServer.VacuumVolumeCompact:output_type -> volume_server_pb.VacuumVolumeCompactResponse
11, // 85: volume_server_pb.VolumeServer.VacuumVolumeCommit:output_type -> volume_server_pb.VacuumVolumeCommitResponse
13, // 86: volume_server_pb.VolumeServer.VacuumVolumeCleanup:output_type -> volume_server_pb.VacuumVolumeCleanupResponse
15, // 87: volume_server_pb.VolumeServer.DeleteCollection:output_type -> volume_server_pb.DeleteCollectionResponse
17, // 88: volume_server_pb.VolumeServer.AllocateVolume:output_type -> volume_server_pb.AllocateVolumeResponse
19, // 89: volume_server_pb.VolumeServer.VolumeSyncStatus:output_type -> volume_server_pb.VolumeSyncStatusResponse
21, // 90: volume_server_pb.VolumeServer.VolumeIncrementalCopy:output_type -> volume_server_pb.VolumeIncrementalCopyResponse
23, // 91: volume_server_pb.VolumeServer.VolumeMount:output_type -> volume_server_pb.VolumeMountResponse
25, // 92: volume_server_pb.VolumeServer.VolumeUnmount:output_type -> volume_server_pb.VolumeUnmountResponse
27, // 93: volume_server_pb.VolumeServer.VolumeDelete:output_type -> volume_server_pb.VolumeDeleteResponse
29, // 94: volume_server_pb.VolumeServer.VolumeMarkReadonly:output_type -> volume_server_pb.VolumeMarkReadonlyResponse
31, // 95: volume_server_pb.VolumeServer.VolumeMarkWritable:output_type -> volume_server_pb.VolumeMarkWritableResponse
33, // 96: volume_server_pb.VolumeServer.VolumeConfigure:output_type -> volume_server_pb.VolumeConfigureResponse
35, // 97: volume_server_pb.VolumeServer.VolumeStatus:output_type -> volume_server_pb.VolumeStatusResponse
37, // 98: volume_server_pb.VolumeServer.GetState:output_type -> volume_server_pb.GetStateResponse
39, // 99: volume_server_pb.VolumeServer.SetState:output_type -> volume_server_pb.SetStateResponse
41, // 100: volume_server_pb.VolumeServer.VolumeCopy:output_type -> volume_server_pb.VolumeCopyResponse
81, // 101: volume_server_pb.VolumeServer.ReadVolumeFileStatus:output_type -> volume_server_pb.ReadVolumeFileStatusResponse
43, // 102: volume_server_pb.VolumeServer.CopyFile:output_type -> volume_server_pb.CopyFileResponse
46, // 103: volume_server_pb.VolumeServer.ReceiveFile:output_type -> volume_server_pb.ReceiveFileResponse
48, // 104: volume_server_pb.VolumeServer.ReadNeedleBlob:output_type -> volume_server_pb.ReadNeedleBlobResponse
50, // 105: volume_server_pb.VolumeServer.ReadNeedleMeta:output_type -> volume_server_pb.ReadNeedleMetaResponse
52, // 106: volume_server_pb.VolumeServer.WriteNeedleBlob:output_type -> volume_server_pb.WriteNeedleBlobResponse
54, // 107: volume_server_pb.VolumeServer.ReadAllNeedles:output_type -> volume_server_pb.ReadAllNeedlesResponse
56, // 108: volume_server_pb.VolumeServer.VolumeTailSender:output_type -> volume_server_pb.VolumeTailSenderResponse
58, // 109: volume_server_pb.VolumeServer.VolumeTailReceiver:output_type -> volume_server_pb.VolumeTailReceiverResponse
60, // 110: volume_server_pb.VolumeServer.VolumeEcShardsGenerate:output_type -> volume_server_pb.VolumeEcShardsGenerateResponse
62, // 111: volume_server_pb.VolumeServer.VolumeEcShardsRebuild:output_type -> volume_server_pb.VolumeEcShardsRebuildResponse
64, // 112: volume_server_pb.VolumeServer.VolumeEcShardsCopy:output_type -> volume_server_pb.VolumeEcShardsCopyResponse
66, // 113: volume_server_pb.VolumeServer.VolumeEcShardsDelete:output_type -> volume_server_pb.VolumeEcShardsDeleteResponse
68, // 114: volume_server_pb.VolumeServer.VolumeEcShardsMount:output_type -> volume_server_pb.VolumeEcShardsMountResponse
70, // 115: volume_server_pb.VolumeServer.VolumeEcShardsUnmount:output_type -> volume_server_pb.VolumeEcShardsUnmountResponse
72, // 116: volume_server_pb.VolumeServer.VolumeEcShardRead:output_type -> volume_server_pb.VolumeEcShardReadResponse
74, // 117: volume_server_pb.VolumeServer.VolumeEcBlobDelete:output_type -> volume_server_pb.VolumeEcBlobDeleteResponse
76, // 118: volume_server_pb.VolumeServer.VolumeEcShardsToVolume:output_type -> volume_server_pb.VolumeEcShardsToVolumeResponse
78, // 119: volume_server_pb.VolumeServer.VolumeEcShardsInfo:output_type -> volume_server_pb.VolumeEcShardsInfoResponse
89, // 120: volume_server_pb.VolumeServer.VolumeTierMoveDatToRemote:output_type -> volume_server_pb.VolumeTierMoveDatToRemoteResponse
91, // 121: volume_server_pb.VolumeServer.VolumeTierMoveDatFromRemote:output_type -> volume_server_pb.VolumeTierMoveDatFromRemoteResponse
93, // 122: volume_server_pb.VolumeServer.VolumeServerStatus:output_type -> volume_server_pb.VolumeServerStatusResponse
95, // 123: volume_server_pb.VolumeServer.VolumeServerLeave:output_type -> volume_server_pb.VolumeServerLeaveResponse
97, // 124: volume_server_pb.VolumeServer.FetchAndWriteNeedle:output_type -> volume_server_pb.FetchAndWriteNeedleResponse
99, // 125: volume_server_pb.VolumeServer.ScrubVolume:output_type -> volume_server_pb.ScrubVolumeResponse
101, // 126: volume_server_pb.VolumeServer.ScrubEcVolume:output_type -> volume_server_pb.ScrubEcVolumeResponse
103, // 127: volume_server_pb.VolumeServer.Query:output_type -> volume_server_pb.QueriedStripe
105, // 128: volume_server_pb.VolumeServer.VolumeNeedleStatus:output_type -> volume_server_pb.VolumeNeedleStatusResponse
107, // 129: volume_server_pb.VolumeServer.Ping:output_type -> volume_server_pb.PingResponse
109, // 130: volume_server_pb.VolumeServer.AllocateBlockVolume:output_type -> volume_server_pb.AllocateBlockVolumeResponse
111, // 131: volume_server_pb.VolumeServer.VolumeServerDeleteBlockVolume:output_type -> volume_server_pb.VolumeServerDeleteBlockVolumeResponse
113, // 132: volume_server_pb.VolumeServer.SnapshotBlockVolume:output_type -> volume_server_pb.SnapshotBlockVolumeResponse
115, // 133: volume_server_pb.VolumeServer.DeleteBlockSnapshot:output_type -> volume_server_pb.DeleteBlockSnapshotResponse
117, // 134: volume_server_pb.VolumeServer.ListBlockSnapshots:output_type -> volume_server_pb.ListBlockSnapshotsResponse
120, // 135: volume_server_pb.VolumeServer.ExpandBlockVolume:output_type -> volume_server_pb.ExpandBlockVolumeResponse
82, // [82:136] is the sub-list for method output_type
28, // [28:82] is the sub-list for method input_type
121, // 82: volume_server_pb.VolumeServer.PrepareExpandBlockVolume:input_type -> volume_server_pb.PrepareExpandBlockVolumeRequest
123, // 83: volume_server_pb.VolumeServer.CommitExpandBlockVolume:input_type -> volume_server_pb.CommitExpandBlockVolumeRequest
125, // 84: volume_server_pb.VolumeServer.CancelExpandBlockVolume:input_type -> volume_server_pb.CancelExpandBlockVolumeRequest
3, // 85: volume_server_pb.VolumeServer.BatchDelete:output_type -> volume_server_pb.BatchDeleteResponse
7, // 86: volume_server_pb.VolumeServer.VacuumVolumeCheck:output_type -> volume_server_pb.VacuumVolumeCheckResponse
9, // 87: volume_server_pb.VolumeServer.VacuumVolumeCompact:output_type -> volume_server_pb.VacuumVolumeCompactResponse
11, // 88: volume_server_pb.VolumeServer.VacuumVolumeCommit:output_type -> volume_server_pb.VacuumVolumeCommitResponse
13, // 89: volume_server_pb.VolumeServer.VacuumVolumeCleanup:output_type -> volume_server_pb.VacuumVolumeCleanupResponse
15, // 90: volume_server_pb.VolumeServer.DeleteCollection:output_type -> volume_server_pb.DeleteCollectionResponse
17, // 91: volume_server_pb.VolumeServer.AllocateVolume:output_type -> volume_server_pb.AllocateVolumeResponse
19, // 92: volume_server_pb.VolumeServer.VolumeSyncStatus:output_type -> volume_server_pb.VolumeSyncStatusResponse
21, // 93: volume_server_pb.VolumeServer.VolumeIncrementalCopy:output_type -> volume_server_pb.VolumeIncrementalCopyResponse
23, // 94: volume_server_pb.VolumeServer.VolumeMount:output_type -> volume_server_pb.VolumeMountResponse
25, // 95: volume_server_pb.VolumeServer.VolumeUnmount:output_type -> volume_server_pb.VolumeUnmountResponse
27, // 96: volume_server_pb.VolumeServer.VolumeDelete:output_type -> volume_server_pb.VolumeDeleteResponse
29, // 97: volume_server_pb.VolumeServer.VolumeMarkReadonly:output_type -> volume_server_pb.VolumeMarkReadonlyResponse
31, // 98: volume_server_pb.VolumeServer.VolumeMarkWritable:output_type -> volume_server_pb.VolumeMarkWritableResponse
33, // 99: volume_server_pb.VolumeServer.VolumeConfigure:output_type -> volume_server_pb.VolumeConfigureResponse
35, // 100: volume_server_pb.VolumeServer.VolumeStatus:output_type -> volume_server_pb.VolumeStatusResponse
37, // 101: volume_server_pb.VolumeServer.GetState:output_type -> volume_server_pb.GetStateResponse
39, // 102: volume_server_pb.VolumeServer.SetState:output_type -> volume_server_pb.SetStateResponse
41, // 103: volume_server_pb.VolumeServer.VolumeCopy:output_type -> volume_server_pb.VolumeCopyResponse
81, // 104: volume_server_pb.VolumeServer.ReadVolumeFileStatus:output_type -> volume_server_pb.ReadVolumeFileStatusResponse
43, // 105: volume_server_pb.VolumeServer.CopyFile:output_type -> volume_server_pb.CopyFileResponse
46, // 106: volume_server_pb.VolumeServer.ReceiveFile:output_type -> volume_server_pb.ReceiveFileResponse
48, // 107: volume_server_pb.VolumeServer.ReadNeedleBlob:output_type -> volume_server_pb.ReadNeedleBlobResponse
50, // 108: volume_server_pb.VolumeServer.ReadNeedleMeta:output_type -> volume_server_pb.ReadNeedleMetaResponse
52, // 109: volume_server_pb.VolumeServer.WriteNeedleBlob:output_type -> volume_server_pb.WriteNeedleBlobResponse
54, // 110: volume_server_pb.VolumeServer.ReadAllNeedles:output_type -> volume_server_pb.ReadAllNeedlesResponse
56, // 111: volume_server_pb.VolumeServer.VolumeTailSender:output_type -> volume_server_pb.VolumeTailSenderResponse
58, // 112: volume_server_pb.VolumeServer.VolumeTailReceiver:output_type -> volume_server_pb.VolumeTailReceiverResponse
60, // 113: volume_server_pb.VolumeServer.VolumeEcShardsGenerate:output_type -> volume_server_pb.VolumeEcShardsGenerateResponse
62, // 114: volume_server_pb.VolumeServer.VolumeEcShardsRebuild:output_type -> volume_server_pb.VolumeEcShardsRebuildResponse
64, // 115: volume_server_pb.VolumeServer.VolumeEcShardsCopy:output_type -> volume_server_pb.VolumeEcShardsCopyResponse
66, // 116: volume_server_pb.VolumeServer.VolumeEcShardsDelete:output_type -> volume_server_pb.VolumeEcShardsDeleteResponse
68, // 117: volume_server_pb.VolumeServer.VolumeEcShardsMount:output_type -> volume_server_pb.VolumeEcShardsMountResponse
70, // 118: volume_server_pb.VolumeServer.VolumeEcShardsUnmount:output_type -> volume_server_pb.VolumeEcShardsUnmountResponse
72, // 119: volume_server_pb.VolumeServer.VolumeEcShardRead:output_type -> volume_server_pb.VolumeEcShardReadResponse
74, // 120: volume_server_pb.VolumeServer.VolumeEcBlobDelete:output_type -> volume_server_pb.VolumeEcBlobDeleteResponse
76, // 121: volume_server_pb.VolumeServer.VolumeEcShardsToVolume:output_type -> volume_server_pb.VolumeEcShardsToVolumeResponse
78, // 122: volume_server_pb.VolumeServer.VolumeEcShardsInfo:output_type -> volume_server_pb.VolumeEcShardsInfoResponse
89, // 123: volume_server_pb.VolumeServer.VolumeTierMoveDatToRemote:output_type -> volume_server_pb.VolumeTierMoveDatToRemoteResponse
91, // 124: volume_server_pb.VolumeServer.VolumeTierMoveDatFromRemote:output_type -> volume_server_pb.VolumeTierMoveDatFromRemoteResponse
93, // 125: volume_server_pb.VolumeServer.VolumeServerStatus:output_type -> volume_server_pb.VolumeServerStatusResponse
95, // 126: volume_server_pb.VolumeServer.VolumeServerLeave:output_type -> volume_server_pb.VolumeServerLeaveResponse
97, // 127: volume_server_pb.VolumeServer.FetchAndWriteNeedle:output_type -> volume_server_pb.FetchAndWriteNeedleResponse
99, // 128: volume_server_pb.VolumeServer.ScrubVolume:output_type -> volume_server_pb.ScrubVolumeResponse
101, // 129: volume_server_pb.VolumeServer.ScrubEcVolume:output_type -> volume_server_pb.ScrubEcVolumeResponse
103, // 130: volume_server_pb.VolumeServer.Query:output_type -> volume_server_pb.QueriedStripe
105, // 131: volume_server_pb.VolumeServer.VolumeNeedleStatus:output_type -> volume_server_pb.VolumeNeedleStatusResponse
107, // 132: volume_server_pb.VolumeServer.Ping:output_type -> volume_server_pb.PingResponse
109, // 133: volume_server_pb.VolumeServer.AllocateBlockVolume:output_type -> volume_server_pb.AllocateBlockVolumeResponse
111, // 134: volume_server_pb.VolumeServer.VolumeServerDeleteBlockVolume:output_type -> volume_server_pb.VolumeServerDeleteBlockVolumeResponse
113, // 135: volume_server_pb.VolumeServer.SnapshotBlockVolume:output_type -> volume_server_pb.SnapshotBlockVolumeResponse
115, // 136: volume_server_pb.VolumeServer.DeleteBlockSnapshot:output_type -> volume_server_pb.DeleteBlockSnapshotResponse
117, // 137: volume_server_pb.VolumeServer.ListBlockSnapshots:output_type -> volume_server_pb.ListBlockSnapshotsResponse
120, // 138: volume_server_pb.VolumeServer.ExpandBlockVolume:output_type -> volume_server_pb.ExpandBlockVolumeResponse
122, // 139: volume_server_pb.VolumeServer.PrepareExpandBlockVolume:output_type -> volume_server_pb.PrepareExpandBlockVolumeResponse
124, // 140: volume_server_pb.VolumeServer.CommitExpandBlockVolume:output_type -> volume_server_pb.CommitExpandBlockVolumeResponse
126, // 141: volume_server_pb.VolumeServer.CancelExpandBlockVolume:output_type -> volume_server_pb.CancelExpandBlockVolumeResponse
85, // [85:142] is the sub-list for method output_type
28, // [28:85] is the sub-list for method input_type
28, // [28:28] is the sub-list for extension type_name
28, // [28:28] is the sub-list for extension extendee
0, // [0:28] is the sub-list for field type_name
@ -8334,7 +8643,7 @@ func file_volume_server_proto_init() {
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_volume_server_proto_rawDesc), len(file_volume_server_proto_rawDesc)),
NumEnums: 1,
NumMessages: 129,
NumMessages: 135,
NumExtensions: 0,
NumServices: 1,
},

114
weed/pb/volume_server_pb/volume_server_grpc.pb.go

@ -73,6 +73,9 @@ const (
VolumeServer_DeleteBlockSnapshot_FullMethodName = "/volume_server_pb.VolumeServer/DeleteBlockSnapshot"
VolumeServer_ListBlockSnapshots_FullMethodName = "/volume_server_pb.VolumeServer/ListBlockSnapshots"
VolumeServer_ExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/ExpandBlockVolume"
VolumeServer_PrepareExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/PrepareExpandBlockVolume"
VolumeServer_CommitExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/CommitExpandBlockVolume"
VolumeServer_CancelExpandBlockVolume_FullMethodName = "/volume_server_pb.VolumeServer/CancelExpandBlockVolume"
)
// VolumeServerClient is the client API for VolumeServer service.
@ -141,6 +144,9 @@ type VolumeServerClient interface {
DeleteBlockSnapshot(ctx context.Context, in *DeleteBlockSnapshotRequest, opts ...grpc.CallOption) (*DeleteBlockSnapshotResponse, error)
ListBlockSnapshots(ctx context.Context, in *ListBlockSnapshotsRequest, opts ...grpc.CallOption) (*ListBlockSnapshotsResponse, error)
ExpandBlockVolume(ctx context.Context, in *ExpandBlockVolumeRequest, opts ...grpc.CallOption) (*ExpandBlockVolumeResponse, error)
PrepareExpandBlockVolume(ctx context.Context, in *PrepareExpandBlockVolumeRequest, opts ...grpc.CallOption) (*PrepareExpandBlockVolumeResponse, error)
CommitExpandBlockVolume(ctx context.Context, in *CommitExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CommitExpandBlockVolumeResponse, error)
CancelExpandBlockVolume(ctx context.Context, in *CancelExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CancelExpandBlockVolumeResponse, error)
}
type volumeServerClient struct {
@ -784,6 +790,36 @@ func (c *volumeServerClient) ExpandBlockVolume(ctx context.Context, in *ExpandBl
return out, nil
}
func (c *volumeServerClient) PrepareExpandBlockVolume(ctx context.Context, in *PrepareExpandBlockVolumeRequest, opts ...grpc.CallOption) (*PrepareExpandBlockVolumeResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(PrepareExpandBlockVolumeResponse)
err := c.cc.Invoke(ctx, VolumeServer_PrepareExpandBlockVolume_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *volumeServerClient) CommitExpandBlockVolume(ctx context.Context, in *CommitExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CommitExpandBlockVolumeResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(CommitExpandBlockVolumeResponse)
err := c.cc.Invoke(ctx, VolumeServer_CommitExpandBlockVolume_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *volumeServerClient) CancelExpandBlockVolume(ctx context.Context, in *CancelExpandBlockVolumeRequest, opts ...grpc.CallOption) (*CancelExpandBlockVolumeResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(CancelExpandBlockVolumeResponse)
err := c.cc.Invoke(ctx, VolumeServer_CancelExpandBlockVolume_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// VolumeServerServer is the server API for VolumeServer service.
// All implementations must embed UnimplementedVolumeServerServer
// for forward compatibility.
@ -850,6 +886,9 @@ type VolumeServerServer interface {
DeleteBlockSnapshot(context.Context, *DeleteBlockSnapshotRequest) (*DeleteBlockSnapshotResponse, error)
ListBlockSnapshots(context.Context, *ListBlockSnapshotsRequest) (*ListBlockSnapshotsResponse, error)
ExpandBlockVolume(context.Context, *ExpandBlockVolumeRequest) (*ExpandBlockVolumeResponse, error)
PrepareExpandBlockVolume(context.Context, *PrepareExpandBlockVolumeRequest) (*PrepareExpandBlockVolumeResponse, error)
CommitExpandBlockVolume(context.Context, *CommitExpandBlockVolumeRequest) (*CommitExpandBlockVolumeResponse, error)
CancelExpandBlockVolume(context.Context, *CancelExpandBlockVolumeRequest) (*CancelExpandBlockVolumeResponse, error)
mustEmbedUnimplementedVolumeServerServer()
}
@ -1022,6 +1061,15 @@ func (UnimplementedVolumeServerServer) ListBlockSnapshots(context.Context, *List
func (UnimplementedVolumeServerServer) ExpandBlockVolume(context.Context, *ExpandBlockVolumeRequest) (*ExpandBlockVolumeResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ExpandBlockVolume not implemented")
}
func (UnimplementedVolumeServerServer) PrepareExpandBlockVolume(context.Context, *PrepareExpandBlockVolumeRequest) (*PrepareExpandBlockVolumeResponse, error) {
return nil, status.Error(codes.Unimplemented, "method PrepareExpandBlockVolume not implemented")
}
func (UnimplementedVolumeServerServer) CommitExpandBlockVolume(context.Context, *CommitExpandBlockVolumeRequest) (*CommitExpandBlockVolumeResponse, error) {
return nil, status.Error(codes.Unimplemented, "method CommitExpandBlockVolume not implemented")
}
func (UnimplementedVolumeServerServer) CancelExpandBlockVolume(context.Context, *CancelExpandBlockVolumeRequest) (*CancelExpandBlockVolumeResponse, error) {
return nil, status.Error(codes.Unimplemented, "method CancelExpandBlockVolume not implemented")
}
func (UnimplementedVolumeServerServer) mustEmbedUnimplementedVolumeServerServer() {}
func (UnimplementedVolumeServerServer) testEmbeddedByValue() {}
@ -1934,6 +1982,60 @@ func _VolumeServer_ExpandBlockVolume_Handler(srv interface{}, ctx context.Contex
return interceptor(ctx, in, info, handler)
}
func _VolumeServer_PrepareExpandBlockVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PrepareExpandBlockVolumeRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(VolumeServerServer).PrepareExpandBlockVolume(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: VolumeServer_PrepareExpandBlockVolume_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(VolumeServerServer).PrepareExpandBlockVolume(ctx, req.(*PrepareExpandBlockVolumeRequest))
}
return interceptor(ctx, in, info, handler)
}
func _VolumeServer_CommitExpandBlockVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(CommitExpandBlockVolumeRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(VolumeServerServer).CommitExpandBlockVolume(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: VolumeServer_CommitExpandBlockVolume_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(VolumeServerServer).CommitExpandBlockVolume(ctx, req.(*CommitExpandBlockVolumeRequest))
}
return interceptor(ctx, in, info, handler)
}
func _VolumeServer_CancelExpandBlockVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(CancelExpandBlockVolumeRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(VolumeServerServer).CancelExpandBlockVolume(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: VolumeServer_CancelExpandBlockVolume_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(VolumeServerServer).CancelExpandBlockVolume(ctx, req.(*CancelExpandBlockVolumeRequest))
}
return interceptor(ctx, in, info, handler)
}
// VolumeServer_ServiceDesc is the grpc.ServiceDesc for VolumeServer service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
@ -2113,6 +2215,18 @@ var VolumeServer_ServiceDesc = grpc.ServiceDesc{
MethodName: "ExpandBlockVolume",
Handler: _VolumeServer_ExpandBlockVolume_Handler,
},
{
MethodName: "PrepareExpandBlockVolume",
Handler: _VolumeServer_PrepareExpandBlockVolume_Handler,
},
{
MethodName: "CommitExpandBlockVolume",
Handler: _VolumeServer_CommitExpandBlockVolume_Handler,
},
{
MethodName: "CancelExpandBlockVolume",
Handler: _VolumeServer_CancelExpandBlockVolume_Handler,
},
},
Streams: []grpc.StreamDesc{
{

75
weed/server/master_block_registry.go

@ -82,6 +82,12 @@ type BlockVolumeEntry struct {
// Lease tracking for failover (CP6-3 F2).
LastLeaseGrant time.Time
LeaseTTL time.Duration
// CP11A-2: Coordinated expand tracking.
ExpandInProgress bool
ExpandFailed bool // true = primary committed but replica(s) failed; size suppressed
PendingExpandSize uint64
ExpandEpoch uint64
}
// HasReplica returns true if this volume has any replica (checks both new and deprecated fields).
@ -190,6 +196,70 @@ func (r *BlockVolumeRegistry) Unregister(name string) *BlockVolumeEntry {
return entry
}
// AcquireExpandInflight tries to acquire an expand lock for the named volume
// and records the pending expand metadata on the entry.
// Returns false if an expand is already in flight or failed (requires ClearExpandFailed first).
func (r *BlockVolumeRegistry) AcquireExpandInflight(name string, pendingSize, expandEpoch uint64) bool {
r.mu.Lock()
defer r.mu.Unlock()
entry, ok := r.volumes[name]
if !ok {
return false
}
if entry.ExpandInProgress || entry.ExpandFailed {
return false
}
entry.ExpandInProgress = true
entry.PendingExpandSize = pendingSize
entry.ExpandEpoch = expandEpoch
return true
}
// ReleaseExpandInflight clears all expand tracking fields for the named volume.
// Only call on clean success or clean cancel (all nodes rolled back).
func (r *BlockVolumeRegistry) ReleaseExpandInflight(name string) {
r.mu.Lock()
defer r.mu.Unlock()
entry, ok := r.volumes[name]
if !ok {
return
}
entry.ExpandInProgress = false
entry.ExpandFailed = false
entry.PendingExpandSize = 0
entry.ExpandEpoch = 0
}
// MarkExpandFailed transitions the entry from in-progress to failed.
// ExpandInProgress stays true so heartbeat continues to suppress size updates.
// The entry remains locked until ClearExpandFailed is called (manual reconciliation).
func (r *BlockVolumeRegistry) MarkExpandFailed(name string) {
r.mu.Lock()
defer r.mu.Unlock()
entry, ok := r.volumes[name]
if !ok {
return
}
entry.ExpandFailed = true
// Keep ExpandInProgress=true, PendingExpandSize, ExpandEpoch — all needed for diagnosis.
}
// ClearExpandFailed resets the expand-failed state so a new expand can be attempted.
// Called by an operator or automated reconciliation after the inconsistency is resolved
// (e.g., failed replica rebuilt or manually expanded).
func (r *BlockVolumeRegistry) ClearExpandFailed(name string) {
r.mu.Lock()
defer r.mu.Unlock()
entry, ok := r.volumes[name]
if !ok {
return
}
entry.ExpandInProgress = false
entry.ExpandFailed = false
entry.PendingExpandSize = 0
entry.ExpandEpoch = 0
}
// UpdateSize updates the size of a registered volume.
// Called only after a successful VS expand to keep registry in sync.
func (r *BlockVolumeRegistry) UpdateSize(name string, newSizeBytes uint64) error {
@ -319,7 +389,10 @@ func (r *BlockVolumeRegistry) UpdateFullHeartbeat(server string, infos []*master
if isPrimary {
// Primary heartbeat: update primary fields.
existing.SizeBytes = info.VolumeSize
// CP11A-2: skip size update during coordinated expand.
if !existing.ExpandInProgress {
existing.SizeBytes = info.VolumeSize
}
existing.Epoch = info.Epoch
existing.Role = info.Role
existing.Status = StatusActive

95
weed/server/master_grpc_server_block.go

@ -367,7 +367,8 @@ func (ms *MasterServer) ListBlockSnapshots(ctx context.Context, req *master_pb.L
return resp, nil
}
// ExpandBlockVolume expands a block volume via the volume server, then updates registry.
// ExpandBlockVolume expands a block volume. For standalone volumes (no replicas),
// uses direct expand. For replicated volumes, uses coordinated prepare/commit/cancel.
func (ms *MasterServer) ExpandBlockVolume(ctx context.Context, req *master_pb.ExpandBlockVolumeRequest) (*master_pb.ExpandBlockVolumeResponse, error) {
if req.Name == "" {
return nil, fmt.Errorf("name is required")
@ -381,28 +382,96 @@ func (ms *MasterServer) ExpandBlockVolume(ctx context.Context, req *master_pb.Ex
return nil, fmt.Errorf("block volume %q not found", req.Name)
}
// Expand primary first; only update registry on success.
capacity, err := ms.blockVSExpand(ctx, entry.VolumeServer, req.Name, req.NewSizeBytes)
// Standalone path: no replicas → direct expand (unchanged behavior).
if len(entry.Replicas) == 0 {
capacity, err := ms.blockVSExpand(ctx, entry.VolumeServer, req.Name, req.NewSizeBytes)
if err != nil {
return nil, fmt.Errorf("expand on %s: %w", entry.VolumeServer, err)
}
if uerr := ms.blockRegistry.UpdateSize(req.Name, capacity); uerr != nil {
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: registry update failed: %v", blockReqID(ctx), req.Name, uerr)
}
return &master_pb.ExpandBlockVolumeResponse{CapacityBytes: capacity}, nil
}
// Coordinated expand for replicated volumes.
expandEpoch := ms.nextExpandEpoch.Add(1)
if !ms.blockRegistry.AcquireExpandInflight(req.Name, req.NewSizeBytes, expandEpoch) {
return nil, fmt.Errorf("block volume %q: expand already in progress or failed (requires reconciliation)", req.Name)
}
// Only release on clean success or clean cancel (all nodes rolled back).
// On partial commit failure, MarkExpandFailed keeps the guard up.
expandClean := false
defer func() {
if expandClean {
ms.blockRegistry.ReleaseExpandInflight(req.Name)
}
}()
// Track prepared nodes for rollback.
var prepared []string
// PREPARE: primary.
if err := ms.blockVSPrepareExpand(ctx, entry.VolumeServer, req.Name, req.NewSizeBytes, expandEpoch); err != nil {
expandClean = true // nothing to worry about, just release
return nil, fmt.Errorf("prepare expand on primary %s: %w", entry.VolumeServer, err)
}
prepared = append(prepared, entry.VolumeServer)
// PREPARE: replicas.
for _, ri := range entry.Replicas {
if err := ms.blockVSPrepareExpand(ctx, ri.Server, req.Name, req.NewSizeBytes, expandEpoch); err != nil {
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: prepare on replica %s failed: %v", blockReqID(ctx), req.Name, ri.Server, err)
// Cancel all prepared nodes.
for _, ps := range prepared {
if cerr := ms.blockVSCancelExpand(ctx, ps, req.Name, expandEpoch); cerr != nil {
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: cancel on %s failed: %v", blockReqID(ctx), req.Name, ps, cerr)
}
}
expandClean = true // all cancelled, safe to release
return nil, fmt.Errorf("prepare expand on replica %s: %w", ri.Server, err)
}
prepared = append(prepared, ri.Server)
}
// COMMIT: primary.
capacity, err := ms.blockVSCommitExpand(ctx, entry.VolumeServer, req.Name, expandEpoch)
if err != nil {
return nil, fmt.Errorf("expand on %s: %w", entry.VolumeServer, err)
// Commit failed on primary — cancel all.
for _, ps := range prepared {
if cerr := ms.blockVSCancelExpand(ctx, ps, req.Name, expandEpoch); cerr != nil {
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: cancel on %s after primary commit fail: %v", blockReqID(ctx), req.Name, ps, cerr)
}
}
expandClean = true // all cancelled, safe to release
return nil, fmt.Errorf("commit expand on primary %s: %w", entry.VolumeServer, err)
}
// CP8-2: Expand ALL replicas (best-effort, log warning on failure).
// COMMIT: replicas.
allCommitted := true
for _, ri := range entry.Replicas {
if _, err := ms.blockVSExpand(ctx, ri.Server, req.Name, req.NewSizeBytes); err != nil {
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: replica expand on %s failed (best-effort): %v",
blockReqID(ctx), req.Name, ri.Server, err)
if _, cerr := ms.blockVSCommitExpand(ctx, ri.Server, req.Name, expandEpoch); cerr != nil {
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: commit on replica %s failed: %v", blockReqID(ctx), req.Name, ri.Server, cerr)
allCommitted = false
}
}
// Update registry with actual new size.
if !allCommitted {
// Primary committed but replica(s) failed. Mark expand as failed:
// ExpandInProgress stays true → heartbeat won't overwrite SizeBytes.
// Operator must reconcile (rebuild/re-expand failed replicas) then call ClearExpandFailed.
ms.blockRegistry.MarkExpandFailed(req.Name)
return nil, fmt.Errorf("block volume %q: expand committed on primary but failed on one or more replicas (volume degraded, expand locked)", req.Name)
}
// All committed: update registry and release cleanly.
if uerr := ms.blockRegistry.UpdateSize(req.Name, capacity); uerr != nil {
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: registry update failed (VS succeeded): %v", blockReqID(ctx), req.Name, uerr)
glog.Warningf("[reqID=%s] ExpandBlockVolume %q: registry update failed: %v", blockReqID(ctx), req.Name, uerr)
}
expandClean = true
return &master_pb.ExpandBlockVolumeResponse{
CapacityBytes: capacity,
}, nil
return &master_pb.ExpandBlockVolumeResponse{CapacityBytes: capacity}, nil
}
// createBlockVolumeResponseFromEntry builds a CreateBlockVolumeResponse from a registry entry.

492
weed/server/master_grpc_server_block_test.go

@ -813,14 +813,21 @@ func TestMaster_DeleteRF3_DeletesAllReplicas(t *testing.T) {
}
}
// ExpandBlockVolume RF=3 expands all replicas.
// ExpandBlockVolume RF=3 uses coordinated prepare/commit on all nodes.
func TestMaster_ExpandRF3_ExpandsAllReplicas(t *testing.T) {
ms := testMasterServerRF3(t)
var expandedServers []string
ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
expandedServers = append(expandedServers, server)
return newSize, nil
var preparedServers, committedServers []string
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
preparedServers = append(preparedServers, server)
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
committedServers = append(committedServers, server)
return 2 << 30, nil
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
return nil
}
_, err := ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
@ -839,10 +846,12 @@ func TestMaster_ExpandRF3_ExpandsAllReplicas(t *testing.T) {
t.Fatalf("expand: %v", err)
}
// Should have expanded on primary + 2 replicas = 3 servers.
if len(expandedServers) != 3 {
t.Fatalf("expected 3 expand calls (primary + 2 replicas), got %d: %v",
len(expandedServers), expandedServers)
// Should have prepared on primary + 2 replicas = 3 servers.
if len(preparedServers) != 3 {
t.Fatalf("expected 3 prepare calls, got %d: %v", len(preparedServers), preparedServers)
}
if len(committedServers) != 3 {
t.Fatalf("expected 3 commit calls, got %d: %v", len(committedServers), committedServers)
}
}
@ -1178,3 +1187,468 @@ func TestMaster_PromotionCopiesNvmeFields(t *testing.T) {
t.Fatalf("Lookup Nqn after promotion: got %q", lresp.Nqn)
}
}
// ============================================================
// CP11A-2: Coordinated Expand Tests
// ============================================================
func testMasterServerWithExpandMocks(t *testing.T) *MasterServer {
t.Helper()
ms := testMasterServer(t)
ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
return newSize, nil
}
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
return 2 << 30, nil
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
return nil
}
return ms
}
func TestMaster_ExpandCoordinated_Success(t *testing.T) {
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
return &blockAllocResult{
Path: fmt.Sprintf("/data/%s.blk", name),
IQN: fmt.Sprintf("iqn.test:%s", name),
ISCSIAddr: server,
ReplicaDataAddr: server + ":4001",
ReplicaCtrlAddr: server + ":4002",
}, nil
}
var prepareCount, commitCount int
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
prepareCount++
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
commitCount++
return 2 << 30, nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "coord-vol", SizeBytes: 1 << 30,
})
resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "coord-vol", NewSizeBytes: 2 << 30,
})
if err != nil {
t.Fatalf("expand: %v", err)
}
if resp.CapacityBytes != 2<<30 {
t.Fatalf("capacity: got %d, want %d", resp.CapacityBytes, 2<<30)
}
if prepareCount != 2 {
t.Fatalf("expected 2 prepare calls (primary+replica), got %d", prepareCount)
}
if commitCount != 2 {
t.Fatalf("expected 2 commit calls, got %d", commitCount)
}
entry, _ := ms.blockRegistry.Lookup("coord-vol")
if entry.SizeBytes != 2<<30 {
t.Fatalf("registry size: got %d, want %d", entry.SizeBytes, 2<<30)
}
}
func TestMaster_ExpandCoordinated_PrepareFailure_Cancels(t *testing.T) {
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
return &blockAllocResult{
Path: fmt.Sprintf("/data/%s.blk", name),
IQN: fmt.Sprintf("iqn.test:%s", name),
ISCSIAddr: server,
ReplicaDataAddr: server + ":4001",
ReplicaCtrlAddr: server + ":4002",
}, nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "cancel-vol", SizeBytes: 1 << 30,
})
// Determine which server is primary so we can fail the replica.
entry, _ := ms.blockRegistry.Lookup("cancel-vol")
primaryServer := entry.VolumeServer
var cancelCount int
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
if server != primaryServer {
return fmt.Errorf("replica prepare failed")
}
return nil
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
cancelCount++
return nil
}
_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "cancel-vol", NewSizeBytes: 2 << 30,
})
if err == nil {
t.Fatal("expected error when replica prepare fails")
}
if cancelCount != 1 {
t.Fatalf("expected 1 cancel call (primary was prepared), got %d", cancelCount)
}
entry, _ = ms.blockRegistry.Lookup("cancel-vol")
if entry.SizeBytes != 1<<30 {
t.Fatalf("registry size should be unchanged: got %d", entry.SizeBytes)
}
}
func TestMaster_ExpandCoordinated_Standalone_DirectCommit(t *testing.T) {
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
var expandCalled bool
ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
expandCalled = true
return newSize, nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "solo-vol", SizeBytes: 1 << 30,
})
resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "solo-vol", NewSizeBytes: 2 << 30,
})
if err != nil {
t.Fatalf("expand: %v", err)
}
if !expandCalled {
t.Fatal("standalone should use direct expand, not prepare/commit")
}
if resp.CapacityBytes != 2<<30 {
t.Fatalf("capacity: got %d", resp.CapacityBytes)
}
}
func TestMaster_ExpandCoordinated_ConcurrentRejected(t *testing.T) {
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
return &blockAllocResult{
Path: fmt.Sprintf("/data/%s.blk", name),
IQN: fmt.Sprintf("iqn.test:%s", name),
ISCSIAddr: server,
ReplicaDataAddr: server + ":4001",
ReplicaCtrlAddr: server + ":4002",
}, nil
}
// Make prepare block until we release it.
blockCh := make(chan struct{})
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
<-blockCh
return nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "conc-vol", SizeBytes: 1 << 30,
})
// First expand acquires inflight.
errCh := make(chan error, 1)
go func() {
_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "conc-vol", NewSizeBytes: 2 << 30,
})
errCh <- err
}()
// Give goroutine time to acquire lock.
time.Sleep(20 * time.Millisecond)
// Second expand should be rejected.
_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "conc-vol", NewSizeBytes: 2 << 30,
})
if err == nil {
t.Fatal("concurrent expand should be rejected")
}
// Release the first expand.
close(blockCh)
<-errCh
}
func TestMaster_ExpandCoordinated_Idempotent(t *testing.T) {
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "idem-vol", SizeBytes: 1 << 30,
})
// Same size expand: standalone path, Expand handles no-op internally.
ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
return 1 << 30, nil // return current size
}
resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "idem-vol", NewSizeBytes: 1 << 30,
})
if err != nil {
t.Fatalf("idempotent expand: %v", err)
}
if resp.CapacityBytes != 1<<30 {
t.Fatalf("capacity: got %d", resp.CapacityBytes)
}
}
func TestMaster_ExpandCoordinated_CommitFailure_MarksInconsistent(t *testing.T) {
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
return &blockAllocResult{
Path: fmt.Sprintf("/data/%s.blk", name),
IQN: fmt.Sprintf("iqn.test:%s", name),
ISCSIAddr: server,
ReplicaDataAddr: server + ":4001",
ReplicaCtrlAddr: server + ":4002",
}, nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "fail-vol", SizeBytes: 1 << 30,
})
// Determine which server is primary so we fail only the replica's commit.
entry, _ := ms.blockRegistry.Lookup("fail-vol")
primaryServer := entry.VolumeServer
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
if server != primaryServer {
return 0, fmt.Errorf("replica commit failed")
}
return 2 << 30, nil
}
_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "fail-vol", NewSizeBytes: 2 << 30,
})
if err == nil {
t.Fatal("expected error when replica commit fails")
}
// Registry size should NOT be updated (inconsistent state).
entry, _ = ms.blockRegistry.Lookup("fail-vol")
if entry.SizeBytes != 1<<30 {
t.Fatalf("registry size should be unchanged: got %d", entry.SizeBytes)
}
// Finding 1: ExpandFailed must be true, ExpandInProgress must stay true
// so heartbeat cannot overwrite SizeBytes with the primary's new committed size.
if !entry.ExpandFailed {
t.Fatal("entry.ExpandFailed should be true after partial commit failure")
}
if !entry.ExpandInProgress {
t.Fatal("entry.ExpandInProgress should stay true to suppress heartbeat size updates")
}
// Finding 2: PendingExpandSize and ExpandEpoch should be populated for diagnosis.
if entry.PendingExpandSize != 2<<30 {
t.Fatalf("entry.PendingExpandSize: got %d, want %d", entry.PendingExpandSize, 2<<30)
}
if entry.ExpandEpoch == 0 {
t.Fatal("entry.ExpandEpoch should be non-zero")
}
// A new expand should be rejected while ExpandFailed is set.
_, err = ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "fail-vol", NewSizeBytes: 2 << 30,
})
if err == nil {
t.Fatal("expand should be rejected while ExpandFailed is set")
}
// ClearExpandFailed unblocks new expands.
ms.blockRegistry.ClearExpandFailed("fail-vol")
entry, _ = ms.blockRegistry.Lookup("fail-vol")
if entry.ExpandFailed || entry.ExpandInProgress {
t.Fatal("ClearExpandFailed should reset both flags")
}
}
func TestMaster_ExpandCoordinated_HeartbeatSuppressedAfterPartialCommit(t *testing.T) {
// Bug 1 regression: after primary commits but replica fails,
// a heartbeat from the primary reporting the new VolumeSize
// must NOT update the registry SizeBytes.
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
return &blockAllocResult{
Path: fmt.Sprintf("/data/%s.blk", name),
IQN: fmt.Sprintf("iqn.test:%s", name),
ISCSIAddr: server,
ReplicaDataAddr: server + ":4001",
ReplicaCtrlAddr: server + ":4002",
}, nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "hb-vol", SizeBytes: 1 << 30,
})
entry, _ := ms.blockRegistry.Lookup("hb-vol")
primaryServer := entry.VolumeServer
// Fail replica commit.
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
if server != primaryServer {
return 0, fmt.Errorf("replica commit failed")
}
return 2 << 30, nil
}
ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "hb-vol", NewSizeBytes: 2 << 30,
})
// Volume is now in ExpandFailed state, ExpandInProgress=true.
// Simulate primary heartbeat reporting VolumeSize = 2 GiB (primary already committed).
ms.blockRegistry.UpdateFullHeartbeat(primaryServer, []*master_pb.BlockVolumeInfoMessage{
{
Path: fmt.Sprintf("/data/%s.blk", "hb-vol"),
VolumeSize: 2 << 30, // primary's new committed size
Epoch: 1,
Role: 1,
},
})
// Registry size must still be the OLD size — heartbeat must not leak the new size.
entry, _ = ms.blockRegistry.Lookup("hb-vol")
if entry.SizeBytes != 1<<30 {
t.Fatalf("heartbeat leaked new size: got %d, want %d", entry.SizeBytes, 1<<30)
}
if !entry.ExpandFailed {
t.Fatal("ExpandFailed should still be true after heartbeat")
}
}
func TestMaster_ExpandCoordinated_FailoverDuringPrepare(t *testing.T) {
// Scenario: primary and replica are prepared but commit hasn't happened.
// On recovery (OpenBlockVol), prepared state is cleared → VolumeSize stays at old.
// This test validates at the registry/coordinator level.
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
return &blockAllocResult{
Path: fmt.Sprintf("/data/%s.blk", name),
IQN: fmt.Sprintf("iqn.test:%s", name),
ISCSIAddr: server,
ReplicaDataAddr: server + ":4001",
ReplicaCtrlAddr: server + ":4002",
}, nil
}
// Prepare succeeds but commit on primary fails (simulating crash).
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
var cancelCount int
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
return 0, fmt.Errorf("primary crashed during commit")
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
cancelCount++
return nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "failover-vol", SizeBytes: 1 << 30,
})
_, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "failover-vol", NewSizeBytes: 2 << 30,
})
if err == nil {
t.Fatal("expected error when primary commit fails")
}
// Cancel should have been called on all prepared nodes.
if cancelCount < 1 {
t.Fatalf("expected cancel calls, got %d", cancelCount)
}
// Registry size should be unchanged.
entry, _ := ms.blockRegistry.Lookup("failover-vol")
if entry.SizeBytes != 1<<30 {
t.Fatalf("registry size should be unchanged: got %d", entry.SizeBytes)
}
}
func TestMaster_ExpandCoordinated_RestartRecovery(t *testing.T) {
// After node restart with prepared state, OpenBlockVol clears it.
// Master re-driving expand would go through full prepare/commit again.
// This test verifies the coordinator doesn't get stuck after a failed expand.
ms := testMasterServerWithExpandMocks(t)
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockVSAllocate = func(ctx context.Context, server string, name string, sizeBytes uint64, diskType string, durabilityMode string) (*blockAllocResult, error) {
return &blockAllocResult{
Path: fmt.Sprintf("/data/%s.blk", name),
IQN: fmt.Sprintf("iqn.test:%s", name),
ISCSIAddr: server,
ReplicaDataAddr: server + ":4001",
ReplicaCtrlAddr: server + ":4002",
}, nil
}
ms.CreateBlockVolume(context.Background(), &master_pb.CreateBlockVolumeRequest{
Name: "restart-vol", SizeBytes: 1 << 30,
})
// First expand fails at commit.
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
return 0, fmt.Errorf("crash")
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
return nil
}
ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "restart-vol", NewSizeBytes: 2 << 30,
})
// After "restart" (inflight released), retry should work.
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
return 2 << 30, nil
}
resp, err := ms.ExpandBlockVolume(context.Background(), &master_pb.ExpandBlockVolumeRequest{
Name: "restart-vol", NewSizeBytes: 2 << 30,
})
if err != nil {
t.Fatalf("retry expand: %v", err)
}
if resp.CapacityBytes != 2<<30 {
t.Fatalf("capacity: got %d", resp.CapacityBytes)
}
}

46
weed/server/master_server.go

@ -12,6 +12,7 @@ import (
"runtime"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/seaweedfs/seaweedfs/weed/cluster/maintenance"
@ -104,6 +105,10 @@ type MasterServer struct {
blockVSDeleteSnap func(ctx context.Context, server string, name string, snapID uint32) error
blockVSListSnaps func(ctx context.Context, server string, name string) ([]*volume_server_pb.BlockSnapshotInfo, error)
blockVSExpand func(ctx context.Context, server string, name string, newSize uint64) (uint64, error)
blockVSPrepareExpand func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error
blockVSCommitExpand func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error)
blockVSCancelExpand func(ctx context.Context, server string, name string, expandEpoch uint64) error
nextExpandEpoch atomic.Uint64
}
func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer {
@ -164,6 +169,9 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
ms.blockVSDeleteSnap = ms.defaultBlockVSDeleteSnap
ms.blockVSListSnaps = ms.defaultBlockVSListSnaps
ms.blockVSExpand = ms.defaultBlockVSExpand
ms.blockVSPrepareExpand = ms.defaultBlockVSPrepareExpand
ms.blockVSCommitExpand = ms.defaultBlockVSCommitExpand
ms.blockVSCancelExpand = ms.defaultBlockVSCancelExpand
ms.MasterClient.SetOnPeerUpdateFn(ms.OnPeerUpdate)
@ -215,6 +223,7 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
r.HandleFunc("/block/volume/{name}", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeDeleteHandler)))).Methods("DELETE")
r.HandleFunc("/block/volume/{name}", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeLookupHandler))).Methods("GET")
r.HandleFunc("/block/volumes", ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeListHandler))).Methods("GET")
r.HandleFunc("/block/volume/{name}/expand", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockVolumeExpandHandler)))).Methods("POST")
r.HandleFunc("/block/assign", ms.proxyToLeader(ms.guard.WhiteList(requestIDMiddleware(ms.blockAssignHandler)))).Methods("POST")
r.HandleFunc("/block/servers", ms.guard.WhiteList(requestIDMiddleware(ms.blockServersHandler))).Methods("GET")
r.HandleFunc("/block/status", ms.guard.WhiteList(requestIDMiddleware(ms.blockStatusHandler))).Methods("GET")
@ -648,3 +657,40 @@ func (ms *MasterServer) defaultBlockVSExpand(ctx context.Context, server string,
})
return capacity, err
}
func (ms *MasterServer) defaultBlockVSPrepareExpand(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return operation.WithVolumeServerClient(false, pb.ServerAddress(server), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
_, err := client.PrepareExpandBlockVolume(ctx, &volume_server_pb.PrepareExpandBlockVolumeRequest{
Name: name,
NewSizeBytes: newSize,
ExpandEpoch: expandEpoch,
})
return err
})
}
func (ms *MasterServer) defaultBlockVSCommitExpand(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
var capacity uint64
err := operation.WithVolumeServerClient(false, pb.ServerAddress(server), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
resp, rerr := client.CommitExpandBlockVolume(ctx, &volume_server_pb.CommitExpandBlockVolumeRequest{
Name: name,
ExpandEpoch: expandEpoch,
})
if rerr != nil {
return rerr
}
capacity = resp.CapacityBytes
return nil
})
return capacity, err
}
func (ms *MasterServer) defaultBlockVSCancelExpand(ctx context.Context, server string, name string, expandEpoch uint64) error {
return operation.WithVolumeServerClient(false, pb.ServerAddress(server), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
_, err := client.CancelExpandBlockVolume(ctx, &volume_server_pb.CancelExpandBlockVolumeRequest{
Name: name,
ExpandEpoch: expandEpoch,
})
return err
})
}

29
weed/server/master_server_handlers_block.go

@ -162,6 +162,35 @@ func (ms *MasterServer) blockServersHandler(w http.ResponseWriter, r *http.Reque
writeJsonQuiet(w, r, http.StatusOK, infos)
}
// blockVolumeExpandHandler handles POST /block/volume/{name}/expand.
func (ms *MasterServer) blockVolumeExpandHandler(w http.ResponseWriter, r *http.Request) {
name := mux.Vars(r)["name"]
if name == "" {
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("name is required"))
return
}
var req blockapi.ExpandVolumeRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err))
return
}
if req.NewSizeBytes == 0 {
writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("new_size_bytes must be > 0"))
return
}
resp, err := ms.ExpandBlockVolume(r.Context(), &master_pb.ExpandBlockVolumeRequest{
Name: name,
NewSizeBytes: req.NewSizeBytes,
})
if err != nil {
writeJsonError(w, r, http.StatusInternalServerError, err)
return
}
writeJsonQuiet(w, r, http.StatusOK, blockapi.ExpandVolumeResponse{CapacityBytes: resp.CapacityBytes})
}
// blockStatusHandler handles GET /block/status — returns registry configuration for debugging.
func (ms *MasterServer) blockStatusHandler(w http.ResponseWriter, r *http.Request) {
status := map[string]interface{}{

58
weed/server/qa_block_cp82_adversarial_test.go

@ -43,6 +43,15 @@ func qaCP82Master(t *testing.T) *MasterServer {
ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
return newSize, nil
}
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
return 2 << 30, nil
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
return nil
}
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockRegistry.MarkBlockCapable("vs3:9333")
@ -726,31 +735,58 @@ func TestQA_CP82_ExpandRF3_PartialReplicaFailure(t *testing.T) {
entry, _ := ms.blockRegistry.Lookup("vol-expand")
failServer := entry.Replicas[1].Server
// Override expand mock: one replica fails.
ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
// CP11A-2: coordinated expand — set up prepare/commit/cancel mocks.
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
if server == failServer {
return 0, fmt.Errorf("disk full on %s", server)
}
return newSize, nil
return 2 << 30, nil
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
return nil
}
// Expand should succeed (primary + one replica succeed, one fails best-effort).
// Under coordinated expand, partial replica commit failure marks the volume degraded.
_, err = ms.ExpandBlockVolume(ctx, &master_pb.ExpandBlockVolumeRequest{
Name: "vol-expand",
NewSizeBytes: 2 << 30,
})
if err == nil {
t.Fatal("expand should fail when a required replica commit fails")
}
// Registry size should NOT be updated (primary committed but replica failed → degraded).
entry, _ = ms.blockRegistry.Lookup("vol-expand")
if entry.SizeBytes != 1<<30 {
t.Fatalf("registry size should be unchanged: got %d, want %d", entry.SizeBytes, uint64(1<<30))
}
if !entry.ExpandFailed {
t.Fatal("ExpandFailed should be true after partial commit failure")
}
if !entry.ExpandInProgress {
t.Fatal("ExpandInProgress should stay true to suppress heartbeat size updates")
}
// Cleanup: ClearExpandFailed allows future operations.
ms.blockRegistry.ClearExpandFailed("vol-expand")
// Now expand with all mocks succeeding should work.
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
return 2 << 30, nil
}
resp, err := ms.ExpandBlockVolume(ctx, &master_pb.ExpandBlockVolumeRequest{
Name: "vol-expand",
NewSizeBytes: 2 << 30,
})
if err != nil {
t.Fatalf("expand should succeed despite partial replica failure: %v", err)
t.Fatalf("retry expand after clear: %v", err)
}
if resp.CapacityBytes != 2<<30 {
t.Fatalf("capacity: got %d, want %d", resp.CapacityBytes, uint64(2<<30))
}
// Registry should reflect new size.
entry, _ = ms.blockRegistry.Lookup("vol-expand")
if entry.SizeBytes != 2<<30 {
t.Fatalf("registry size: got %d, want %d", entry.SizeBytes, uint64(2<<30))
}
}
// ────────────────────────────────────────────────────────────

9
weed/server/qa_block_durability_test.go

@ -43,6 +43,15 @@ func qaDurabilityMaster(t *testing.T) *MasterServer {
ms.blockVSExpand = func(ctx context.Context, server string, name string, newSize uint64) (uint64, error) {
return newSize, nil
}
ms.blockVSPrepareExpand = func(ctx context.Context, server string, name string, newSize, expandEpoch uint64) error {
return nil
}
ms.blockVSCommitExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) (uint64, error) {
return 2 << 30, nil
}
ms.blockVSCancelExpand = func(ctx context.Context, server string, name string, expandEpoch uint64) error {
return nil
}
ms.blockRegistry.MarkBlockCapable("vs1:9333")
ms.blockRegistry.MarkBlockCapable("vs2:9333")
ms.blockRegistry.MarkBlockCapable("vs3:9333")

54
weed/server/volume_grpc_block.go

@ -125,6 +125,60 @@ func (vs *VolumeServer) ListBlockSnapshots(_ context.Context, req *volume_server
return resp, nil
}
// PrepareExpandBlockVolume prepares a block volume for expansion without committing.
func (vs *VolumeServer) PrepareExpandBlockVolume(_ context.Context, req *volume_server_pb.PrepareExpandBlockVolumeRequest) (*volume_server_pb.PrepareExpandBlockVolumeResponse, error) {
if vs.blockService == nil {
return nil, fmt.Errorf("block service not enabled on this volume server")
}
if req.Name == "" {
return nil, fmt.Errorf("name is required")
}
if req.NewSizeBytes == 0 {
return nil, fmt.Errorf("new_size_bytes must be > 0")
}
if err := vs.blockService.PrepareExpandBlockVol(req.Name, req.NewSizeBytes, req.ExpandEpoch); err != nil {
return nil, fmt.Errorf("prepare expand block volume %q: %w", req.Name, err)
}
return &volume_server_pb.PrepareExpandBlockVolumeResponse{}, nil
}
// CommitExpandBlockVolume commits a prepared block volume expansion.
func (vs *VolumeServer) CommitExpandBlockVolume(_ context.Context, req *volume_server_pb.CommitExpandBlockVolumeRequest) (*volume_server_pb.CommitExpandBlockVolumeResponse, error) {
if vs.blockService == nil {
return nil, fmt.Errorf("block service not enabled on this volume server")
}
if req.Name == "" {
return nil, fmt.Errorf("name is required")
}
capacity, err := vs.blockService.CommitExpandBlockVol(req.Name, req.ExpandEpoch)
if err != nil {
return nil, fmt.Errorf("commit expand block volume %q: %w", req.Name, err)
}
return &volume_server_pb.CommitExpandBlockVolumeResponse{
CapacityBytes: capacity,
}, nil
}
// CancelExpandBlockVolume cancels a prepared block volume expansion.
func (vs *VolumeServer) CancelExpandBlockVolume(_ context.Context, req *volume_server_pb.CancelExpandBlockVolumeRequest) (*volume_server_pb.CancelExpandBlockVolumeResponse, error) {
if vs.blockService == nil {
return nil, fmt.Errorf("block service not enabled on this volume server")
}
if req.Name == "" {
return nil, fmt.Errorf("name is required")
}
if err := vs.blockService.CancelExpandBlockVol(req.Name, req.ExpandEpoch); err != nil {
return nil, fmt.Errorf("cancel expand block volume %q: %w", req.Name, err)
}
return &volume_server_pb.CancelExpandBlockVolumeResponse{}, nil
}
// ExpandBlockVolume expands a block volume to a new size.
func (vs *VolumeServer) ExpandBlockVolume(_ context.Context, req *volume_server_pb.ExpandBlockVolumeRequest) (*volume_server_pb.ExpandBlockVolumeResponse, error) {
if vs.blockService == nil {

50
weed/server/volume_grpc_block_test.go

@ -201,3 +201,53 @@ func TestVS_ExpandVolumeNotFound(t *testing.T) {
t.Fatal("expected error for nonexistent volume")
}
}
func TestVS_PrepareExpand(t *testing.T) {
bs, _ := newTestBlockServiceWithDir(t)
bs.CreateBlockVol("prep-vol", 4*1024*1024, "", "")
if err := bs.PrepareExpandBlockVol("prep-vol", 8*1024*1024, 1); err != nil {
t.Fatalf("PrepareExpandBlockVol: %v", err)
}
}
func TestVS_CommitExpand(t *testing.T) {
bs, _ := newTestBlockServiceWithDir(t)
bs.CreateBlockVol("commit-vol", 4*1024*1024, "", "")
if err := bs.PrepareExpandBlockVol("commit-vol", 8*1024*1024, 42); err != nil {
t.Fatalf("prepare: %v", err)
}
capacity, err := bs.CommitExpandBlockVol("commit-vol", 42)
if err != nil {
t.Fatalf("CommitExpandBlockVol: %v", err)
}
if capacity != 8*1024*1024 {
t.Fatalf("capacity: got %d, want %d", capacity, 8*1024*1024)
}
}
func TestVS_CancelExpand(t *testing.T) {
bs, _ := newTestBlockServiceWithDir(t)
bs.CreateBlockVol("cancel-vol", 4*1024*1024, "", "")
if err := bs.PrepareExpandBlockVol("cancel-vol", 8*1024*1024, 5); err != nil {
t.Fatalf("prepare: %v", err)
}
if err := bs.CancelExpandBlockVol("cancel-vol", 5); err != nil {
t.Fatalf("CancelExpandBlockVol: %v", err)
}
}
func TestVS_PrepareExpand_AlreadyInFlight(t *testing.T) {
bs, _ := newTestBlockServiceWithDir(t)
bs.CreateBlockVol("inflight-vol", 4*1024*1024, "", "")
if err := bs.PrepareExpandBlockVol("inflight-vol", 8*1024*1024, 1); err != nil {
t.Fatalf("first prepare: %v", err)
}
err := bs.PrepareExpandBlockVol("inflight-vol", 16*1024*1024, 2)
if err == nil {
t.Fatal("second prepare should be rejected")
}
}

30
weed/server/volume_server_block.go

@ -518,6 +518,36 @@ func (bs *BlockService) ExpandBlockVol(name string, newSize uint64) (uint64, err
return actualSize, err
}
// PrepareExpandBlockVol prepares an expand on the named volume without committing.
func (bs *BlockService) PrepareExpandBlockVol(name string, newSize, expandEpoch uint64) error {
path := bs.volumePath(name)
return bs.blockStore.WithVolume(path, func(vol *blockvol.BlockVol) error {
return vol.PrepareExpand(newSize, expandEpoch)
})
}
// CommitExpandBlockVol commits a prepared expand on the named volume.
func (bs *BlockService) CommitExpandBlockVol(name string, expandEpoch uint64) (uint64, error) {
path := bs.volumePath(name)
var actualSize uint64
err := bs.blockStore.WithVolume(path, func(vol *blockvol.BlockVol) error {
if eerr := vol.CommitExpand(expandEpoch); eerr != nil {
return eerr
}
actualSize = vol.Info().VolumeSize
return nil
})
return actualSize, err
}
// CancelExpandBlockVol cancels a prepared expand on the named volume.
func (bs *BlockService) CancelExpandBlockVol(name string, expandEpoch uint64) error {
path := bs.volumePath(name)
return bs.blockStore.WithVolume(path, func(vol *blockvol.BlockVol) error {
return vol.CancelExpand(expandEpoch)
})
}
// volumePath converts a volume name to its .blk file path.
func (bs *BlockService) volumePath(name string) string {
sanitized := blockvol.SanitizeFilename(name)

21
weed/storage/blockvol/blockapi/client.go

@ -115,6 +115,27 @@ func (c *Client) AssignRole(ctx context.Context, req AssignRequest) error {
return checkStatus(resp, http.StatusOK)
}
// ExpandVolume expands a block volume to a new size.
func (c *Client) ExpandVolume(ctx context.Context, name string, newSizeBytes uint64) (uint64, error) {
body, err := json.Marshal(ExpandVolumeRequest{NewSizeBytes: newSizeBytes})
if err != nil {
return 0, fmt.Errorf("marshal request: %w", err)
}
resp, err := c.doRequest(ctx, http.MethodPost, "/block/volume/"+name+"/expand", bytes.NewReader(body))
if err != nil {
return 0, err
}
defer resp.Body.Close()
if err := checkStatus(resp, http.StatusOK); err != nil {
return 0, err
}
var out ExpandVolumeResponse
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return 0, fmt.Errorf("decode response: %w", err)
}
return out.CapacityBytes, nil
}
// ListServers lists all block-capable volume servers.
func (c *Client) ListServers(ctx context.Context) ([]ServerInfo, error) {
resp, err := c.doRequest(ctx, http.MethodGet, "/block/servers", nil)

10
weed/storage/blockvol/blockapi/types.go

@ -64,6 +64,16 @@ type ServerInfo struct {
BlockCapable bool `json:"block_capable"`
}
// ExpandVolumeRequest is the request body for POST /block/volume/{name}/expand.
type ExpandVolumeRequest struct {
NewSizeBytes uint64 `json:"new_size_bytes"`
}
// ExpandVolumeResponse is the response for POST /block/volume/{name}/expand.
type ExpandVolumeResponse struct {
CapacityBytes uint64 `json:"capacity_bytes"`
}
// RoleFromString converts a role string to its uint32 wire value.
// Returns 0 (RoleNone) for unrecognized strings.
func RoleFromString(s string) uint32 {

151
weed/storage/blockvol/blockvol.go

@ -213,6 +213,25 @@ func OpenBlockVol(path string, cfgs ...BlockVolConfig) (*BlockVol, error) {
return nil, fmt.Errorf("blockvol: validate superblock: %w", err)
}
// CP11A-2: Clear stale prepared expand state on recovery.
if sb.PreparedSize != 0 {
log.Printf("blockvol: clearing stale PreparedSize=%d ExpandEpoch=%d on open (crash during prepare phase)", sb.PreparedSize, sb.ExpandEpoch)
sb.PreparedSize = 0
sb.ExpandEpoch = 0
if _, err := fd.Seek(0, 0); err != nil {
fd.Close()
return nil, fmt.Errorf("blockvol: seek for prepared clear: %w", err)
}
if _, err := sb.WriteTo(fd); err != nil {
fd.Close()
return nil, fmt.Errorf("blockvol: write superblock for prepared clear: %w", err)
}
if err := fd.Sync(); err != nil {
fd.Close()
return nil, fmt.Errorf("blockvol: sync for prepared clear: %w", err)
}
}
dirtyMap := NewDirtyMap(cfg.DirtyMapShards)
// Run WAL recovery: replay entries from tail to head.
@ -1055,22 +1074,19 @@ func (v *BlockVol) ListSnapshots() []SnapshotInfo {
var (
ErrShrinkNotSupported = errors.New("blockvol: shrink not supported")
ErrSnapshotsPreventResize = errors.New("blockvol: cannot resize with active snapshots")
ErrExpandAlreadyInFlight = errors.New("blockvol: expand already in flight")
ErrExpandEpochMismatch = errors.New("blockvol: expand epoch mismatch")
ErrNoExpandInFlight = errors.New("blockvol: no expand in flight")
ErrSameSize = errors.New("blockvol: new size equals current size")
)
// Expand grows the volume to newSize bytes. newSize must be larger than
// the current size and aligned to BlockSize. Fails if snapshots are active.
func (v *BlockVol) Expand(newSize uint64) error {
if err := v.beginOp(); err != nil {
return err
}
defer v.endOp()
if err := v.writeGate(); err != nil {
return err
}
// growFile extends the backing file to accommodate newSize bytes of extent data.
// Validates size, alignment, and snapshot constraints. Pauses/resumes flusher.
// Does NOT update VolumeSize in the superblock.
func (v *BlockVol) growFile(newSize uint64) error {
if newSize <= v.super.VolumeSize {
if newSize == v.super.VolumeSize {
return nil // no-op
return nil // no-op, caller should check
}
return ErrShrinkNotSupported
}
@ -1098,12 +1114,121 @@ func (v *BlockVol) Expand(newSize uint64) error {
if err := v.fd.Truncate(newFileSize); err != nil {
return fmt.Errorf("blockvol: expand truncate: %w", err)
}
return nil
}
// Update superblock.
// Expand grows the volume to newSize bytes (standalone direct-commit).
// newSize must be larger than the current size and aligned to BlockSize.
// Fails if snapshots are active. No PreparedSize involved.
func (v *BlockVol) Expand(newSize uint64) error {
if err := v.beginOp(); err != nil {
return err
}
defer v.endOp()
if err := v.writeGate(); err != nil {
return err
}
if newSize == v.super.VolumeSize {
return nil // no-op
}
if err := v.growFile(newSize); err != nil {
return err
}
// Update superblock: direct-commit.
v.super.VolumeSize = newSize
v.super.PreparedSize = 0 // defensive clear
v.super.ExpandEpoch = 0
return v.persistSuperblock()
}
// PrepareExpand grows the file and records the pending expand in the superblock
// without updating VolumeSize. Writes beyond the old VolumeSize are rejected
// by ValidateWrite until CommitExpand is called.
func (v *BlockVol) PrepareExpand(newSize, expandEpoch uint64) error {
if err := v.beginOp(); err != nil {
return err
}
defer v.endOp()
v.mu.Lock()
defer v.mu.Unlock()
if v.super.PreparedSize != 0 {
return ErrExpandAlreadyInFlight
}
if newSize <= v.super.VolumeSize {
if newSize == v.super.VolumeSize {
return ErrSameSize
}
return ErrShrinkNotSupported
}
if err := v.growFile(newSize); err != nil {
return err
}
v.super.PreparedSize = newSize
v.super.ExpandEpoch = expandEpoch
return v.persistSuperblock()
}
// CommitExpand activates the prepared expand: VolumeSize = PreparedSize.
// Returns ErrNoExpandInFlight if no prepare was done, or ErrExpandEpochMismatch
// if the epoch doesn't match.
func (v *BlockVol) CommitExpand(expandEpoch uint64) error {
if err := v.beginOp(); err != nil {
return err
}
defer v.endOp()
v.mu.Lock()
defer v.mu.Unlock()
if v.super.PreparedSize == 0 {
return ErrNoExpandInFlight
}
if v.super.ExpandEpoch != expandEpoch {
return ErrExpandEpochMismatch
}
v.super.VolumeSize = v.super.PreparedSize
v.super.PreparedSize = 0
v.super.ExpandEpoch = 0
return v.persistSuperblock()
}
// CancelExpand clears the prepared expand state without activating it.
// The file stays physically grown (sparse, harmless).
// If expandEpoch is 0, force-cancels regardless of current epoch.
func (v *BlockVol) CancelExpand(expandEpoch uint64) error {
if err := v.beginOp(); err != nil {
return err
}
defer v.endOp()
v.mu.Lock()
defer v.mu.Unlock()
if expandEpoch != 0 && v.super.ExpandEpoch != expandEpoch {
return ErrExpandEpochMismatch
}
v.super.PreparedSize = 0
v.super.ExpandEpoch = 0
return v.persistSuperblock()
}
// ExpandState returns the current prepared expand state.
func (v *BlockVol) ExpandState() (preparedSize, expandEpoch uint64) {
v.mu.RLock()
defer v.mu.RUnlock()
return v.super.PreparedSize, v.super.ExpandEpoch
}
// persistSuperblock writes the superblock to disk and fsyncs.
func (v *BlockVol) persistSuperblock() error {
if _, err := v.fd.Seek(0, 0); err != nil {

303
weed/storage/blockvol/expand_test.go

@ -0,0 +1,303 @@
package blockvol
import (
"bytes"
"path/filepath"
"testing"
)
const (
expandVolSize = 1024 * 1024 // 1MB
expandBlkSize = 4096
expandWALSize = 64 * 1024 // 64KB
expandNewSize = 2 * 1024 * 1024 // 2MB
)
func createExpandTestVol(t *testing.T) (*BlockVol, string) {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "test.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: expandVolSize,
BlockSize: expandBlkSize,
WALSize: expandWALSize,
})
if err != nil {
t.Fatalf("create: %v", err)
}
return vol, path
}
func TestExpand_Standalone_DirectCommit(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.Expand(expandNewSize); err != nil {
t.Fatalf("expand: %v", err)
}
if vol.Info().VolumeSize != expandNewSize {
t.Fatalf("VolumeSize: got %d, want %d", vol.Info().VolumeSize, expandNewSize)
}
ps, ee := vol.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState: got (%d,%d), want (0,0)", ps, ee)
}
}
func TestExpand_Standalone_Idempotent(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.Expand(expandVolSize); err != nil {
t.Fatalf("same-size expand should be no-op: %v", err)
}
if vol.Info().VolumeSize != expandVolSize {
t.Fatalf("VolumeSize changed: %d", vol.Info().VolumeSize)
}
}
func TestExpand_Standalone_ShrinkRejected(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
err := vol.Expand(expandVolSize / 2)
if err != ErrShrinkNotSupported {
t.Fatalf("expected ErrShrinkNotSupported, got %v", err)
}
}
func TestExpand_Standalone_SurvivesReopen(t *testing.T) {
vol, path := createExpandTestVol(t)
if err := vol.Expand(expandNewSize); err != nil {
t.Fatalf("expand: %v", err)
}
vol.Close()
vol2, err := OpenBlockVol(path)
if err != nil {
t.Fatalf("reopen: %v", err)
}
defer vol2.Close()
if vol2.Info().VolumeSize != expandNewSize {
t.Fatalf("VolumeSize after reopen: got %d, want %d", vol2.Info().VolumeSize, expandNewSize)
}
}
func TestPrepareExpand_Success(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 42); err != nil {
t.Fatalf("prepare: %v", err)
}
if vol.Info().VolumeSize != expandVolSize {
t.Fatalf("VolumeSize should be unchanged: %d", vol.Info().VolumeSize)
}
ps, ee := vol.ExpandState()
if ps != expandNewSize || ee != 42 {
t.Fatalf("ExpandState: got (%d,%d), want (%d,42)", ps, ee, expandNewSize)
}
}
func TestPrepareExpand_WriteBeyondOldSize_Rejected(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
newLBA := uint64(expandVolSize / expandBlkSize)
data := make([]byte, expandBlkSize)
err := vol.WriteLBA(newLBA, data)
if err == nil {
t.Fatal("write beyond old size should be rejected while in prepared state")
}
}
func TestPrepareExpand_WriteWithinOldSize_OK(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
data := make([]byte, expandBlkSize)
data[0] = 0xCC
if err := vol.WriteLBA(0, data); err != nil {
t.Fatalf("write within old size: %v", err)
}
got, err := vol.ReadLBA(0, expandBlkSize)
if err != nil {
t.Fatalf("read: %v", err)
}
if !bytes.Equal(got, data) {
t.Fatal("data mismatch")
}
}
func TestCommitExpand_Success(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 7); err != nil {
t.Fatalf("prepare: %v", err)
}
if err := vol.CommitExpand(7); err != nil {
t.Fatalf("commit: %v", err)
}
if vol.Info().VolumeSize != expandNewSize {
t.Fatalf("VolumeSize: got %d, want %d", vol.Info().VolumeSize, expandNewSize)
}
ps, ee := vol.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState: got (%d,%d), want (0,0)", ps, ee)
}
}
func TestCommitExpand_WriteBeyondNewSize_OK(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
if err := vol.CommitExpand(1); err != nil {
t.Fatalf("commit: %v", err)
}
newLBA := uint64(expandVolSize / expandBlkSize)
data := make([]byte, expandBlkSize)
data[0] = 0xDD
if err := vol.WriteLBA(newLBA, data); err != nil {
t.Fatalf("write in expanded region: %v", err)
}
got, err := vol.ReadLBA(newLBA, expandBlkSize)
if err != nil {
t.Fatalf("read: %v", err)
}
if !bytes.Equal(got, data) {
t.Fatal("data mismatch in expanded region")
}
}
func TestCommitExpand_EpochMismatch_Rejected(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 5); err != nil {
t.Fatalf("prepare: %v", err)
}
err := vol.CommitExpand(99)
if err != ErrExpandEpochMismatch {
t.Fatalf("expected ErrExpandEpochMismatch, got %v", err)
}
if vol.Info().VolumeSize != expandVolSize {
t.Fatalf("VolumeSize should be unchanged: %d", vol.Info().VolumeSize)
}
}
func TestCancelExpand_ClearsPreparedState(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 3); err != nil {
t.Fatalf("prepare: %v", err)
}
if err := vol.CancelExpand(3); err != nil {
t.Fatalf("cancel: %v", err)
}
ps, ee := vol.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState: got (%d,%d), want (0,0)", ps, ee)
}
if vol.Info().VolumeSize != expandVolSize {
t.Fatalf("VolumeSize should be unchanged: %d", vol.Info().VolumeSize)
}
}
func TestCancelExpand_WriteStillRejectedInNewRange(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
if err := vol.CancelExpand(1); err != nil {
t.Fatalf("cancel: %v", err)
}
newLBA := uint64(expandVolSize / expandBlkSize)
data := make([]byte, expandBlkSize)
err := vol.WriteLBA(newLBA, data)
if err == nil {
t.Fatal("write in expanded region should still be rejected after cancel")
}
}
func TestPrepareExpand_AlreadyInFlight_Rejected(t *testing.T) {
vol, _ := createExpandTestVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("first prepare: %v", err)
}
err := vol.PrepareExpand(expandNewSize*2, 2)
if err != ErrExpandAlreadyInFlight {
t.Fatalf("expected ErrExpandAlreadyInFlight, got %v", err)
}
}
func TestRecovery_PreparedState_Cleared(t *testing.T) {
vol, path := createExpandTestVol(t)
if err := vol.PrepareExpand(expandNewSize, 10); err != nil {
t.Fatalf("prepare: %v", err)
}
vol.Close()
vol2, err := OpenBlockVol(path)
if err != nil {
t.Fatalf("reopen: %v", err)
}
defer vol2.Close()
ps, ee := vol2.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState after reopen: got (%d,%d), want (0,0)", ps, ee)
}
if vol2.Info().VolumeSize != expandVolSize {
t.Fatalf("VolumeSize should be original after recovery: %d", vol2.Info().VolumeSize)
}
}
func TestExpand_WithProfile_Single(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "profile.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: expandVolSize,
BlockSize: expandBlkSize,
WALSize: expandWALSize,
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("create: %v", err)
}
defer vol.Close()
if vol.Profile() != ProfileSingle {
t.Fatalf("profile: got %d, want %d", vol.Profile(), ProfileSingle)
}
if err := vol.Expand(expandNewSize); err != nil {
t.Fatalf("expand with single profile: %v", err)
}
if vol.Info().VolumeSize != expandNewSize {
t.Fatalf("VolumeSize: got %d, want %d", vol.Info().VolumeSize, expandNewSize)
}
}

588
weed/storage/blockvol/qa_expand_test.go

@ -0,0 +1,588 @@
package blockvol
import (
"bytes"
"errors"
"path/filepath"
"sync"
"sync/atomic"
"testing"
"time"
)
// =============================================================================
// CP11A-2 QA Adversarial Tests — Coordinated Expand
// =============================================================================
// --- Engine-level adversarial tests ---
func createQAExpandVol(t *testing.T) (*BlockVol, string) {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "qa-expand.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: expandVolSize,
BlockSize: expandBlkSize,
WALSize: expandWALSize,
})
if err != nil {
t.Fatalf("create: %v", err)
}
return vol, path
}
// T1: ConcurrentPrepare — two goroutines race to PrepareExpand;
// exactly one must win, the other gets ErrExpandAlreadyInFlight.
func TestQA_Expand_ConcurrentPrepare(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
const goroutines = 10
var wins, rejects atomic.Int32
var wg sync.WaitGroup
start := make(chan struct{})
for i := 0; i < goroutines; i++ {
wg.Add(1)
epoch := uint64(i + 1)
go func() {
defer wg.Done()
<-start
err := vol.PrepareExpand(expandNewSize, epoch)
if err == nil {
wins.Add(1)
} else if errors.Is(err, ErrExpandAlreadyInFlight) {
rejects.Add(1)
} else {
t.Errorf("unexpected error: %v", err)
}
}()
}
close(start)
wg.Wait()
if wins.Load() != 1 {
t.Fatalf("expected exactly 1 winner, got %d", wins.Load())
}
if rejects.Load() != int32(goroutines-1) {
t.Fatalf("expected %d rejects, got %d", goroutines-1, rejects.Load())
}
}
// T2: CommitWithoutPrepare — CommitExpand with no prior PrepareExpand.
func TestQA_Expand_CommitWithoutPrepare(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
err := vol.CommitExpand(42)
if !errors.Is(err, ErrNoExpandInFlight) {
t.Fatalf("expected ErrNoExpandInFlight, got %v", err)
}
// VolumeSize must not change.
if vol.Info().VolumeSize != expandVolSize {
t.Fatalf("VolumeSize corrupted: %d", vol.Info().VolumeSize)
}
}
// T3: CancelWithoutPrepare — CancelExpand when nothing is in flight.
// With epoch=0 (force-cancel), should be a harmless no-op.
func TestQA_Expand_CancelWithoutPrepare_ForceEpoch(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
// Force-cancel (epoch=0) when nothing is in flight — should succeed.
if err := vol.CancelExpand(0); err != nil {
t.Fatalf("force-cancel with no inflight should succeed: %v", err)
}
ps, ee := vol.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState should be clean: (%d, %d)", ps, ee)
}
}
// T4: CancelWithWrongEpoch — CancelExpand with non-zero wrong epoch.
func TestQA_Expand_CancelWithWrongEpoch(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 5); err != nil {
t.Fatalf("prepare: %v", err)
}
err := vol.CancelExpand(99)
if !errors.Is(err, ErrExpandEpochMismatch) {
t.Fatalf("expected ErrExpandEpochMismatch, got %v", err)
}
// PreparedSize must still be set (cancel failed).
ps, ee := vol.ExpandState()
if ps != expandNewSize || ee != 5 {
t.Fatalf("ExpandState should be unchanged: (%d, %d)", ps, ee)
}
}
// T5: ForceCancel — epoch=0 cancels regardless of actual epoch.
func TestQA_Expand_ForceCancel_IgnoresEpoch(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 777); err != nil {
t.Fatalf("prepare: %v", err)
}
// Force-cancel with epoch=0 should clear regardless.
if err := vol.CancelExpand(0); err != nil {
t.Fatalf("force-cancel: %v", err)
}
ps, ee := vol.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState should be cleared: (%d, %d)", ps, ee)
}
}
// T6: DoubleCommit — commit, then commit again. Second must fail.
func TestQA_Expand_DoubleCommit(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
if err := vol.CommitExpand(1); err != nil {
t.Fatalf("first commit: %v", err)
}
// Second commit: PreparedSize is now 0, so ErrNoExpandInFlight.
err := vol.CommitExpand(1)
if !errors.Is(err, ErrNoExpandInFlight) {
t.Fatalf("expected ErrNoExpandInFlight on double commit, got %v", err)
}
}
// T7: PrepareAfterCommit — after a successful prepare+commit cycle,
// a new prepare should work (the state machine resets).
func TestQA_Expand_PrepareAfterCommit(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
// First cycle: 1MB -> 2MB.
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare1: %v", err)
}
if err := vol.CommitExpand(1); err != nil {
t.Fatalf("commit1: %v", err)
}
if vol.Info().VolumeSize != expandNewSize {
t.Fatalf("size after first commit: %d", vol.Info().VolumeSize)
}
// Second cycle: 2MB -> 4MB.
newSize2 := uint64(4 * 1024 * 1024)
if err := vol.PrepareExpand(newSize2, 2); err != nil {
t.Fatalf("prepare2: %v", err)
}
if err := vol.CommitExpand(2); err != nil {
t.Fatalf("commit2: %v", err)
}
if vol.Info().VolumeSize != newSize2 {
t.Fatalf("size after second commit: %d", vol.Info().VolumeSize)
}
}
// T8: PrepareAfterCancel — after cancel, a new prepare should succeed.
func TestQA_Expand_PrepareAfterCancel(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare1: %v", err)
}
if err := vol.CancelExpand(1); err != nil {
t.Fatalf("cancel: %v", err)
}
// Second prepare with different epoch should work.
if err := vol.PrepareExpand(expandNewSize, 2); err != nil {
t.Fatalf("prepare2 after cancel: %v", err)
}
ps, ee := vol.ExpandState()
if ps != expandNewSize || ee != 2 {
t.Fatalf("ExpandState: (%d, %d), want (%d, 2)", ps, ee, expandNewSize)
}
}
// T9: PrepareShrink — PrepareExpand with size < current must be rejected.
func TestQA_Expand_PrepareShrink(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
err := vol.PrepareExpand(expandVolSize/2, 1)
if !errors.Is(err, ErrShrinkNotSupported) {
t.Fatalf("expected ErrShrinkNotSupported, got %v", err)
}
}
// T10: PrepareUnaligned — unaligned size rejected.
func TestQA_Expand_PrepareUnaligned(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
err := vol.PrepareExpand(expandNewSize+1, 1)
if !errors.Is(err, ErrAlignment) {
t.Fatalf("expected ErrAlignment, got %v", err)
}
// Must not leave state dirty.
ps, ee := vol.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState should be clean after alignment reject: (%d, %d)", ps, ee)
}
}
// T11: DataIntegrity — write data before prepare, commit, then verify
// data in both old and new regions.
func TestQA_Expand_DataIntegrityAcrossCommit(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
// Write to LBA 0 before expand.
data := make([]byte, expandBlkSize)
for i := range data {
data[i] = 0xAB
}
if err := vol.WriteLBA(0, data); err != nil {
t.Fatalf("write pre-expand: %v", err)
}
// Prepare + commit.
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
// Write to LBA 0 during prepared state (within old range — allowed).
data2 := make([]byte, expandBlkSize)
for i := range data2 {
data2[i] = 0xCD
}
if err := vol.WriteLBA(0, data2); err != nil {
t.Fatalf("write during prepared: %v", err)
}
if err := vol.CommitExpand(1); err != nil {
t.Fatalf("commit: %v", err)
}
// Read LBA 0 — should have data2 (0xCD).
got, err := vol.ReadLBA(0, expandBlkSize)
if err != nil {
t.Fatalf("read LBA 0: %v", err)
}
if !bytes.Equal(got, data2) {
t.Fatalf("data mismatch at LBA 0: got %x, want %x", got[0], data2[0])
}
// Write to new region (LBA beyond old size).
newLBA := uint64(expandVolSize / expandBlkSize)
data3 := make([]byte, expandBlkSize)
for i := range data3 {
data3[i] = 0xEF
}
if err := vol.WriteLBA(newLBA, data3); err != nil {
t.Fatalf("write new region: %v", err)
}
got3, err := vol.ReadLBA(newLBA, expandBlkSize)
if err != nil {
t.Fatalf("read new region: %v", err)
}
if !bytes.Equal(got3, data3) {
t.Fatalf("data mismatch in new region")
}
}
// T12: RecoveryClearsAndDataSurvives — crash with PreparedSize set,
// reopen clears it, old data is intact.
func TestQA_Expand_RecoveryClearsAndDataSurvives(t *testing.T) {
vol, path := createQAExpandVol(t)
// Write data.
data := make([]byte, expandBlkSize)
data[0] = 0x77
if err := vol.WriteLBA(0, data); err != nil {
t.Fatalf("write: %v", err)
}
// Flush so data reaches extent.
if err := vol.SyncCache(); err != nil {
t.Fatalf("sync: %v", err)
}
time.Sleep(200 * time.Millisecond) // let flusher flush
// Prepare expand (not committed).
if err := vol.PrepareExpand(expandNewSize, 99); err != nil {
t.Fatalf("prepare: %v", err)
}
vol.Close()
// Reopen — recovery should clear PreparedSize.
vol2, err := OpenBlockVol(path)
if err != nil {
t.Fatalf("reopen: %v", err)
}
defer vol2.Close()
ps, ee := vol2.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("ExpandState after recovery: (%d, %d)", ps, ee)
}
if vol2.Info().VolumeSize != expandVolSize {
t.Fatalf("VolumeSize should be original: %d", vol2.Info().VolumeSize)
}
// Data written before prepare should survive.
got, err := vol2.ReadLBA(0, expandBlkSize)
if err != nil {
t.Fatalf("read after recovery: %v", err)
}
if got[0] != 0x77 {
t.Fatalf("data[0]: got %x, want 0x77", got[0])
}
}
// T13: CommittedExpandSurvivesReopen — committed expand persists.
func TestQA_Expand_CommittedSurvivesReopen(t *testing.T) {
vol, path := createQAExpandVol(t)
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
if err := vol.CommitExpand(1); err != nil {
t.Fatalf("commit: %v", err)
}
// Write in new region.
newLBA := uint64(expandVolSize / expandBlkSize)
data := make([]byte, expandBlkSize)
data[0] = 0xAA
if err := vol.WriteLBA(newLBA, data); err != nil {
t.Fatalf("write new region: %v", err)
}
if err := vol.SyncCache(); err != nil {
t.Fatalf("sync: %v", err)
}
time.Sleep(200 * time.Millisecond)
vol.Close()
// Reopen.
vol2, err := OpenBlockVol(path)
if err != nil {
t.Fatalf("reopen: %v", err)
}
defer vol2.Close()
if vol2.Info().VolumeSize != expandNewSize {
t.Fatalf("VolumeSize: got %d, want %d", vol2.Info().VolumeSize, expandNewSize)
}
got, err := vol2.ReadLBA(newLBA, expandBlkSize)
if err != nil {
t.Fatalf("read new region: %v", err)
}
if got[0] != 0xAA {
t.Fatalf("data[0]: got %x, want 0xAA", got[0])
}
}
// T14: ExpandOnClosedVolume — all expand ops must return ErrVolumeClosed.
func TestQA_Expand_ClosedVolume(t *testing.T) {
vol, _ := createQAExpandVol(t)
vol.Close()
if err := vol.Expand(expandNewSize); !errors.Is(err, ErrVolumeClosed) {
t.Fatalf("Expand on closed: expected ErrVolumeClosed, got %v", err)
}
if err := vol.PrepareExpand(expandNewSize, 1); !errors.Is(err, ErrVolumeClosed) {
t.Fatalf("PrepareExpand on closed: expected ErrVolumeClosed, got %v", err)
}
if err := vol.CommitExpand(1); !errors.Is(err, ErrVolumeClosed) {
t.Fatalf("CommitExpand on closed: expected ErrVolumeClosed, got %v", err)
}
if err := vol.CancelExpand(1); !errors.Is(err, ErrVolumeClosed) {
t.Fatalf("CancelExpand on closed: expected ErrVolumeClosed, got %v", err)
}
}
// T15: PrepareExpandSameSize — PrepareExpand with newSize == VolumeSize must fail.
// BUG-CP11A2-1 fix: PrepareExpand rejects same-size with ErrSameSize.
func TestQA_Expand_PrepareSameSize(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
err := vol.PrepareExpand(expandVolSize, 1)
if !errors.Is(err, ErrSameSize) {
t.Fatalf("PrepareExpand(sameSize): expected ErrSameSize, got %v", err)
}
// Verify no state was left behind.
ps, ee := vol.ExpandState()
if ps != 0 || ee != 0 {
t.Fatalf("state leaked: PreparedSize=%d ExpandEpoch=%d", ps, ee)
}
}
// T16: ConcurrentPrepareAndWrite — write I/O during PrepareExpand.
// Writes within old range must succeed, writes beyond must fail.
func TestQA_Expand_ConcurrentWriteDuringPrepare(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
// Start background writes to LBA 0 (within old range).
var writeCount atomic.Int32
var writeErr atomic.Value
stopCh := make(chan struct{})
go func() {
data := make([]byte, expandBlkSize)
for {
select {
case <-stopCh:
return
default:
}
err := vol.WriteLBA(0, data)
if err != nil {
writeErr.Store(err)
return
}
writeCount.Add(1)
}
}()
// Let writes run briefly.
time.Sleep(10 * time.Millisecond)
// PrepareExpand while writes are happening.
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
close(stopCh)
t.Fatalf("prepare: %v", err)
}
// Let a few more writes happen.
time.Sleep(10 * time.Millisecond)
close(stopCh)
if e := writeErr.Load(); e != nil {
t.Fatalf("write error during prepare: %v", e)
}
if writeCount.Load() == 0 {
t.Fatal("no writes completed during test")
}
}
// T17: ExpandStateRaceWithCommit — concurrent ExpandState reads during commit.
func TestQA_Expand_ExpandStateRaceWithCommit(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
var wg sync.WaitGroup
// Concurrent ExpandState readers.
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := 0; j < 100; j++ {
ps, ee := vol.ExpandState()
// Valid states: (expandNewSize, 1) before commit, or (0, 0) after.
if ps != 0 && ps != expandNewSize {
t.Errorf("invalid PreparedSize: %d", ps)
}
if ee != 0 && ee != 1 {
t.Errorf("invalid ExpandEpoch: %d", ee)
}
// PreparedSize and ExpandEpoch must be consistent (both set or both cleared).
if (ps == 0) != (ee == 0) {
t.Errorf("inconsistent ExpandState: (%d, %d)", ps, ee)
}
}
}()
}
// Commit while readers are running.
time.Sleep(1 * time.Millisecond)
if err := vol.CommitExpand(1); err != nil {
t.Fatalf("commit: %v", err)
}
wg.Wait()
}
// T18: TrimDuringPreparedExpand — trim within old range must work.
func TestQA_Expand_TrimDuringPrepared(t *testing.T) {
vol, _ := createQAExpandVol(t)
defer vol.Close()
// Write data.
data := make([]byte, expandBlkSize)
data[0] = 0xFF
if err := vol.WriteLBA(0, data); err != nil {
t.Fatalf("write: %v", err)
}
// Prepare expand.
if err := vol.PrepareExpand(expandNewSize, 1); err != nil {
t.Fatalf("prepare: %v", err)
}
// Trim LBA 0 (within old range).
if err := vol.Trim(0, expandBlkSize); err != nil {
t.Fatalf("trim during prepared: %v", err)
}
// Read should return zeros.
got, err := vol.ReadLBA(0, expandBlkSize)
if err != nil {
t.Fatalf("read after trim: %v", err)
}
zeros := make([]byte, expandBlkSize)
if !bytes.Equal(got, zeros) {
t.Fatalf("expected zeros after trim, got %x at [0]", got[0])
}
}
// T19: SuperblockValidate — manually construct superblock with
// PreparedSize == VolumeSize and verify Validate() rejects it.
func TestQA_Expand_SuperblockValidatePreparedSize(t *testing.T) {
sb := Superblock{
Version: CurrentVersion,
VolumeSize: 1024 * 1024,
BlockSize: 4096,
ExtentSize: 65536,
WALSize: 65536,
WALOffset: SuperblockSize,
PreparedSize: 1024 * 1024, // == VolumeSize, should fail
ExpandEpoch: 1,
}
copy(sb.Magic[:], MagicSWBK)
if err := sb.Validate(); err == nil {
t.Fatal("Validate should reject PreparedSize == VolumeSize")
}
}
// T20: SuperblockValidate — ExpandEpoch != 0 with PreparedSize == 0.
func TestQA_Expand_SuperblockValidateOrphanEpoch(t *testing.T) {
sb := Superblock{
Version: CurrentVersion,
VolumeSize: 1024 * 1024,
BlockSize: 4096,
ExtentSize: 65536,
WALSize: 65536,
WALOffset: SuperblockSize,
PreparedSize: 0,
ExpandEpoch: 5, // orphan epoch
}
copy(sb.Magic[:], MagicSWBK)
if err := sb.Validate(); err == nil {
t.Fatal("Validate should reject ExpandEpoch!=0 when PreparedSize==0")
}
}

228
weed/storage/blockvol/qa_iobackend_config_test.go

@ -0,0 +1,228 @@
//go:build ignore
package blockvol
import (
"strings"
"testing"
)
// =============================================================================
// QA Adversarial Tests for IOBackend Config (Item 3)
//
// Covers: ParseIOBackend, ResolveIOBackend, Validate for IOBackend field,
// edge cases, unknown values, io_uring rejection, case insensitivity.
// =============================================================================
// --- ParseIOBackend ---
func TestQA_ParseIOBackend_ValidInputs(t *testing.T) {
cases := []struct {
input string
want IOBackend
}{
{"auto", IOBackendAuto},
{"AUTO", IOBackendAuto},
{"Auto", IOBackendAuto},
{"", IOBackendAuto},
{" auto ", IOBackendAuto},
{"standard", IOBackendStandard},
{"STANDARD", IOBackendStandard},
{"Standard", IOBackendStandard},
{" standard ", IOBackendStandard},
{"io_uring", IOBackendIOURing},
{"IO_URING", IOBackendIOURing},
{"Io_Uring", IOBackendIOURing},
{"iouring", IOBackendIOURing},
{"IOURING", IOBackendIOURing},
}
for _, tc := range cases {
t.Run(tc.input, func(t *testing.T) {
got, err := ParseIOBackend(tc.input)
if err != nil {
t.Fatalf("ParseIOBackend(%q): unexpected error: %v", tc.input, err)
}
if got != tc.want {
t.Fatalf("ParseIOBackend(%q) = %v, want %v", tc.input, got, tc.want)
}
})
}
}
func TestQA_ParseIOBackend_InvalidInputs(t *testing.T) {
invalids := []string{
"spdk",
"uring",
"io-uring",
"io_uring_sqpoll",
"direct",
"aio",
"posix",
"libaio",
"123",
"null",
"none",
}
for _, s := range invalids {
t.Run(s, func(t *testing.T) {
got, err := ParseIOBackend(s)
if err == nil {
t.Fatalf("ParseIOBackend(%q) = %v, want error", s, got)
}
if got != IOBackendAuto {
t.Fatalf("ParseIOBackend(%q) error case should return Auto, got %v", s, got)
}
if !strings.Contains(err.Error(), "unknown IOBackend") {
t.Fatalf("error should mention 'unknown IOBackend', got: %v", err)
}
})
}
}
// --- IOBackend.String ---
func TestQA_IOBackend_String(t *testing.T) {
cases := []struct {
b IOBackend
want string
}{
{IOBackendAuto, "auto"},
{IOBackendStandard, "standard"},
{IOBackendIOURing, "io_uring"},
{IOBackend(99), "unknown(99)"},
{IOBackend(-1), "unknown(-1)"},
}
for _, tc := range cases {
got := tc.b.String()
if got != tc.want {
t.Errorf("IOBackend(%d).String() = %q, want %q", int(tc.b), got, tc.want)
}
}
}
// --- ResolveIOBackend ---
func TestQA_ResolveIOBackend(t *testing.T) {
// Auto resolves to standard.
if got := ResolveIOBackend(IOBackendAuto); got != IOBackendStandard {
t.Fatalf("ResolveIOBackend(Auto) = %v, want Standard", got)
}
// Standard stays standard.
if got := ResolveIOBackend(IOBackendStandard); got != IOBackendStandard {
t.Fatalf("ResolveIOBackend(Standard) = %v, want Standard", got)
}
// IOURing stays io_uring (resolve doesn't validate, just maps auto).
if got := ResolveIOBackend(IOBackendIOURing); got != IOBackendIOURing {
t.Fatalf("ResolveIOBackend(IOURing) = %v, want IOURing", got)
}
}
// --- Validate IOBackend field ---
func TestQA_Config_Validate_IOBackend_AutoOK(t *testing.T) {
cfg := DefaultConfig()
cfg.IOBackend = IOBackendAuto
if err := cfg.Validate(); err != nil {
t.Fatalf("Validate with IOBackendAuto: %v", err)
}
}
func TestQA_Config_Validate_IOBackend_StandardOK(t *testing.T) {
cfg := DefaultConfig()
cfg.IOBackend = IOBackendStandard
if err := cfg.Validate(); err != nil {
t.Fatalf("Validate with IOBackendStandard: %v", err)
}
}
func TestQA_Config_Validate_IOBackend_IOURingRejected(t *testing.T) {
cfg := DefaultConfig()
cfg.IOBackend = IOBackendIOURing
err := cfg.Validate()
if err == nil {
t.Fatal("Validate should reject IOBackendIOURing (not yet implemented)")
}
if !strings.Contains(err.Error(), "not yet implemented") {
t.Fatalf("error should mention 'not yet implemented', got: %v", err)
}
}
func TestQA_Config_Validate_IOBackend_OutOfRange(t *testing.T) {
cfg := DefaultConfig()
cfg.IOBackend = IOBackend(99)
err := cfg.Validate()
if err == nil {
t.Fatal("Validate should reject out-of-range IOBackend")
}
if !strings.Contains(err.Error(), "unknown IOBackend") {
t.Fatalf("error should mention 'unknown IOBackend', got: %v", err)
}
}
func TestQA_Config_Validate_IOBackend_NegativeValue(t *testing.T) {
cfg := DefaultConfig()
cfg.IOBackend = IOBackend(-1)
err := cfg.Validate()
if err == nil {
t.Fatal("Validate should reject negative IOBackend")
}
}
// --- DefaultConfig IOBackend ---
func TestQA_DefaultConfig_IOBackend_IsAuto(t *testing.T) {
cfg := DefaultConfig()
if cfg.IOBackend != IOBackendAuto {
t.Fatalf("DefaultConfig().IOBackend = %v, want Auto (zero value)", cfg.IOBackend)
}
}
// --- applyDefaults does NOT override IOBackend ---
func TestQA_ApplyDefaults_IOBackend_ZeroStaysAuto(t *testing.T) {
cfg := BlockVolConfig{}
cfg.applyDefaults()
// IOBackend is not in applyDefaults — zero value (Auto) should remain.
if cfg.IOBackend != IOBackendAuto {
t.Fatalf("applyDefaults left IOBackend = %v, want Auto", cfg.IOBackend)
}
}
func TestQA_ApplyDefaults_IOBackend_ExplicitPreserved(t *testing.T) {
cfg := BlockVolConfig{IOBackend: IOBackendStandard}
cfg.applyDefaults()
if cfg.IOBackend != IOBackendStandard {
t.Fatalf("applyDefaults changed IOBackend from Standard to %v", cfg.IOBackend)
}
}
// --- Round-trip: parse → resolve → string ---
func TestQA_IOBackend_RoundTrip(t *testing.T) {
for _, input := range []string{"auto", "standard"} {
b, err := ParseIOBackend(input)
if err != nil {
t.Fatalf("ParseIOBackend(%q): %v", input, err)
}
resolved := ResolveIOBackend(b)
s := resolved.String()
if s != "standard" {
t.Fatalf("round-trip %q → resolve → string = %q, want standard", input, s)
}
}
}
// --- Iota ordering stability ---
func TestQA_IOBackend_IotaValues(t *testing.T) {
// These values are persisted/transmitted — they must never change.
if IOBackendAuto != 0 {
t.Fatalf("IOBackendAuto = %d, want 0", IOBackendAuto)
}
if IOBackendStandard != 1 {
t.Fatalf("IOBackendStandard = %d, want 1", IOBackendStandard)
}
if IOBackendIOURing != 2 {
t.Fatalf("IOBackendIOURing = %d, want 2", IOBackendIOURing)
}
}

567
weed/storage/blockvol/qa_storage_profile_test.go

@ -0,0 +1,567 @@
package blockvol
import (
"bytes"
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"sync"
"sync/atomic"
"testing"
)
// =============================================================================
// QA Adversarial Tests for StorageProfile (CP11A-1)
//
// These tests go beyond the dev-test coverage in storage_profile_test.go:
// - SP-A1: write/read data integrity on single profile
// - SP-A2: concurrent writes with no corruption
// - additional: crash recovery, superblock byte corruption, boundary cases
// =============================================================================
// TestQA_Profile_WritePath_SingleCorrect writes multiple blocks at different
// LBAs on a single-profile volume, reads them back, and verifies byte-for-byte
// correctness. This is SP-A1 from the test spec.
func TestQA_Profile_WritePath_SingleCorrect(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "sp-a1.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 256 * 1024, // 256KB = 64 blocks
BlockSize: 4096,
WALSize: 128 * 1024,
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
defer vol.Close()
if vol.Profile() != ProfileSingle {
t.Fatalf("Profile() = %v, want single", vol.Profile())
}
// Write unique patterns to blocks 0, 10, 30, 63 (last block).
type testBlock struct {
lba uint64
fill byte
}
blocks := []testBlock{
{0, 0xAA},
{10, 0xBB},
{30, 0xCC},
{63, 0xDD}, // last block in 256KB volume
}
for _, b := range blocks {
data := make([]byte, 4096)
for i := range data {
data[i] = b.fill
}
if err := vol.WriteLBA(b.lba, data); err != nil {
t.Fatalf("WriteLBA(%d): %v", b.lba, err)
}
}
// SyncCache to ensure WAL is durable.
if err := vol.SyncCache(); err != nil {
t.Fatalf("SyncCache: %v", err)
}
// Read back and verify.
for _, b := range blocks {
got, err := vol.ReadLBA(b.lba, 4096)
if err != nil {
t.Fatalf("ReadLBA(%d): %v", b.lba, err)
}
expected := make([]byte, 4096)
for i := range expected {
expected[i] = b.fill
}
if !bytes.Equal(got, expected) {
t.Errorf("LBA %d: data mismatch (first byte: got 0x%02X, want 0x%02X)",
b.lba, got[0], b.fill)
}
}
// Unwritten blocks should read as zeros.
zeros, err := vol.ReadLBA(5, 4096)
if err != nil {
t.Fatalf("ReadLBA(5): %v", err)
}
for i, b := range zeros {
if b != 0 {
t.Fatalf("LBA 5 byte[%d] = 0x%02X, want 0x00 (unwritten)", i, b)
}
}
}
// TestQA_Profile_ConcurrentWrites_Single runs 16 goroutines writing to
// non-overlapping LBAs on a single-profile volume. No data corruption
// or panics should occur. This is SP-A2 from the test spec.
func TestQA_Profile_ConcurrentWrites_Single(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "sp-a2.blk")
// 1MB volume = 256 blocks. Each of 16 goroutines gets 16 blocks.
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 1024 * 1024,
BlockSize: 4096,
WALSize: 512 * 1024,
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
defer vol.Close()
const goroutines = 16
const blocksPerGoroutine = 16
var wg sync.WaitGroup
errs := make([]error, goroutines)
for g := 0; g < goroutines; g++ {
wg.Add(1)
go func(gid int) {
defer wg.Done()
baseLBA := uint64(gid * blocksPerGoroutine)
fill := byte(gid + 1) // unique fill per goroutine
for i := 0; i < blocksPerGoroutine; i++ {
data := make([]byte, 4096)
for j := range data {
data[j] = fill
}
if err := vol.WriteLBA(baseLBA+uint64(i), data); err != nil {
errs[gid] = fmt.Errorf("goroutine %d LBA %d: %v", gid, baseLBA+uint64(i), err)
return
}
}
}(g)
}
wg.Wait()
for i, err := range errs {
if err != nil {
t.Fatalf("goroutine %d: %v", i, err)
}
}
// Sync and verify all data.
if err := vol.SyncCache(); err != nil {
t.Fatalf("SyncCache: %v", err)
}
for g := 0; g < goroutines; g++ {
baseLBA := uint64(g * blocksPerGoroutine)
fill := byte(g + 1)
for i := 0; i < blocksPerGoroutine; i++ {
lba := baseLBA + uint64(i)
got, err := vol.ReadLBA(lba, 4096)
if err != nil {
t.Fatalf("ReadLBA(%d): %v", lba, err)
}
for j, b := range got {
if b != fill {
t.Fatalf("LBA %d byte[%d] = 0x%02X, want 0x%02X (goroutine %d)",
lba, j, b, fill, g)
}
}
}
}
}
// TestQA_Profile_SurvivesCrashRecovery writes data on a single-profile
// volume, simulates a crash (close without clean shutdown), reopens, and
// verifies that the profile metadata and data are intact.
func TestQA_Profile_SurvivesCrashRecovery(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "sp-crash.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 64 * 1024,
BlockSize: 4096,
WALSize: 32 * 1024,
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
// Write known data.
data := make([]byte, 4096)
for i := range data {
data[i] = 0xEE
}
if err := vol.WriteLBA(0, data); err != nil {
t.Fatalf("WriteLBA: %v", err)
}
if err := vol.SyncCache(); err != nil {
t.Fatalf("SyncCache: %v", err)
}
// Close normally (simulates a crash by just closing).
vol.Close()
// Reopen — crash recovery runs.
vol2, err := OpenBlockVol(path)
if err != nil {
t.Fatalf("Reopen: %v", err)
}
defer vol2.Close()
if vol2.Profile() != ProfileSingle {
t.Errorf("Profile after reopen = %v, want single", vol2.Profile())
}
got, err := vol2.ReadLBA(0, 4096)
if err != nil {
t.Fatalf("ReadLBA after reopen: %v", err)
}
if got[0] != 0xEE {
t.Errorf("data[0] = 0x%02X, want 0xEE", got[0])
}
}
// TestQA_Profile_CorruptByte_AllValues corrupts the StorageProfile byte on
// disk to every value 2..255 and verifies that OpenBlockVol rejects each one.
func TestQA_Profile_CorruptByte_AllValues(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "sp-corrupt-all.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 64 * 1024,
BlockSize: 4096,
WALSize: 32 * 1024,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
vol.Close()
// Read original file for restoration.
original, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read: %v", err)
}
for corruptVal := byte(2); corruptVal != 0; corruptVal++ { // 2..255
// Restore original, then corrupt.
if err := os.WriteFile(path, original, 0644); err != nil {
t.Fatalf("restore: %v", err)
}
f, err := os.OpenFile(path, os.O_RDWR, 0644)
if err != nil {
t.Fatalf("open: %v", err)
}
if _, err := f.WriteAt([]byte{corruptVal}, 105); err != nil {
f.Close()
t.Fatalf("corrupt: %v", err)
}
f.Close()
_, err = OpenBlockVol(path)
if err == nil {
t.Errorf("StorageProfile=%d: OpenBlockVol should fail", corruptVal)
}
}
}
// TestQA_Profile_StripedReject_NoFileLeaked verifies that attempting to
// create a striped volume does not leak partial files, even under different
// config combinations.
func TestQA_Profile_StripedReject_NoFileLeaked(t *testing.T) {
dir := t.TempDir()
configs := []CreateOptions{
{VolumeSize: 64 * 1024, StorageProfile: ProfileStriped},
{VolumeSize: 1024 * 1024, StorageProfile: ProfileStriped, WALSize: 256 * 1024},
{VolumeSize: 64 * 1024, StorageProfile: ProfileStriped, BlockSize: 512},
}
for i, opts := range configs {
path := filepath.Join(dir, fmt.Sprintf("striped-%d.blk", i))
_, err := CreateBlockVol(path, opts)
if !errors.Is(err, ErrStripedNotImplemented) {
t.Errorf("config %d: error = %v, want ErrStripedNotImplemented", i, err)
}
if _, statErr := os.Stat(path); !os.IsNotExist(statErr) {
t.Errorf("config %d: file %s should not exist after rejected create", i, path)
}
}
}
// TestQA_Profile_ConcurrentCreateSameFile races multiple goroutines trying
// to create a volume at the same path. Exactly one should succeed (O_EXCL),
// the rest should fail. No partial files should remain from losers.
func TestQA_Profile_ConcurrentCreateSameFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "race.blk")
const racers = 8
var (
wg sync.WaitGroup
wins atomic.Int32
errCount atomic.Int32
)
for i := 0; i < racers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 64 * 1024,
BlockSize: 4096,
WALSize: 32 * 1024,
StorageProfile: ProfileSingle,
})
if err != nil {
errCount.Add(1)
return
}
wins.Add(1)
vol.Close()
}()
}
wg.Wait()
if wins.Load() != 1 {
t.Errorf("winners = %d, want exactly 1", wins.Load())
}
if errCount.Load() != racers-1 {
t.Errorf("errors = %d, want %d", errCount.Load(), racers-1)
}
// The winner's file should be valid.
vol, err := OpenBlockVol(path)
if err != nil {
t.Fatalf("OpenBlockVol winner file: %v", err)
}
defer vol.Close()
if vol.Profile() != ProfileSingle {
t.Errorf("Profile() = %v, want single", vol.Profile())
}
}
// TestQA_Profile_SuperblockByteOffset verifies the StorageProfile byte is
// at the exact expected offset (105) in the on-disk format. This prevents
// silent field-reorder regressions.
func TestQA_Profile_SuperblockByteOffset(t *testing.T) {
sb, err := NewSuperblock(64*1024, CreateOptions{
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("NewSuperblock: %v", err)
}
// Write the superblock for single profile.
var buf bytes.Buffer
sb.WriteTo(&buf)
data := buf.Bytes()
if data[105] != 0 {
t.Errorf("offset 105 = %d, want 0 (ProfileSingle)", data[105])
}
// Now set striped and check the byte changed.
sb.StorageProfile = uint8(ProfileStriped)
var buf2 bytes.Buffer
sb.WriteTo(&buf2)
data2 := buf2.Bytes()
if data2[105] != 1 {
t.Errorf("offset 105 = %d, want 1 (ProfileStriped)", data2[105])
}
// Verify all other bytes are identical (only offset 105 changed).
for i := range data {
if i == 105 {
continue
}
if data[i] != data2[i] {
t.Errorf("byte[%d] changed: 0x%02X -> 0x%02X (only offset 105 should differ)", i, data[i], data2[i])
}
}
}
// TestQA_Profile_MultiBlockWriteRead writes a multi-block (16KB) payload
// at a non-zero LBA and reads it back on a single-profile volume.
// Catches alignment and multi-block dirty-map consistency bugs.
func TestQA_Profile_MultiBlockWriteRead(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "sp-multi.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 512 * 1024,
BlockSize: 4096,
WALSize: 256 * 1024,
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
defer vol.Close()
// Write 4 blocks (16KB) of random data at LBA 20.
payload := make([]byte, 16384)
if _, err := rand.Read(payload); err != nil {
t.Fatalf("rand: %v", err)
}
if err := vol.WriteLBA(20, payload); err != nil {
t.Fatalf("WriteLBA: %v", err)
}
if err := vol.SyncCache(); err != nil {
t.Fatalf("SyncCache: %v", err)
}
got, err := vol.ReadLBA(20, 16384)
if err != nil {
t.Fatalf("ReadLBA: %v", err)
}
if !bytes.Equal(got, payload) {
t.Error("multi-block payload mismatch")
}
}
// TestQA_Profile_ExpandPreservesProfile verifies that expanding a
// single-profile volume preserves the profile metadata.
func TestQA_Profile_ExpandPreservesProfile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "sp-expand.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 64 * 1024,
BlockSize: 4096,
WALSize: 32 * 1024,
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
// Write at LBA 0 before expand.
data := make([]byte, 4096)
for i := range data {
data[i] = 0x42
}
if err := vol.WriteLBA(0, data); err != nil {
t.Fatalf("WriteLBA: %v", err)
}
// Expand to 128KB.
if err := vol.Expand(128 * 1024); err != nil {
t.Fatalf("Expand: %v", err)
}
if vol.Profile() != ProfileSingle {
t.Errorf("Profile after expand = %v, want single", vol.Profile())
}
// Verify data at LBA 0 survived.
got, err := vol.ReadLBA(0, 4096)
if err != nil {
t.Fatalf("ReadLBA(0): %v", err)
}
if got[0] != 0x42 {
t.Errorf("data[0] = 0x%02X, want 0x42", got[0])
}
// Write to new region (LBA 16+ is in expanded area).
newData := make([]byte, 4096)
for i := range newData {
newData[i] = 0x99
}
if err := vol.WriteLBA(20, newData); err != nil {
t.Fatalf("WriteLBA(20): %v", err)
}
got2, err := vol.ReadLBA(20, 4096)
if err != nil {
t.Fatalf("ReadLBA(20): %v", err)
}
if got2[0] != 0x99 {
t.Errorf("expanded LBA 20 data[0] = 0x%02X, want 0x99", got2[0])
}
// Close and reopen — verify profile and data survive.
vol.Close()
vol2, err := OpenBlockVol(path)
if err != nil {
t.Fatalf("Reopen: %v", err)
}
defer vol2.Close()
if vol2.Profile() != ProfileSingle {
t.Errorf("Profile after reopen = %v, want single", vol2.Profile())
}
if vol2.Info().VolumeSize != 128*1024 {
t.Errorf("VolumeSize = %d, want %d", vol2.Info().VolumeSize, 128*1024)
}
}
// TestQA_Profile_SnapshotPreservesProfile creates a snapshot on a
// single-profile volume, writes more data, restores the snapshot,
// and verifies the profile metadata is unchanged.
func TestQA_Profile_SnapshotPreservesProfile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "sp-snap.blk")
vol, err := CreateBlockVol(path, CreateOptions{
VolumeSize: 64 * 1024,
BlockSize: 4096,
WALSize: 32 * 1024,
StorageProfile: ProfileSingle,
})
if err != nil {
t.Fatalf("Create: %v", err)
}
defer vol.Close()
// Write block A.
dataA := make([]byte, 4096)
for i := range dataA {
dataA[i] = 0xAA
}
if err := vol.WriteLBA(0, dataA); err != nil {
t.Fatalf("WriteLBA(A): %v", err)
}
// Create snapshot.
if err := vol.CreateSnapshot(1); err != nil {
t.Fatalf("CreateSnapshot: %v", err)
}
// Write block B (overwrites A at LBA 0).
dataB := make([]byte, 4096)
for i := range dataB {
dataB[i] = 0xBB
}
if err := vol.WriteLBA(0, dataB); err != nil {
t.Fatalf("WriteLBA(B): %v", err)
}
// Verify live reads B.
got, _ := vol.ReadLBA(0, 4096)
if got[0] != 0xBB {
t.Fatalf("live data[0] = 0x%02X, want 0xBB", got[0])
}
// Restore snapshot.
if err := vol.RestoreSnapshot(1); err != nil {
t.Fatalf("RestoreSnapshot: %v", err)
}
// Profile should be unchanged.
if vol.Profile() != ProfileSingle {
t.Errorf("Profile after restore = %v, want single", vol.Profile())
}
// Data should be A again.
got2, _ := vol.ReadLBA(0, 4096)
if got2[0] != 0xAA {
t.Errorf("restored data[0] = 0x%02X, want 0xAA", got2[0])
}
}

20
weed/storage/blockvol/superblock.go

@ -42,6 +42,8 @@ type Superblock struct {
Epoch uint64 // fencing epoch (0 = no fencing, Phase 3 compat)
DurabilityMode uint8 // CP8-3-1: 0=best_effort, 1=sync_all, 2=sync_quorum
StorageProfile uint8 // CP11A-1: 0=single, 1=striped (reserved)
PreparedSize uint64 // CP11A-2: pending expand size (0 = no expand in flight)
ExpandEpoch uint64 // CP11A-2: expand operation ID (0 = none)
}
// superblockOnDisk is the fixed-size on-disk layout (binary.Write/Read target).
@ -65,6 +67,8 @@ type superblockOnDisk struct {
Epoch uint64
DurabilityMode uint8
StorageProfile uint8
PreparedSize uint64
ExpandEpoch uint64
}
// NewSuperblock creates a superblock with defaults and a fresh UUID.
@ -135,6 +139,8 @@ func (sb *Superblock) WriteTo(w io.Writer) (int64, error) {
Epoch: sb.Epoch,
DurabilityMode: sb.DurabilityMode,
StorageProfile: sb.StorageProfile,
PreparedSize: sb.PreparedSize,
ExpandEpoch: sb.ExpandEpoch,
}
// Encode into beginning of buf; rest stays zero (padding).
@ -172,6 +178,10 @@ func (sb *Superblock) WriteTo(w io.Writer) (int64, error) {
buf[off] = d.DurabilityMode
off++
buf[off] = d.StorageProfile
off++
endian.PutUint64(buf[off:], d.PreparedSize)
off += 8
endian.PutUint64(buf[off:], d.ExpandEpoch)
n, err := w.Write(buf)
return int64(n), err
@ -236,6 +246,10 @@ func ReadSuperblock(r io.Reader) (Superblock, error) {
sb.DurabilityMode = buf[off]
off++
sb.StorageProfile = buf[off]
off++
sb.PreparedSize = endian.Uint64(buf[off:])
off += 8
sb.ExpandEpoch = endian.Uint64(buf[off:])
return sb, nil
}
@ -274,5 +288,11 @@ func (sb *Superblock) Validate() error {
if sb.StorageProfile > 1 {
return fmt.Errorf("%w: invalid StorageProfile %d", ErrInvalidSuperblock, sb.StorageProfile)
}
if sb.PreparedSize != 0 && sb.PreparedSize <= sb.VolumeSize {
return fmt.Errorf("%w: PreparedSize %d must be > VolumeSize %d", ErrInvalidSuperblock, sb.PreparedSize, sb.VolumeSize)
}
if sb.PreparedSize == 0 && sb.ExpandEpoch != 0 {
return fmt.Errorf("%w: ExpandEpoch %d must be 0 when PreparedSize is 0", ErrInvalidSuperblock, sb.ExpandEpoch)
}
return nil
}

16
weed/storage/blockvol/testrunner/actions/bench.go

@ -218,6 +218,7 @@ type fioJobStats struct {
IOPS float64 `json:"iops"`
BWBytes float64 `json:"bw_bytes"`
LatNS fioLatency `json:"lat_ns"`
CLatNS fioLatency `json:"clat_ns"`
}
type fioLatency struct {
@ -283,11 +284,11 @@ func ParseFioMetric(input, metric, direction string) (float64, error) {
case "lat_mean_us":
return stats.LatNS.Mean / 1000, nil // ns → µs
case "lat_p50_us":
return getPercentile(stats.LatNS, "50.000000") / 1000, nil
return getPercentileWithFallback(stats, "50.000000") / 1000, nil
case "lat_p99_us":
return getPercentile(stats.LatNS, "99.000000") / 1000, nil
return getPercentileWithFallback(stats, "99.000000") / 1000, nil
case "lat_p999_us":
return getPercentile(stats.LatNS, "99.900000") / 1000, nil
return getPercentileWithFallback(stats, "99.900000") / 1000, nil
default:
return 0, fmt.Errorf("unknown metric %q", metric)
}
@ -300,6 +301,15 @@ func getPercentile(lat fioLatency, key string) float64 {
return lat.Percentile[key]
}
// getPercentileWithFallback tries clat_ns first (fio puts percentiles there),
// then falls back to lat_ns.
func getPercentileWithFallback(stats fioJobStats, key string) float64 {
if v := getPercentile(stats.CLatNS, key); v != 0 {
return v
}
return getPercentile(stats.LatNS, key)
}
// benchStats computes statistics from a comma-separated list of values.
// Useful for aggregating results from multiple runs outside the phase repeat system.
// Params:

15
weed/storage/blockvol/testrunner/actions/block.go

@ -283,6 +283,21 @@ func killStale(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[
stdout, _, _, _ := node.Run(ctx, cmd)
actx.Log(" kill_stale %s: %s", process, strings.TrimSpace(stdout))
// Also kill by port: any process holding ports the scenario needs,
// regardless of binary name. This catches stale binaries with different
// names (e.g., iscsi-target-linux vs iscsi-target-test).
for _, portKey := range []string{"port", "iscsi_port", "nvme_port", "admin_port"} {
if portStr := act.Params[portKey]; portStr != "" {
killCmd := fmt.Sprintf(
"ss -tlnp 2>/dev/null | grep ':%s ' | grep -oP 'pid=\\K[0-9]+' | xargs -r kill -9 2>/dev/null && echo 'killed port %s occupant' || true",
portStr, portStr)
out, _, _, _ := node.Run(ctx, killCmd)
if out = strings.TrimSpace(out); out != "" {
actx.Log(" kill_stale port %s: %s", portStr, out)
}
}
}
// If iscsi cleanup requested, clean up stale iSCSI sessions.
if act.Params["iscsi_cleanup"] == "true" {
node.Run(ctx, "sudo iscsiadm -m session -u 2>/dev/null; sudo iscsiadm -m node -o delete 2>/dev/null")

6
weed/storage/blockvol/testrunner/actions/database.go

@ -70,10 +70,10 @@ func sqliteInsertRows(ctx context.Context, actx *tr.ActionContext, act tr.Action
// Generate SQL in a temp file with BEGIN/COMMIT, then pipe to sqlite3.
// Use bash -c with \x27 for single quotes to avoid quoting issues with sudo.
tmpFile := "/tmp/sw_sqlite_insert.sql"
tmpFile := tempPath(actx, "sqlite_insert.sql")
cmd := fmt.Sprintf(
`bash -c 'printf "BEGIN;\n" > %s; for i in $(seq 1 %s); do printf "INSERT INTO %s (data) VALUES (\x27row-%%d\x27);\n" $i; done >> %s; printf "COMMIT;\n" >> %s; sqlite3 %s < %s; rm -f %s'`,
tmpFile, count, table, tmpFile, tmpFile, path, tmpFile, tmpFile)
`bash -c 'mkdir -p %s; printf "BEGIN;\n" > %s; for i in $(seq 1 %s); do printf "INSERT INTO %s (data) VALUES (\x27row-%%d\x27);\n" $i; done >> %s; printf "COMMIT;\n" >> %s; sqlite3 %s < %s; rm -f %s'`,
actx.TempRoot, tmpFile, count, table, tmpFile, tmpFile, path, tmpFile, tmpFile)
_, stderr, code, err := node.RunRoot(ctx, cmd)
if err != nil || code != 0 {
return nil, fmt.Errorf("sqlite_insert_rows: code=%d stderr=%s err=%v", code, stderr, err)

234
weed/storage/blockvol/testrunner/actions/devops.go

@ -4,9 +4,12 @@ import (
"context"
"encoding/json"
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/blockapi"
tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
)
@ -18,9 +21,41 @@ func RegisterDevOpsActions(r *tr.Registry) {
r.RegisterFunc("stop_weed", tr.TierDevOps, stopWeed)
r.RegisterFunc("wait_cluster_ready", tr.TierDevOps, waitClusterReady)
r.RegisterFunc("create_block_volume", tr.TierDevOps, createBlockVolume)
r.RegisterFunc("expand_block_volume", tr.TierDevOps, expandBlockVolume)
r.RegisterFunc("lookup_block_volume", tr.TierDevOps, lookupBlockVolume)
r.RegisterFunc("delete_block_volume", tr.TierDevOps, deleteBlockVolume)
r.RegisterFunc("wait_block_servers", tr.TierDevOps, waitBlockServers)
r.RegisterFunc("cluster_status", tr.TierDevOps, clusterStatus)
}
// setISCSIVars sets the save_as_iscsi_host/port/addr/iqn vars from a VolumeInfo.
// When the iSCSI addr has no host (e.g. ":3275"), falls back to the volume server's host.
func setISCSIVars(actx *tr.ActionContext, prefix string, info *blockapi.VolumeInfo) {
actx.Vars[prefix+"_capacity"] = strconv.FormatUint(info.SizeBytes, 10)
actx.Vars[prefix+"_iscsi_addr"] = info.ISCSIAddr
actx.Vars[prefix+"_iqn"] = info.IQN
if info.ISCSIAddr != "" {
host, port, _ := net.SplitHostPort(info.ISCSIAddr)
if host == "" && info.VolumeServer != "" {
host, _, _ = net.SplitHostPort(info.VolumeServer)
}
actx.Vars[prefix+"_iscsi_host"] = host
actx.Vars[prefix+"_iscsi_port"] = port
}
}
// blockAPIClient builds a blockapi.Client from the master_url param or var.
func blockAPIClient(actx *tr.ActionContext, act tr.Action) (*blockapi.Client, error) {
masterURL := act.Params["master_url"]
if masterURL == "" {
masterURL = actx.Vars["master_url"]
}
if masterURL == "" {
return nil, fmt.Errorf("master_url param or var required")
}
return blockapi.NewClient(masterURL), nil
}
// buildDeployWeed cross-compiles the weed binary and uploads to all nodes.
func buildDeployWeed(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
repoDir := actx.Vars["repo_dir"]
@ -30,7 +65,7 @@ func buildDeployWeed(ctx context.Context, actx *tr.ActionContext, act tr.Action)
actx.Log(" cross-compiling weed binary...")
localBin := repoDir + "/weed-linux"
buildCmd := fmt.Sprintf("cd %s && GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o weed-linux ./weed/command", repoDir)
buildCmd := fmt.Sprintf("cd %s && GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o weed-linux ./weed", repoDir)
ln := tr.NewLocalNode("build-host")
_, stderr, code, err := ln.Run(ctx, buildCmd)
@ -82,7 +117,7 @@ func startWeedMaster(ctx context.Context, actx *tr.ActionContext, act tr.Action)
// Ensure directory exists.
node.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", dir))
cmd := fmt.Sprintf("setsid %sweed master -port=%s -mdir=%s %s </dev/null >%s/master.log 2>&1 & echo $!",
cmd := fmt.Sprintf("sh -c 'nohup %sweed master -port=%s -mdir=%s %s </dev/null >%s/master.log 2>&1 & echo $!'",
tr.UploadBasePath, port, dir, extraArgs, dir)
stdout, stderr, code, err := node.RunRoot(ctx, cmd)
if err != nil || code != 0 {
@ -117,7 +152,7 @@ func startWeedVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action)
node.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", dir))
cmd := fmt.Sprintf("setsid %sweed volume -port=%s -mserver=%s -dir=%s %s </dev/null >%s/volume.log 2>&1 & echo $!",
cmd := fmt.Sprintf("sh -c 'nohup %sweed volume -port=%s -mserver=%s -dir=%s %s </dev/null >%s/volume.log 2>&1 & echo $!'",
tr.UploadBasePath, port, master, dir, extraArgs, dir)
stdout, stderr, code, err := node.RunRoot(ctx, cmd)
if err != nil || code != 0 {
@ -155,7 +190,7 @@ func stopWeed(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[s
actx.Log(" force-killed PID %s", pid)
return nil, nil
case <-ticker.C:
_, _, code, _ := node.Run(ctx, fmt.Sprintf("kill -0 %s 2>/dev/null", pid))
_, _, code, _ := node.RunRoot(ctx, fmt.Sprintf("kill -0 %s 2>/dev/null", pid))
if code != 0 {
actx.Log(" PID %s exited gracefully", pid)
return nil, nil
@ -209,29 +244,194 @@ func waitClusterReady(ctx context.Context, actx *tr.ActionContext, act tr.Action
}
}
// createBlockVolume creates a block volume via the master assign API.
// createBlockVolume creates a block volume via the master block API.
// Params: name, size (human e.g. "50M") or size_bytes, replica_factor (default 1).
// Sets save_as=JSON, save_as_capacity, save_as_iscsi_addr, save_as_iqn.
func createBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
node, err := getNode(actx, act.Node)
client, err := blockAPIClient(actx, act)
if err != nil {
return nil, fmt.Errorf("create_block_volume: %w", err)
}
masterURL := act.Params["master_url"]
if masterURL == "" {
return nil, fmt.Errorf("create_block_volume: master_url param required")
name := act.Params["name"]
if name == "" {
return nil, fmt.Errorf("create_block_volume: name param required")
}
size := act.Params["size"]
if size == "" {
size = "1g"
var sizeBytes uint64
if sb := act.Params["size_bytes"]; sb != "" {
sizeBytes, err = strconv.ParseUint(sb, 10, 64)
if err != nil {
return nil, fmt.Errorf("create_block_volume: invalid size_bytes: %w", err)
}
} else {
size := act.Params["size"]
if size == "" {
size = "1G"
}
sizeBytes, err = parseSizeBytes(size)
if err != nil {
return nil, fmt.Errorf("create_block_volume: %w", err)
}
}
cmd := fmt.Sprintf("curl -s -X POST '%s/vol/assign?type=block&size=%s' 2>/dev/null", masterURL, size)
stdout, stderr, code, err := node.Run(ctx, cmd)
if err != nil || code != 0 {
return nil, fmt.Errorf("create_block_volume: code=%d stderr=%s err=%v", code, stderr, err)
rf := parseInt(act.Params["replica_factor"], 1)
info, err := client.CreateVolume(ctx, blockapi.CreateVolumeRequest{
Name: name,
SizeBytes: sizeBytes,
ReplicaFactor: rf,
})
if err != nil {
return nil, fmt.Errorf("create_block_volume: %w", err)
}
return map[string]string{"value": strings.TrimSpace(stdout)}, nil
jsonBytes, _ := json.Marshal(info)
actx.Log(" created block volume %s (size=%d, rf=%d)", name, info.SizeBytes, rf)
// Set multi-var outputs.
if act.SaveAs != "" {
setISCSIVars(actx, act.SaveAs, info)
}
return map[string]string{"value": string(jsonBytes)}, nil
}
// expandBlockVolume expands a block volume via master block API.
// Params: name, new_size (human e.g. "100M") or new_size_bytes.
func expandBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
client, err := blockAPIClient(actx, act)
if err != nil {
return nil, fmt.Errorf("expand_block_volume: %w", err)
}
name := act.Params["name"]
if name == "" {
return nil, fmt.Errorf("expand_block_volume: name param required")
}
var newSizeBytes uint64
if sb := act.Params["new_size_bytes"]; sb != "" {
newSizeBytes, err = strconv.ParseUint(sb, 10, 64)
if err != nil {
return nil, fmt.Errorf("expand_block_volume: invalid new_size_bytes: %w", err)
}
} else {
ns := act.Params["new_size"]
if ns == "" {
return nil, fmt.Errorf("expand_block_volume: new_size or new_size_bytes param required")
}
newSizeBytes, err = parseSizeBytes(ns)
if err != nil {
return nil, fmt.Errorf("expand_block_volume: %w", err)
}
}
capacity, err := client.ExpandVolume(ctx, name, newSizeBytes)
if err != nil {
return nil, fmt.Errorf("expand_block_volume: %w", err)
}
actx.Log(" expanded block volume %s -> %d bytes", name, capacity)
return map[string]string{"value": strconv.FormatUint(capacity, 10)}, nil
}
// lookupBlockVolume looks up a block volume via master block API.
// Params: name. Sets save_as_capacity, save_as_iscsi_addr, save_as_iqn, save_as_iscsi_host, save_as_iscsi_port.
func lookupBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
client, err := blockAPIClient(actx, act)
if err != nil {
return nil, fmt.Errorf("lookup_block_volume: %w", err)
}
name := act.Params["name"]
if name == "" {
return nil, fmt.Errorf("lookup_block_volume: name param required")
}
info, err := client.LookupVolume(ctx, name)
if err != nil {
return nil, fmt.Errorf("lookup_block_volume: %w", err)
}
if act.SaveAs != "" {
setISCSIVars(actx, act.SaveAs, info)
}
actx.Log(" looked up %s: size=%d iscsi=%s", name, info.SizeBytes, info.ISCSIAddr)
return map[string]string{"value": strconv.FormatUint(info.SizeBytes, 10)}, nil
}
// deleteBlockVolume deletes a block volume via master block API.
func deleteBlockVolume(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
client, err := blockAPIClient(actx, act)
if err != nil {
return nil, fmt.Errorf("delete_block_volume: %w", err)
}
name := act.Params["name"]
if name == "" {
return nil, fmt.Errorf("delete_block_volume: name param required")
}
if err := client.DeleteVolume(ctx, name); err != nil {
return nil, fmt.Errorf("delete_block_volume: %w", err)
}
actx.Log(" deleted block volume %s", name)
return nil, nil
}
// waitBlockServers polls master until N block-capable servers are registered.
// Params: count (default 1), timeout (default 60s).
func waitBlockServers(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
client, err := blockAPIClient(actx, act)
if err != nil {
return nil, fmt.Errorf("wait_block_servers: %w", err)
}
want := parseInt(act.Params["count"], 1)
timeout := 60 * time.Second
if t, ok := act.Params["timeout"]; ok {
if d, err := parseDuration(t); err == nil {
timeout = d
}
}
timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
ticker := time.NewTicker(2 * time.Second)
defer ticker.Stop()
pollCount := 0
for {
select {
case <-timeoutCtx.Done():
return nil, fmt.Errorf("wait_block_servers: timeout waiting for %d servers after %s (polled %d times)", want, timeout, pollCount)
case <-ticker.C:
pollCount++
servers, err := client.ListServers(timeoutCtx)
if err != nil {
actx.Log(" poll %d: error: %v", pollCount, err)
continue
}
capable := 0
for _, s := range servers {
if s.BlockCapable {
capable++
}
}
if pollCount <= 3 || pollCount%10 == 0 {
actx.Log(" poll %d: %d/%d block-capable servers (total %d)", pollCount, capable, want, len(servers))
}
if capable >= want {
actx.Log(" %d block-capable servers ready", capable)
return map[string]string{"value": strconv.Itoa(capable)}, nil
}
}
}
}
// clusterStatus fetches the full cluster status JSON.

22
weed/storage/blockvol/testrunner/actions/devops_test.go

@ -18,6 +18,10 @@ func TestDevOpsActions_Registration(t *testing.T) {
"stop_weed",
"wait_cluster_ready",
"create_block_volume",
"expand_block_volume",
"lookup_block_volume",
"delete_block_volume",
"wait_block_servers",
"cluster_status",
}
@ -35,8 +39,8 @@ func TestDevOpsActions_Tier(t *testing.T) {
byTier := registry.ListByTier()
devopsActions := byTier[tr.TierDevOps]
if len(devopsActions) != 7 {
t.Errorf("devops tier has %d actions, want 7", len(devopsActions))
if len(devopsActions) != 11 {
t.Errorf("devops tier has %d actions, want 11", len(devopsActions))
}
// Verify all are in devops tier.
@ -80,11 +84,11 @@ func TestAllActions_Registration(t *testing.T) {
if n := len(byTier[tr.TierCore]); n != 11 {
t.Errorf("core: %d, want 11", n)
}
if n := len(byTier[tr.TierBlock]); n != 55 {
t.Errorf("block: %d, want 55", n)
if n := len(byTier[tr.TierBlock]); n != 56 {
t.Errorf("block: %d, want 56", n)
}
if n := len(byTier[tr.TierDevOps]); n != 7 {
t.Errorf("devops: %d, want 7", n)
if n := len(byTier[tr.TierDevOps]); n != 11 {
t.Errorf("devops: %d, want 11", n)
}
if n := len(byTier[tr.TierChaos]); n != 5 {
t.Errorf("chaos: %d, want 5", n)
@ -93,13 +97,13 @@ func TestAllActions_Registration(t *testing.T) {
t.Errorf("k8s: %d, want 14", n)
}
// Total should be 92 (89 existing + 3 profiling: pprof_capture, vmstat_capture, iostat_capture).
// Total should be 97 (92 prev + 4 devops: expand/lookup/delete/wait_block_servers + 1 block: iscsi_login_direct).
total := 0
for _, actions := range byTier {
total += len(actions)
}
if total != 92 {
t.Errorf("total actions: %d, want 92", total)
if total != 97 {
t.Errorf("total actions: %d, want 97", total)
}
}

28
weed/storage/blockvol/testrunner/actions/helpers.go

@ -85,6 +85,34 @@ func parseInt(s string, def int) int {
return v
}
// parseSizeBytes converts a human-readable size string (e.g. "50M", "1G", "104857600") to bytes.
func parseSizeBytes(s string) (uint64, error) {
s = strings.TrimSpace(s)
if s == "" {
return 0, fmt.Errorf("empty size string")
}
upper := strings.ToUpper(s)
var multiplier uint64 = 1
switch {
case strings.HasSuffix(upper, "G"):
multiplier = 1024 * 1024 * 1024
s = strings.TrimSuffix(upper, "G")
case strings.HasSuffix(upper, "M"):
multiplier = 1024 * 1024
s = strings.TrimSuffix(upper, "M")
case strings.HasSuffix(upper, "K"):
multiplier = 1024
s = strings.TrimSuffix(upper, "K")
default:
s = upper
}
v, err := strconv.ParseUint(s, 10, 64)
if err != nil {
return 0, fmt.Errorf("parse size %q: %w", s, err)
}
return v * multiplier, nil
}
func parseIntSlice(s string) []int {
var result []int
for _, part := range strings.Split(s, ",") {

42
weed/storage/blockvol/testrunner/actions/io.go

@ -46,7 +46,10 @@ func ddWrite(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[st
}
// Generate random data to temp file, write to device, compute md5.
tmpFile := "/tmp/sw-test-runner-dd-data"
tmpFile := tempPath(actx, "dd-data")
if err := ensureTempRoot(ctx, node, actx); err != nil {
return nil, fmt.Errorf("dd_write: %w", err)
}
genCmd := fmt.Sprintf("dd if=/dev/urandom of=%s bs=%s count=%s 2>/dev/null", tmpFile, bs, count)
_, stderr, code, err := node.RunRoot(ctx, genCmd)
if err != nil || code != 0 {
@ -98,7 +101,10 @@ func ddReadMD5(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[
return nil, err
}
tmpFile := "/tmp/sw-test-runner-dd-read"
tmpFile := tempPath(actx, "dd-read")
if err := ensureTempRoot(ctx, node, actx); err != nil {
return nil, fmt.Errorf("dd_read_md5: %w", err)
}
readCmd := fmt.Sprintf("dd if=%s of=%s bs=%s count=%s iflag=direct", device, tmpFile, bs, count)
if skip := act.Params["skip"]; skip != "" {
readCmd += fmt.Sprintf(" skip=%s", skip)
@ -285,8 +291,12 @@ func writeLoopBg(ctx context.Context, actx *tr.ActionContext, act tr.Action) (ma
return nil, err
}
cmd := fmt.Sprintf("setsid bash -c 'while true; do dd if=/dev/urandom of=%s bs=%s count=1 oflag=%s conv=notrunc 2>/dev/null; done' &>/tmp/sw_bg.log & echo $!",
device, bs, oflag)
bgLog := tempPath(actx, "bg.log")
if err := ensureTempRoot(ctx, node, actx); err != nil {
return nil, fmt.Errorf("write_loop_bg: %w", err)
}
cmd := fmt.Sprintf("setsid bash -c 'while true; do dd if=/dev/urandom of=%s bs=%s count=1 oflag=%s conv=notrunc 2>/dev/null; done' &>%s & echo $!",
device, bs, oflag, bgLog)
stdout, stderr, code, err := node.RunRoot(ctx, cmd)
if err != nil || code != 0 {
return nil, fmt.Errorf("write_loop_bg: code=%d stderr=%s err=%v", code, stderr, err)
@ -318,3 +328,27 @@ func stopBg(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[str
return nil, nil
}
// ensureTempRoot creates the per-run temp directory on the remote node.
// Uses RunRoot so the directory is created with root privileges, ensuring
// subsequent RunRoot commands can write into it.
func ensureTempRoot(ctx context.Context, node interface{ RunRoot(context.Context, string) (string, string, int, error) }, actx *tr.ActionContext) error {
if actx.TempRoot == "" {
return nil
}
_, stderr, code, err := node.RunRoot(ctx, fmt.Sprintf("mkdir -p %s", actx.TempRoot))
if err != nil || code != 0 {
return fmt.Errorf("mkdir TempRoot %s: code=%d stderr=%s err=%v", actx.TempRoot, code, stderr, err)
}
return nil
}
// tempPath returns a path under the per-run temp root for the given suffix.
// Falls back to /tmp if TempRoot is empty (backward compat).
func tempPath(actx *tr.ActionContext, suffix string) string {
root := actx.TempRoot
if root == "" {
root = "/tmp"
}
return root + "/sw-" + suffix
}

59
weed/storage/blockvol/testrunner/actions/iscsi.go

@ -3,6 +3,7 @@ package actions
import (
"context"
"fmt"
"strconv"
tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner/infra"
@ -11,6 +12,7 @@ import (
// RegisterISCSIActions registers iSCSI client actions.
func RegisterISCSIActions(r *tr.Registry) {
r.RegisterFunc("iscsi_login", tr.TierBlock, iscsiLogin)
r.RegisterFunc("iscsi_login_direct", tr.TierBlock, iscsiLoginDirect)
r.RegisterFunc("iscsi_logout", tr.TierBlock, iscsiLogout)
r.RegisterFunc("iscsi_discover", tr.TierBlock, iscsiDiscover)
r.RegisterFunc("iscsi_cleanup", tr.TierBlock, iscsiCleanup)
@ -71,6 +73,61 @@ func iscsiLogin(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map
return map[string]string{"value": dev}, nil
}
// iscsiLoginDirect discovers + logs into a target using explicit host, port, iqn params.
// Unlike iscsi_login, it does not require a target spec — useful for cluster-provisioned
// volumes whose iSCSI address comes from the master API response.
func iscsiLoginDirect(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
host := act.Params["host"]
if host == "" {
return nil, fmt.Errorf("iscsi_login_direct: host param required")
}
portStr := act.Params["port"]
if portStr == "" {
return nil, fmt.Errorf("iscsi_login_direct: port param required")
}
port, err := strconv.Atoi(portStr)
if err != nil {
return nil, fmt.Errorf("iscsi_login_direct: invalid port %q: %w", portStr, err)
}
iqn := act.Params["iqn"]
if iqn == "" {
return nil, fmt.Errorf("iscsi_login_direct: iqn param required")
}
node, err := getNode(actx, act.Node)
if err != nil {
return nil, fmt.Errorf("iscsi_login_direct: %w", err)
}
client := infra.NewISCSIClient(node)
actx.Log(" discovering %s:%d ...", host, port)
iqns, derr := client.Discover(ctx, host, port)
if derr != nil {
return nil, fmt.Errorf("iscsi_login_direct discover: %w", derr)
}
found := false
for _, q := range iqns {
if q == iqn {
found = true
break
}
}
if !found {
return nil, fmt.Errorf("iscsi_login_direct: IQN %s not found in discovery (got %v)", iqn, iqns)
}
actx.Log(" logging in to %s ...", iqn)
dev, lerr := client.Login(ctx, iqn)
if lerr != nil {
return nil, fmt.Errorf("iscsi_login_direct: %w", lerr)
}
actx.Log(" device: %s", dev)
return map[string]string{"value": dev}, nil
}
func iscsiLogout(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
targetName := act.Target
if targetName == "" {
@ -128,5 +185,5 @@ func iscsiCleanup(ctx context.Context, actx *tr.ActionContext, act tr.Action) (m
}
client := infra.NewISCSIClient(node)
return nil, client.CleanupAll(ctx, "iqn.2024.com.seaweedfs:")
return nil, client.CleanupAll(ctx, "iqn.2024-01.com.seaweedfs:")
}

2
weed/storage/blockvol/testrunner/actions/metrics.go

@ -312,7 +312,7 @@ func iostatCapture(ctx context.Context, actx *tr.ActionContext, act tr.Action) (
func collectArtifactsAction(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
dir := act.Params["dir"]
if dir == "" {
dir = "/tmp/sw-test-runner-artifacts"
dir = tempPath(actx, "artifacts")
}
// Find client node for dmesg/lsblk.

69
weed/storage/blockvol/testrunner/engine.go

@ -53,6 +53,12 @@ func (e *Engine) Run(ctx context.Context, s *Scenario, actx *ActionContext) *Sce
actx.Vars[k] = v
}
// Allocate a unique per-run temp directory (T6).
if actx.TempRoot == "" {
actx.TempRoot = fmt.Sprintf("/tmp/sw-run-%s-%d", s.Name, start.UnixMilli())
}
actx.Vars["__temp_dir"] = actx.TempRoot
// Separate always-phases for deferred cleanup.
var normalPhases, alwaysPhases []Phase
for _, p := range s.Phases {
@ -237,15 +243,19 @@ func (e *Engine) runPhaseParallel(ctx context.Context, actx *ActionContext, phas
}
wg.Wait()
var errors []string
for i, ar := range results {
pr.Actions = append(pr.Actions, ar)
if ar.Status == StatusFail && !phase.Actions[i].IgnoreError {
pr.Status = StatusFail
if pr.Error == "" {
pr.Error = fmt.Sprintf("action %d (%s) failed: %s", i, phase.Actions[i].Action, ar.Error)
}
errors = append(errors, fmt.Sprintf("action %d (%s): %s", i, phase.Actions[i].Action, ar.Error))
}
}
if len(errors) == 1 {
pr.Error = errors[0]
} else if len(errors) > 1 {
pr.Error = fmt.Sprintf("%d actions failed: [1] %s", len(errors), strings.Join(errors, "; "))
}
return pr
}
@ -287,13 +297,37 @@ func (e *Engine) runAction(ctx context.Context, actx *ActionContext, act Action)
}
}
e.log(" [action] %s", resolved.Action)
// Enforce action-level timeout if specified.
var actionTimeout time.Duration
if resolved.Timeout != "" {
if dur, err := time.ParseDuration(resolved.Timeout); err == nil && dur > 0 {
actionTimeout = dur
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, dur)
defer cancel()
}
}
// Log action start with context (node/target if available).
actionLabel := resolved.Action
if resolved.Node != "" {
actionLabel += " @" + resolved.Node
} else if resolved.Target != "" {
actionLabel += " >" + resolved.Target
}
e.log(" [action] %s", actionLabel)
output, err := handler.Execute(ctx, actx, resolved)
elapsed := time.Since(start)
// Enrich timeout errors with action-specific context.
if err != nil && ctx.Err() != nil && actionTimeout > 0 {
err = fmt.Errorf("action %q timed out after %s: %w", resolved.Action, actionTimeout, err)
}
ar := ActionResult{
Action: resolved.Action,
Duration: time.Since(start),
Duration: elapsed,
YAML: yamlDef,
}
@ -302,10 +336,16 @@ func (e *Engine) runAction(ctx context.Context, actx *ActionContext, act Action)
ar.Error = err.Error()
if act.IgnoreError {
ar.Status = StatusPass
e.log(" [action] %s failed (ignored): %v", resolved.Action, err)
e.log(" [done] %s (ignored error, %s): %v", actionLabel, fmtDuration(elapsed), err)
} else {
e.log(" [FAIL] %s (%s): %v", actionLabel, fmtDuration(elapsed), err)
}
} else {
ar.Status = StatusPass
// Only log completion for slow actions (>1s) to avoid noise on quick ones.
if elapsed >= time.Second {
e.log(" [done] %s (%s)", actionLabel, fmtDuration(elapsed))
}
}
// Store output as var if save_as is set.
@ -327,7 +367,7 @@ func (e *Engine) runAction(ctx context.Context, actx *ActionContext, act Action)
if output != nil {
if v, ok := output["value"]; ok {
ar.Output = truncate(v, 4096)
ar.Output = truncate(v, 65536)
}
}
@ -343,6 +383,8 @@ func resolveAction(act Action, vars map[string]string) Action {
Node: act.Node,
SaveAs: act.SaveAs,
IgnoreError: act.IgnoreError,
Retry: act.Retry,
Timeout: act.Timeout,
Params: make(map[string]string),
}
@ -373,7 +415,18 @@ func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "..."
return s[:max] + fmt.Sprintf("...[truncated, %d/%d bytes]", max, len(s))
}
// fmtDuration formats a duration as a human-readable string.
func fmtDuration(d time.Duration) string {
if d < time.Second {
return fmt.Sprintf("%dms", d.Milliseconds())
}
if d < time.Minute {
return fmt.Sprintf("%.1fs", d.Seconds())
}
return fmt.Sprintf("%dm%ds", int(d.Minutes()), int(d.Seconds())%60)
}
// marshalActionYAML serializes a resolved action to YAML for report display.

144
weed/storage/blockvol/testrunner/engine_test.go

@ -3,6 +3,7 @@ package testrunner
import (
"context"
"fmt"
"strings"
"testing"
"time"
)
@ -889,6 +890,149 @@ func TestEngine_CleanupVars(t *testing.T) {
}
}
func TestEngine_ActionTimeout_Enforced(t *testing.T) {
registry := NewRegistry()
// Action that sleeps forever, should be killed by action-level timeout.
slowStep := ActionHandlerFunc(func(ctx context.Context, actx *ActionContext, act Action) (map[string]string, error) {
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(30 * time.Second):
return nil, nil
}
})
registry.Register("slow", TierCore, slowStep)
scenario := &Scenario{
Name: "action-timeout-test",
Timeout: Duration{10 * time.Second}, // scenario timeout is generous
Phases: []Phase{
{
Name: "phase1",
Actions: []Action{
{Action: "slow", Timeout: "150ms"},
},
},
},
}
engine := NewEngine(registry, nil)
actx := &ActionContext{
Scenario: scenario,
Vars: make(map[string]string),
Log: func(string, ...interface{}) {},
}
start := time.Now()
result := engine.Run(context.Background(), scenario, actx)
elapsed := time.Since(start)
if result.Status != StatusFail {
t.Errorf("status = %s, want FAIL", result.Status)
}
// Should timeout at ~150ms, not 10s.
if elapsed > 2*time.Second {
t.Errorf("took %v, action timeout should have fired at ~150ms", elapsed)
}
// Error message should mention the action name and timeout.
if len(result.Phases) > 0 && len(result.Phases[0].Actions) > 0 {
errMsg := result.Phases[0].Actions[0].Error
if !strings.Contains(errMsg, "slow") || !strings.Contains(errMsg, "timed out") {
t.Errorf("error = %q, should mention action name and timeout", errMsg)
}
}
}
func TestEngine_TempRoot_UniquePerRun(t *testing.T) {
registry := NewRegistry()
step := &mockHandler{}
registry.Register("step", TierCore, step)
scenario := &Scenario{
Name: "tempdir-test",
Timeout: Duration{5 * time.Second},
Phases: []Phase{
{
Name: "phase1",
Actions: []Action{{Action: "step"}},
},
},
}
engine := NewEngine(registry, nil)
// Run 1
actx1 := &ActionContext{
Scenario: scenario,
Vars: make(map[string]string),
Log: func(string, ...interface{}) {},
}
engine.Run(context.Background(), scenario, actx1)
// Small delay so timestamp differs.
time.Sleep(2 * time.Millisecond)
// Run 2
actx2 := &ActionContext{
Scenario: scenario,
Vars: make(map[string]string),
Log: func(string, ...interface{}) {},
}
engine.Run(context.Background(), scenario, actx2)
// Both should have TempRoot set and they should differ.
if actx1.TempRoot == "" {
t.Fatal("run 1: TempRoot is empty")
}
if actx2.TempRoot == "" {
t.Fatal("run 2: TempRoot is empty")
}
if actx1.TempRoot == actx2.TempRoot {
t.Errorf("TempRoot should be unique per run: both = %q", actx1.TempRoot)
}
// __temp_dir var should be set.
if actx1.Vars["__temp_dir"] != actx1.TempRoot {
t.Errorf("__temp_dir = %q, want %q", actx1.Vars["__temp_dir"], actx1.TempRoot)
}
// Should contain scenario name.
if !strings.Contains(actx1.TempRoot, "tempdir-test") {
t.Errorf("TempRoot %q should contain scenario name", actx1.TempRoot)
}
}
func TestEngine_TempRoot_PreservedIfSet(t *testing.T) {
registry := NewRegistry()
step := &mockHandler{}
registry.Register("step", TierCore, step)
scenario := &Scenario{
Name: "tempdir-preset-test",
Timeout: Duration{5 * time.Second},
Phases: []Phase{
{
Name: "phase1",
Actions: []Action{{Action: "step"}},
},
},
}
engine := NewEngine(registry, nil)
actx := &ActionContext{
Scenario: scenario,
Vars: make(map[string]string),
Log: func(string, ...interface{}) {},
TempRoot: "/custom/temp/path",
}
engine.Run(context.Background(), scenario, actx)
if actx.TempRoot != "/custom/temp/path" {
t.Errorf("TempRoot = %q, want /custom/temp/path (should preserve caller-set value)", actx.TempRoot)
}
}
func TestParse_AggregateValidation(t *testing.T) {
base := `
name: validate-test

18
weed/storage/blockvol/testrunner/infra/target.go

@ -80,11 +80,17 @@ func (t *Target) Deploy(localBin string) error {
// Start launches the target process. If create is true, a new volume is created.
func (t *Target) Start(ctx context.Context, create bool) error {
// Pre-flight: verify binary exists and is executable.
_, _, binCode, _ := t.Node.Run(ctx, fmt.Sprintf("test -x %s", t.BinPath))
if binCode != 0 {
return fmt.Errorf("binary not found or not executable on %s: %s", t.Node.Host, t.BinPath)
}
// Pre-flight: check if iSCSI port is already in use.
stdout, _, code, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tln | grep ':%d '", t.Config.Port))
if code == 0 && strings.TrimSpace(stdout) != "" {
portOut, _, portCode, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tln | grep ':%d '", t.Config.Port))
if portCode == 0 && strings.TrimSpace(portOut) != "" {
owner, _, _, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tlnp | grep ':%d ' | head -1", t.Config.Port))
return fmt.Errorf("port %d already in use on %s: %s",
return fmt.Errorf("port %d (iSCSI) already in use on %s: %s",
t.Config.Port, t.Node.Host, strings.TrimSpace(owner))
}
@ -116,7 +122,7 @@ func (t *Target) Start(ctx context.Context, create bool) error {
}
// Discover PID by matching the binary name
stdout, _, _, _ = t.Node.Run(ctx, fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", t.BinPath))
stdout, _, _, _ := t.Node.Run(ctx, fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", t.BinPath))
pidStr := strings.TrimSpace(stdout)
if idx := strings.IndexByte(pidStr, '\n'); idx > 0 {
pidStr = pidStr[:idx]
@ -227,13 +233,13 @@ func CheckDiskSpace(ctx context.Context, node *Node, volFile, volSize, walSize s
}
stdout, _, code, _ := node.Run(ctx, fmt.Sprintf("df -BM %s 2>/dev/null | tail -1 | awk '{print $4}'", dir))
if code != 0 {
return nil // can't check, proceed anyway
return fmt.Errorf("disk space check failed on %s (df returned code %d for %s)", node.Host, code, dir)
}
availStr := strings.TrimSpace(stdout)
availStr = strings.TrimSuffix(availStr, "M")
availMB, err := strconv.Atoi(availStr)
if err != nil {
return nil // can't parse, proceed anyway
return fmt.Errorf("disk space check: cannot parse df output %q on %s", availStr, node.Host)
}
if availMB < neededMB {

1
weed/storage/blockvol/testrunner/registry.go

@ -14,6 +14,7 @@ type ActionContext struct {
Vars map[string]string
Log func(format string, args ...interface{})
Coordinator *Coordinator // non-nil when running in coordinator mode
TempRoot string // per-run temp directory on remote nodes (T6)
}
// NodeRunner abstracts remote command execution (implemented by infra.Node).

78
weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml

@ -172,6 +172,45 @@ phases:
runtime: "60"
name: "mixed-70-30-j4-qd32"
# --- iSCSI profiling snapshot (T7) ---
- name: iscsi-profile
parallel: true
actions:
- action: pprof_capture
target: primary
save_as: iscsi_pprof_heap
profile: heap
output_dir: "{{ __temp_dir }}/pprof"
label: iscsi-heap
- action: pprof_capture
target: primary
save_as: iscsi_pprof_goroutine
profile: goroutine
output_dir: "{{ __temp_dir }}/pprof"
label: iscsi-goroutine
- action: pprof_capture
target: primary
save_as: iscsi_pprof_cpu
profile: profile
seconds: "10"
output_dir: "{{ __temp_dir }}/pprof"
label: iscsi-cpu
- action: vmstat_capture
node: server
save_as: iscsi_vmstat
seconds: "10"
output_dir: "{{ __temp_dir }}/os"
label: iscsi-vmstat
- action: iostat_capture
node: server
save_as: iscsi_iostat
seconds: "10"
output_dir: "{{ __temp_dir }}/os"
label: iscsi-iostat
- action: scrape_metrics
target: primary
save_as: iscsi_metrics
- name: iscsi-disconnect
actions:
- action: iscsi_logout
@ -313,6 +352,45 @@ phases:
runtime: "60"
name: "mixed-70-30-j4-qd32"
# --- NVMe profiling snapshot (T7) ---
- name: nvme-profile
parallel: true
actions:
- action: pprof_capture
target: primary
save_as: nvme_pprof_heap
profile: heap
output_dir: "{{ __temp_dir }}/pprof"
label: nvme-heap
- action: pprof_capture
target: primary
save_as: nvme_pprof_goroutine
profile: goroutine
output_dir: "{{ __temp_dir }}/pprof"
label: nvme-goroutine
- action: pprof_capture
target: primary
save_as: nvme_pprof_cpu
profile: profile
seconds: "10"
output_dir: "{{ __temp_dir }}/pprof"
label: nvme-cpu
- action: vmstat_capture
node: server
save_as: nvme_vmstat
seconds: "10"
output_dir: "{{ __temp_dir }}/os"
label: nvme-vmstat
- action: iostat_capture
node: server
save_as: nvme_iostat
seconds: "10"
output_dir: "{{ __temp_dir }}/os"
label: nvme-iostat
- action: scrape_metrics
target: primary
save_as: nvme_metrics
- name: nvme-disconnect
actions:
- action: nvme_disconnect

30
weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml

@ -85,6 +85,36 @@ phases:
name: perf_64k_seqwrite
save_as: fio_64k_sw
# --- Profiling snapshot (T7) ---
- name: profile_capture
parallel: true
actions:
- action: pprof_capture
target: primary
save_as: pprof_heap
profile: heap
output_dir: "{{ __temp_dir }}/pprof"
label: post-bench-heap
- action: pprof_capture
target: primary
save_as: pprof_cpu
profile: profile
seconds: "10"
output_dir: "{{ __temp_dir }}/pprof"
label: post-bench-cpu
- action: vmstat_capture
node: target_node
save_as: post_vmstat
seconds: "10"
output_dir: "{{ __temp_dir }}/os"
label: post-bench-vmstat
- action: iostat_capture
node: target_node
save_as: post_iostat
seconds: "10"
output_dir: "{{ __temp_dir }}/os"
label: post-bench-iostat
- name: collect_metrics
actions:
- action: scrape_metrics

Loading…
Cancel
Save