diff --git a/test/volume_server/framework/volume_fixture.go b/test/volume_server/framework/volume_fixture.go index f6229f3f8..98529a5c0 100644 --- a/test/volume_server/framework/volume_fixture.go +++ b/test/volume_server/framework/volume_fixture.go @@ -5,6 +5,8 @@ import ( "context" "fmt" "net/http" + "os" + "path/filepath" "testing" "time" @@ -45,6 +47,39 @@ func UploadBytes(t testing.TB, client *http.Client, volumeURL, fid string, data return DoRequest(t, client, req) } +// CorruptIndexFile appends garbage bytes to a volume's .idx file on disk so +// that CheckIndexFile detects a size mismatch during scrub. +func CorruptIndexFile(t testing.TB, baseDir string, volumeID uint32) { + t.Helper() + idxPath := filepath.Join(baseDir, "volume", fmt.Sprintf("%d.idx", volumeID)) + f, err := os.OpenFile(idxPath, os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + t.Fatalf("open idx file for corruption: %v", err) + } + defer f.Close() + if _, err := f.Write([]byte{0xDE, 0xAD}); err != nil { + t.Fatalf("corrupt idx file: %v", err) + } +} + +// EnableMaintenanceMode puts the volume server into maintenance mode. +func EnableMaintenanceMode(t testing.TB, ctx context.Context, client volume_server_pb.VolumeServerClient) { + t.Helper() + stateResp, err := client.GetState(ctx, &volume_server_pb.GetStateRequest{}) + if err != nil { + t.Fatalf("GetState failed: %v", err) + } + _, err = client.SetState(ctx, &volume_server_pb.SetStateRequest{ + State: &volume_server_pb.VolumeServerState{ + Maintenance: true, + Version: stateResp.GetState().GetVersion(), + }, + }) + if err != nil { + t.Fatalf("SetState maintenance=true failed: %v", err) + } +} + func ReadBytes(t testing.TB, client *http.Client, volumeURL, fid string) *http.Response { t.Helper() diff --git a/test/volume_server/grpc/admin_readonly_collection_test.go b/test/volume_server/grpc/admin_readonly_collection_test.go index 6aba413c6..36d2d2f31 100644 --- a/test/volume_server/grpc/admin_readonly_collection_test.go +++ b/test/volume_server/grpc/admin_readonly_collection_test.go @@ -97,9 +97,13 @@ func TestVolumeMarkReadonlyWritableErrorPaths(t *testing.T) { conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress()) defer conn.Close() + const volumeID = uint32(75) + framework.AllocateVolume(t, grpcClient, volumeID, "") + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() + // non-existent volumes should return "not found" _, err := grpcClient.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{VolumeId: 98771, Persist: true}) if err == nil || !strings.Contains(err.Error(), "not found") { t.Fatalf("VolumeMarkReadonly missing-volume error mismatch: %v", err) @@ -110,18 +114,30 @@ func TestVolumeMarkReadonlyWritableErrorPaths(t *testing.T) { t.Fatalf("VolumeMarkWritable missing-volume error mismatch: %v", err) } - stateResp, err := grpcClient.GetState(ctx, &volume_server_pb.GetStateRequest{}) - if err != nil { - t.Fatalf("GetState failed: %v", err) + // enter maintenance mode + framework.EnableMaintenanceMode(t, ctx, grpcClient) + + // existing volume in maintenance mode should return "maintenance mode" error + _, err = grpcClient.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{VolumeId: volumeID, Persist: true}) + if err == nil || !strings.Contains(err.Error(), "maintenance mode") { + t.Fatalf("VolumeMarkReadonly maintenance error mismatch: %v", err) } - _, err = grpcClient.SetState(ctx, &volume_server_pb.SetStateRequest{ - State: &volume_server_pb.VolumeServerState{ - Maintenance: true, - Version: stateResp.GetState().GetVersion(), - }, - }) - if err != nil { - t.Fatalf("SetState maintenance=true failed: %v", err) + + _, err = grpcClient.VolumeMarkWritable(ctx, &volume_server_pb.VolumeMarkWritableRequest{VolumeId: volumeID}) + if err == nil || !strings.Contains(err.Error(), "maintenance mode") { + t.Fatalf("VolumeMarkWritable maintenance error mismatch: %v", err) + } + + // non-existent volume in maintenance mode should still return "not found" + // (volume lookup happens before maintenance check) + _, err = grpcClient.VolumeMarkReadonly(ctx, &volume_server_pb.VolumeMarkReadonlyRequest{VolumeId: 98773, Persist: true}) + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Fatalf("VolumeMarkReadonly missing-volume in maintenance error mismatch: %v", err) + } + + _, err = grpcClient.VolumeMarkWritable(ctx, &volume_server_pb.VolumeMarkWritableRequest{VolumeId: 98774}) + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Fatalf("VolumeMarkWritable missing-volume in maintenance error mismatch: %v", err) } } diff --git a/test/volume_server/grpc/scrub_query_test.go b/test/volume_server/grpc/scrub_query_test.go index 46b67e0ca..9ddfddead 100644 --- a/test/volume_server/grpc/scrub_query_test.go +++ b/test/volume_server/grpc/scrub_query_test.go @@ -385,3 +385,149 @@ func TestQueryCookieMismatchReturnsEOFNoResults(t *testing.T) { t.Fatalf("Query cookie mismatch expected EOF with no streamed records, got: %v", err) } } + +func TestScrubVolumeMarkBrokenReadonlyHealthyVolume(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + clusterHarness := framework.StartSingleVolumeCluster(t, matrix.P1()) + conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress()) + defer conn.Close() + + const volumeID = uint32(76) + framework.AllocateVolume(t, grpcClient, volumeID, "") + + httpClient := framework.NewHTTPClient() + framework.UploadBytes(t, httpClient, clusterHarness.VolumeAdminURL(), framework.NewFileID(volumeID, 1, 1), []byte("healthy data")) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + resp, err := grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{ + VolumeIds: []uint32{volumeID}, + Mode: volume_server_pb.VolumeScrubMode_INDEX, + MarkBrokenVolumesReadonly: true, + }) + if err != nil { + t.Fatalf("ScrubVolume with MarkBrokenVolumesReadonly on healthy volume failed: %v", err) + } + if len(resp.GetBrokenVolumeIds()) != 0 { + t.Fatalf("expected no broken volumes, got %v", resp.GetBrokenVolumeIds()) + } + + statusResp, err := grpcClient.VolumeStatus(ctx, &volume_server_pb.VolumeStatusRequest{VolumeId: volumeID}) + if err != nil { + t.Fatalf("VolumeStatus failed: %v", err) + } + if statusResp.GetIsReadOnly() { + t.Fatalf("healthy volume should not be read-only after scrub with MarkBrokenVolumesReadonly") + } +} + +func TestScrubVolumeMarkBrokenReadonlyCorruptVolume(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + clusterHarness := framework.StartSingleVolumeCluster(t, matrix.P1()) + conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress()) + defer conn.Close() + + const volumeID = uint32(77) + framework.AllocateVolume(t, grpcClient, volumeID, "") + + httpClient := framework.NewHTTPClient() + framework.UploadBytes(t, httpClient, clusterHarness.VolumeAdminURL(), framework.NewFileID(volumeID, 1, 1), []byte("test data")) + + framework.CorruptIndexFile(t, clusterHarness.BaseDir(), volumeID) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // scrub without the flag: broken volume reported but not marked read-only + resp, err := grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{ + VolumeIds: []uint32{volumeID}, + Mode: volume_server_pb.VolumeScrubMode_INDEX, + }) + if err != nil { + t.Fatalf("ScrubVolume without flag failed: %v", err) + } + if len(resp.GetBrokenVolumeIds()) == 0 { + t.Fatalf("expected broken volume after corruption") + } + + statusResp, err := grpcClient.VolumeStatus(ctx, &volume_server_pb.VolumeStatusRequest{VolumeId: volumeID}) + if err != nil { + t.Fatalf("VolumeStatus after scrub without flag failed: %v", err) + } + if statusResp.GetIsReadOnly() { + t.Fatalf("volume should not be read-only when MarkBrokenVolumesReadonly is false") + } + + // scrub with the flag: broken volume should now be marked read-only + resp, err = grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{ + VolumeIds: []uint32{volumeID}, + Mode: volume_server_pb.VolumeScrubMode_INDEX, + MarkBrokenVolumesReadonly: true, + }) + if err != nil { + t.Fatalf("ScrubVolume with MarkBrokenVolumesReadonly failed: %v", err) + } + if len(resp.GetBrokenVolumeIds()) == 0 { + t.Fatalf("expected broken volume after corruption with flag") + } + + found := false + for _, d := range resp.GetDetails() { + if strings.Contains(d, "is now read-only") { + found = true + break + } + } + if !found { + t.Fatalf("expected 'is now read-only' in details, got: %v", resp.GetDetails()) + } + + statusResp, err = grpcClient.VolumeStatus(ctx, &volume_server_pb.VolumeStatusRequest{VolumeId: volumeID}) + if err != nil { + t.Fatalf("VolumeStatus after MarkBrokenVolumesReadonly scrub failed: %v", err) + } + if !statusResp.GetIsReadOnly() { + t.Fatalf("broken volume should be read-only after MarkBrokenVolumesReadonly scrub") + } +} + +func TestScrubVolumeMarkBrokenReadonlyInMaintenanceMode(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + clusterHarness := framework.StartSingleVolumeCluster(t, matrix.P1()) + conn, grpcClient := framework.DialVolumeServer(t, clusterHarness.VolumeGRPCAddress()) + defer conn.Close() + + const volumeID = uint32(78) + framework.AllocateVolume(t, grpcClient, volumeID, "") + + httpClient := framework.NewHTTPClient() + framework.UploadBytes(t, httpClient, clusterHarness.VolumeAdminURL(), framework.NewFileID(volumeID, 1, 1), []byte("test data")) + + framework.CorruptIndexFile(t, clusterHarness.BaseDir(), volumeID) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + framework.EnableMaintenanceMode(t, ctx, grpcClient) + + // scrub with the flag in maintenance mode: makeVolumeReadonly should fail + // and ScrubVolume should propagate the error + _, err := grpcClient.ScrubVolume(ctx, &volume_server_pb.ScrubVolumeRequest{ + VolumeIds: []uint32{volumeID}, + Mode: volume_server_pb.VolumeScrubMode_INDEX, + MarkBrokenVolumesReadonly: true, + }) + if err == nil || !strings.Contains(err.Error(), "maintenance mode") { + t.Fatalf("ScrubVolume with MarkBrokenVolumesReadonly in maintenance mode error mismatch: %v", err) + } +}