From ff84ef880d3c7e4f1a534153d6d42ec93af0a029 Mon Sep 17 00:00:00 2001 From: Plamen Nikolov Date: Tue, 24 Feb 2026 09:45:08 +0200 Subject: [PATCH] fix(s3api): make ListObjectsV1 namespaced and prevent marker-echo pagination loops (#8409) * fix(s3api): make ListObjectsV1 namespaced and stop marker-echo pagination loops * test(s3api): harden marker-echo coverage and align V1 encoding tag * test(s3api): cover encoded marker matching and trim redundant setup * refactor(s3api): tighten V1 list helper visibility and test mock docs --- weed/s3api/s3api_object_handlers_list.go | 80 ++++++++++- weed/s3api/s3api_object_handlers_list_test.go | 134 ++++++++++++++++++ 2 files changed, 213 insertions(+), 1 deletion(-) diff --git a/weed/s3api/s3api_object_handlers_list.go b/weed/s3api/s3api_object_handlers_list.go index 8a52c91f1..d32cbb415 100644 --- a/weed/s3api/s3api_object_handlers_list.go +++ b/weed/s3api/s3api_object_handlers_list.go @@ -47,6 +47,37 @@ type ListBucketResultV2 struct { StartAfter string `xml:"StartAfter,omitempty"` } +type listBucketResultV1 struct { + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"` + Metadata []MetadataEntry `xml:"Metadata,omitempty"` + Name string `xml:"Name"` + Prefix string `xml:"Prefix"` + Marker string `xml:"Marker"` + NextMarker string `xml:"NextMarker,omitempty"` + MaxKeys int `xml:"MaxKeys"` + Delimiter string `xml:"Delimiter,omitempty"` + IsTruncated bool `xml:"IsTruncated"` + Contents []ListEntry `xml:"Contents,omitempty"` + CommonPrefixes []PrefixEntry `xml:"CommonPrefixes,omitempty"` + EncodingType string `xml:"EncodingType,omitempty"` +} + +func toListBucketResultV1(in ListBucketResult) listBucketResultV1 { + return listBucketResultV1{ + Metadata: in.Metadata, + Name: in.Name, + Prefix: in.Prefix, + Marker: in.Marker, + NextMarker: in.NextMarker, + MaxKeys: in.MaxKeys, + Delimiter: in.Delimiter, + IsTruncated: in.IsTruncated, + Contents: in.Contents, + CommonPrefixes: in.CommonPrefixes, + EncodingType: in.EncodingType, + } +} + func (s3a *S3ApiServer) ListObjectsV2Handler(w http.ResponseWriter, r *http.Request) { // https://docs.aws.amazon.com/AmazonS3/latest/API/v2-RESTBucketGET.html @@ -148,6 +179,7 @@ func (s3a *S3ApiServer) ListObjectsV1Handler(w http.ResponseWriter, r *http.Requ s3err.WriteErrorResponse(w, r, s3err.ErrInternalError) return } + sanitizeV1MarkerEcho(&response, marker, encodingTypeUrl) if len(response.Contents) == 0 { if exists, existErr := s3a.bucketExists(bucket); existErr == nil && !exists { @@ -157,7 +189,47 @@ func (s3a *S3ApiServer) ListObjectsV1Handler(w http.ResponseWriter, r *http.Requ } glog.V(3).Infof("ListObjectsV1Handler response: %+v", response) - writeSuccessResponseXML(w, r, response) + writeSuccessResponseXML(w, r, toListBucketResultV1(response)) +} + +func sanitizeV1MarkerEcho(response *ListBucketResult, marker string, encodingTypeUrl bool) { + if marker == "" { + return + } + + markerCandidates := map[string]struct{}{ + marker: {}, + strings.TrimPrefix(marker, "/"): {}, + } + if encodingTypeUrl { + escapedMarker := urlPathEscape(strings.TrimPrefix(marker, "/")) + markerCandidates[escapedMarker] = struct{}{} + } + matchesMarker := func(v string) bool { + if _, ok := markerCandidates[v]; ok { + return true + } + _, ok := markerCandidates[strings.TrimPrefix(v, "/")] + return ok + } + + if len(response.Contents) > 0 { + filtered := response.Contents[:0] + for _, content := range response.Contents { + if matchesMarker(content.Key) { + continue + } + filtered = append(filtered, content) + } + response.Contents = filtered + } + + // doListFilerEntries advances nextMarker to the last emitted entry and skips + // the marker in exclusive mode. So NextMarker==marker indicates no progress. + if matchesMarker(response.NextMarker) && len(response.Contents) == 0 && len(response.CommonPrefixes) == 0 { + response.NextMarker = "" + response.IsTruncated = false + } } func (s3a *S3ApiServer) listFilerEntries(bucket string, originalPrefix string, maxKeys uint16, originalMarker string, delimiter string, encodingTypeUrl bool, fetchOwner bool) (response ListBucketResult, err error) { @@ -543,6 +615,12 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d } } entry := resp.Entry + // listFilerEntries always calls doListFilerEntries with inclusiveStartFrom=false + // (S3 marker semantics are exclusive), but keep the guard explicit to preserve + // behavior if inclusive callers are introduced in the future. + if !inclusiveStartFrom && marker != "" && entry.Name == marker { + continue + } if cursor.maxKeys <= 0 { cursor.isTruncated = true diff --git a/weed/s3api/s3api_object_handlers_list_test.go b/weed/s3api/s3api_object_handlers_list_test.go index 480ace4ae..ba34f802b 100644 --- a/weed/s3api/s3api_object_handlers_list_test.go +++ b/weed/s3api/s3api_object_handlers_list_test.go @@ -64,6 +64,48 @@ func (c *testFilerClient) ListEntries(ctx context.Context, in *filer_pb.ListEntr return &testListEntriesStream{entries: entries}, nil } +type markerEchoFilerClient struct { + filer_pb.SeaweedFilerClient + entriesByDir map[string][]*filer_pb.Entry + returnFollowing bool +} + +// markerEchoFilerClient intentionally ignores request Limit/InclusiveStartFrom +// and simulates a backend that may echo StartFromFileName. entriesByDir controls +// returned entries; returnFollowing controls whether ListEntries returns only the +// echoed marker or the echoed marker plus following entries. +func (c *markerEchoFilerClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[filer_pb.ListEntriesResponse], error) { + entries := c.entriesByDir[in.Directory] + ensureEntryAttributes(entries) + if in.StartFromFileName == "" { + return &testListEntriesStream{entries: entries}, nil + } + + for i, e := range entries { + if e.Name == in.StartFromFileName { + // Emulate buggy backend behavior: return marker again even when exclusive. + if c.returnFollowing { + echoAndFollowing := entries[i:] + return &testListEntriesStream{entries: echoAndFollowing}, nil + } + return &testListEntriesStream{entries: []*filer_pb.Entry{e}}, nil + } + } + + return &testListEntriesStream{entries: nil}, nil +} + +func ensureEntryAttributes(entries []*filer_pb.Entry) { + for _, entry := range entries { + if entry == nil { + continue + } + if entry.Attributes == nil { + entry.Attributes = &filer_pb.FuseAttributes{} + } + } +} + func TestListObjectsHandler(t *testing.T) { // https://docs.aws.amazon.com/AmazonS3/latest/API/v2-RESTBucketGET.html @@ -96,6 +138,20 @@ func TestListObjectsHandler(t *testing.T) { } } +func TestListObjectsV1NamespaceResponse(t *testing.T) { + response := ListBucketResult{ + Name: "test_container", + Prefix: "", + Marker: "", + NextMarker: "", + MaxKeys: 1000, + IsTruncated: false, + } + + encoded := string(s3err.EncodeXMLResponse(toListBucketResultV1(response))) + assert.Contains(t, encoded, ``) +} + func Test_normalizePrefixMarker(t *testing.T) { type args struct { prefix string @@ -253,6 +309,52 @@ func TestDoListFilerEntries_BucketRootPrefixSlashDelimiterSlash_ListsDirectories assert.Contains(t, seen, "Veeam") } +func TestDoListFilerEntries_ExclusiveStartSkipsMarkerEcho(t *testing.T) { + s3a := &S3ApiServer{} + client := &markerEchoFilerClient{ + entriesByDir: map[string][]*filer_pb.Entry{ + "/buckets/test-bucket": { + {Name: "file.txt", Attributes: &filer_pb.FuseAttributes{}}, + {Name: "test.txt", Attributes: &filer_pb.FuseAttributes{}}, + }, + }, + } + + cursor := &ListingCursor{maxKeys: 1000} + var seen []string + nextMarker, err := s3a.doListFilerEntries(client, "/buckets/test-bucket", "", cursor, "test.txt", "", false, "test-bucket", func(dir string, entry *filer_pb.Entry) { + seen = append(seen, entry.Name) + }) + + assert.NoError(t, err) + assert.Empty(t, seen, "marker entry should not be returned in exclusive mode") + assert.Equal(t, "", nextMarker, "next marker should be empty when only marker echo is returned") +} + +func TestDoListFilerEntries_ExclusiveStartSkipsMarkerEchoWithSubsequentEntries(t *testing.T) { + s3a := &S3ApiServer{} + client := &markerEchoFilerClient{ + entriesByDir: map[string][]*filer_pb.Entry{ + "/buckets/test-bucket": { + {Name: "file.txt", Attributes: &filer_pb.FuseAttributes{}}, + {Name: "test.txt", Attributes: &filer_pb.FuseAttributes{}}, + {Name: "zebra.txt", Attributes: &filer_pb.FuseAttributes{}}, + }, + }, + returnFollowing: true, + } + + cursor := &ListingCursor{maxKeys: 1000} + var seen []string + nextMarker, err := s3a.doListFilerEntries(client, "/buckets/test-bucket", "", cursor, "test.txt", "", false, "test-bucket", func(dir string, entry *filer_pb.Entry) { + seen = append(seen, entry.Name) + }) + + assert.NoError(t, err) + assert.Equal(t, []string{"zebra.txt"}, seen, "marker should be skipped while subsequent entries are returned") + assert.Equal(t, "zebra.txt", nextMarker) +} + func TestAllowUnorderedWithDelimiterValidation(t *testing.T) { t.Run("should return error when allow-unordered=true and delimiter are both present", func(t *testing.T) { // Create a request with both allow-unordered=true and delimiter @@ -329,6 +431,38 @@ func TestAllowUnorderedWithDelimiterValidation(t *testing.T) { }) } +func TestSanitizeV1MarkerEcho_NoProgressGuard(t *testing.T) { + response := ListBucketResult{ + Marker: "test.txt", + NextMarker: "test.txt", + IsTruncated: true, + Contents: []ListEntry{ + {Key: "test.txt"}, + }, + } + + sanitizeV1MarkerEcho(&response, "test.txt", false) + + assert.Empty(t, response.Contents) + assert.Equal(t, "", response.NextMarker) + assert.False(t, response.IsTruncated) + + response2 := ListBucketResult{ + Marker: "test file.txt", + NextMarker: "test%20file.txt", + IsTruncated: true, + Contents: []ListEntry{ + {Key: "test%20file.txt"}, + }, + } + + sanitizeV1MarkerEcho(&response2, "test file.txt", true) + + assert.Empty(t, response2.Contents) + assert.Equal(t, "", response2.NextMarker) + assert.False(t, response2.IsTruncated) +} + // TestMaxKeysParameterValidation tests the validation of max-keys parameter func TestMaxKeysParameterValidation(t *testing.T) { t.Run("valid max-keys values should work", func(t *testing.T) {