Browse Source
fix: S3 remote storage cold-cache read fails with 'size reported but no content available' (#7817)
fix: S3 remote storage cold-cache read fails with 'size reported but no content available' (#7817)
fix: S3 remote storage cold-cache read fails with 'size reported but no content available' (#7815) When a remote-only entry's initial caching attempt times out or fails, streamFromVolumeServers() now detects this case and retries caching synchronously before streaming, similar to how the filer server handles remote-only entries. Changes: - Modified streamFromVolumeServers() to check entry.IsInRemoteOnly() before treating missing chunks as a data integrity error - Added doCacheRemoteObject() as the core caching function (calls filer gRPC) - Added buildRemoteObjectPath() helper to reduce code duplication - Refactored cacheRemoteObjectWithDedup() and cacheRemoteObjectForStreaming() to reuse the shared functions - Added integration tests for remote storage scenarios Fixes https://github.com/seaweedfs/seaweedfs/issues/7815pull/7821/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 364 additions and 40 deletions
@ -0,0 +1,273 @@ |
|||||
|
package s3api |
||||
|
|
||||
|
import ( |
||||
|
"strings" |
||||
|
"testing" |
||||
|
|
||||
|
"github.com/seaweedfs/seaweedfs/weed/filer" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" |
||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" |
||||
|
"github.com/stretchr/testify/assert" |
||||
|
) |
||||
|
|
||||
|
// TestIsInRemoteOnly tests the IsInRemoteOnly method on filer_pb.Entry
|
||||
|
func TestIsInRemoteOnly(t *testing.T) { |
||||
|
tests := []struct { |
||||
|
name string |
||||
|
entry *filer_pb.Entry |
||||
|
expected bool |
||||
|
}{ |
||||
|
{ |
||||
|
name: "remote-only entry with no chunks", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "remote-file.txt", |
||||
|
Chunks: nil, |
||||
|
RemoteEntry: &filer_pb.RemoteEntry{ |
||||
|
RemoteSize: 1024, |
||||
|
}, |
||||
|
}, |
||||
|
expected: true, |
||||
|
}, |
||||
|
{ |
||||
|
name: "remote entry with chunks (cached)", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "cached-file.txt", |
||||
|
Chunks: []*filer_pb.FileChunk{ |
||||
|
{FileId: "1,abc123", Size: 1024, Offset: 0}, |
||||
|
}, |
||||
|
RemoteEntry: &filer_pb.RemoteEntry{ |
||||
|
RemoteSize: 1024, |
||||
|
}, |
||||
|
}, |
||||
|
expected: false, |
||||
|
}, |
||||
|
{ |
||||
|
name: "local file with chunks (not remote)", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "local-file.txt", |
||||
|
Chunks: []*filer_pb.FileChunk{ |
||||
|
{FileId: "1,abc123", Size: 1024, Offset: 0}, |
||||
|
}, |
||||
|
RemoteEntry: nil, |
||||
|
}, |
||||
|
expected: false, |
||||
|
}, |
||||
|
{ |
||||
|
name: "empty remote entry (size 0)", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "empty-remote.txt", |
||||
|
Chunks: nil, |
||||
|
RemoteEntry: &filer_pb.RemoteEntry{ |
||||
|
RemoteSize: 0, |
||||
|
}, |
||||
|
}, |
||||
|
expected: false, |
||||
|
}, |
||||
|
{ |
||||
|
name: "no chunks but nil RemoteEntry", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "empty-local.txt", |
||||
|
Chunks: nil, |
||||
|
RemoteEntry: nil, |
||||
|
}, |
||||
|
expected: false, |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
for _, tt := range tests { |
||||
|
t.Run(tt.name, func(t *testing.T) { |
||||
|
result := tt.entry.IsInRemoteOnly() |
||||
|
assert.Equal(t, tt.expected, result, |
||||
|
"IsInRemoteOnly() for %s should return %v", tt.name, tt.expected) |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// TestRemoteOnlyEntryDetection tests that the streamFromVolumeServers logic
|
||||
|
// correctly distinguishes between remote-only entries and data integrity errors
|
||||
|
func TestRemoteOnlyEntryDetection(t *testing.T) { |
||||
|
tests := []struct { |
||||
|
name string |
||||
|
entry *filer_pb.Entry |
||||
|
shouldBeRemote bool |
||||
|
shouldBeDataError bool |
||||
|
shouldBeEmpty bool |
||||
|
}{ |
||||
|
{ |
||||
|
name: "remote-only entry (no chunks, has remote entry)", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "remote-file.txt", |
||||
|
Chunks: nil, |
||||
|
Attributes: &filer_pb.FuseAttributes{ |
||||
|
FileSize: 1024, |
||||
|
}, |
||||
|
RemoteEntry: &filer_pb.RemoteEntry{ |
||||
|
RemoteSize: 1024, |
||||
|
}, |
||||
|
}, |
||||
|
shouldBeRemote: true, |
||||
|
shouldBeDataError: false, |
||||
|
shouldBeEmpty: false, |
||||
|
}, |
||||
|
{ |
||||
|
name: "data integrity error (no chunks, no remote, has size)", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "corrupt-file.txt", |
||||
|
Chunks: nil, |
||||
|
Attributes: &filer_pb.FuseAttributes{ |
||||
|
FileSize: 1024, |
||||
|
}, |
||||
|
RemoteEntry: nil, |
||||
|
}, |
||||
|
shouldBeRemote: false, |
||||
|
shouldBeDataError: true, |
||||
|
shouldBeEmpty: false, |
||||
|
}, |
||||
|
{ |
||||
|
name: "empty local file (no chunks, no remote, size 0)", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "empty-file.txt", |
||||
|
Chunks: nil, |
||||
|
Attributes: &filer_pb.FuseAttributes{ |
||||
|
FileSize: 0, |
||||
|
}, |
||||
|
RemoteEntry: nil, |
||||
|
}, |
||||
|
shouldBeRemote: false, |
||||
|
shouldBeDataError: false, |
||||
|
shouldBeEmpty: true, |
||||
|
}, |
||||
|
{ |
||||
|
name: "normal file with chunks", |
||||
|
entry: &filer_pb.Entry{ |
||||
|
Name: "normal-file.txt", |
||||
|
Chunks: []*filer_pb.FileChunk{ |
||||
|
{FileId: "1,abc123", Size: 1024, Offset: 0}, |
||||
|
}, |
||||
|
Attributes: &filer_pb.FuseAttributes{ |
||||
|
FileSize: 1024, |
||||
|
}, |
||||
|
RemoteEntry: nil, |
||||
|
}, |
||||
|
shouldBeRemote: false, |
||||
|
shouldBeDataError: false, |
||||
|
shouldBeEmpty: false, |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
for _, tt := range tests { |
||||
|
t.Run(tt.name, func(t *testing.T) { |
||||
|
chunks := tt.entry.GetChunks() |
||||
|
totalSize := int64(filer.FileSize(tt.entry)) |
||||
|
|
||||
|
if len(chunks) == 0 { |
||||
|
// This mirrors the logic in streamFromVolumeServers
|
||||
|
if tt.entry.IsInRemoteOnly() { |
||||
|
assert.True(t, tt.shouldBeRemote, |
||||
|
"Entry should be detected as remote-only") |
||||
|
} else if totalSize > 0 && len(tt.entry.Content) == 0 { |
||||
|
assert.True(t, tt.shouldBeDataError, |
||||
|
"Entry should be detected as data integrity error") |
||||
|
} else { |
||||
|
assert.True(t, tt.shouldBeEmpty, |
||||
|
"Entry should be detected as empty") |
||||
|
} |
||||
|
} else { |
||||
|
assert.False(t, tt.shouldBeRemote, "Entry with chunks should not be remote-only") |
||||
|
assert.False(t, tt.shouldBeDataError, "Entry with chunks should not be data error") |
||||
|
assert.False(t, tt.shouldBeEmpty, "Entry with chunks should not be empty") |
||||
|
} |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// TestVersionedRemoteObjectPathBuilding tests that the path building logic
|
||||
|
// correctly handles versioned objects stored in .versions/ directory
|
||||
|
func TestVersionedRemoteObjectPathBuilding(t *testing.T) { |
||||
|
bucketsPath := "/buckets" |
||||
|
|
||||
|
tests := []struct { |
||||
|
name string |
||||
|
bucket string |
||||
|
object string |
||||
|
versionId string |
||||
|
expectedDir string |
||||
|
expectedName string |
||||
|
}{ |
||||
|
{ |
||||
|
name: "non-versioned object (empty versionId)", |
||||
|
bucket: "mybucket", |
||||
|
object: "myobject.txt", |
||||
|
versionId: "", |
||||
|
expectedDir: "/buckets/mybucket", |
||||
|
expectedName: "myobject.txt", |
||||
|
}, |
||||
|
{ |
||||
|
name: "null version", |
||||
|
bucket: "mybucket", |
||||
|
object: "myobject.txt", |
||||
|
versionId: "null", |
||||
|
expectedDir: "/buckets/mybucket", |
||||
|
expectedName: "myobject.txt", |
||||
|
}, |
||||
|
{ |
||||
|
name: "specific version", |
||||
|
bucket: "mybucket", |
||||
|
object: "myobject.txt", |
||||
|
versionId: "abc123", |
||||
|
expectedDir: "/buckets/mybucket/myobject.txt" + s3_constants.VersionsFolder, |
||||
|
expectedName: "v_abc123", |
||||
|
}, |
||||
|
{ |
||||
|
name: "nested object with version", |
||||
|
bucket: "mybucket", |
||||
|
object: "folder/subfolder/file.txt", |
||||
|
versionId: "xyz789", |
||||
|
expectedDir: "/buckets/mybucket/folder/subfolder/file.txt" + s3_constants.VersionsFolder, |
||||
|
expectedName: "v_xyz789", |
||||
|
}, |
||||
|
{ |
||||
|
name: "object with leading slash and version", |
||||
|
bucket: "mybucket", |
||||
|
object: "/path/to/file.txt", |
||||
|
versionId: "ver456", |
||||
|
expectedDir: "/buckets/mybucket/path/to/file.txt" + s3_constants.VersionsFolder, |
||||
|
expectedName: "v_ver456", |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
for _, tt := range tests { |
||||
|
t.Run(tt.name, func(t *testing.T) { |
||||
|
var dir, name string |
||||
|
|
||||
|
// This mirrors the logic in cacheRemoteObjectForStreaming
|
||||
|
if tt.versionId != "" && tt.versionId != "null" { |
||||
|
// Versioned object path
|
||||
|
normalizedObject := strings.TrimPrefix(removeDuplicateSlashesTest(tt.object), "/") |
||||
|
dir = bucketsPath + "/" + tt.bucket + "/" + normalizedObject + s3_constants.VersionsFolder |
||||
|
name = "v_" + tt.versionId |
||||
|
} else { |
||||
|
// Non-versioned path (simplified - just for testing)
|
||||
|
dir = bucketsPath + "/" + tt.bucket |
||||
|
normalizedObject := strings.TrimPrefix(removeDuplicateSlashesTest(tt.object), "/") |
||||
|
if idx := strings.LastIndex(normalizedObject, "/"); idx > 0 { |
||||
|
dir = dir + "/" + normalizedObject[:idx] |
||||
|
name = normalizedObject[idx+1:] |
||||
|
} else { |
||||
|
name = normalizedObject |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
assert.Equal(t, tt.expectedDir, dir, "Directory path should match") |
||||
|
assert.Equal(t, tt.expectedName, name, "Name should match") |
||||
|
}) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// removeDuplicateSlashesTest is a test helper that mirrors production code
|
||||
|
func removeDuplicateSlashesTest(s string) string { |
||||
|
for strings.Contains(s, "//") { |
||||
|
s = strings.ReplaceAll(s, "//", "/") |
||||
|
} |
||||
|
return s |
||||
|
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue