Browse Source
s3: fix remote object not caching (#7790)
s3: fix remote object not caching (#7790)
* s3: fix remote object not caching * s3: address review comments for remote object caching - Fix leading slash in object name by using strings.TrimPrefix - Return cached entry from CacheRemoteObjectToLocalCluster to get updated local chunk locations - Reuse existing helper function instead of inline gRPC call * s3/filer: add singleflight deduplication for remote object caching - Add singleflight.Group to FilerServer to deduplicate concurrent cache operations - Wrap CacheRemoteObjectToLocalCluster with singleflight to ensure only one caching operation runs per object when multiple clients request the same file - Add early-return check for already-cached objects - S3 API calls filer gRPC with timeout and graceful fallback on error - Clear negative bucket cache when bucket is created via weed shell - Add integration tests for remote cache with singleflight deduplication This benefits all clients (S3, HTTP, Hadoop) accessing remote-mounted objects by preventing redundant cache operations and improving concurrent access performance. Fixes: https://github.com/seaweedfs/seaweedfs/discussions/7599 * fix: data race in concurrent remote object caching - Add mutex to protect chunks slice from concurrent append - Add mutex to protect fetchAndWriteErr from concurrent read/write - Fix incorrect error check (was checking assignResult.Error instead of parseErr) - Rename inner variable to avoid shadowing fetchAndWriteErr * fix: address code review comments - Remove duplicate remote caching block in GetObjectHandler, keep only singleflight version - Add mutex protection for concurrent chunk slice and error access (data race fix) - Use lazy initialization for S3 client in tests to avoid panic during package load - Fix markdown linting: add language specifier to code fence, blank lines around tables - Add 'all' target to Makefile as alias for test-with-server - Remove unused 'util' import * style: remove emojis from test files * fix: add defensive checks and sort chunks by offset - Add nil check and type assertion check for singleflight result - Sort chunks by offset after concurrent fetching to maintain file order * fix: improve test diagnostics and path normalization - runWeedShell now returns error for better test diagnostics - Add all targets to .PHONY in Makefile (logs-primary, logs-remote, health) - Strip leading slash from normalizedObject to avoid double slashes in path --------- Co-authored-by: chrislu <chris.lu@gmail.com> Co-authored-by: Chris Lu <chrislusf@users.noreply.github.com>pull/7795/head
committed by
GitHub
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 992 additions and 36 deletions
-
55.github/workflows/s3-go-tests.yml
-
4.gitignore
-
210test/s3/remote_cache/Makefile
-
157test/s3/remote_cache/README.md
-
375test/s3/remote_cache/remote_cache_test.go
-
16weed/filer/read_remote.go
-
3weed/s3api/auth_credentials_subscribe.go
-
9weed/s3api/bucket_metadata.go
-
67weed/s3api/s3api_object_handlers.go
-
2weed/s3api/s3api_server.go
-
124weed/server/filer_grpc_server_remote.go
-
4weed/server/filer_server.go
-
2weed/shell/command_remote_cache.go
@ -0,0 +1,210 @@ |
|||
# Remote Storage Cache Integration Tests
|
|||
# Tests the remote object caching functionality with singleflight deduplication
|
|||
# Uses two SeaweedFS instances: primary (with caching) and secondary (as remote storage)
|
|||
|
|||
.PHONY: all help build-weed check-deps start-remote stop-remote start-primary stop-primary \ |
|||
setup-remote test test-with-server clean logs logs-primary logs-remote health |
|||
|
|||
all: test-with-server |
|||
|
|||
# Configuration
|
|||
WEED_BINARY := ../../../weed/weed_binary |
|||
|
|||
# Primary SeaweedFS (the one being tested - has remote caching)
|
|||
PRIMARY_S3_PORT := 8333 |
|||
PRIMARY_FILER_PORT := 8888 |
|||
PRIMARY_MASTER_PORT := 9333 |
|||
PRIMARY_VOLUME_PORT := 8080 |
|||
PRIMARY_METRICS_PORT := 9324 |
|||
PRIMARY_DIR := ./test-primary-data |
|||
|
|||
# Secondary SeaweedFS (acts as "remote" S3 storage)
|
|||
REMOTE_S3_PORT := 8334 |
|||
REMOTE_FILER_PORT := 8889 |
|||
REMOTE_MASTER_PORT := 9334 |
|||
REMOTE_VOLUME_PORT := 8081 |
|||
REMOTE_METRICS_PORT := 9325 |
|||
REMOTE_DIR := ./test-remote-data |
|||
|
|||
# Test configuration
|
|||
TEST_TIMEOUT := 10m |
|||
TEST_PATTERN := TestRemoteCache |
|||
|
|||
# Buckets
|
|||
REMOTE_BUCKET := remotesourcebucket |
|||
|
|||
# Default target
|
|||
help: |
|||
@echo "Remote Storage Cache Integration Tests" |
|||
@echo "" |
|||
@echo "Uses two SeaweedFS instances:" |
|||
@echo " - Primary (port $(PRIMARY_S3_PORT)): Being tested, has remote caching" |
|||
@echo " - Remote (port $(REMOTE_S3_PORT)): Acts as remote S3 storage" |
|||
@echo "" |
|||
@echo "Available targets:" |
|||
@echo " help - Show this help message" |
|||
@echo " build-weed - Build the SeaweedFS binary" |
|||
@echo " check-deps - Check dependencies" |
|||
@echo " start-remote - Start remote SeaweedFS (secondary)" |
|||
@echo " stop-remote - Stop remote SeaweedFS" |
|||
@echo " start-primary - Start primary SeaweedFS" |
|||
@echo " stop-primary - Stop primary SeaweedFS" |
|||
@echo " setup-remote - Configure remote storage mount" |
|||
@echo " test - Run tests (assumes servers are running)" |
|||
@echo " test-with-server - Start servers, run tests, stop servers" |
|||
@echo " clean - Clean up all resources" |
|||
@echo " logs - Show server logs" |
|||
|
|||
# Build the SeaweedFS binary
|
|||
build-weed: |
|||
@echo "Building SeaweedFS binary..." |
|||
@cd ../../../weed && go build -o weed_binary . |
|||
@chmod +x $(WEED_BINARY) |
|||
@echo "SeaweedFS binary built" |
|||
|
|||
check-deps: build-weed |
|||
@echo "Checking dependencies..." |
|||
@command -v go >/dev/null 2>&1 || (echo "Go is required" && exit 1) |
|||
@test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found" && exit 1) |
|||
@echo "All dependencies available" |
|||
|
|||
# Start remote SeaweedFS (acts as the "remote" S3 storage)
|
|||
start-remote: check-deps |
|||
@echo "Starting remote SeaweedFS (secondary instance)..." |
|||
@rm -f remote-server.pid |
|||
@mkdir -p $(REMOTE_DIR) |
|||
@$(WEED_BINARY) server \
|
|||
-s3 \
|
|||
-s3.port=$(REMOTE_S3_PORT) \
|
|||
-s3.allowDeleteBucketNotEmpty=true \
|
|||
-filer \
|
|||
-filer.port=$(REMOTE_FILER_PORT) \
|
|||
-master.port=$(REMOTE_MASTER_PORT) \
|
|||
-volume.port=$(REMOTE_VOLUME_PORT) \
|
|||
-master.volumeSizeLimitMB=50 \
|
|||
-volume.max=100 \
|
|||
-dir=$(REMOTE_DIR) \
|
|||
-volume.preStopSeconds=1 \
|
|||
-metricsPort=$(REMOTE_METRICS_PORT) \
|
|||
> remote-weed.log 2>&1 & echo $$! > remote-server.pid |
|||
@echo "Waiting for remote SeaweedFS to start..." |
|||
@for i in $$(seq 1 60); do \
|
|||
if curl -s http://localhost:$(REMOTE_S3_PORT) >/dev/null 2>&1; then \
|
|||
echo "Remote SeaweedFS started on port $(REMOTE_S3_PORT)"; \
|
|||
exit 0; \
|
|||
fi; \
|
|||
sleep 1; \
|
|||
done; \
|
|||
echo "ERROR: Remote SeaweedFS failed to start"; \
|
|||
cat remote-weed.log; \
|
|||
exit 1 |
|||
|
|||
stop-remote: |
|||
@echo "Stopping remote SeaweedFS..." |
|||
@if [ -f remote-server.pid ]; then \
|
|||
kill -TERM $$(cat remote-server.pid) 2>/dev/null || true; \
|
|||
sleep 2; \
|
|||
kill -KILL $$(cat remote-server.pid) 2>/dev/null || true; \
|
|||
rm -f remote-server.pid; \
|
|||
fi |
|||
@echo "Remote SeaweedFS stopped" |
|||
|
|||
# Start primary SeaweedFS (the one being tested)
|
|||
start-primary: check-deps |
|||
@echo "Starting primary SeaweedFS..." |
|||
@rm -f primary-server.pid |
|||
@mkdir -p $(PRIMARY_DIR) |
|||
@$(WEED_BINARY) server \
|
|||
-s3 \
|
|||
-s3.port=$(PRIMARY_S3_PORT) \
|
|||
-s3.allowDeleteBucketNotEmpty=true \
|
|||
-filer \
|
|||
-filer.port=$(PRIMARY_FILER_PORT) \
|
|||
-master.port=$(PRIMARY_MASTER_PORT) \
|
|||
-volume.port=$(PRIMARY_VOLUME_PORT) \
|
|||
-master.volumeSizeLimitMB=50 \
|
|||
-volume.max=100 \
|
|||
-dir=$(PRIMARY_DIR) \
|
|||
-volume.preStopSeconds=1 \
|
|||
-metricsPort=$(PRIMARY_METRICS_PORT) \
|
|||
> primary-weed.log 2>&1 & echo $$! > primary-server.pid |
|||
@echo "Waiting for primary SeaweedFS to start..." |
|||
@for i in $$(seq 1 60); do \
|
|||
if curl -s http://localhost:$(PRIMARY_S3_PORT) >/dev/null 2>&1; then \
|
|||
echo "Primary SeaweedFS started on port $(PRIMARY_S3_PORT)"; \
|
|||
exit 0; \
|
|||
fi; \
|
|||
sleep 1; \
|
|||
done; \
|
|||
echo "ERROR: Primary SeaweedFS failed to start"; \
|
|||
cat primary-weed.log; \
|
|||
exit 1 |
|||
|
|||
stop-primary: |
|||
@echo "Stopping primary SeaweedFS..." |
|||
@if [ -f primary-server.pid ]; then \
|
|||
kill -TERM $$(cat primary-server.pid) 2>/dev/null || true; \
|
|||
sleep 2; \
|
|||
kill -KILL $$(cat primary-server.pid) 2>/dev/null || true; \
|
|||
rm -f primary-server.pid; \
|
|||
fi |
|||
@echo "Primary SeaweedFS stopped" |
|||
|
|||
# Create bucket on remote and configure remote storage mount on primary
|
|||
setup-remote: |
|||
@echo "Creating bucket on remote SeaweedFS..." |
|||
@curl -s -X PUT "http://localhost:$(REMOTE_S3_PORT)/$(REMOTE_BUCKET)" || echo "Bucket may already exist" |
|||
@sleep 1 |
|||
@echo "Configuring remote storage on primary..." |
|||
@printf 'remote.configure -name=seaweedremote -type=s3 -s3.access_key=any -s3.secret_key=any -s3.endpoint=http://localhost:$(REMOTE_S3_PORT) -s3.region=us-east-1\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.configure done" |
|||
@sleep 2 |
|||
@echo "Mounting remote bucket on primary..." |
|||
@printf 'remote.mount -dir=/buckets/remotemounted -remote=seaweedremote/$(REMOTE_BUCKET) -nonempty\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.mount done" |
|||
@sleep 1 |
|||
@echo "Remote storage configured" |
|||
|
|||
# Run tests
|
|||
test: check-deps |
|||
@echo "Running remote cache tests..." |
|||
@go test -v -timeout=$(TEST_TIMEOUT) -run "$(TEST_PATTERN)" . |
|||
@echo "Tests completed" |
|||
|
|||
# Full test workflow
|
|||
test-with-server: start-remote start-primary |
|||
@sleep 3 |
|||
@$(MAKE) setup-remote || (echo "Remote setup failed" && $(MAKE) stop-primary stop-remote && exit 1) |
|||
@sleep 2 |
|||
@echo "Running remote cache tests..." |
|||
@$(MAKE) test || (echo "Tests failed" && tail -50 primary-weed.log && $(MAKE) stop-primary stop-remote && exit 1) |
|||
@$(MAKE) stop-primary stop-remote |
|||
@echo "All tests passed" |
|||
|
|||
# Show logs
|
|||
logs: |
|||
@echo "=== Primary SeaweedFS Logs ===" |
|||
@if [ -f primary-weed.log ]; then tail -50 primary-weed.log; else echo "No log file"; fi |
|||
@echo "" |
|||
@echo "=== Remote SeaweedFS Logs ===" |
|||
@if [ -f remote-weed.log ]; then tail -50 remote-weed.log; else echo "No log file"; fi |
|||
|
|||
logs-primary: |
|||
@if [ -f primary-weed.log ]; then tail -f primary-weed.log; else echo "No log file"; fi |
|||
|
|||
logs-remote: |
|||
@if [ -f remote-weed.log ]; then tail -f remote-weed.log; else echo "No log file"; fi |
|||
|
|||
# Clean up
|
|||
clean: |
|||
@$(MAKE) stop-primary |
|||
@$(MAKE) stop-remote |
|||
@rm -f primary-weed.log remote-weed.log primary-server.pid remote-server.pid |
|||
@rm -rf $(PRIMARY_DIR) $(REMOTE_DIR) |
|||
@rm -f remote_cache.test |
|||
@go clean -testcache |
|||
@echo "Cleanup completed" |
|||
|
|||
# Health check
|
|||
health: |
|||
@echo "Checking server status..." |
|||
@curl -s http://localhost:$(PRIMARY_S3_PORT) >/dev/null 2>&1 && echo "Primary S3 ($(PRIMARY_S3_PORT)): UP" || echo "Primary S3 ($(PRIMARY_S3_PORT)): DOWN" |
|||
@curl -s http://localhost:$(REMOTE_S3_PORT) >/dev/null 2>&1 && echo "Remote S3 ($(REMOTE_S3_PORT)): UP" || echo "Remote S3 ($(REMOTE_S3_PORT)): DOWN" |
|||
@ -0,0 +1,157 @@ |
|||
# Remote Object Cache Integration Tests |
|||
|
|||
This directory contains integration tests for the remote object caching feature with singleflight deduplication. |
|||
|
|||
## Test Flow |
|||
|
|||
Each test follows this pattern: |
|||
1. **Write to local** - Upload data to primary SeaweedFS (local storage) |
|||
2. **Uncache** - Push data to remote storage and remove local chunks |
|||
3. **Read** - Read data (triggers caching from remote back to local) |
|||
|
|||
This tests the full remote caching workflow including singleflight deduplication. |
|||
|
|||
## Architecture |
|||
|
|||
```text |
|||
┌─────────────────────────────────────────────────────────────────┐ |
|||
│ Test Client │ |
|||
│ │ |
|||
│ 1. PUT data to primary SeaweedFS │ |
|||
│ 2. remote.cache.uncache (push to remote, purge local) │ |
|||
│ 3. GET data (triggers caching from remote) │ |
|||
│ 4. Verify singleflight deduplication │ |
|||
└──────────────────────────────────┬──────────────────────────────┘ |
|||
│ |
|||
┌─────────────────┴─────────────────┐ |
|||
▼ ▼ |
|||
┌────────────────────────────────────┐ ┌────────────────────────────────┐ |
|||
│ Primary SeaweedFS │ │ Remote SeaweedFS │ |
|||
│ (port 8333) │ │ (port 8334) │ |
|||
│ │ │ │ |
|||
│ - Being tested │ │ - Acts as "remote" S3 │ |
|||
│ - Has remote storage mounted │──▶│ - Receives uncached data │ |
|||
│ - Caches remote objects │ │ - Serves data for caching │ |
|||
│ - Singleflight deduplication │ │ │ |
|||
└────────────────────────────────────┘ └────────────────────────────────┘ |
|||
``` |
|||
|
|||
## What's Being Tested |
|||
|
|||
1. **Basic Remote Caching**: Write → Uncache → Read workflow |
|||
2. **Singleflight Deduplication**: Concurrent reads only trigger ONE caching operation |
|||
3. **Large Object Caching**: 5MB files cache correctly |
|||
4. **Range Requests**: Partial reads work with cached objects |
|||
5. **Not Found Handling**: Proper error for non-existent objects |
|||
|
|||
## Quick Start |
|||
|
|||
### Run Full Test Suite (Recommended) |
|||
|
|||
```bash |
|||
# Build SeaweedFS, start both servers, run tests, stop servers |
|||
make test-with-server |
|||
``` |
|||
|
|||
### Manual Steps |
|||
|
|||
```bash |
|||
# 1. Build SeaweedFS binary |
|||
make build-weed |
|||
|
|||
# 2. Start remote SeaweedFS (acts as "remote" storage) |
|||
make start-remote |
|||
|
|||
# 3. Start primary SeaweedFS (the one being tested) |
|||
make start-primary |
|||
|
|||
# 4. Configure remote storage mount |
|||
make setup-remote |
|||
|
|||
# 5. Run tests |
|||
make test |
|||
|
|||
# 6. Clean up |
|||
make clean |
|||
``` |
|||
|
|||
## Configuration |
|||
|
|||
### Primary SeaweedFS (Being Tested) |
|||
|
|||
| Service | Port | |
|||
|---------|------| |
|||
| S3 API | 8333 | |
|||
| Filer | 8888 | |
|||
| Master | 9333 | |
|||
| Volume | 8080 | |
|||
|
|||
### Remote SeaweedFS (Remote Storage) |
|||
|
|||
| Service | Port | |
|||
|---------|------| |
|||
| S3 API | 8334 | |
|||
| Filer | 8889 | |
|||
| Master | 9334 | |
|||
| Volume | 8081 | |
|||
|
|||
## Makefile Targets |
|||
|
|||
```bash |
|||
make help # Show all available targets |
|||
make build-weed # Build SeaweedFS binary |
|||
make start-remote # Start remote SeaweedFS |
|||
make start-primary # Start primary SeaweedFS |
|||
make setup-remote # Configure remote storage mount |
|||
make test # Run tests |
|||
make test-with-server # Full automated test workflow |
|||
make logs # Show server logs |
|||
make health # Check server status |
|||
make clean # Stop servers and clean up |
|||
``` |
|||
|
|||
## Test Details |
|||
|
|||
### TestRemoteCacheBasic |
|||
Basic workflow test: |
|||
1. Write object to primary (local) |
|||
2. Uncache (push to remote, remove local chunks) |
|||
3. Read (triggers caching from remote) |
|||
4. Read again (from local cache - should be faster) |
|||
|
|||
### TestRemoteCacheConcurrent |
|||
Singleflight deduplication test: |
|||
1. Write 1MB object |
|||
2. Uncache to remote |
|||
3. Launch 10 concurrent reads |
|||
4. All should succeed with correct data |
|||
5. Only ONE caching operation should run (singleflight) |
|||
|
|||
### TestRemoteCacheLargeObject |
|||
Large file test (5MB) to verify chunked transfer works correctly. |
|||
|
|||
### TestRemoteCacheRangeRequest |
|||
Tests HTTP range requests work correctly after caching. |
|||
|
|||
### TestRemoteCacheNotFound |
|||
Tests proper error handling for non-existent objects. |
|||
|
|||
## Troubleshooting |
|||
|
|||
### View logs |
|||
```bash |
|||
make logs # Show recent logs from both servers |
|||
make logs-primary # Follow primary logs in real-time |
|||
make logs-remote # Follow remote logs in real-time |
|||
``` |
|||
|
|||
### Check server health |
|||
```bash |
|||
make health |
|||
``` |
|||
|
|||
### Clean up and retry |
|||
```bash |
|||
make clean |
|||
make test-with-server |
|||
``` |
|||
@ -0,0 +1,375 @@ |
|||
package remote_cache |
|||
|
|||
import ( |
|||
"bytes" |
|||
"fmt" |
|||
"io" |
|||
"net/http" |
|||
"os" |
|||
"os/exec" |
|||
"strings" |
|||
"sync" |
|||
"sync/atomic" |
|||
"testing" |
|||
"time" |
|||
|
|||
"github.com/aws/aws-sdk-go/aws" |
|||
"github.com/aws/aws-sdk-go/aws/credentials" |
|||
"github.com/aws/aws-sdk-go/aws/session" |
|||
"github.com/aws/aws-sdk-go/service/s3" |
|||
"github.com/stretchr/testify/assert" |
|||
"github.com/stretchr/testify/require" |
|||
) |
|||
|
|||
// Test configuration
|
|||
// Uses two SeaweedFS instances:
|
|||
// - Primary: The one being tested (has remote caching)
|
|||
// - Remote: Acts as the "remote" S3 storage
|
|||
const ( |
|||
// Primary SeaweedFS
|
|||
primaryEndpoint = "http://localhost:8333" |
|||
primaryMasterPort = "9333" |
|||
|
|||
// Remote SeaweedFS (acts as remote storage)
|
|||
remoteEndpoint = "http://localhost:8334" |
|||
|
|||
// Credentials (anonymous access for testing)
|
|||
accessKey = "any" |
|||
secretKey = "any" |
|||
|
|||
// Bucket name - mounted on primary as remote storage
|
|||
testBucket = "remotemounted" |
|||
|
|||
// Path to weed binary
|
|||
weedBinary = "../../../weed/weed_binary" |
|||
) |
|||
|
|||
var ( |
|||
primaryClient *s3.S3 |
|||
primaryClientOnce sync.Once |
|||
) |
|||
|
|||
func getPrimaryClient() *s3.S3 { |
|||
primaryClientOnce.Do(func() { |
|||
primaryClient = createS3Client(primaryEndpoint) |
|||
}) |
|||
return primaryClient |
|||
} |
|||
|
|||
func createS3Client(endpoint string) *s3.S3 { |
|||
sess, err := session.NewSession(&aws.Config{ |
|||
Region: aws.String("us-east-1"), |
|||
Endpoint: aws.String(endpoint), |
|||
Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""), |
|||
DisableSSL: aws.Bool(!strings.HasPrefix(endpoint, "https")), |
|||
S3ForcePathStyle: aws.Bool(true), |
|||
}) |
|||
if err != nil { |
|||
panic(fmt.Sprintf("failed to create session: %v", err)) |
|||
} |
|||
return s3.New(sess) |
|||
} |
|||
|
|||
// skipIfNotRunning skips the test if the servers aren't running
|
|||
func skipIfNotRunning(t *testing.T) { |
|||
resp, err := http.Get(primaryEndpoint) |
|||
if err != nil { |
|||
t.Skipf("Primary SeaweedFS not running at %s: %v", primaryEndpoint, err) |
|||
} |
|||
resp.Body.Close() |
|||
|
|||
resp, err = http.Get(remoteEndpoint) |
|||
if err != nil { |
|||
t.Skipf("Remote SeaweedFS not running at %s: %v", remoteEndpoint, err) |
|||
} |
|||
resp.Body.Close() |
|||
} |
|||
|
|||
// runWeedShell executes a weed shell command
|
|||
func runWeedShell(t *testing.T, command string) (string, error) { |
|||
cmd := exec.Command(weedBinary, "shell", "-master=localhost:"+primaryMasterPort) |
|||
cmd.Stdin = strings.NewReader(command + "\nexit\n") |
|||
output, err := cmd.CombinedOutput() |
|||
if err != nil { |
|||
t.Logf("weed shell command '%s' failed: %v, output: %s", command, err, string(output)) |
|||
return string(output), err |
|||
} |
|||
return string(output), nil |
|||
} |
|||
|
|||
// uploadToPrimary uploads an object to the primary SeaweedFS (local write)
|
|||
func uploadToPrimary(t *testing.T, key string, data []byte) { |
|||
_, err := getPrimaryClient().PutObject(&s3.PutObjectInput{ |
|||
Bucket: aws.String(testBucket), |
|||
Key: aws.String(key), |
|||
Body: bytes.NewReader(data), |
|||
}) |
|||
require.NoError(t, err, "failed to upload to primary SeaweedFS") |
|||
} |
|||
|
|||
// getFromPrimary gets an object from primary SeaweedFS
|
|||
func getFromPrimary(t *testing.T, key string) []byte { |
|||
resp, err := getPrimaryClient().GetObject(&s3.GetObjectInput{ |
|||
Bucket: aws.String(testBucket), |
|||
Key: aws.String(key), |
|||
}) |
|||
require.NoError(t, err, "failed to get from primary SeaweedFS") |
|||
defer resp.Body.Close() |
|||
|
|||
data, err := io.ReadAll(resp.Body) |
|||
require.NoError(t, err, "failed to read response body") |
|||
return data |
|||
} |
|||
|
|||
// syncToRemote syncs local data to remote storage
|
|||
func syncToRemote(t *testing.T) { |
|||
t.Log("Syncing to remote storage...") |
|||
output, err := runWeedShell(t, "remote.cache.uncache -dir=/buckets/"+testBucket+" -include=*") |
|||
if err != nil { |
|||
t.Logf("syncToRemote warning: %v", err) |
|||
} |
|||
t.Log(output) |
|||
time.Sleep(1 * time.Second) |
|||
} |
|||
|
|||
// uncacheLocal purges the local cache, forcing data to be fetched from remote
|
|||
func uncacheLocal(t *testing.T, pattern string) { |
|||
t.Logf("Purging local cache for pattern: %s", pattern) |
|||
output, err := runWeedShell(t, fmt.Sprintf("remote.uncache -dir=/buckets/%s -include=%s", testBucket, pattern)) |
|||
if err != nil { |
|||
t.Logf("uncacheLocal warning: %v", err) |
|||
} |
|||
t.Log(output) |
|||
time.Sleep(500 * time.Millisecond) |
|||
} |
|||
|
|||
// TestRemoteCacheBasic tests the basic caching workflow:
|
|||
// 1. Write to local
|
|||
// 2. Uncache (push to remote, remove local chunks)
|
|||
// 3. Read (triggers caching from remote)
|
|||
func TestRemoteCacheBasic(t *testing.T) { |
|||
skipIfNotRunning(t) |
|||
|
|||
testKey := fmt.Sprintf("test-basic-%d.txt", time.Now().UnixNano()) |
|||
testData := []byte("Hello, this is test data for remote caching!") |
|||
|
|||
// Step 1: Write to local
|
|||
t.Log("Step 1: Writing object to primary SeaweedFS (local)...") |
|||
uploadToPrimary(t, testKey, testData) |
|||
|
|||
// Verify it's readable
|
|||
result := getFromPrimary(t, testKey) |
|||
assert.Equal(t, testData, result, "initial read mismatch") |
|||
|
|||
// Step 2: Uncache - push to remote and remove local chunks
|
|||
t.Log("Step 2: Uncaching (pushing to remote, removing local chunks)...") |
|||
uncacheLocal(t, testKey) |
|||
|
|||
// Step 3: Read - this should trigger caching from remote
|
|||
t.Log("Step 3: Reading object (should trigger caching from remote)...") |
|||
start := time.Now() |
|||
result = getFromPrimary(t, testKey) |
|||
firstReadDuration := time.Since(start) |
|||
|
|||
assert.Equal(t, testData, result, "data mismatch after cache") |
|||
t.Logf("First read (from remote) took %v", firstReadDuration) |
|||
|
|||
// Step 4: Read again - should be from local cache
|
|||
t.Log("Step 4: Reading again (should be from local cache)...") |
|||
start = time.Now() |
|||
result = getFromPrimary(t, testKey) |
|||
secondReadDuration := time.Since(start) |
|||
|
|||
assert.Equal(t, testData, result, "data mismatch on cached read") |
|||
t.Logf("Second read (from cache) took %v", secondReadDuration) |
|||
|
|||
t.Log("Basic caching test passed") |
|||
} |
|||
|
|||
// TestRemoteCacheConcurrent tests that concurrent reads of the same
|
|||
// remote object only trigger ONE caching operation (singleflight deduplication)
|
|||
func TestRemoteCacheConcurrent(t *testing.T) { |
|||
skipIfNotRunning(t) |
|||
|
|||
testKey := fmt.Sprintf("test-concurrent-%d.txt", time.Now().UnixNano()) |
|||
// Use larger data to make caching take measurable time
|
|||
testData := make([]byte, 1024*1024) // 1MB
|
|||
for i := range testData { |
|||
testData[i] = byte(i % 256) |
|||
} |
|||
|
|||
// Step 1: Write to local
|
|||
t.Log("Step 1: Writing 1MB object to primary SeaweedFS...") |
|||
uploadToPrimary(t, testKey, testData) |
|||
|
|||
// Verify it's readable
|
|||
result := getFromPrimary(t, testKey) |
|||
assert.Equal(t, len(testData), len(result), "initial size mismatch") |
|||
|
|||
// Step 2: Uncache
|
|||
t.Log("Step 2: Uncaching (pushing to remote)...") |
|||
uncacheLocal(t, testKey) |
|||
|
|||
// Step 3: Launch many concurrent reads - singleflight should deduplicate
|
|||
numRequests := 10 |
|||
var wg sync.WaitGroup |
|||
var successCount atomic.Int32 |
|||
var errorCount atomic.Int32 |
|||
results := make(chan []byte, numRequests) |
|||
|
|||
t.Logf("Step 3: Launching %d concurrent requests...", numRequests) |
|||
startTime := time.Now() |
|||
|
|||
for i := 0; i < numRequests; i++ { |
|||
wg.Add(1) |
|||
go func(idx int) { |
|||
defer wg.Done() |
|||
resp, err := getPrimaryClient().GetObject(&s3.GetObjectInput{ |
|||
Bucket: aws.String(testBucket), |
|||
Key: aws.String(testKey), |
|||
}) |
|||
if err != nil { |
|||
t.Logf("Request %d failed: %v", idx, err) |
|||
errorCount.Add(1) |
|||
return |
|||
} |
|||
defer resp.Body.Close() |
|||
|
|||
data, err := io.ReadAll(resp.Body) |
|||
if err != nil { |
|||
t.Logf("Request %d read failed: %v", idx, err) |
|||
errorCount.Add(1) |
|||
return |
|||
} |
|||
|
|||
results <- data |
|||
successCount.Add(1) |
|||
}(i) |
|||
} |
|||
|
|||
wg.Wait() |
|||
close(results) |
|||
totalDuration := time.Since(startTime) |
|||
|
|||
t.Logf("All %d requests completed in %v", numRequests, totalDuration) |
|||
t.Logf("Successful: %d, Failed: %d", successCount.Load(), errorCount.Load()) |
|||
|
|||
// Verify all successful requests returned correct data
|
|||
for data := range results { |
|||
assert.Equal(t, len(testData), len(data), "data length mismatch") |
|||
} |
|||
|
|||
// All requests should succeed
|
|||
assert.Equal(t, int32(numRequests), successCount.Load(), "some requests failed") |
|||
assert.Equal(t, int32(0), errorCount.Load(), "no requests should fail") |
|||
|
|||
t.Log("Concurrent caching test passed") |
|||
} |
|||
|
|||
// TestRemoteCacheLargeObject tests caching of larger objects
|
|||
func TestRemoteCacheLargeObject(t *testing.T) { |
|||
skipIfNotRunning(t) |
|||
|
|||
testKey := fmt.Sprintf("test-large-%d.bin", time.Now().UnixNano()) |
|||
// 5MB object
|
|||
testData := make([]byte, 5*1024*1024) |
|||
for i := range testData { |
|||
testData[i] = byte(i % 256) |
|||
} |
|||
|
|||
// Step 1: Write to local
|
|||
t.Log("Step 1: Writing 5MB object to primary SeaweedFS...") |
|||
uploadToPrimary(t, testKey, testData) |
|||
|
|||
// Verify it's readable
|
|||
result := getFromPrimary(t, testKey) |
|||
assert.Equal(t, len(testData), len(result), "initial size mismatch") |
|||
|
|||
// Step 2: Uncache
|
|||
t.Log("Step 2: Uncaching...") |
|||
uncacheLocal(t, testKey) |
|||
|
|||
// Step 3: Read from remote
|
|||
t.Log("Step 3: Reading 5MB object (should cache from remote)...") |
|||
start := time.Now() |
|||
result = getFromPrimary(t, testKey) |
|||
duration := time.Since(start) |
|||
|
|||
assert.Equal(t, len(testData), len(result), "size mismatch") |
|||
assert.Equal(t, testData, result, "data mismatch") |
|||
t.Logf("Large object cached in %v", duration) |
|||
|
|||
t.Log("Large object caching test passed") |
|||
} |
|||
|
|||
// TestRemoteCacheRangeRequest tests that range requests work after caching
|
|||
func TestRemoteCacheRangeRequest(t *testing.T) { |
|||
skipIfNotRunning(t) |
|||
|
|||
testKey := fmt.Sprintf("test-range-%d.txt", time.Now().UnixNano()) |
|||
testData := []byte("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ") |
|||
|
|||
// Write, uncache, then test range request
|
|||
t.Log("Writing and uncaching object...") |
|||
uploadToPrimary(t, testKey, testData) |
|||
uncacheLocal(t, testKey) |
|||
|
|||
// Range request should work and trigger caching
|
|||
t.Log("Testing range request (bytes 10-19)...") |
|||
resp, err := getPrimaryClient().GetObject(&s3.GetObjectInput{ |
|||
Bucket: aws.String(testBucket), |
|||
Key: aws.String(testKey), |
|||
Range: aws.String("bytes=10-19"), |
|||
}) |
|||
require.NoError(t, err) |
|||
defer resp.Body.Close() |
|||
|
|||
rangeData, err := io.ReadAll(resp.Body) |
|||
require.NoError(t, err) |
|||
|
|||
expected := testData[10:20] // "ABCDEFGHIJ"
|
|||
assert.Equal(t, expected, rangeData, "range data mismatch") |
|||
t.Logf("Range request returned: %s", string(rangeData)) |
|||
|
|||
t.Log("Range request test passed") |
|||
} |
|||
|
|||
// TestRemoteCacheNotFound tests that non-existent objects return proper errors
|
|||
func TestRemoteCacheNotFound(t *testing.T) { |
|||
skipIfNotRunning(t) |
|||
|
|||
testKey := fmt.Sprintf("non-existent-object-%d", time.Now().UnixNano()) |
|||
|
|||
_, err := getPrimaryClient().GetObject(&s3.GetObjectInput{ |
|||
Bucket: aws.String(testBucket), |
|||
Key: aws.String(testKey), |
|||
}) |
|||
|
|||
assert.Error(t, err, "should get error for non-existent object") |
|||
t.Logf("Got expected error: %v", err) |
|||
|
|||
t.Log("Not found test passed") |
|||
} |
|||
|
|||
// TestMain sets up and tears down the test environment
|
|||
func TestMain(m *testing.M) { |
|||
if !isServerRunning(primaryEndpoint) { |
|||
fmt.Println("WARNING: Primary SeaweedFS not running at", primaryEndpoint) |
|||
fmt.Println(" Run 'make test-with-server' to start servers automatically") |
|||
} |
|||
if !isServerRunning(remoteEndpoint) { |
|||
fmt.Println("WARNING: Remote SeaweedFS not running at", remoteEndpoint) |
|||
fmt.Println(" Run 'make test-with-server' to start servers automatically") |
|||
} |
|||
|
|||
os.Exit(m.Run()) |
|||
} |
|||
|
|||
func isServerRunning(url string) bool { |
|||
resp, err := http.Get(url) |
|||
if err != nil { |
|||
return false |
|||
} |
|||
resp.Body.Close() |
|||
return true |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue