From 7064ad420df549b488183fb0e9e0f281c6dd0a2b Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Thu, 25 Dec 2025 11:00:54 -0800 Subject: [PATCH] Refactor S3 integration tests to use weed mini (#7877) * Refactor S3 integration tests to use weed mini * Fix weed mini flags for sse and parquet tests * Fix IAM test startup: remove -iam.config flag from weed mini * Enhance logging in IAM Makefile to debug startup failure * Simplify weed mini flags and checks in S3 tests (IAM, Parquet, SSE, Copying) * Simplify weed mini flags and checks in all S3 tests * Fix IAM tests: use -s3.iam.config for weed mini * Replace timeout command with portable loop in IAM Makefile * Standardize portable loop-based readiness checks in all S3 Makefiles * Define SERVER_DIR in retention Makefile * Fix versioning and retention Makefiles: remove unsupported weed mini flags * fix filer_group test * fix cors * emojis * fix sse * fix retention * fixes * fix * fixes * fix parquet * fixes * fix * clean up * avoid duplicated debug server * Update .gitignore * simplify * clean up * add credentials * bind * delay * Update Makefile * Update Makefile * check ready * delay * update remote credentials * Update Makefile * clean up * kill * Update Makefile * update credentials --- .gitignore | 2 + test/fuse_integration/Makefile | 18 +- test/s3/compatibility/run.sh | 5 +- test/s3/copying/Makefile | 48 ++--- test/s3/cors/Makefile | 46 ++--- test/s3/filer_group/Makefile | 59 +++--- test/s3/filer_group/test_config.json | 1 - test/s3/iam/Makefile | 99 ++++------ .../parquet/CROSS_FILESYSTEM_COMPATIBILITY.md | 172 ------------------ test/s3/parquet/FINAL_ROOT_CAUSE_ANALYSIS.md | 58 ------ test/s3/parquet/MINIO_DIRECTORY_HANDLING.md | 70 ------- test/s3/parquet/Makefile | 164 ++++------------- test/s3/parquet/TEST_COVERAGE.md | 46 ----- .../s3/parquet/test_implicit_directory_fix.py | 1 + test/s3/remote_cache/Makefile | 36 +--- test/s3/remote_cache/remote_cache_test.go | 15 +- test/s3/retention/Makefile | 31 ++-- test/s3/sse/Makefile | 115 +++--------- test/s3/tagging/Makefile | 36 +--- test/s3/versioning/Makefile | 67 +++---- weed/command/mini.go | 4 +- 21 files changed, 240 insertions(+), 853 deletions(-) delete mode 100644 test/s3/parquet/CROSS_FILESYSTEM_COMPATIBILITY.md delete mode 100644 test/s3/parquet/FINAL_ROOT_CAUSE_ANALYSIS.md delete mode 100644 test/s3/parquet/MINIO_DIRECTORY_HANDLING.md delete mode 100644 test/s3/parquet/TEST_COVERAGE.md diff --git a/.gitignore b/.gitignore index 91fa9391d..b895a8f08 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,5 @@ coverage.out test/s3/remote_cache/remote-server.pid test/s3/remote_cache/primary-server.pid /test/erasure_coding/filerldb2 +/test/s3/cors/test-mini-data +/test/s3/filer_group/test-volume-data diff --git a/test/fuse_integration/Makefile b/test/fuse_integration/Makefile index fe2ad690b..3c1e68d59 100644 --- a/test/fuse_integration/Makefile +++ b/test/fuse_integration/Makefile @@ -12,20 +12,20 @@ COVERAGE_FILE := coverage.out # Check if weed binary exists check-binary: @if [ ! -f "$(WEED_BINARY)" ]; then \ - echo "❌ SeaweedFS binary not found at $(WEED_BINARY)"; \ + echo "ERROR SeaweedFS binary not found at $(WEED_BINARY)"; \ echo " Please run 'make' in the root directory first"; \ exit 1; \ fi - @echo "✅ SeaweedFS binary found" + @echo "OK SeaweedFS binary found" # Check FUSE installation check-fuse: @if command -v fusermount >/dev/null 2>&1; then \ - echo "✅ FUSE is installed (Linux)"; \ + echo "OK FUSE is installed (Linux)"; \ elif command -v umount >/dev/null 2>&1 && [ "$$(uname)" = "Darwin" ]; then \ - echo "✅ FUSE is available (macOS)"; \ + echo "OK FUSE is available (macOS)"; \ else \ - echo "❌ FUSE not found. Please install:"; \ + echo "ERROR FUSE not found. Please install:"; \ echo " Ubuntu/Debian: sudo apt-get install fuse"; \ echo " CentOS/RHEL: sudo yum install fuse"; \ echo " macOS: brew install macfuse"; \ @@ -36,8 +36,8 @@ check-fuse: check-go: @go version | grep -q "go1\.[2-9][0-9]" || \ go version | grep -q "go1\.2[1-9]" || \ - (echo "❌ Go $(GO_VERSION)+ required. Current: $$(go version)" && exit 1) - @echo "✅ Go version check passed" + (echo "ERROR Go $(GO_VERSION)+ required. Current: $$(go version)" && exit 1) + @echo "OK Go version check passed" # Verify all prerequisites check-prereqs: check-go check-fuse @@ -45,9 +45,9 @@ check-prereqs: check-go check-fuse # Build the SeaweedFS binary (if needed) build: - @echo "🔨 Building SeaweedFS..." + @echo "Building SeaweedFS..." cd ../.. && make - @echo "✅ Build complete" + @echo "OK Build complete" # Initialize go module (if needed) init-module: diff --git a/test/s3/compatibility/run.sh b/test/s3/compatibility/run.sh index adfee1366..844435d69 100755 --- a/test/s3/compatibility/run.sh +++ b/test/s3/compatibility/run.sh @@ -22,10 +22,7 @@ docker stop $CONTAINER_NAME || echo "already stopped" ulimit -n 10000 # Start weed w/ filer + s3 in the background -$WEED_BIN server \ - -filer \ - -s3 \ - -volume.max 0 \ +$WEED_BIN mini \ -master.volumeSizeLimitMB 5 \ -dir "$(pwd)/tmp" \ 1>&2>weed.log & diff --git a/test/s3/copying/Makefile b/test/s3/copying/Makefile index 225798900..1a5c98c01 100644 --- a/test/s3/copying/Makefile +++ b/test/s3/copying/Makefile @@ -63,30 +63,24 @@ start-seaweedfs: check-binary @pkill -f "weed volume" || true @pkill -f "weed filer" || true @pkill -f "weed s3" || true + @pkill -f "weed mini" || true @sleep 2 # Create necessary directories - @mkdir -p /tmp/seaweedfs-test-copying-master - @mkdir -p /tmp/seaweedfs-test-copying-volume + @mkdir -p /tmp/seaweedfs-test-copying - # Start master server with volume size limit - @nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -mdir=/tmp/seaweedfs-test-copying-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-master.log 2>&1 & - @sleep 3 - - # Start volume server - @nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-copying-volume -ip=127.0.0.1 > /tmp/seaweedfs-volume.log 2>&1 & - @sleep 3 - - # Start filer server (using standard SeaweedFS gRPC port convention: HTTP port + 10000) - @nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -ip=127.0.0.1 > /tmp/seaweedfs-filer.log 2>&1 & - @sleep 3 - - # Create S3 configuration - @echo '{"identities":[{"name":"$(ACCESS_KEY)","credentials":[{"accessKey":"$(ACCESS_KEY)","secretKey":"$(SECRET_KEY)"}],"actions":["Admin","Read","Write"]}]}' > /tmp/seaweedfs-s3.json - - # Start S3 server - @nohup $(SEAWEEDFS_BINARY) s3 -port=$(S3_PORT) -filer=127.0.0.1:$(FILER_PORT) -config=/tmp/seaweedfs-s3.json -ip.bind=127.0.0.1 > /tmp/seaweedfs-s3.log 2>&1 & - @sleep 5 + # Start weed mini + @echo "Starting weed mini with dir=/tmp/seaweedfs-test-copying" + @export AWS_ACCESS_KEY_ID=$(ACCESS_KEY) && \ + export AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) && \ + # Start weed mini with S3 configuration + @echo "Starting weed mini..." + @nohup $(SEAWEEDFS_BINARY) mini \ + -dir=/tmp/seaweedfs-test-copying \ + -s3.port=$(S3_PORT) \ + -s3.config=/tmp/seaweedfs-s3.json \ + -ip=127.0.0.1 \ + > /tmp/seaweedfs-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid 5 # Wait for S3 service to be ready @echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)" @@ -98,16 +92,12 @@ start-seaweedfs: check-binary echo "Waiting for S3 service... ($$i/30)"; \ sleep 1; \ done - - # Additional wait for filer gRPC to be ready - @echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)" - @sleep 2 - @echo "$(GREEN)SeaweedFS server started successfully$(NC)" - @echo "Master: http://localhost:$(MASTER_PORT)" - @echo "Volume: http://localhost:$(VOLUME_PORT)" - @echo "Filer: http://localhost:$(FILER_PORT)" + # Additional wait for filer gRPC to be ready + @echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)" + @sleep 2 + @echo "$(GREEN)SeaweedFS server started successfully$(NC)" + @echo "Mini Log: /tmp/seaweedfs-mini.log" @echo "S3: http://localhost:$(S3_PORT)" - @echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB" stop-seaweedfs: @echo "$(YELLOW)Stopping SeaweedFS server...$(NC)" diff --git a/test/s3/cors/Makefile b/test/s3/cors/Makefile index 3164d1341..4a1db781e 100644 --- a/test/s3/cors/Makefile +++ b/test/s3/cors/Makefile @@ -11,6 +11,7 @@ VOLUME_PORT := 8080 FILER_PORT := 8888 TEST_TIMEOUT := 10m TEST_PATTERN := TestCORS +SERVER_DIR := test-mini-data # Default target help: @@ -41,21 +42,21 @@ build-weed: @echo "Building SeaweedFS binary..." @cd ../../../weed && go build -o weed_binary . @chmod +x $(WEED_BINARY) - @echo "✅ SeaweedFS binary built at $(WEED_BINARY)" + @echo "OK SeaweedFS binary built at $(WEED_BINARY)" check-deps: build-weed @echo "Checking dependencies..." - @echo "🔍 DEBUG: Checking Go installation..." + @echo "DEBUG: Checking Go installation..." @command -v go >/dev/null 2>&1 || (echo "Go is required but not installed" && exit 1) - @echo "🔍 DEBUG: Go version: $$(go version)" - @echo "🔍 DEBUG: Checking binary at $(WEED_BINARY)..." + @echo "DEBUG: Go version: $$(go version)" + @echo "DEBUG: Checking binary at $(WEED_BINARY)..." @test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found at $(WEED_BINARY)" && exit 1) @echo "🔍 DEBUG: Binary size: $$(ls -lh $(WEED_BINARY) | awk '{print $$5}')" @echo "🔍 DEBUG: Binary permissions: $$(ls -la $(WEED_BINARY) | awk '{print $$1}')" @echo "🔍 DEBUG: Checking Go module dependencies..." @go list -m github.com/aws/aws-sdk-go-v2 >/dev/null 2>&1 || (echo "AWS SDK Go v2 not found. Run 'go mod tidy'." && exit 1) @go list -m github.com/stretchr/testify >/dev/null 2>&1 || (echo "Testify not found. Run 'go mod tidy'." && exit 1) - @echo "✅ All dependencies are available" + @echo "OK All dependencies are available" # Start SeaweedFS server for testing start-server: check-deps @@ -77,23 +78,26 @@ start-server: check-deps @echo "🔍 DEBUG: Checking config file at ../../../docker/compose/s3.json" @ls -la ../../../docker/compose/s3.json || echo "⚠️ Config file not found, continuing without it" @echo "🔍 DEBUG: Creating volume directory..." - @mkdir -p ./test-volume-data - @echo "🔍 DEBUG: Launching SeaweedFS server in background..." - @echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324" - @$(WEED_BINARY) server \ - -debug \ - -s3 \ + @mkdir -p $(SERVER_DIR) + @echo "🔍 DEBUG: Launching SeaweedFS S3 server in background..." + @echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=$(SERVER_DIR) -s3.port=$(S3_PORT) -s3.config=$(S3_CONFIG)" + @$(WEED_BINARY) mini \ + -dir=$(SERVER_DIR) \ -s3.port=$(S3_PORT) \ - -s3.allowDeleteBucketNotEmpty=true \ - -s3.config=../../../docker/compose/s3.json \ - -filer \ - -filer.maxMB=64 \ - -master.volumeSizeLimitMB=50 \ - -volume.max=100 \ - -dir=./test-volume-data \ - -volume.preStopSeconds=1 \ - -metricsPort=9324 \ - > weed-test.log 2>&1 & echo $$! > weed-server.pid + -s3.config=$(S3_CONFIG) \ + > weed-test.log 2>&1 & \ + echo $$! > weed-test.pid + + @echo "Waiting for S3 server to be ready..." + @for i in $$(seq 1 30); do \ + if echo | nc -z localhost $(S3_PORT); then \ + echo "S3 server is ready!"; \ + exit 0; \ + fi; \ + sleep 1; \ + done; \ + echo "S3 server failed to start"; \ + exit 1 > weed-server.pid @echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')" @echo "🔍 DEBUG: Checking if PID is still running..." @sleep 2 diff --git a/test/s3/filer_group/Makefile b/test/s3/filer_group/Makefile index df243d2b0..21d5c050a 100644 --- a/test/s3/filer_group/Makefile +++ b/test/s3/filer_group/Makefile @@ -16,6 +16,8 @@ TEST_PATTERN := Test # Filer group configuration FILER_GROUP := testgroup +SERVER_DIR := ./test-volume-data +S3_CONFIG := ../../../docker/compose/s3.json # Default target help: @@ -44,7 +46,7 @@ build-weed: @echo "Building SeaweedFS binary..." @cd ../../../weed && go build -o weed_binary . @chmod +x $(WEED_BINARY) - @echo "✅ SeaweedFS binary built at $(WEED_BINARY)" + @echo "OK SeaweedFS binary built at $(WEED_BINARY)" check-deps: build-weed @echo "Checking dependencies..." @@ -53,51 +55,40 @@ check-deps: build-weed @test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found at $(WEED_BINARY)" && exit 1) @go list -m github.com/aws/aws-sdk-go-v2 >/dev/null 2>&1 || (echo "AWS SDK Go v2 not found. Run 'go mod tidy'." && exit 1) @go list -m github.com/stretchr/testify >/dev/null 2>&1 || (echo "Testify not found. Run 'go mod tidy'." && exit 1) - @echo "✅ All dependencies are available" + @echo "OK All dependencies are available" # Start SeaweedFS server with filer group configured start-server: check-deps @echo "Starting SeaweedFS server with filer group: $(FILER_GROUP)..." @rm -f weed-server.pid - @mkdir -p ./test-volume-data + @mkdir -p $(SERVER_DIR) @if netstat -tlnp 2>/dev/null | grep $(S3_PORT) >/dev/null; then \ - echo "⚠️ Port $(S3_PORT) is already in use"; \ + echo "WARNING: Port $(S3_PORT) is already in use"; \ exit 1; \ fi @echo "Launching SeaweedFS server with filer group $(FILER_GROUP)..." - @$(WEED_BINARY) server \ + @export AWS_ACCESS_KEY_ID=some_access_key1 && \ + export AWS_SECRET_ACCESS_KEY=some_secret_key1 && \ + $(WEED_BINARY) mini \ -debug \ - -s3 \ + -dir=$(SERVER_DIR) \ -s3.port=$(S3_PORT) \ - -s3.allowDeleteBucketNotEmpty=true \ - -s3.config=../../../docker/compose/s3.json \ - -filer \ - -filer.maxMB=64 \ + -s3.config=$(S3_CONFIG) \ -filer.filerGroup=$(FILER_GROUP) \ - -master.volumeSizeLimitMB=50 \ - -volume.max=100 \ - -dir=./test-volume-data \ - -volume.preStopSeconds=1 \ - -metricsPort=9325 \ - > weed-test.log 2>&1 & echo $$! > weed-server.pid - @echo "Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')" - @echo "Waiting for server to start (up to 90 seconds)..." - @for i in $$(seq 1 90); do \ - if curl -s http://localhost:$(S3_PORT) >/dev/null 2>&1; then \ - echo "✅ SeaweedFS server started successfully on port $(S3_PORT) with filer group $(FILER_GROUP)"; \ + > weed-server.log 2>&1 & \ + echo $$! > weed-server.pid + + @echo "Waiting for S3 server to be ready..." + @for i in $$(seq 1 30); do \ + if echo | nc -z localhost $(S3_PORT); then \ + echo "S3 server is ready!"; \ exit 0; \ fi; \ - if [ $$i -eq 30 ]; then \ - echo "⚠️ Server taking longer than expected (30s), checking logs..."; \ - if [ -f weed-test.log ]; then \ - tail -20 weed-test.log; \ - fi; \ - fi; \ sleep 1; \ done; \ - echo "❌ Server failed to start within 90 seconds"; \ - if [ -f weed-test.log ]; then \ - cat weed-test.log; \ + echo "❌ Server failed to start within 30 seconds"; \ + if [ -f weed-server.log ]; then \ + cat weed-server.log; \ fi; \ exit 1 @@ -126,9 +117,9 @@ stop-server: # Show server logs logs: - @if test -f weed-test.log; then \ + @if test -f weed-server.log; then \ echo "=== SeaweedFS Server Logs ==="; \ - tail -f weed-test.log; \ + tail -f weed-server.log; \ else \ echo "No log file found. Server may not be running."; \ fi @@ -146,7 +137,7 @@ test-with-server: start-server @echo "Test pattern: $(TEST_PATTERN)" @echo "Test timeout: $(TEST_TIMEOUT)" @trap "$(MAKE) stop-server" EXIT; \ - $(MAKE) test || (echo "❌ Tests failed, showing server logs:" && echo "=== Last 50 lines of server logs ===" && tail -50 weed-test.log && echo "=== End of server logs ===" && exit 1) + $(MAKE) test || (echo "❌ Tests failed, showing server logs:" && echo "=== Last 50 lines of server logs ===" && tail -50 weed-server.log && echo "=== End of server logs ===" && exit 1) @$(MAKE) stop-server @echo "✅ Tests completed and server stopped" @@ -154,7 +145,7 @@ test-with-server: start-server clean: @echo "Cleaning up test artifacts..." @$(MAKE) stop-server - @rm -f weed-test*.log weed-server.pid + @rm -f weed-server.log weed-test*.log weed-server.pid @rm -rf test-volume-data/ @go clean -testcache @echo "✅ Cleanup completed" diff --git a/test/s3/filer_group/test_config.json b/test/s3/filer_group/test_config.json index 34a4e5d66..05e5c6912 100644 --- a/test/s3/filer_group/test_config.json +++ b/test/s3/filer_group/test_config.json @@ -1,6 +1,5 @@ { "s3_endpoint": "http://localhost:8333", - "master_address": "localhost:9333", "access_key": "some_access_key1", "secret_key": "some_secret_key1", "region": "us-east-1", diff --git a/test/s3/iam/Makefile b/test/s3/iam/Makefile index 7a3f8f950..5113b6b57 100644 --- a/test/s3/iam/Makefile +++ b/test/s3/iam/Makefile @@ -19,6 +19,7 @@ MASTER_PID_FILE = /tmp/weed-master.pid VOLUME_PID_FILE = /tmp/weed-volume.pid FILER_PID_FILE = /tmp/weed-filer.pid S3_PID_FILE = /tmp/weed-s3.pid +MINI_PID_FILE = /tmp/weed-mini.pid help: ## Show this help message @echo "SeaweedFS S3 IAM Integration Tests" @@ -49,80 +50,54 @@ test: clean setup start-services run-tests stop-services ## Run complete IAM int test-quick: run-tests ## Run tests assuming services are already running run-tests: ## Execute the Go tests - @echo "🧪 Running S3 IAM Integration Tests..." + @echo "Running S3 IAM Integration Tests..." go test -v -timeout $(TEST_TIMEOUT) ./... setup: ## Setup test environment - @echo "🔧 Setting up test environment..." + @echo "Setting up test environment..." @mkdir -p test-volume-data/filerldb2 @mkdir -p test-volume-data/m9333 start-services: ## Start SeaweedFS services for testing - @echo "🚀 Starting SeaweedFS services..." - @echo "Starting master server..." - @$(WEED_BINARY) master -port=$(MASTER_PORT) \ - -mdir=test-volume-data/m9333 \ - -peers=none > weed-master.log 2>&1 & \ - echo $$! > $(MASTER_PID_FILE) - - @echo "Waiting for master server to be ready..." - @timeout 60 bash -c 'until curl -s http://localhost:$(MASTER_PORT)/cluster/status > /dev/null 2>&1; do echo "Waiting for master server..."; sleep 2; done' || (echo "❌ Master failed to start, checking logs..." && tail -20 weed-master.log && exit 1) - @echo "✅ Master server is ready" - - @echo "Starting volume server..." - @$(WEED_BINARY) volume -port=$(VOLUME_PORT) \ - -ip=localhost \ - -dataCenter=dc1 -rack=rack1 \ + @echo "Starting SeaweedFS services using weed mini..." + @mkdir -p test-volume-data + @$(WEED_BINARY) mini \ -dir=test-volume-data \ - -max=100 \ - -master=localhost:$(MASTER_PORT) > weed-volume.log 2>&1 & \ - echo $$! > $(VOLUME_PID_FILE) - - @echo "Waiting for volume server to be ready..." - @timeout 60 bash -c 'until curl -s http://localhost:$(VOLUME_PORT)/status > /dev/null 2>&1; do echo "Waiting for volume server..."; sleep 2; done' || (echo "❌ Volume server failed to start, checking logs..." && tail -20 weed-volume.log && exit 1) - @echo "✅ Volume server is ready" - - @echo "Starting filer server..." - @$(WEED_BINARY) filer -port=$(FILER_PORT) \ - -defaultStoreDir=test-volume-data/filerldb2 \ - -master=localhost:$(MASTER_PORT) > weed-filer.log 2>&1 & \ - echo $$! > $(FILER_PID_FILE) - - @echo "Waiting for filer server to be ready..." - @timeout 60 bash -c 'until curl -s http://localhost:$(FILER_PORT)/status > /dev/null 2>&1; do echo "Waiting for filer server..."; sleep 2; done' || (echo "❌ Filer failed to start, checking logs..." && tail -20 weed-filer.log && exit 1) - @echo "✅ Filer server is ready" - - @echo "Starting S3 API server with IAM..." - @$(WEED_BINARY) -v=3 s3 -port=$(S3_PORT) \ - -filer=localhost:$(FILER_PORT) \ - -config=test_config.json \ - -iam.config=$(CURDIR)/iam_config.json > weed-s3.log 2>&1 & \ - echo $$! > $(S3_PID_FILE) + -s3.port=$(S3_PORT) \ + -s3.config=test_config.json \ + -s3.iam.config=$(CURDIR)/iam_config.json \ + > weed-mini.log 2>&1 & \ + echo $$! > $(MINI_PID_FILE) - @echo "Waiting for S3 API server to be ready..." - @timeout 60 bash -c 'until curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1; do echo "Waiting for S3 API server..."; sleep 2; done' || (echo "❌ S3 API failed to start, checking logs..." && tail -20 weed-s3.log && exit 1) - @echo "✅ S3 API server is ready" - - @echo "✅ All services started and ready" + @echo "Waiting for services to be ready..." + @$(MAKE) wait-for-services + @echo "OK All services started and ready" wait-for-services: ## Wait for all services to be ready - @echo "⏳ Waiting for services to be ready..." - @echo "Checking master server..." - @timeout 30 bash -c 'until curl -s http://localhost:$(MASTER_PORT)/cluster/status > /dev/null; do sleep 1; done' || (echo "❌ Master failed to start" && exit 1) - - @echo "Checking filer server..." - @timeout 30 bash -c 'until curl -s http://localhost:$(FILER_PORT)/status > /dev/null; do sleep 1; done' || (echo "❌ Filer failed to start" && exit 1) - + @echo "Waiting for services to be ready..." @echo "Checking S3 API server..." - @timeout 30 bash -c 'until curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1; do sleep 1; done' || (echo "❌ S3 API failed to start" && exit 1) + @for i in $$(seq 1 30); do \ + if curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1; then \ + echo "OK S3 API server is ready"; \ + exit 0; \ + fi; \ + sleep 1; \ + done; \ + echo "ERROR S3 API failed to start"; \ + exit 1 @echo "Pre-allocating volumes for concurrent operations..." - @curl -s "http://localhost:$(MASTER_PORT)/vol/grow?collection=default&count=10&replication=000" > /dev/null || echo "⚠️ Volume pre-allocation failed, but continuing..." + @curl -s "http://localhost:$(MASTER_PORT)/vol/grow?collection=default&count=10&replication=000" > /dev/null || echo "WARNING Volume pre-allocation failed, but continuing..." @sleep 3 - @echo "✅ All services are ready" + @echo "OK All services are ready" stop-services: ## Stop all SeaweedFS services - @echo "🛑 Stopping SeaweedFS services..." + @echo "Stopping SeaweedFS services..." + @if [ -f $(MINI_PID_FILE) ]; then \ + echo "Stopping weed mini..."; \ + kill $$(cat $(MINI_PID_FILE)) 2>/dev/null || true; \ + rm -f $(MINI_PID_FILE); \ + fi @if [ -f $(S3_PID_FILE) ]; then \ echo "Stopping S3 API server..."; \ kill $$(cat $(S3_PID_FILE)) 2>/dev/null || true; \ @@ -143,17 +118,17 @@ stop-services: ## Stop all SeaweedFS services kill $$(cat $(MASTER_PID_FILE)) 2>/dev/null || true; \ rm -f $(MASTER_PID_FILE); \ fi - @echo "✅ All services stopped" + @echo "OK All services stopped" clean: stop-services ## Clean up test environment - @echo "🧹 Cleaning up test environment..." + @echo "Cleaning up test environment..." @rm -rf test-volume-data @rm -f weed-*.log @rm -f *.test - @echo "✅ Cleanup complete" + @echo "Cleanup complete" logs: ## Show service logs - @echo "📋 Service Logs:" + @echo "Service Logs:" @echo "=== Master Log ===" @tail -20 weed-master.log 2>/dev/null || echo "No master log" @echo "" @@ -167,7 +142,7 @@ logs: ## Show service logs @tail -20 weed-s3.log 2>/dev/null || echo "No S3 log" status: ## Check service status - @echo "📊 Service Status:" + @echo "Service Status:" @echo -n "Master: "; curl -s http://localhost:$(MASTER_PORT)/cluster/status > /dev/null 2>&1 && echo "✅ Running" || echo "❌ Not running" @echo -n "Filer: "; curl -s http://localhost:$(FILER_PORT)/status > /dev/null 2>&1 && echo "✅ Running" || echo "❌ Not running" @echo -n "S3 API: "; curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1 && echo "✅ Running" || echo "❌ Not running" diff --git a/test/s3/parquet/CROSS_FILESYSTEM_COMPATIBILITY.md b/test/s3/parquet/CROSS_FILESYSTEM_COMPATIBILITY.md deleted file mode 100644 index 62ef9563d..000000000 --- a/test/s3/parquet/CROSS_FILESYSTEM_COMPATIBILITY.md +++ /dev/null @@ -1,172 +0,0 @@ -# Cross-Filesystem Compatibility Test Results - -## Overview - -This document summarizes the cross-filesystem compatibility testing between **s3fs** and **PyArrow native S3 filesystem** implementations when working with SeaweedFS. - -## Test Purpose - -Verify that Parquet files written using one filesystem implementation (s3fs or PyArrow native S3) can be correctly read using the other implementation, confirming true file format compatibility. - -## Test Methodology - -### Test Matrix - -The test performs the following combinations: - -1. **Write with s3fs → Read with PyArrow native S3** -2. **Write with PyArrow native S3 → Read with s3fs** - -For each direction, the test: -- Creates a sample PyArrow table with multiple data types (int64, string, float64, bool) -- Writes the Parquet file using one filesystem implementation -- Reads the Parquet file using the other filesystem implementation -- Verifies data integrity by comparing: - - Row counts - - Schema equality - - Data contents (after sorting by ID to handle row order differences) - -### File Sizes Tested - -- **Small files**: 5 rows (quick validation) -- **Large files**: 200,000 rows (multi-row-group validation) - -## Test Results - -### ✅ Small Files (5 rows) - -| Write Method | Read Method | Result | Read Function Used | -|--------------|-------------|--------|--------------------| -| s3fs | PyArrow native S3 | ✅ PASS | pq.read_table | -| PyArrow native S3 | s3fs | ✅ PASS | pq.read_table | - -**Status**: **ALL TESTS PASSED** - -### Large Files (200,000 rows) - -Large file testing requires adequate volume capacity in SeaweedFS. When run with default volume settings (50MB max size), tests may encounter capacity issues with the number of large test files created simultaneously. - -**Recommendation**: For large file testing, increase `VOLUME_MAX_SIZE_MB` in the Makefile or run tests with `TEST_QUICK=1` for development/validation purposes. - -## Key Findings - -### ✅ Full Compatibility Confirmed - -**Files written with s3fs and PyArrow native S3 filesystem are fully compatible and can be read by either implementation.** - -This confirms that: - -1. **Identical Parquet Format**: Both s3fs and PyArrow native S3 use the same underlying PyArrow library to generate Parquet files, resulting in identical file formats at the binary level. - -2. **S3 API Compatibility**: SeaweedFS's S3 implementation handles both filesystem backends correctly, with proper: - - Object creation (PutObject) - - Object reading (GetObject) - - Directory handling (implicit directories) - - Multipart uploads (for larger files) - -3. **Metadata Consistency**: File metadata, schemas, and data integrity are preserved across both write and read operations regardless of which filesystem implementation is used. - -## Implementation Details - -### Common Write Path - -Both implementations use PyArrow's `pads.write_dataset()` function: - -```python -# s3fs approach -fs = s3fs.S3FileSystem(...) -pads.write_dataset(table, path, format="parquet", filesystem=fs) - -# PyArrow native approach -s3 = pafs.S3FileSystem(...) -pads.write_dataset(table, path, format="parquet", filesystem=s3) -``` - -### Multiple Read Methods Tested - -The test attempts reads using multiple PyArrow methods: -- `pq.read_table()` - Direct table reading -- `pq.ParquetDataset()` - Dataset-based reading -- `pads.dataset()` - PyArrow dataset API - -All methods successfully read files written by either filesystem implementation. - -## Practical Implications - -### For Users - -1. **Flexibility**: Users can choose either s3fs or PyArrow native S3 based on their preferences: - - **s3fs**: More mature, widely used, familiar API - - **PyArrow native**: Pure PyArrow solution, fewer dependencies - -2. **Interoperability**: Teams using different tools can seamlessly share Parquet datasets stored in SeaweedFS - -3. **Migration**: Easy to migrate between filesystem implementations without data conversion - -### For SeaweedFS - -1. **S3 Compatibility**: Confirms SeaweedFS's S3 implementation is compatible with major Python data science tools - -2. **Implicit Directory Handling**: The implicit directory fix works correctly for both filesystem implementations - -3. **Standard Compliance**: SeaweedFS handles S3 operations in a way that's compatible with AWS S3 behavior - -## Running the Tests - -### Quick Test (Recommended for Development) - -```bash -cd test/s3/parquet -TEST_QUICK=1 make test-cross-fs-with-server -``` - -### Full Test (All File Sizes) - -```bash -cd test/s3/parquet -make test-cross-fs-with-server -``` - -### Manual Test (Assuming Server is Running) - -```bash -cd test/s3/parquet -make setup-python -make start-seaweedfs-ci - -# In another terminal -TEST_QUICK=1 make test-cross-fs - -# Cleanup -make stop-seaweedfs-safe -``` - -## Environment Variables - -The test supports customization through environment variables: - -- `S3_ENDPOINT_URL`: S3 endpoint (default: `http://localhost:8333`) -- `S3_ACCESS_KEY`: Access key (default: `some_access_key1`) -- `S3_SECRET_KEY`: Secret key (default: `some_secret_key1`) -- `BUCKET_NAME`: Bucket name (default: `test-parquet-bucket`) -- `TEST_QUICK`: Run only small tests (default: `0`, set to `1` for quick mode) - -## Conclusion - -The cross-filesystem compatibility tests demonstrate that **Parquet files written via s3fs and PyArrow native S3 filesystem are completely interchangeable**. This validates that: - -1. The Parquet file format is implementation-agnostic -2. SeaweedFS's S3 API correctly handles both filesystem backends -3. Users have full flexibility in choosing their preferred filesystem implementation - -This compatibility is a testament to: -- PyArrow's consistent file format generation -- SeaweedFS's robust S3 API implementation -- Proper handling of S3 semantics (especially implicit directories) - ---- - -**Test Implementation**: `test_cross_filesystem_compatibility.py` -**Last Updated**: November 21, 2024 -**Status**: ✅ All critical tests passing - diff --git a/test/s3/parquet/FINAL_ROOT_CAUSE_ANALYSIS.md b/test/s3/parquet/FINAL_ROOT_CAUSE_ANALYSIS.md deleted file mode 100644 index 3dff9cb03..000000000 --- a/test/s3/parquet/FINAL_ROOT_CAUSE_ANALYSIS.md +++ /dev/null @@ -1,58 +0,0 @@ -# Final Root Cause Analysis - -## Overview - -This document provides a deep technical analysis of the s3fs compatibility issue with PyArrow Parquet datasets on SeaweedFS, and the solution implemented to resolve it. - -## Root Cause - -When PyArrow writes datasets using `write_dataset()`, it creates implicit directory structures by writing files without explicit directory markers. However, some S3 workflows may create 0-byte directory markers. - -### The Problem - -1. **PyArrow writes dataset files** without creating explicit directory objects -2. **s3fs calls HEAD** on the directory path to check if it exists -3. **If HEAD returns 200** with `Content-Length: 0`, s3fs interprets it as a file (not a directory) -4. **PyArrow fails** when trying to read, reporting "Parquet file size is 0 bytes" - -### AWS S3 Behavior - -AWS S3 returns **404 Not Found** for implicit directories (directories that only exist because they have children but no explicit marker object). This allows s3fs to fall back to LIST operations to detect the directory. - -## The Solution - -### Implementation - -Modified the S3 API HEAD handler in `weed/s3api/s3api_object_handlers.go` to: - -1. **Check if object ends with `/`**: Explicit directory markers return 200 as before -2. **Check if object has children**: If a 0-byte object has children in the filer, treat it as an implicit directory -3. **Return 404 for implicit directories**: This matches AWS S3 behavior and triggers s3fs's LIST fallback - -### Code Changes - -The fix is implemented in the `HeadObjectHandler` function with logic to: -- Detect implicit directories by checking for child entries -- Return 404 (NoSuchKey) for implicit directories -- Preserve existing behavior for explicit directory markers and regular files - -## Performance Considerations - -### Optimization: Child Check Cache -- Child existence checks are performed via filer LIST operations -- Results could be cached for frequently accessed paths -- Trade-off between consistency and performance - -### Impact -- Minimal performance impact for normal file operations -- Slight overhead for HEAD requests on implicit directories (one additional LIST call) -- Overall improvement in PyArrow compatibility outweighs minor performance cost - -## TODO - -- [ ] Add detailed benchmarking results comparing before/after fix -- [ ] Document edge cases discovered during implementation -- [ ] Add architectural diagrams showing the request flow -- [ ] Document alternative solutions considered and why they were rejected -- [ ] Add performance profiling data for child existence checks - diff --git a/test/s3/parquet/MINIO_DIRECTORY_HANDLING.md b/test/s3/parquet/MINIO_DIRECTORY_HANDLING.md deleted file mode 100644 index 04d80cfcb..000000000 --- a/test/s3/parquet/MINIO_DIRECTORY_HANDLING.md +++ /dev/null @@ -1,70 +0,0 @@ -# MinIO Directory Handling Comparison - -## Overview - -This document compares how MinIO handles directory markers versus SeaweedFS's implementation, and explains the different approaches to S3 directory semantics. - -## MinIO's Approach - -MinIO handles implicit directories similarly to AWS S3: - -1. **No explicit directory objects**: Directories are implicit, defined only by object key prefixes -2. **HEAD on directory returns 404**: Consistent with AWS S3 behavior -3. **LIST operations reveal directories**: Directories are discovered through delimiter-based LIST operations -4. **Automatic prefix handling**: MinIO automatically recognizes prefixes as directories - -### MinIO Implementation Details - -- Uses in-memory metadata for fast prefix lookups -- Optimized for LIST operations with common delimiter (`/`) -- No persistent directory objects in storage layer -- Directories "exist" as long as they contain objects - -## SeaweedFS Approach - -SeaweedFS uses a filer-based approach with real directory entries: - -### Before the Fix - -1. **Explicit directory objects**: Could create 0-byte objects as directory markers -2. **HEAD returns 200**: Even for implicit directories -3. **Caused s3fs issues**: s3fs interpreted 0-byte HEAD responses as empty files - -### After the Fix - -1. **Hybrid approach**: Supports both explicit markers (with `/` suffix) and implicit directories -2. **HEAD returns 404 for implicit directories**: Matches AWS S3 and MinIO behavior -3. **Filer integration**: Uses filer's directory metadata to detect implicit directories -4. **s3fs compatibility**: Triggers proper LIST fallback behavior - -## Key Differences - -| Aspect | MinIO | SeaweedFS (After Fix) | -|--------|-------|----------------------| -| Directory Storage | No persistent objects | Filer directory entries | -| Implicit Directory HEAD | 404 Not Found | 404 Not Found | -| Explicit Marker HEAD | Not applicable | 200 OK (with `/` suffix) | -| Child Detection | Prefix scan | Filer LIST operation | -| Performance | In-memory lookups | Filer gRPC calls | - -## Implementation Considerations - -### Advantages of SeaweedFS Approach -- Integrates with existing filer metadata -- Supports both implicit and explicit directories -- Preserves directory metadata and attributes -- Compatible with POSIX filer semantics - -### Trade-offs -- Additional filer communication overhead for HEAD requests -- Complexity of supporting both directory paradigms -- Performance depends on filer efficiency - -## TODO - -- [ ] Add performance benchmark comparison: MinIO vs SeaweedFS -- [ ] Document edge cases where behaviors differ -- [ ] Add example request/response traces for both systems -- [ ] Document migration path for users moving from MinIO to SeaweedFS -- [ ] Add compatibility matrix for different S3 clients - diff --git a/test/s3/parquet/Makefile b/test/s3/parquet/Makefile index 0aa6c8117..708f4aa5c 100644 --- a/test/s3/parquet/Makefile +++ b/test/s3/parquet/Makefile @@ -4,14 +4,9 @@ # Default values SEAWEEDFS_BINARY ?= weed S3_PORT ?= 8333 -FILER_PORT ?= 8888 -VOLUME_PORT ?= 8080 -MASTER_PORT ?= 9333 TEST_TIMEOUT ?= 15m ACCESS_KEY ?= some_access_key1 SECRET_KEY ?= some_secret_key1 -VOLUME_MAX_SIZE_MB ?= 50 -VOLUME_MAX_COUNT ?= 100 BUCKET_NAME ?= test-parquet-bucket ENABLE_SSE_S3 ?= false @@ -68,11 +63,7 @@ help: @echo "Configuration:" @echo " SEAWEEDFS_BINARY=$(SEAWEEDFS_BINARY)" @echo " S3_PORT=$(S3_PORT)" - @echo " FILER_PORT=$(FILER_PORT)" - @echo " VOLUME_PORT=$(VOLUME_PORT)" - @echo " MASTER_PORT=$(MASTER_PORT)" @echo " BUCKET_NAME=$(BUCKET_NAME)" - @echo " VOLUME_MAX_SIZE_MB=$(VOLUME_MAX_SIZE_MB)" @echo " ENABLE_SSE_S3=$(ENABLE_SSE_S3)" @echo " PYTHON=$(PYTHON)" @@ -106,39 +97,25 @@ setup-python: check-python start-seaweedfs-ci: check-binary @echo "$(YELLOW)Starting SeaweedFS server for Parquet testing...$(NC)" - # Clean up any existing processes first (CI-safe) - @echo "Cleaning up any existing processes..." + # Clean up any existing processes first (CI-safe) - aggressive cleanup + @echo "Aggressively cleaning up any existing processes on S3 port $(S3_PORT) and master port 9333..." @if command -v lsof >/dev/null 2>&1; then \ - lsof -ti :$(MASTER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$(VOLUME_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$(FILER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ + for attempt in 1 2 3; do \ + lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -9 $$pid 2>/dev/null || true; done; \ + lsof -ti :9333 2>/dev/null | head -5 | while read pid; do kill -9 $$pid 2>/dev/null || true; done; \ + sleep 1; \ + done; \ fi - @sleep 2 + @sleep 3 # Create necessary directories - @mkdir -p /tmp/seaweedfs-test-parquet-master - @mkdir -p /tmp/seaweedfs-test-parquet-volume - @mkdir -p /tmp/seaweedfs-test-parquet-filer + @mkdir -p /tmp/seaweedfs-test-parquet # Clean up any old server logs @rm -f /tmp/seaweedfs-parquet-*.log || true - # Start master server with volume size limit and explicit gRPC port - @echo "Starting master server..." - @nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -port.grpc=$$(( $(MASTER_PORT) + 10000 )) -mdir=/tmp/seaweedfs-test-parquet-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-parquet-master.log 2>&1 & - @sleep 3 - - # Start volume server with master HTTP port and increased capacity - @echo "Starting volume server..." - @nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-parquet-volume -max=$(VOLUME_MAX_COUNT) -ip=127.0.0.1 -preStopSeconds=1 > /tmp/seaweedfs-parquet-volume.log 2>&1 & - @sleep 5 - - # Start filer server with embedded S3 - @echo "Starting filer server with embedded S3..." + # Start weed mini with embedded S3 + @echo "Starting weed mini with embedded S3..." @if [ "$(ENABLE_SSE_S3)" = "true" ]; then \ echo " SSE-S3 encryption: ENABLED"; \ printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}],"buckets":[{"name":"$(BUCKET_NAME)","encryption":{"sseS3":{"enabled":true}}}]}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" > /tmp/seaweedfs-parquet-s3.json; \ @@ -146,96 +123,43 @@ start-seaweedfs-ci: check-binary echo " SSE-S3 encryption: DISABLED"; \ printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}]}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" > /tmp/seaweedfs-parquet-s3.json; \ fi - @AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -dataCenter=defaultDataCenter -ip=127.0.0.1 -s3 -s3.port=$(S3_PORT) -s3.config=/tmp/seaweedfs-parquet-s3.json > /tmp/seaweedfs-parquet-filer.log 2>&1 & - @sleep 5 - - # Wait for S3 service to be ready - use port-based checking for reliability - @echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)" - @for i in $$(seq 1 20); do \ - if netstat -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \ - ss -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \ - lsof -i :$(S3_PORT) >/dev/null 2>&1; then \ - echo "$(GREEN)S3 service is listening on port $(S3_PORT)$(NC)"; \ - sleep 1; \ - break; \ + @$(SEAWEEDFS_BINARY) mini \ + -dir=/tmp/seaweedfs-test-parquet \ + -ip.bind=0.0.0.0 \ + -s3.port=$(S3_PORT) \ + -s3.config=/tmp/seaweedfs-parquet-s3.json \ + > /tmp/seaweedfs-parquet-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid + @echo "Waiting for S3 service to be fully ready (max 90 seconds)..." + @bash -c 'for i in $$(seq 1 90); do \ + if curl -s -H "Authorization: AWS4-HMAC-SHA256 Credential=$(ACCESS_KEY)" http://localhost:$(S3_PORT)/ > /dev/null 2>&1; then \ + echo "✅ S3 service is ready"; \ + sleep 2; \ + exit 0; \ fi; \ - if [ $$i -eq 20 ]; then \ - echo "$(RED)S3 service failed to start within 20 seconds$(NC)"; \ - echo "=== Detailed Logs ==="; \ - echo "Master log:"; tail -30 /tmp/seaweedfs-parquet-master.log || true; \ - echo "Volume log:"; tail -30 /tmp/seaweedfs-parquet-volume.log || true; \ - echo "Filer log:"; tail -30 /tmp/seaweedfs-parquet-filer.log || true; \ - echo "=== Port Status ==="; \ - netstat -an 2>/dev/null | grep ":$(S3_PORT)" || \ - ss -an 2>/dev/null | grep ":$(S3_PORT)" || \ - echo "No port listening on $(S3_PORT)"; \ - exit 1; \ - fi; \ - echo "Waiting for S3 service... ($$i/20)"; \ sleep 1; \ - done - - # Additional wait for filer gRPC to be ready - @echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)" - @sleep 2 - - # Wait for volume server to register with master and ensure volume assignment works - @echo "$(YELLOW)Waiting for volume assignment to be ready...$(NC)" - @for i in $$(seq 1 30); do \ - ASSIGN_RESULT=$$(curl -s "http://localhost:$(MASTER_PORT)/dir/assign?count=1" 2>/dev/null); \ - if echo "$$ASSIGN_RESULT" | grep -q '"fid"'; then \ - echo "$(GREEN)Volume assignment is ready$(NC)"; \ - break; \ - fi; \ - if [ $$i -eq 30 ]; then \ - echo "$(RED)Volume assignment not ready after 30 seconds$(NC)"; \ - echo "=== Last assign attempt ==="; \ - echo "$$ASSIGN_RESULT"; \ - echo "=== Master Status ==="; \ - curl -s "http://localhost:$(MASTER_PORT)/dir/status" 2>/dev/null || echo "Failed to get master status"; \ - echo "=== Master Logs ==="; \ - tail -50 /tmp/seaweedfs-parquet-master.log 2>/dev/null || echo "No master log"; \ - echo "=== Volume Logs ==="; \ - tail -50 /tmp/seaweedfs-parquet-volume.log 2>/dev/null || echo "No volume log"; \ - exit 1; \ - fi; \ - echo "Waiting for volume assignment... ($$i/30)"; \ - sleep 1; \ - done - - @echo "$(GREEN)SeaweedFS server started successfully for Parquet testing$(NC)" - @echo "Master: http://localhost:$(MASTER_PORT)" - @echo "Volume: http://localhost:$(VOLUME_PORT)" - @echo "Filer: http://localhost:$(FILER_PORT)" - @echo "S3: http://localhost:$(S3_PORT)" - @echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB" + done; \ + echo "ERROR S3 service failed to start within 90 seconds"; \ + echo "=== Server log output ==="; \ + cat /tmp/seaweedfs-parquet-mini.log 2>/dev/null || echo "No startup log available"; \ + exit 1' start-seaweedfs: check-binary @echo "$(YELLOW)Starting SeaweedFS server for Parquet testing...$(NC)" @# Use port-based cleanup for consistency and safety @echo "Cleaning up any existing processes..." - @lsof -ti :$(MASTER_PORT) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$(VOLUME_PORT) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$(FILER_PORT) 2>/dev/null | xargs -r kill -TERM || true @lsof -ti :$(S3_PORT) 2>/dev/null | xargs -r kill -TERM || true - @# Clean up gRPC ports (HTTP port + 10000) - @lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true @sleep 2 @$(MAKE) start-seaweedfs-ci stop-seaweedfs: @echo "$(YELLOW)Stopping SeaweedFS server...$(NC)" @# Use port-based cleanup for consistency and safety - @lsof -ti :$(MASTER_PORT) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$(VOLUME_PORT) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$(FILER_PORT) 2>/dev/null | xargs -r kill -TERM || true + @if [ -f /tmp/weed-mini.pid ]; then \ + echo "Stopping weed mini..."; \ + kill $$(cat /tmp/weed-mini.pid) || true; \ + rm -f /tmp/weed-mini.pid; \ + fi @lsof -ti :$(S3_PORT) 2>/dev/null | xargs -r kill -TERM || true - @# Clean up gRPC ports (HTTP port + 10000) - @lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true - @lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true @sleep 2 @echo "$(GREEN)SeaweedFS server stopped$(NC)" @@ -245,22 +169,10 @@ stop-seaweedfs-safe: @# Use port-based cleanup which is safer in CI @if command -v lsof >/dev/null 2>&1; then \ echo "Using lsof for port-based cleanup..."; \ - lsof -ti :$(MASTER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$(VOLUME_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$(FILER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ - lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \ else \ echo "lsof not available, using netstat approach..."; \ - netstat -tlnp 2>/dev/null | grep :$(MASTER_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \ - netstat -tlnp 2>/dev/null | grep :$(VOLUME_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \ - netstat -tlnp 2>/dev/null | grep :$(FILER_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \ netstat -tlnp 2>/dev/null | grep :$(S3_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \ - netstat -tlnp 2>/dev/null | grep :$$(( $(MASTER_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \ - netstat -tlnp 2>/dev/null | grep :$$(( $(VOLUME_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \ - netstat -tlnp 2>/dev/null | grep :$$(( $(FILER_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \ fi @sleep 2 @echo "$(GREEN)SeaweedFS server safely stopped$(NC)" @@ -351,18 +263,14 @@ test-implicit-dir-with-server: build-weed setup-python # Debug targets debug-logs: - @echo "$(YELLOW)=== Master Log ===$(NC)" - @tail -n 50 /tmp/seaweedfs-parquet-master.log || echo "No master log found" - @echo "$(YELLOW)=== Volume Log ===$(NC)" - @tail -n 50 /tmp/seaweedfs-parquet-volume.log || echo "No volume log found" - @echo "$(YELLOW)=== Filer Log ===$(NC)" - @tail -n 50 /tmp/seaweedfs-parquet-filer.log || echo "No filer log found" + @echo "$(YELLOW)=== Mini Log ===$(NC)" + @tail -n 50 /tmp/seaweedfs-parquet-mini.log || echo "No mini log found" debug-status: @echo "$(YELLOW)=== Process Status ===$(NC)" @ps aux | grep -E "(weed|seaweedfs)" | grep -v grep || echo "No SeaweedFS processes found" @echo "$(YELLOW)=== Port Status ===$(NC)" - @netstat -an | grep -E "($(MASTER_PORT)|$(VOLUME_PORT)|$(FILER_PORT)|$(S3_PORT))" || echo "No ports in use" + @netstat -an | grep -E "($(S3_PORT))" || echo "No ports in use" # Manual test targets for development manual-start: start-seaweedfs diff --git a/test/s3/parquet/TEST_COVERAGE.md b/test/s3/parquet/TEST_COVERAGE.md deleted file mode 100644 index f08a93ab9..000000000 --- a/test/s3/parquet/TEST_COVERAGE.md +++ /dev/null @@ -1,46 +0,0 @@ -# Test Coverage Documentation - -## Overview - -This document provides comprehensive test coverage documentation for the SeaweedFS S3 Parquet integration tests. - -## Test Categories - -### Unit Tests (Go) -- 17 test cases covering S3 API handlers -- Tests for implicit directory handling -- HEAD request behavior validation -- Located in: `weed/s3api/s3api_implicit_directory_test.go` - -### Integration Tests (Python) -- 6 test cases for implicit directory fix -- Tests HEAD request behavior on directory markers -- s3fs directory detection validation -- PyArrow dataset read compatibility -- Located in: `test_implicit_directory_fix.py` - -### End-to-End Tests (Python) -- 20 test cases combining write and read methods -- Small file tests (5 rows): 10 test combinations -- Large file tests (200,000 rows): 10 test combinations -- Tests multiple write methods: `pads.write_dataset`, `pq.write_table+s3fs` -- Tests multiple read methods: `pads.dataset`, `pq.ParquetDataset`, `pq.read_table`, `s3fs+direct`, `s3fs+buffered` -- Located in: `s3_parquet_test.py` - -## Coverage Summary - -| Test Type | Count | Status | -|-----------|-------|--------| -| Unit Tests (Go) | 17 | ✅ Pass | -| Integration Tests (Python) | 6 | ✅ Pass | -| End-to-End Tests (Python) | 20 | ✅ Pass | -| **Total** | **43** | **✅ All Pass** | - -## TODO - -- [ ] Add detailed test execution time metrics -- [ ] Document test data generation strategies -- [ ] Add code coverage percentages for Go tests -- [ ] Document edge cases and corner cases tested -- [ ] Add performance benchmarking results - diff --git a/test/s3/parquet/test_implicit_directory_fix.py b/test/s3/parquet/test_implicit_directory_fix.py index 2ed52e5d7..58f3f2170 100755 --- a/test/s3/parquet/test_implicit_directory_fix.py +++ b/test/s3/parquet/test_implicit_directory_fix.py @@ -60,6 +60,7 @@ def setup_s3(): endpoint_url=S3_ENDPOINT_URL, aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY, + region_name='us-east-1', use_ssl=False ) diff --git a/test/s3/remote_cache/Makefile b/test/s3/remote_cache/Makefile index 1b7a64539..0292c0d35 100644 --- a/test/s3/remote_cache/Makefile +++ b/test/s3/remote_cache/Makefile @@ -10,19 +10,17 @@ all: test-with-server # Configuration WEED_BINARY := ../../../weed/weed_binary +ACCESS_KEY ?= some_access_key1 +SECRET_KEY ?= some_secret_key1 + # Primary SeaweedFS (the one being tested - has remote caching) PRIMARY_S3_PORT := 8333 -PRIMARY_FILER_PORT := 8888 PRIMARY_MASTER_PORT := 9333 -PRIMARY_VOLUME_PORT := 8080 PRIMARY_METRICS_PORT := 9324 PRIMARY_DIR := ./test-primary-data # Secondary SeaweedFS (acts as "remote" S3 storage) REMOTE_S3_PORT := 8334 -REMOTE_FILER_PORT := 8889 -REMOTE_MASTER_PORT := 9334 -REMOTE_VOLUME_PORT := 8081 REMOTE_METRICS_PORT := 9325 REMOTE_DIR := ./test-remote-data @@ -73,18 +71,11 @@ start-remote: check-deps @echo "Starting remote SeaweedFS (secondary instance)..." @rm -f remote-server.pid @mkdir -p $(REMOTE_DIR) - @$(WEED_BINARY) server \ - -s3 \ + @AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(WEED_BINARY) mini \ -s3.port=$(REMOTE_S3_PORT) \ -s3.allowDeleteBucketNotEmpty=true \ - -filer \ - -filer.port=$(REMOTE_FILER_PORT) \ - -master.port=$(REMOTE_MASTER_PORT) \ - -volume.port=$(REMOTE_VOLUME_PORT) \ - -master.volumeSizeLimitMB=50 \ - -volume.max=100 \ -dir=$(REMOTE_DIR) \ - -volume.preStopSeconds=1 \ + -ip.bind=0.0.0.0 \ -metricsPort=$(REMOTE_METRICS_PORT) \ > remote-weed.log 2>&1 & echo $$! > remote-server.pid @echo "Waiting for remote SeaweedFS to start..." @@ -93,7 +84,7 @@ start-remote: check-deps echo "Remote SeaweedFS started on port $(REMOTE_S3_PORT)"; \ exit 0; \ fi; \ - sleep 1; \ + sleep 3; \ done; \ echo "ERROR: Remote SeaweedFS failed to start"; \ cat remote-weed.log; \ @@ -114,18 +105,11 @@ start-primary: check-deps @echo "Starting primary SeaweedFS..." @rm -f primary-server.pid @mkdir -p $(PRIMARY_DIR) - @$(WEED_BINARY) server \ - -s3 \ + @AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(WEED_BINARY) mini \ -s3.port=$(PRIMARY_S3_PORT) \ -s3.allowDeleteBucketNotEmpty=true \ - -filer \ - -filer.port=$(PRIMARY_FILER_PORT) \ - -master.port=$(PRIMARY_MASTER_PORT) \ - -volume.port=$(PRIMARY_VOLUME_PORT) \ - -master.volumeSizeLimitMB=50 \ - -volume.max=100 \ -dir=$(PRIMARY_DIR) \ - -volume.preStopSeconds=1 \ + -ip.bind=0.0.0.0 \ -metricsPort=$(PRIMARY_METRICS_PORT) \ > primary-weed.log 2>&1 & echo $$! > primary-server.pid @echo "Waiting for primary SeaweedFS to start..." @@ -134,7 +118,7 @@ start-primary: check-deps echo "Primary SeaweedFS started on port $(PRIMARY_S3_PORT)"; \ exit 0; \ fi; \ - sleep 1; \ + sleep 3; \ done; \ echo "ERROR: Primary SeaweedFS failed to start"; \ cat primary-weed.log; \ @@ -156,7 +140,7 @@ setup-remote: @curl -s -X PUT "http://localhost:$(REMOTE_S3_PORT)/$(REMOTE_BUCKET)" || echo "Bucket may already exist" @sleep 1 @echo "Configuring remote storage on primary..." - @printf 'remote.configure -name=seaweedremote -type=s3 -s3.access_key=any -s3.secret_key=any -s3.endpoint=http://localhost:$(REMOTE_S3_PORT) -s3.region=us-east-1\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.configure done" + @printf 'remote.configure -name=seaweedremote -type=s3 -s3.access_key=$(ACCESS_KEY) -s3.secret_key=$(SECRET_KEY) -s3.endpoint=http://localhost:$(REMOTE_S3_PORT) -s3.region=us-east-1\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.configure done" @sleep 2 @echo "Mounting remote bucket on primary..." @printf 'remote.mount -dir=/buckets/remotemounted -remote=seaweedremote/$(REMOTE_BUCKET) -nonempty\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.mount done" diff --git a/test/s3/remote_cache/remote_cache_test.go b/test/s3/remote_cache/remote_cache_test.go index 08eca1802..290151ba8 100644 --- a/test/s3/remote_cache/remote_cache_test.go +++ b/test/s3/remote_cache/remote_cache_test.go @@ -34,8 +34,8 @@ const ( remoteEndpoint = "http://localhost:8334" // Credentials (anonymous access for testing) - accessKey = "any" - secretKey = "any" + accessKey = "some_access_key1" + secretKey = "some_secret_key1" // Bucket name - mounted on primary as remote storage testBucket = "remotemounted" @@ -121,17 +121,6 @@ func getFromPrimary(t *testing.T, key string) []byte { return data } -// syncToRemote syncs local data to remote storage -func syncToRemote(t *testing.T) { - t.Log("Syncing to remote storage...") - output, err := runWeedShell(t, "remote.cache.uncache -dir=/buckets/"+testBucket+" -include=*") - if err != nil { - t.Logf("syncToRemote warning: %v", err) - } - t.Log(output) - time.Sleep(1 * time.Second) -} - // uncacheLocal purges the local cache, forcing data to be fetched from remote func uncacheLocal(t *testing.T, pattern string) { t.Logf("Purging local cache for pattern: %s", pattern) diff --git a/test/s3/retention/Makefile b/test/s3/retention/Makefile index 3277e1db0..9854e0aef 100644 --- a/test/s3/retention/Makefile +++ b/test/s3/retention/Makefile @@ -6,11 +6,14 @@ # Configuration WEED_BINARY := ../../../weed/weed_binary S3_PORT := 8333 +ACCESS_KEY ?= some_access_key1 +SECRET_KEY ?= some_secret_key1 MASTER_PORT := 9333 VOLUME_PORT := 8080 FILER_PORT := 8888 TEST_TIMEOUT := 15m TEST_PATTERN := TestRetention +SERVER_DIR := ./test-volume-data/server-data # Default target help: @@ -80,23 +83,16 @@ start-server: check-deps @ls -la ../../../docker/compose/s3.json || echo "⚠️ Config file not found, continuing without it" @echo "🔍 DEBUG: Creating volume directory..." @mkdir -p ./test-volume-data - @echo "🔍 DEBUG: Launching SeaweedFS server in background..." - @echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324" - @$(WEED_BINARY) server \ - -debug \ - -s3 \ + @echo "🔍 DEBUG: Creating server data directory..." + @mkdir -p $(SERVER_DIR) + @echo "🔍 DEBUG: Launching SeaweedFS S3 server in background..." + @echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=$(SERVER_DIR) -s3.port=$(S3_PORT)" + @AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(WEED_BINARY) mini \ + -dir=$(SERVER_DIR) \ -s3.port=$(S3_PORT) \ - -s3.allowDeleteBucketNotEmpty=true \ - -s3.config=../../../docker/compose/s3.json \ - -filer \ - -filer.maxMB=64 \ - -master.volumeSizeLimitMB=50 \ - -volume.max=100 \ - -dir=./test-volume-data \ - -volume.preStopSeconds=1 \ - -metricsPort=9324 \ - > weed-test.log 2>&1 & echo $$! > weed-server.pid - @echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')" + > weed-test.log 2>&1 & \ + echo $$! > weed-server.pid + @echo "🔍 DEBUG: Server PID: $$(cat weed-test.pid 2>/dev/null || echo 'PID file not found')" @echo "🔍 DEBUG: Checking if PID is still running..." @sleep 2 @if [ -f weed-server.pid ]; then \ @@ -105,7 +101,6 @@ start-server: check-deps else \ echo "⚠️ PID file not found"; \ fi - @echo "🔍 DEBUG: Waiting for server to start (up to 90 seconds)..." @for i in $$(seq 1 90); do \ echo "🔍 DEBUG: Attempt $$i/90 - checking port $(S3_PORT)"; \ if curl -s http://localhost:$(S3_PORT) >/dev/null 2>&1; then \ @@ -123,8 +118,6 @@ start-server: check-deps if [ $$i -eq 15 ]; then \ echo "🔍 DEBUG: After 15 seconds, checking port bindings..."; \ netstat -tlnp 2>/dev/null | grep $(S3_PORT) || echo "Port $(S3_PORT) not bound"; \ - netstat -tlnp 2>/dev/null | grep 9333 || echo "Port 9333 not bound"; \ - netstat -tlnp 2>/dev/null | grep 8080 || echo "Port 8080 not bound"; \ fi; \ if [ $$i -eq 30 ]; then \ echo "⚠️ Server taking longer than expected (30s), checking logs..."; \ diff --git a/test/s3/sse/Makefile b/test/s3/sse/Makefile index 8d0869a82..e646ef901 100644 --- a/test/s3/sse/Makefile +++ b/test/s3/sse/Makefile @@ -93,54 +93,35 @@ start-seaweedfs: check-binary @sleep 2 # Create necessary directories - @mkdir -p /tmp/seaweedfs-test-sse-master - @mkdir -p /tmp/seaweedfs-test-sse-volume - @mkdir -p /tmp/seaweedfs-test-sse-filer - - # Start master server with volume size limit and explicit gRPC port - @nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -port.grpc=$$(( $(MASTER_PORT) + 10000 )) -mdir=/tmp/seaweedfs-test-sse-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-sse-master.log 2>&1 & - @sleep 3 - - # Start volume server with master HTTP port and increased capacity - @nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-sse-volume -max=$(VOLUME_MAX_COUNT) -ip=127.0.0.1 > /tmp/seaweedfs-sse-volume.log 2>&1 & - @sleep 5 - - # Start filer server (using standard SeaweedFS gRPC port convention: HTTP port + 10000) - @nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -dataCenter=defaultDataCenter -ip=127.0.0.1 > /tmp/seaweedfs-sse-filer.log 2>&1 & - @sleep 3 + @mkdir -p /tmp/seaweedfs-test-sse # Create S3 configuration with SSE-KMS support @printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}],"kms":{"type":"%s","configs":{"keyId":"%s","encryptionContext":{},"bucketKey":false}}}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" "$(KMS_TYPE)" "$(KMS_KEY_ID)" > /tmp/seaweedfs-sse-s3.json - # Start S3 server with KMS configuration - @nohup $(SEAWEEDFS_BINARY) s3 -port=$(S3_PORT) -filer=127.0.0.1:$(FILER_PORT) -config=/tmp/seaweedfs-sse-s3.json -ip.bind=127.0.0.1 > /tmp/seaweedfs-sse-s3.log 2>&1 & - @sleep 5 - - # Wait for S3 service to be ready - @echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)" + # Start weed mini + @AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(SEAWEEDFS_BINARY) mini \ + -dir=/tmp/seaweedfs-test-sse \ + -s3.port=$(S3_PORT) \ + -s3.config=/tmp/seaweedfs-sse-s3.json \ + > /tmp/seaweedfs-sse-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid + + @echo "Checking S3 service is ready..." @for i in $$(seq 1 30); do \ - if curl -s -f http://127.0.0.1:$(S3_PORT) > /dev/null 2>&1; then \ - echo "$(GREEN)S3 service is ready$(NC)"; \ + if curl -s http://127.0.0.1:$(S3_PORT) > /dev/null 2>&1; then \ + echo "✅ S3 service is ready"; \ break; \ fi; \ - echo "Waiting for S3 service... ($$i/30)"; \ sleep 1; \ done - - # Additional wait for filer gRPC to be ready - @echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)" - @sleep 2 - @echo "$(GREEN)SeaweedFS server started successfully for SSE testing$(NC)" - @echo "Master: http://localhost:$(MASTER_PORT)" - @echo "Volume: http://localhost:$(VOLUME_PORT)" - @echo "Filer: http://localhost:$(FILER_PORT)" - @echo "S3: http://localhost:$(S3_PORT)" - @echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB" - @echo "SSE-KMS Support: Enabled" stop-seaweedfs: @echo "$(YELLOW)Stopping SeaweedFS server...$(NC)" @# Use port-based cleanup for consistency and safety + @if [ -f /tmp/weed-mini.pid ]; then \ + echo "Stopping weed mini..."; \ + kill $$(cat /tmp/weed-mini.pid) || true; \ + rm -f /tmp/weed-mini.pid; \ + fi @lsof -ti :$(MASTER_PORT) | xargs -r kill -TERM || true @lsof -ti :$(VOLUME_PORT) | xargs -r kill -TERM || true @lsof -ti :$(FILER_PORT) | xargs -r kill -TERM || true @@ -345,71 +326,33 @@ start-seaweedfs-ci: check-binary @echo "$(YELLOW)Starting SeaweedFS server for CI testing...$(NC)" # Create necessary directories - @mkdir -p /tmp/seaweedfs-test-sse-master - @mkdir -p /tmp/seaweedfs-test-sse-volume - @mkdir -p /tmp/seaweedfs-test-sse-filer + @mkdir -p /tmp/seaweedfs-test-sse # Clean up any old server logs @rm -f /tmp/seaweedfs-sse-*.log || true - # Start master server with volume size limit and explicit gRPC port - @echo "Starting master server..." - @nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -port.grpc=$$(( $(MASTER_PORT) + 10000 )) -mdir=/tmp/seaweedfs-test-sse-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-sse-master.log 2>&1 & - @sleep 3 - - # Start volume server with master HTTP port and increased capacity - @echo "Starting volume server..." - @nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-sse-volume -max=$(VOLUME_MAX_COUNT) -ip=127.0.0.1 > /tmp/seaweedfs-sse-volume.log 2>&1 & - @sleep 5 - # Create S3 JSON configuration with KMS (Local provider) and basic identity for embedded S3 @sed -e 's/ACCESS_KEY_PLACEHOLDER/$(ACCESS_KEY)/g' \ -e 's/SECRET_KEY_PLACEHOLDER/$(SECRET_KEY)/g' \ s3-config-template.json > /tmp/seaweedfs-s3.json - # Start filer server with embedded S3 using the JSON config (with verbose logging) - @echo "Starting filer server with embedded S3..." - @AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) GLOG_v=4 nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -dataCenter=defaultDataCenter -ip=127.0.0.1 -s3 -s3.port=$(S3_PORT) -s3.config=/tmp/seaweedfs-s3.json > /tmp/seaweedfs-sse-filer.log 2>&1 & - @sleep 5 + # Start weed mini with embedded S3 using the JSON config (with verbose logging) + @echo "Starting weed mini with embedded S3..." + @AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) GLOG_v=4 $(SEAWEEDFS_BINARY) mini \ + -dir=/tmp/seaweedfs-test-sse \ + -s3.port=$(S3_PORT) \ + -s3.config=/tmp/seaweedfs-s3.json \ + -ip=127.0.0.1 \ + > /tmp/seaweedfs-sse-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid - # Wait for S3 service to be ready - use port-based checking for reliability - @echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)" - @for i in $$(seq 1 20); do \ - if netstat -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \ - ss -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \ - lsof -i :$(S3_PORT) >/dev/null 2>&1; then \ - echo "$(GREEN)S3 service is listening on port $(S3_PORT)$(NC)"; \ - sleep 1; \ + @echo "Checking S3 service is ready..." + @for i in $$(seq 1 30); do \ + if curl -s http://127.0.0.1:$(S3_PORT) > /dev/null 2>&1; then \ + echo "✅ S3 service is ready"; \ break; \ fi; \ - if [ $$i -eq 20 ]; then \ - echo "$(RED)S3 service failed to start within 20 seconds$(NC)"; \ - echo "=== Detailed Logs ==="; \ - echo "Master log:"; tail -30 /tmp/seaweedfs-sse-master.log || true; \ - echo "Volume log:"; tail -30 /tmp/seaweedfs-sse-volume.log || true; \ - echo "Filer log:"; tail -30 /tmp/seaweedfs-sse-filer.log || true; \ - echo "=== Port Status ==="; \ - netstat -an 2>/dev/null | grep ":$(S3_PORT)" || \ - ss -an 2>/dev/null | grep ":$(S3_PORT)" || \ - echo "No port listening on $(S3_PORT)"; \ - echo "=== Process Status ==="; \ - ps aux | grep -E "weed.*(filer|s3).*$(S3_PORT)" | grep -v grep || echo "No S3 process found"; \ - exit 1; \ - fi; \ - echo "Waiting for S3 service... ($$i/20)"; \ sleep 1; \ done - - # Additional wait for filer gRPC to be ready - @echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)" - @sleep 2 - @echo "$(GREEN)SeaweedFS server started successfully for SSE testing$(NC)" - @echo "Master: http://localhost:$(MASTER_PORT)" - @echo "Volume: http://localhost:$(VOLUME_PORT)" - @echo "Filer: http://localhost:$(FILER_PORT)" - @echo "S3: http://localhost:$(S3_PORT)" - @echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB" - @echo "SSE-KMS Support: Enabled" # GitHub Actions compatible quick test subset test-quick-with-server: build-weed diff --git a/test/s3/tagging/Makefile b/test/s3/tagging/Makefile index c495d1a40..0ae7b1518 100644 --- a/test/s3/tagging/Makefile +++ b/test/s3/tagging/Makefile @@ -72,41 +72,17 @@ start-server: check-deps fi @echo "🔍 DEBUG: Checking binary at $(WEED_BINARY)" @ls -la $(WEED_BINARY) || (echo "❌ Binary not found!" && exit 1) - @echo "🔍 DEBUG: Checking config file at ../../../docker/compose/s3.json" - @ls -la ../../../docker/compose/s3.json || echo "⚠️ Config file not found, continuing without it" @echo "🔍 DEBUG: Creating volume directory..." @mkdir -p ./test-volume-data @echo "🔍 DEBUG: Launching SeaweedFS server in background..." - @echo "🔍 DEBUG: Command: $(WEED_BINARY) server -filer -filer.maxMB=64 -s3 -ip.bind 0.0.0.0 -dir=./test-volume-data -master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 -volume.max=100 -volume.preStopSeconds=1 -master.port=$(MASTER_PORT) -volume.port=$(VOLUME_PORT) -filer.port=$(FILER_PORT) -s3.port=$(S3_PORT) -metricsPort=9329 -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -master.peers=none" - @$(WEED_BINARY) server \ - -filer \ + @echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=./test-volume-data -s3.port=$(S3_PORT)" + @$(WEED_BINARY) mini \ -filer.maxMB=64 \ - -s3 \ - -ip.bind 0.0.0.0 \ -dir=./test-volume-data \ -master.raftHashicorp \ - -master.electionTimeout 1s \ - -master.volumeSizeLimitMB=100 \ - -volume.max=100 \ - -volume.preStopSeconds=1 \ - -master.port=$(MASTER_PORT) \ - -volume.port=$(VOLUME_PORT) \ - -filer.port=$(FILER_PORT) \ -s3.port=$(S3_PORT) \ - -metricsPort=9329 \ - -s3.allowDeleteBucketNotEmpty=true \ - -s3.config=../../../docker/compose/s3.json \ - -master.peers=none \ - > weed-test.log 2>&1 & echo $$! > weed-server.pid - @echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')" - @echo "🔍 DEBUG: Checking if PID is still running..." - @sleep 2 - @if [ -f weed-server.pid ]; then \ - SERVER_PID=$$(cat weed-server.pid); \ - ps -p $$SERVER_PID || echo "⚠️ Server PID $$SERVER_PID not found after 2 seconds"; \ - else \ - echo "⚠️ PID file not found"; \ - fi + > weed-test.log 2>&1 & \ + echo $$! > weed-server.pid @echo "🔍 DEBUG: Waiting for server to start (up to 90 seconds)..." @for i in $$(seq 1 90); do \ echo "🔍 DEBUG: Attempt $$i/90 - checking port $(S3_PORT)"; \ @@ -125,8 +101,6 @@ start-server: check-deps if [ $$i -eq 15 ]; then \ echo "🔍 DEBUG: After 15 seconds, checking port bindings..."; \ netstat -tlnp 2>/dev/null | grep $(S3_PORT) || echo "Port $(S3_PORT) not bound"; \ - netstat -tlnp 2>/dev/null | grep $(MASTER_PORT) || echo "Port $(MASTER_PORT) not bound"; \ - netstat -tlnp 2>/dev/null | grep $(VOLUME_PORT) || echo "Port $(VOLUME_PORT) not bound"; \ fi; \ if [ $$i -eq 30 ]; then \ echo "⚠️ Server taking longer than expected (30s), checking logs..."; \ @@ -141,7 +115,7 @@ start-server: check-deps echo "🔍 DEBUG: Final process check:"; \ ps aux | grep weed | grep -v grep || echo "No weed processes found"; \ echo "🔍 DEBUG: Final port check:"; \ - netstat -tlnp 2>/dev/null | grep -E "($(S3_PORT)|$(MASTER_PORT)|$(VOLUME_PORT))" || echo "No ports bound"; \ + netstat -tlnp 2>/dev/null | grep -E "($(S3_PORT))" || echo "No ports bound"; \ echo "=== Full server logs ==="; \ if [ -f weed-test.log ]; then \ cat weed-test.log; \ diff --git a/test/s3/versioning/Makefile b/test/s3/versioning/Makefile index 08a18fd96..7e939f90e 100644 --- a/test/s3/versioning/Makefile +++ b/test/s3/versioning/Makefile @@ -43,13 +43,13 @@ build-weed: @echo "Building SeaweedFS binary..." @cd ../../../weed && go build -o weed_binary . @chmod +x $(WEED_BINARY) - @echo "✅ SeaweedFS binary built at $(WEED_BINARY)" + @echo "OK SeaweedFS binary built at $(WEED_BINARY)" check-deps: build-weed @echo "Checking dependencies..." - @echo "🔍 DEBUG: Checking Go installation..." + @echo "DEBUG: Checking Go installation..." @command -v go >/dev/null 2>&1 || (echo "Go is required but not installed" && exit 1) - @echo "🔍 DEBUG: Go version: $$(go version)" + @echo "DEBUG: Go version: $$(go version)" @echo "🔍 DEBUG: Checking binary at $(WEED_BINARY)..." @test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found at $(WEED_BINARY)" && exit 1) @echo "🔍 DEBUG: Binary size: $$(ls -lh $(WEED_BINARY) | awk '{print $$5}')" @@ -57,7 +57,7 @@ check-deps: build-weed @echo "🔍 DEBUG: Checking Go module dependencies..." @go list -m github.com/aws/aws-sdk-go-v2 >/dev/null 2>&1 || (echo "AWS SDK Go v2 not found. Run 'go mod tidy'." && exit 1) @go list -m github.com/stretchr/testify >/dev/null 2>&1 || (echo "Testify not found. Run 'go mod tidy'." && exit 1) - @echo "✅ All dependencies are available" + @echo "OK All dependencies are available" # Start SeaweedFS server for testing start-server: check-deps @@ -81,21 +81,11 @@ start-server: check-deps @echo "🔍 DEBUG: Creating volume directory..." @mkdir -p ./test-volume-data @echo "🔍 DEBUG: Launching SeaweedFS server in background..." - @echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -master.peers=none -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324" - @$(WEED_BINARY) server \ - -debug \ - -s3 \ + @echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=./test-volume-data -s3.port=$(S3_PORT) -s3.config=../../../docker/compose/s3.json" + @$(WEED_BINARY) mini \ + -dir=./test-volume-data \ -s3.port=$(S3_PORT) \ - -s3.allowDeleteBucketNotEmpty=true \ -s3.config=../../../docker/compose/s3.json \ - -filer \ - -filer.maxMB=64 \ - -master.volumeSizeLimitMB=50 \ - -master.peers=none \ - -volume.max=100 \ - -dir=./test-volume-data \ - -volume.preStopSeconds=1 \ - -metricsPort=9324 \ > weed-test.log 2>&1 & echo $$! > weed-server.pid @echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')" @echo "🔍 DEBUG: Checking if PID is still running..." @@ -222,13 +212,13 @@ test-with-server: start-server test-versioning-with-configs: check-deps @echo "Testing with different S3 configurations..." @echo "Testing with empty folder allowed..." - @$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -filer -master.volumeSizeLimitMB=100 -master.peers=none -volume.max=100 > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid + @$(WEED_BINARY) mini -s3.port=$(S3_PORT) > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid @sleep 5 @go test -v -timeout=5m -run "TestVersioningBasicWorkflow" . || true @if [ -f weed-config1.pid ]; then kill -TERM $$(cat weed-config1.pid) 2>/dev/null || true; rm -f weed-config1.pid; fi @sleep 2 @echo "Testing with delete bucket not empty disabled..." - @$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=false -filer -master.volumeSizeLimitMB=100 -master.peers=none -volume.max=100 > weed-test-config2.log 2>&1 & echo $$! > weed-config2.pid + @$(WEED_BINARY) mini -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=false > weed-test-config2.log 2>&1 & echo $$! > weed-config2.pid @sleep 5 @go test -v -timeout=5m -run "TestVersioningBasicWorkflow" . || true @if [ -f weed-config2.pid ]; then kill -TERM $$(cat weed-config2.pid) 2>/dev/null || true; rm -f weed-config2.pid; fi @@ -271,19 +261,14 @@ debug-server: @echo "Starting SeaweedFS server in debug mode..." @$(MAKE) stop-server @mkdir -p ./test-volume-data - @$(WEED_BINARY) server \ + @$(WEED_BINARY) mini \ -debug \ - -s3 \ -s3.port=$(S3_PORT) \ -s3.allowDeleteBucketNotEmpty=true \ -s3.config=../../../docker/compose/s3.json \ - -filer \ -filer.maxMB=16 \ - -master.volumeSizeLimitMB=50 \ -master.peers=none \ - -volume.max=100 \ -dir=./test-volume-data \ - -volume.preStopSeconds=1 \ -metricsPort=9324 # Run a single test for debugging @@ -320,24 +305,24 @@ health-check: # Simple server start without process cleanup (for CI troubleshooting) start-server-simple: check-deps @echo "Starting SeaweedFS server (simple mode)..." - @$(WEED_BINARY) server \ - -debug \ + @$(WEED_BINARY) mini \ + -dir=$(SERVER_DIR) \ -s3 \ -s3.port=$(S3_PORT) \ - -s3.allowDeleteBucketNotEmpty=true \ - -s3.config=../../../docker/compose/s3.json \ - -filer \ - -filer.maxMB=64 \ - -master.volumeSizeLimitMB=50 \ - -master.peers=none \ - -volume.max=100 \ - -volume.preStopSeconds=1 \ - -metricsPort=9324 \ - > weed-test.log 2>&1 & echo $$! > weed-server.pid - @echo "Server PID: $$(cat weed-server.pid)" - @echo "Waiting for server to start..." - @sleep 10 - @curl -s http://localhost:$(S3_PORT) >/dev/null 2>&1 && echo "✅ Server started successfully" || echo "❌ Server failed to start" + -s3.config=$(S3_CONFIG) \ + > weed-server.log 2>&1 & \ + echo $$! > weed-server.pid + + @echo "Waiting for S3 server to be ready..." + @for i in $$(seq 1 30); do \ + if echo | nc -z localhost $(S3_PORT); then \ + echo "S3 server is ready!"; \ + exit 0; \ + fi; \ + sleep 1; \ + done; \ + echo "S3 server failed to start"; \ + exit 1 # Simple test run without server management test-versioning-simple: check-deps diff --git a/weed/command/mini.go b/weed/command/mini.go index 3fcfbc6d4..17430e916 100644 --- a/weed/command/mini.go +++ b/weed/command/mini.go @@ -1062,9 +1062,7 @@ func startMiniWorker() { // Set admin client workerInstance.SetAdminClient(adminClient) - // Start metrics server for health checks and monitoring (uses shared metrics port like other services) - // This allows Kubernetes probes to check worker health via /health endpoint - go stats_collect.StartMetricsServer(*miniMetricsHttpIp, *miniMetricsHttpPort) + // Metrics server is already started in the main init function above, so no need to start it again here // Start the worker err = workerInstance.Start()