Browse Source

Refactor S3 integration tests to use weed mini (#7877)

* Refactor S3 integration tests to use weed mini

* Fix weed mini flags for sse and parquet tests

* Fix IAM test startup: remove -iam.config flag from weed mini

* Enhance logging in IAM Makefile to debug startup failure

* Simplify weed mini flags and checks in S3 tests (IAM, Parquet, SSE, Copying)

* Simplify weed mini flags and checks in all S3 tests

* Fix IAM tests: use -s3.iam.config for weed mini

* Replace timeout command with portable loop in IAM Makefile

* Standardize portable loop-based readiness checks in all S3 Makefiles

* Define SERVER_DIR in retention Makefile

* Fix versioning and retention Makefiles: remove unsupported weed mini flags

* fix filer_group test

* fix cors

* emojis

* fix sse

* fix retention

* fixes

* fix

* fixes

* fix parquet

* fixes

* fix

* clean up

* avoid duplicated debug server

* Update .gitignore

* simplify

* clean up

* add credentials

* bind

* delay

* Update Makefile

* Update Makefile

* check ready

* delay

* update remote credentials

* Update Makefile

* clean up

* kill

* Update Makefile

* update credentials
pull/7183/merge
Chris Lu 1 day ago
committed by GitHub
parent
commit
7064ad420d
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 2
      .gitignore
  2. 18
      test/fuse_integration/Makefile
  3. 5
      test/s3/compatibility/run.sh
  4. 48
      test/s3/copying/Makefile
  5. 46
      test/s3/cors/Makefile
  6. 59
      test/s3/filer_group/Makefile
  7. 1
      test/s3/filer_group/test_config.json
  8. 99
      test/s3/iam/Makefile
  9. 172
      test/s3/parquet/CROSS_FILESYSTEM_COMPATIBILITY.md
  10. 58
      test/s3/parquet/FINAL_ROOT_CAUSE_ANALYSIS.md
  11. 70
      test/s3/parquet/MINIO_DIRECTORY_HANDLING.md
  12. 164
      test/s3/parquet/Makefile
  13. 46
      test/s3/parquet/TEST_COVERAGE.md
  14. 1
      test/s3/parquet/test_implicit_directory_fix.py
  15. 36
      test/s3/remote_cache/Makefile
  16. 15
      test/s3/remote_cache/remote_cache_test.go
  17. 31
      test/s3/retention/Makefile
  18. 115
      test/s3/sse/Makefile
  19. 36
      test/s3/tagging/Makefile
  20. 67
      test/s3/versioning/Makefile
  21. 4
      weed/command/mini.go

2
.gitignore

@ -131,3 +131,5 @@ coverage.out
test/s3/remote_cache/remote-server.pid
test/s3/remote_cache/primary-server.pid
/test/erasure_coding/filerldb2
/test/s3/cors/test-mini-data
/test/s3/filer_group/test-volume-data

18
test/fuse_integration/Makefile

@ -12,20 +12,20 @@ COVERAGE_FILE := coverage.out
# Check if weed binary exists
check-binary:
@if [ ! -f "$(WEED_BINARY)" ]; then \
echo " SeaweedFS binary not found at $(WEED_BINARY)"; \
echo "ERROR SeaweedFS binary not found at $(WEED_BINARY)"; \
echo " Please run 'make' in the root directory first"; \
exit 1; \
fi
@echo " SeaweedFS binary found"
@echo "OK SeaweedFS binary found"
# Check FUSE installation
check-fuse:
@if command -v fusermount >/dev/null 2>&1; then \
echo " FUSE is installed (Linux)"; \
echo "OK FUSE is installed (Linux)"; \
elif command -v umount >/dev/null 2>&1 && [ "$$(uname)" = "Darwin" ]; then \
echo " FUSE is available (macOS)"; \
echo "OK FUSE is available (macOS)"; \
else \
echo " FUSE not found. Please install:"; \
echo "ERROR FUSE not found. Please install:"; \
echo " Ubuntu/Debian: sudo apt-get install fuse"; \
echo " CentOS/RHEL: sudo yum install fuse"; \
echo " macOS: brew install macfuse"; \
@ -36,8 +36,8 @@ check-fuse:
check-go:
@go version | grep -q "go1\.[2-9][0-9]" || \
go version | grep -q "go1\.2[1-9]" || \
(echo " Go $(GO_VERSION)+ required. Current: $$(go version)" && exit 1)
@echo " Go version check passed"
(echo "ERROR Go $(GO_VERSION)+ required. Current: $$(go version)" && exit 1)
@echo "OK Go version check passed"
# Verify all prerequisites
check-prereqs: check-go check-fuse
@ -45,9 +45,9 @@ check-prereqs: check-go check-fuse
# Build the SeaweedFS binary (if needed)
build:
@echo "🔨 Building SeaweedFS..."
@echo "Building SeaweedFS..."
cd ../.. && make
@echo " Build complete"
@echo "OK Build complete"
# Initialize go module (if needed)
init-module:

5
test/s3/compatibility/run.sh

@ -22,10 +22,7 @@ docker stop $CONTAINER_NAME || echo "already stopped"
ulimit -n 10000
# Start weed w/ filer + s3 in the background
$WEED_BIN server \
-filer \
-s3 \
-volume.max 0 \
$WEED_BIN mini \
-master.volumeSizeLimitMB 5 \
-dir "$(pwd)/tmp" \
1>&2>weed.log &

48
test/s3/copying/Makefile

@ -63,30 +63,24 @@ start-seaweedfs: check-binary
@pkill -f "weed volume" || true
@pkill -f "weed filer" || true
@pkill -f "weed s3" || true
@pkill -f "weed mini" || true
@sleep 2
# Create necessary directories
@mkdir -p /tmp/seaweedfs-test-copying-master
@mkdir -p /tmp/seaweedfs-test-copying-volume
@mkdir -p /tmp/seaweedfs-test-copying
# Start master server with volume size limit
@nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -mdir=/tmp/seaweedfs-test-copying-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-master.log 2>&1 &
@sleep 3
# Start volume server
@nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-copying-volume -ip=127.0.0.1 > /tmp/seaweedfs-volume.log 2>&1 &
@sleep 3
# Start filer server (using standard SeaweedFS gRPC port convention: HTTP port + 10000)
@nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -ip=127.0.0.1 > /tmp/seaweedfs-filer.log 2>&1 &
@sleep 3
# Create S3 configuration
@echo '{"identities":[{"name":"$(ACCESS_KEY)","credentials":[{"accessKey":"$(ACCESS_KEY)","secretKey":"$(SECRET_KEY)"}],"actions":["Admin","Read","Write"]}]}' > /tmp/seaweedfs-s3.json
# Start S3 server
@nohup $(SEAWEEDFS_BINARY) s3 -port=$(S3_PORT) -filer=127.0.0.1:$(FILER_PORT) -config=/tmp/seaweedfs-s3.json -ip.bind=127.0.0.1 > /tmp/seaweedfs-s3.log 2>&1 &
@sleep 5
# Start weed mini
@echo "Starting weed mini with dir=/tmp/seaweedfs-test-copying"
@export AWS_ACCESS_KEY_ID=$(ACCESS_KEY) && \
export AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) && \
# Start weed mini with S3 configuration
@echo "Starting weed mini..."
@nohup $(SEAWEEDFS_BINARY) mini \
-dir=/tmp/seaweedfs-test-copying \
-s3.port=$(S3_PORT) \
-s3.config=/tmp/seaweedfs-s3.json \
-ip=127.0.0.1 \
> /tmp/seaweedfs-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid 5
# Wait for S3 service to be ready
@echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)"
@ -98,16 +92,12 @@ start-seaweedfs: check-binary
echo "Waiting for S3 service... ($$i/30)"; \
sleep 1; \
done
# Additional wait for filer gRPC to be ready
@echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)"
@sleep 2
@echo "$(GREEN)SeaweedFS server started successfully$(NC)"
@echo "Master: http://localhost:$(MASTER_PORT)"
@echo "Volume: http://localhost:$(VOLUME_PORT)"
@echo "Filer: http://localhost:$(FILER_PORT)"
# Additional wait for filer gRPC to be ready
@echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)"
@sleep 2
@echo "$(GREEN)SeaweedFS server started successfully$(NC)"
@echo "Mini Log: /tmp/seaweedfs-mini.log"
@echo "S3: http://localhost:$(S3_PORT)"
@echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB"
stop-seaweedfs:
@echo "$(YELLOW)Stopping SeaweedFS server...$(NC)"

46
test/s3/cors/Makefile

@ -11,6 +11,7 @@ VOLUME_PORT := 8080
FILER_PORT := 8888
TEST_TIMEOUT := 10m
TEST_PATTERN := TestCORS
SERVER_DIR := test-mini-data
# Default target
help:
@ -41,21 +42,21 @@ build-weed:
@echo "Building SeaweedFS binary..."
@cd ../../../weed && go build -o weed_binary .
@chmod +x $(WEED_BINARY)
@echo " SeaweedFS binary built at $(WEED_BINARY)"
@echo "OK SeaweedFS binary built at $(WEED_BINARY)"
check-deps: build-weed
@echo "Checking dependencies..."
@echo "🔍 DEBUG: Checking Go installation..."
@echo "DEBUG: Checking Go installation..."
@command -v go >/dev/null 2>&1 || (echo "Go is required but not installed" && exit 1)
@echo "🔍 DEBUG: Go version: $$(go version)"
@echo "🔍 DEBUG: Checking binary at $(WEED_BINARY)..."
@echo "DEBUG: Go version: $$(go version)"
@echo "DEBUG: Checking binary at $(WEED_BINARY)..."
@test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found at $(WEED_BINARY)" && exit 1)
@echo "🔍 DEBUG: Binary size: $$(ls -lh $(WEED_BINARY) | awk '{print $$5}')"
@echo "🔍 DEBUG: Binary permissions: $$(ls -la $(WEED_BINARY) | awk '{print $$1}')"
@echo "🔍 DEBUG: Checking Go module dependencies..."
@go list -m github.com/aws/aws-sdk-go-v2 >/dev/null 2>&1 || (echo "AWS SDK Go v2 not found. Run 'go mod tidy'." && exit 1)
@go list -m github.com/stretchr/testify >/dev/null 2>&1 || (echo "Testify not found. Run 'go mod tidy'." && exit 1)
@echo " All dependencies are available"
@echo "OK All dependencies are available"
# Start SeaweedFS server for testing
start-server: check-deps
@ -77,23 +78,26 @@ start-server: check-deps
@echo "🔍 DEBUG: Checking config file at ../../../docker/compose/s3.json"
@ls -la ../../../docker/compose/s3.json || echo "⚠️ Config file not found, continuing without it"
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
@mkdir -p $(SERVER_DIR)
@echo "🔍 DEBUG: Launching SeaweedFS S3 server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=$(SERVER_DIR) -s3.port=$(S3_PORT) -s3.config=$(S3_CONFIG)"
@$(WEED_BINARY) mini \
-dir=$(SERVER_DIR) \
-s3.port=$(S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
-filer.maxMB=64 \
-master.volumeSizeLimitMB=50 \
-volume.max=100 \
-dir=./test-volume-data \
-volume.preStopSeconds=1 \
-metricsPort=9324 \
> weed-test.log 2>&1 & echo $$! > weed-server.pid
-s3.config=$(S3_CONFIG) \
> weed-test.log 2>&1 & \
echo $$! > weed-test.pid
@echo "Waiting for S3 server to be ready..."
@for i in $$(seq 1 30); do \
if echo | nc -z localhost $(S3_PORT); then \
echo "S3 server is ready!"; \
exit 0; \
fi; \
sleep 1; \
done; \
echo "S3 server failed to start"; \
exit 1 > weed-server.pid
@echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')"
@echo "🔍 DEBUG: Checking if PID is still running..."
@sleep 2

59
test/s3/filer_group/Makefile

@ -16,6 +16,8 @@ TEST_PATTERN := Test
# Filer group configuration
FILER_GROUP := testgroup
SERVER_DIR := ./test-volume-data
S3_CONFIG := ../../../docker/compose/s3.json
# Default target
help:
@ -44,7 +46,7 @@ build-weed:
@echo "Building SeaweedFS binary..."
@cd ../../../weed && go build -o weed_binary .
@chmod +x $(WEED_BINARY)
@echo " SeaweedFS binary built at $(WEED_BINARY)"
@echo "OK SeaweedFS binary built at $(WEED_BINARY)"
check-deps: build-weed
@echo "Checking dependencies..."
@ -53,51 +55,40 @@ check-deps: build-weed
@test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found at $(WEED_BINARY)" && exit 1)
@go list -m github.com/aws/aws-sdk-go-v2 >/dev/null 2>&1 || (echo "AWS SDK Go v2 not found. Run 'go mod tidy'." && exit 1)
@go list -m github.com/stretchr/testify >/dev/null 2>&1 || (echo "Testify not found. Run 'go mod tidy'." && exit 1)
@echo " All dependencies are available"
@echo "OK All dependencies are available"
# Start SeaweedFS server with filer group configured
start-server: check-deps
@echo "Starting SeaweedFS server with filer group: $(FILER_GROUP)..."
@rm -f weed-server.pid
@mkdir -p ./test-volume-data
@mkdir -p $(SERVER_DIR)
@if netstat -tlnp 2>/dev/null | grep $(S3_PORT) >/dev/null; then \
echo "⚠️ Port $(S3_PORT) is already in use"; \
echo "WARNING: Port $(S3_PORT) is already in use"; \
exit 1; \
fi
@echo "Launching SeaweedFS server with filer group $(FILER_GROUP)..."
@$(WEED_BINARY) server \
@export AWS_ACCESS_KEY_ID=some_access_key1 && \
export AWS_SECRET_ACCESS_KEY=some_secret_key1 && \
$(WEED_BINARY) mini \
-debug \
-s3 \
-dir=$(SERVER_DIR) \
-s3.port=$(S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
-filer.maxMB=64 \
-s3.config=$(S3_CONFIG) \
-filer.filerGroup=$(FILER_GROUP) \
-master.volumeSizeLimitMB=50 \
-volume.max=100 \
-dir=./test-volume-data \
-volume.preStopSeconds=1 \
-metricsPort=9325 \
> weed-test.log 2>&1 & echo $$! > weed-server.pid
@echo "Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')"
@echo "Waiting for server to start (up to 90 seconds)..."
@for i in $$(seq 1 90); do \
if curl -s http://localhost:$(S3_PORT) >/dev/null 2>&1; then \
echo "✅ SeaweedFS server started successfully on port $(S3_PORT) with filer group $(FILER_GROUP)"; \
> weed-server.log 2>&1 & \
echo $$! > weed-server.pid
@echo "Waiting for S3 server to be ready..."
@for i in $$(seq 1 30); do \
if echo | nc -z localhost $(S3_PORT); then \
echo "S3 server is ready!"; \
exit 0; \
fi; \
if [ $$i -eq 30 ]; then \
echo "⚠️ Server taking longer than expected (30s), checking logs..."; \
if [ -f weed-test.log ]; then \
tail -20 weed-test.log; \
fi; \
fi; \
sleep 1; \
done; \
echo "❌ Server failed to start within 90 seconds"; \
if [ -f weed-test.log ]; then \
cat weed-test.log; \
echo "❌ Server failed to start within 30 seconds"; \
if [ -f weed-server.log ]; then \
cat weed-server.log; \
fi; \
exit 1
@ -126,9 +117,9 @@ stop-server:
# Show server logs
logs:
@if test -f weed-test.log; then \
@if test -f weed-server.log; then \
echo "=== SeaweedFS Server Logs ==="; \
tail -f weed-test.log; \
tail -f weed-server.log; \
else \
echo "No log file found. Server may not be running."; \
fi
@ -146,7 +137,7 @@ test-with-server: start-server
@echo "Test pattern: $(TEST_PATTERN)"
@echo "Test timeout: $(TEST_TIMEOUT)"
@trap "$(MAKE) stop-server" EXIT; \
$(MAKE) test || (echo "❌ Tests failed, showing server logs:" && echo "=== Last 50 lines of server logs ===" && tail -50 weed-test.log && echo "=== End of server logs ===" && exit 1)
$(MAKE) test || (echo "❌ Tests failed, showing server logs:" && echo "=== Last 50 lines of server logs ===" && tail -50 weed-server.log && echo "=== End of server logs ===" && exit 1)
@$(MAKE) stop-server
@echo "✅ Tests completed and server stopped"
@ -154,7 +145,7 @@ test-with-server: start-server
clean:
@echo "Cleaning up test artifacts..."
@$(MAKE) stop-server
@rm -f weed-test*.log weed-server.pid
@rm -f weed-server.log weed-test*.log weed-server.pid
@rm -rf test-volume-data/
@go clean -testcache
@echo "✅ Cleanup completed"

1
test/s3/filer_group/test_config.json

@ -1,6 +1,5 @@
{
"s3_endpoint": "http://localhost:8333",
"master_address": "localhost:9333",
"access_key": "some_access_key1",
"secret_key": "some_secret_key1",
"region": "us-east-1",

99
test/s3/iam/Makefile

@ -19,6 +19,7 @@ MASTER_PID_FILE = /tmp/weed-master.pid
VOLUME_PID_FILE = /tmp/weed-volume.pid
FILER_PID_FILE = /tmp/weed-filer.pid
S3_PID_FILE = /tmp/weed-s3.pid
MINI_PID_FILE = /tmp/weed-mini.pid
help: ## Show this help message
@echo "SeaweedFS S3 IAM Integration Tests"
@ -49,80 +50,54 @@ test: clean setup start-services run-tests stop-services ## Run complete IAM int
test-quick: run-tests ## Run tests assuming services are already running
run-tests: ## Execute the Go tests
@echo "🧪 Running S3 IAM Integration Tests..."
@echo "Running S3 IAM Integration Tests..."
go test -v -timeout $(TEST_TIMEOUT) ./...
setup: ## Setup test environment
@echo "🔧 Setting up test environment..."
@echo "Setting up test environment..."
@mkdir -p test-volume-data/filerldb2
@mkdir -p test-volume-data/m9333
start-services: ## Start SeaweedFS services for testing
@echo "🚀 Starting SeaweedFS services..."
@echo "Starting master server..."
@$(WEED_BINARY) master -port=$(MASTER_PORT) \
-mdir=test-volume-data/m9333 \
-peers=none > weed-master.log 2>&1 & \
echo $$! > $(MASTER_PID_FILE)
@echo "Waiting for master server to be ready..."
@timeout 60 bash -c 'until curl -s http://localhost:$(MASTER_PORT)/cluster/status > /dev/null 2>&1; do echo "Waiting for master server..."; sleep 2; done' || (echo "❌ Master failed to start, checking logs..." && tail -20 weed-master.log && exit 1)
@echo "✅ Master server is ready"
@echo "Starting volume server..."
@$(WEED_BINARY) volume -port=$(VOLUME_PORT) \
-ip=localhost \
-dataCenter=dc1 -rack=rack1 \
@echo "Starting SeaweedFS services using weed mini..."
@mkdir -p test-volume-data
@$(WEED_BINARY) mini \
-dir=test-volume-data \
-max=100 \
-master=localhost:$(MASTER_PORT) > weed-volume.log 2>&1 & \
echo $$! > $(VOLUME_PID_FILE)
@echo "Waiting for volume server to be ready..."
@timeout 60 bash -c 'until curl -s http://localhost:$(VOLUME_PORT)/status > /dev/null 2>&1; do echo "Waiting for volume server..."; sleep 2; done' || (echo "❌ Volume server failed to start, checking logs..." && tail -20 weed-volume.log && exit 1)
@echo "✅ Volume server is ready"
@echo "Starting filer server..."
@$(WEED_BINARY) filer -port=$(FILER_PORT) \
-defaultStoreDir=test-volume-data/filerldb2 \
-master=localhost:$(MASTER_PORT) > weed-filer.log 2>&1 & \
echo $$! > $(FILER_PID_FILE)
@echo "Waiting for filer server to be ready..."
@timeout 60 bash -c 'until curl -s http://localhost:$(FILER_PORT)/status > /dev/null 2>&1; do echo "Waiting for filer server..."; sleep 2; done' || (echo "❌ Filer failed to start, checking logs..." && tail -20 weed-filer.log && exit 1)
@echo "✅ Filer server is ready"
@echo "Starting S3 API server with IAM..."
@$(WEED_BINARY) -v=3 s3 -port=$(S3_PORT) \
-filer=localhost:$(FILER_PORT) \
-config=test_config.json \
-iam.config=$(CURDIR)/iam_config.json > weed-s3.log 2>&1 & \
echo $$! > $(S3_PID_FILE)
-s3.port=$(S3_PORT) \
-s3.config=test_config.json \
-s3.iam.config=$(CURDIR)/iam_config.json \
> weed-mini.log 2>&1 & \
echo $$! > $(MINI_PID_FILE)
@echo "Waiting for S3 API server to be ready..."
@timeout 60 bash -c 'until curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1; do echo "Waiting for S3 API server..."; sleep 2; done' || (echo "❌ S3 API failed to start, checking logs..." && tail -20 weed-s3.log && exit 1)
@echo "✅ S3 API server is ready"
@echo "✅ All services started and ready"
@echo "Waiting for services to be ready..."
@$(MAKE) wait-for-services
@echo "OK All services started and ready"
wait-for-services: ## Wait for all services to be ready
@echo "⏳ Waiting for services to be ready..."
@echo "Checking master server..."
@timeout 30 bash -c 'until curl -s http://localhost:$(MASTER_PORT)/cluster/status > /dev/null; do sleep 1; done' || (echo "❌ Master failed to start" && exit 1)
@echo "Checking filer server..."
@timeout 30 bash -c 'until curl -s http://localhost:$(FILER_PORT)/status > /dev/null; do sleep 1; done' || (echo "❌ Filer failed to start" && exit 1)
@echo "Waiting for services to be ready..."
@echo "Checking S3 API server..."
@timeout 30 bash -c 'until curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1; do sleep 1; done' || (echo "❌ S3 API failed to start" && exit 1)
@for i in $$(seq 1 30); do \
if curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1; then \
echo "OK S3 API server is ready"; \
exit 0; \
fi; \
sleep 1; \
done; \
echo "ERROR S3 API failed to start"; \
exit 1
@echo "Pre-allocating volumes for concurrent operations..."
@curl -s "http://localhost:$(MASTER_PORT)/vol/grow?collection=default&count=10&replication=000" > /dev/null || echo "⚠️ Volume pre-allocation failed, but continuing..."
@curl -s "http://localhost:$(MASTER_PORT)/vol/grow?collection=default&count=10&replication=000" > /dev/null || echo "WARNING Volume pre-allocation failed, but continuing..."
@sleep 3
@echo "✅ All services are ready"
@echo "OK All services are ready"
stop-services: ## Stop all SeaweedFS services
@echo "🛑 Stopping SeaweedFS services..."
@echo "Stopping SeaweedFS services..."
@if [ -f $(MINI_PID_FILE) ]; then \
echo "Stopping weed mini..."; \
kill $$(cat $(MINI_PID_FILE)) 2>/dev/null || true; \
rm -f $(MINI_PID_FILE); \
fi
@if [ -f $(S3_PID_FILE) ]; then \
echo "Stopping S3 API server..."; \
kill $$(cat $(S3_PID_FILE)) 2>/dev/null || true; \
@ -143,17 +118,17 @@ stop-services: ## Stop all SeaweedFS services
kill $$(cat $(MASTER_PID_FILE)) 2>/dev/null || true; \
rm -f $(MASTER_PID_FILE); \
fi
@echo " All services stopped"
@echo "OK All services stopped"
clean: stop-services ## Clean up test environment
@echo "🧹 Cleaning up test environment..."
@echo "Cleaning up test environment..."
@rm -rf test-volume-data
@rm -f weed-*.log
@rm -f *.test
@echo "Cleanup complete"
@echo "Cleanup complete"
logs: ## Show service logs
@echo "📋 Service Logs:"
@echo "Service Logs:"
@echo "=== Master Log ==="
@tail -20 weed-master.log 2>/dev/null || echo "No master log"
@echo ""
@ -167,7 +142,7 @@ logs: ## Show service logs
@tail -20 weed-s3.log 2>/dev/null || echo "No S3 log"
status: ## Check service status
@echo "📊 Service Status:"
@echo "Service Status:"
@echo -n "Master: "; curl -s http://localhost:$(MASTER_PORT)/cluster/status > /dev/null 2>&1 && echo "✅ Running" || echo "❌ Not running"
@echo -n "Filer: "; curl -s http://localhost:$(FILER_PORT)/status > /dev/null 2>&1 && echo "✅ Running" || echo "❌ Not running"
@echo -n "S3 API: "; curl -s http://localhost:$(S3_PORT) > /dev/null 2>&1 && echo "✅ Running" || echo "❌ Not running"

172
test/s3/parquet/CROSS_FILESYSTEM_COMPATIBILITY.md

@ -1,172 +0,0 @@
# Cross-Filesystem Compatibility Test Results
## Overview
This document summarizes the cross-filesystem compatibility testing between **s3fs** and **PyArrow native S3 filesystem** implementations when working with SeaweedFS.
## Test Purpose
Verify that Parquet files written using one filesystem implementation (s3fs or PyArrow native S3) can be correctly read using the other implementation, confirming true file format compatibility.
## Test Methodology
### Test Matrix
The test performs the following combinations:
1. **Write with s3fs → Read with PyArrow native S3**
2. **Write with PyArrow native S3 → Read with s3fs**
For each direction, the test:
- Creates a sample PyArrow table with multiple data types (int64, string, float64, bool)
- Writes the Parquet file using one filesystem implementation
- Reads the Parquet file using the other filesystem implementation
- Verifies data integrity by comparing:
- Row counts
- Schema equality
- Data contents (after sorting by ID to handle row order differences)
### File Sizes Tested
- **Small files**: 5 rows (quick validation)
- **Large files**: 200,000 rows (multi-row-group validation)
## Test Results
### ✅ Small Files (5 rows)
| Write Method | Read Method | Result | Read Function Used |
|--------------|-------------|--------|--------------------|
| s3fs | PyArrow native S3 | ✅ PASS | pq.read_table |
| PyArrow native S3 | s3fs | ✅ PASS | pq.read_table |
**Status**: **ALL TESTS PASSED**
### Large Files (200,000 rows)
Large file testing requires adequate volume capacity in SeaweedFS. When run with default volume settings (50MB max size), tests may encounter capacity issues with the number of large test files created simultaneously.
**Recommendation**: For large file testing, increase `VOLUME_MAX_SIZE_MB` in the Makefile or run tests with `TEST_QUICK=1` for development/validation purposes.
## Key Findings
### ✅ Full Compatibility Confirmed
**Files written with s3fs and PyArrow native S3 filesystem are fully compatible and can be read by either implementation.**
This confirms that:
1. **Identical Parquet Format**: Both s3fs and PyArrow native S3 use the same underlying PyArrow library to generate Parquet files, resulting in identical file formats at the binary level.
2. **S3 API Compatibility**: SeaweedFS's S3 implementation handles both filesystem backends correctly, with proper:
- Object creation (PutObject)
- Object reading (GetObject)
- Directory handling (implicit directories)
- Multipart uploads (for larger files)
3. **Metadata Consistency**: File metadata, schemas, and data integrity are preserved across both write and read operations regardless of which filesystem implementation is used.
## Implementation Details
### Common Write Path
Both implementations use PyArrow's `pads.write_dataset()` function:
```python
# s3fs approach
fs = s3fs.S3FileSystem(...)
pads.write_dataset(table, path, format="parquet", filesystem=fs)
# PyArrow native approach
s3 = pafs.S3FileSystem(...)
pads.write_dataset(table, path, format="parquet", filesystem=s3)
```
### Multiple Read Methods Tested
The test attempts reads using multiple PyArrow methods:
- `pq.read_table()` - Direct table reading
- `pq.ParquetDataset()` - Dataset-based reading
- `pads.dataset()` - PyArrow dataset API
All methods successfully read files written by either filesystem implementation.
## Practical Implications
### For Users
1. **Flexibility**: Users can choose either s3fs or PyArrow native S3 based on their preferences:
- **s3fs**: More mature, widely used, familiar API
- **PyArrow native**: Pure PyArrow solution, fewer dependencies
2. **Interoperability**: Teams using different tools can seamlessly share Parquet datasets stored in SeaweedFS
3. **Migration**: Easy to migrate between filesystem implementations without data conversion
### For SeaweedFS
1. **S3 Compatibility**: Confirms SeaweedFS's S3 implementation is compatible with major Python data science tools
2. **Implicit Directory Handling**: The implicit directory fix works correctly for both filesystem implementations
3. **Standard Compliance**: SeaweedFS handles S3 operations in a way that's compatible with AWS S3 behavior
## Running the Tests
### Quick Test (Recommended for Development)
```bash
cd test/s3/parquet
TEST_QUICK=1 make test-cross-fs-with-server
```
### Full Test (All File Sizes)
```bash
cd test/s3/parquet
make test-cross-fs-with-server
```
### Manual Test (Assuming Server is Running)
```bash
cd test/s3/parquet
make setup-python
make start-seaweedfs-ci
# In another terminal
TEST_QUICK=1 make test-cross-fs
# Cleanup
make stop-seaweedfs-safe
```
## Environment Variables
The test supports customization through environment variables:
- `S3_ENDPOINT_URL`: S3 endpoint (default: `http://localhost:8333`)
- `S3_ACCESS_KEY`: Access key (default: `some_access_key1`)
- `S3_SECRET_KEY`: Secret key (default: `some_secret_key1`)
- `BUCKET_NAME`: Bucket name (default: `test-parquet-bucket`)
- `TEST_QUICK`: Run only small tests (default: `0`, set to `1` for quick mode)
## Conclusion
The cross-filesystem compatibility tests demonstrate that **Parquet files written via s3fs and PyArrow native S3 filesystem are completely interchangeable**. This validates that:
1. The Parquet file format is implementation-agnostic
2. SeaweedFS's S3 API correctly handles both filesystem backends
3. Users have full flexibility in choosing their preferred filesystem implementation
This compatibility is a testament to:
- PyArrow's consistent file format generation
- SeaweedFS's robust S3 API implementation
- Proper handling of S3 semantics (especially implicit directories)
---
**Test Implementation**: `test_cross_filesystem_compatibility.py`
**Last Updated**: November 21, 2024
**Status**: ✅ All critical tests passing

58
test/s3/parquet/FINAL_ROOT_CAUSE_ANALYSIS.md

@ -1,58 +0,0 @@
# Final Root Cause Analysis
## Overview
This document provides a deep technical analysis of the s3fs compatibility issue with PyArrow Parquet datasets on SeaweedFS, and the solution implemented to resolve it.
## Root Cause
When PyArrow writes datasets using `write_dataset()`, it creates implicit directory structures by writing files without explicit directory markers. However, some S3 workflows may create 0-byte directory markers.
### The Problem
1. **PyArrow writes dataset files** without creating explicit directory objects
2. **s3fs calls HEAD** on the directory path to check if it exists
3. **If HEAD returns 200** with `Content-Length: 0`, s3fs interprets it as a file (not a directory)
4. **PyArrow fails** when trying to read, reporting "Parquet file size is 0 bytes"
### AWS S3 Behavior
AWS S3 returns **404 Not Found** for implicit directories (directories that only exist because they have children but no explicit marker object). This allows s3fs to fall back to LIST operations to detect the directory.
## The Solution
### Implementation
Modified the S3 API HEAD handler in `weed/s3api/s3api_object_handlers.go` to:
1. **Check if object ends with `/`**: Explicit directory markers return 200 as before
2. **Check if object has children**: If a 0-byte object has children in the filer, treat it as an implicit directory
3. **Return 404 for implicit directories**: This matches AWS S3 behavior and triggers s3fs's LIST fallback
### Code Changes
The fix is implemented in the `HeadObjectHandler` function with logic to:
- Detect implicit directories by checking for child entries
- Return 404 (NoSuchKey) for implicit directories
- Preserve existing behavior for explicit directory markers and regular files
## Performance Considerations
### Optimization: Child Check Cache
- Child existence checks are performed via filer LIST operations
- Results could be cached for frequently accessed paths
- Trade-off between consistency and performance
### Impact
- Minimal performance impact for normal file operations
- Slight overhead for HEAD requests on implicit directories (one additional LIST call)
- Overall improvement in PyArrow compatibility outweighs minor performance cost
## TODO
- [ ] Add detailed benchmarking results comparing before/after fix
- [ ] Document edge cases discovered during implementation
- [ ] Add architectural diagrams showing the request flow
- [ ] Document alternative solutions considered and why they were rejected
- [ ] Add performance profiling data for child existence checks

70
test/s3/parquet/MINIO_DIRECTORY_HANDLING.md

@ -1,70 +0,0 @@
# MinIO Directory Handling Comparison
## Overview
This document compares how MinIO handles directory markers versus SeaweedFS's implementation, and explains the different approaches to S3 directory semantics.
## MinIO's Approach
MinIO handles implicit directories similarly to AWS S3:
1. **No explicit directory objects**: Directories are implicit, defined only by object key prefixes
2. **HEAD on directory returns 404**: Consistent with AWS S3 behavior
3. **LIST operations reveal directories**: Directories are discovered through delimiter-based LIST operations
4. **Automatic prefix handling**: MinIO automatically recognizes prefixes as directories
### MinIO Implementation Details
- Uses in-memory metadata for fast prefix lookups
- Optimized for LIST operations with common delimiter (`/`)
- No persistent directory objects in storage layer
- Directories "exist" as long as they contain objects
## SeaweedFS Approach
SeaweedFS uses a filer-based approach with real directory entries:
### Before the Fix
1. **Explicit directory objects**: Could create 0-byte objects as directory markers
2. **HEAD returns 200**: Even for implicit directories
3. **Caused s3fs issues**: s3fs interpreted 0-byte HEAD responses as empty files
### After the Fix
1. **Hybrid approach**: Supports both explicit markers (with `/` suffix) and implicit directories
2. **HEAD returns 404 for implicit directories**: Matches AWS S3 and MinIO behavior
3. **Filer integration**: Uses filer's directory metadata to detect implicit directories
4. **s3fs compatibility**: Triggers proper LIST fallback behavior
## Key Differences
| Aspect | MinIO | SeaweedFS (After Fix) |
|--------|-------|----------------------|
| Directory Storage | No persistent objects | Filer directory entries |
| Implicit Directory HEAD | 404 Not Found | 404 Not Found |
| Explicit Marker HEAD | Not applicable | 200 OK (with `/` suffix) |
| Child Detection | Prefix scan | Filer LIST operation |
| Performance | In-memory lookups | Filer gRPC calls |
## Implementation Considerations
### Advantages of SeaweedFS Approach
- Integrates with existing filer metadata
- Supports both implicit and explicit directories
- Preserves directory metadata and attributes
- Compatible with POSIX filer semantics
### Trade-offs
- Additional filer communication overhead for HEAD requests
- Complexity of supporting both directory paradigms
- Performance depends on filer efficiency
## TODO
- [ ] Add performance benchmark comparison: MinIO vs SeaweedFS
- [ ] Document edge cases where behaviors differ
- [ ] Add example request/response traces for both systems
- [ ] Document migration path for users moving from MinIO to SeaweedFS
- [ ] Add compatibility matrix for different S3 clients

164
test/s3/parquet/Makefile

@ -4,14 +4,9 @@
# Default values
SEAWEEDFS_BINARY ?= weed
S3_PORT ?= 8333
FILER_PORT ?= 8888
VOLUME_PORT ?= 8080
MASTER_PORT ?= 9333
TEST_TIMEOUT ?= 15m
ACCESS_KEY ?= some_access_key1
SECRET_KEY ?= some_secret_key1
VOLUME_MAX_SIZE_MB ?= 50
VOLUME_MAX_COUNT ?= 100
BUCKET_NAME ?= test-parquet-bucket
ENABLE_SSE_S3 ?= false
@ -68,11 +63,7 @@ help:
@echo "Configuration:"
@echo " SEAWEEDFS_BINARY=$(SEAWEEDFS_BINARY)"
@echo " S3_PORT=$(S3_PORT)"
@echo " FILER_PORT=$(FILER_PORT)"
@echo " VOLUME_PORT=$(VOLUME_PORT)"
@echo " MASTER_PORT=$(MASTER_PORT)"
@echo " BUCKET_NAME=$(BUCKET_NAME)"
@echo " VOLUME_MAX_SIZE_MB=$(VOLUME_MAX_SIZE_MB)"
@echo " ENABLE_SSE_S3=$(ENABLE_SSE_S3)"
@echo " PYTHON=$(PYTHON)"
@ -106,39 +97,25 @@ setup-python: check-python
start-seaweedfs-ci: check-binary
@echo "$(YELLOW)Starting SeaweedFS server for Parquet testing...$(NC)"
# Clean up any existing processes first (CI-safe)
@echo "Cleaning up any existing processes..."
# Clean up any existing processes first (CI-safe) - aggressive cleanup
@echo "Aggressively cleaning up any existing processes on S3 port $(S3_PORT) and master port 9333..."
@if command -v lsof >/dev/null 2>&1; then \
lsof -ti :$(MASTER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(VOLUME_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(FILER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
for attempt in 1 2 3; do \
lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -9 $$pid 2>/dev/null || true; done; \
lsof -ti :9333 2>/dev/null | head -5 | while read pid; do kill -9 $$pid 2>/dev/null || true; done; \
sleep 1; \
done; \
fi
@sleep 2
@sleep 3
# Create necessary directories
@mkdir -p /tmp/seaweedfs-test-parquet-master
@mkdir -p /tmp/seaweedfs-test-parquet-volume
@mkdir -p /tmp/seaweedfs-test-parquet-filer
@mkdir -p /tmp/seaweedfs-test-parquet
# Clean up any old server logs
@rm -f /tmp/seaweedfs-parquet-*.log || true
# Start master server with volume size limit and explicit gRPC port
@echo "Starting master server..."
@nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -port.grpc=$$(( $(MASTER_PORT) + 10000 )) -mdir=/tmp/seaweedfs-test-parquet-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-parquet-master.log 2>&1 &
@sleep 3
# Start volume server with master HTTP port and increased capacity
@echo "Starting volume server..."
@nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-parquet-volume -max=$(VOLUME_MAX_COUNT) -ip=127.0.0.1 -preStopSeconds=1 > /tmp/seaweedfs-parquet-volume.log 2>&1 &
@sleep 5
# Start filer server with embedded S3
@echo "Starting filer server with embedded S3..."
# Start weed mini with embedded S3
@echo "Starting weed mini with embedded S3..."
@if [ "$(ENABLE_SSE_S3)" = "true" ]; then \
echo " SSE-S3 encryption: ENABLED"; \
printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}],"buckets":[{"name":"$(BUCKET_NAME)","encryption":{"sseS3":{"enabled":true}}}]}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" > /tmp/seaweedfs-parquet-s3.json; \
@ -146,96 +123,43 @@ start-seaweedfs-ci: check-binary
echo " SSE-S3 encryption: DISABLED"; \
printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}]}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" > /tmp/seaweedfs-parquet-s3.json; \
fi
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -dataCenter=defaultDataCenter -ip=127.0.0.1 -s3 -s3.port=$(S3_PORT) -s3.config=/tmp/seaweedfs-parquet-s3.json > /tmp/seaweedfs-parquet-filer.log 2>&1 &
@sleep 5
# Wait for S3 service to be ready - use port-based checking for reliability
@echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)"
@for i in $$(seq 1 20); do \
if netstat -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \
ss -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \
lsof -i :$(S3_PORT) >/dev/null 2>&1; then \
echo "$(GREEN)S3 service is listening on port $(S3_PORT)$(NC)"; \
sleep 1; \
break; \
@$(SEAWEEDFS_BINARY) mini \
-dir=/tmp/seaweedfs-test-parquet \
-ip.bind=0.0.0.0 \
-s3.port=$(S3_PORT) \
-s3.config=/tmp/seaweedfs-parquet-s3.json \
> /tmp/seaweedfs-parquet-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid
@echo "Waiting for S3 service to be fully ready (max 90 seconds)..."
@bash -c 'for i in $$(seq 1 90); do \
if curl -s -H "Authorization: AWS4-HMAC-SHA256 Credential=$(ACCESS_KEY)" http://localhost:$(S3_PORT)/ > /dev/null 2>&1; then \
echo "✅ S3 service is ready"; \
sleep 2; \
exit 0; \
fi; \
if [ $$i -eq 20 ]; then \
echo "$(RED)S3 service failed to start within 20 seconds$(NC)"; \
echo "=== Detailed Logs ==="; \
echo "Master log:"; tail -30 /tmp/seaweedfs-parquet-master.log || true; \
echo "Volume log:"; tail -30 /tmp/seaweedfs-parquet-volume.log || true; \
echo "Filer log:"; tail -30 /tmp/seaweedfs-parquet-filer.log || true; \
echo "=== Port Status ==="; \
netstat -an 2>/dev/null | grep ":$(S3_PORT)" || \
ss -an 2>/dev/null | grep ":$(S3_PORT)" || \
echo "No port listening on $(S3_PORT)"; \
exit 1; \
fi; \
echo "Waiting for S3 service... ($$i/20)"; \
sleep 1; \
done
# Additional wait for filer gRPC to be ready
@echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)"
@sleep 2
# Wait for volume server to register with master and ensure volume assignment works
@echo "$(YELLOW)Waiting for volume assignment to be ready...$(NC)"
@for i in $$(seq 1 30); do \
ASSIGN_RESULT=$$(curl -s "http://localhost:$(MASTER_PORT)/dir/assign?count=1" 2>/dev/null); \
if echo "$$ASSIGN_RESULT" | grep -q '"fid"'; then \
echo "$(GREEN)Volume assignment is ready$(NC)"; \
break; \
fi; \
if [ $$i -eq 30 ]; then \
echo "$(RED)Volume assignment not ready after 30 seconds$(NC)"; \
echo "=== Last assign attempt ==="; \
echo "$$ASSIGN_RESULT"; \
echo "=== Master Status ==="; \
curl -s "http://localhost:$(MASTER_PORT)/dir/status" 2>/dev/null || echo "Failed to get master status"; \
echo "=== Master Logs ==="; \
tail -50 /tmp/seaweedfs-parquet-master.log 2>/dev/null || echo "No master log"; \
echo "=== Volume Logs ==="; \
tail -50 /tmp/seaweedfs-parquet-volume.log 2>/dev/null || echo "No volume log"; \
exit 1; \
fi; \
echo "Waiting for volume assignment... ($$i/30)"; \
sleep 1; \
done
@echo "$(GREEN)SeaweedFS server started successfully for Parquet testing$(NC)"
@echo "Master: http://localhost:$(MASTER_PORT)"
@echo "Volume: http://localhost:$(VOLUME_PORT)"
@echo "Filer: http://localhost:$(FILER_PORT)"
@echo "S3: http://localhost:$(S3_PORT)"
@echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB"
done; \
echo "ERROR S3 service failed to start within 90 seconds"; \
echo "=== Server log output ==="; \
cat /tmp/seaweedfs-parquet-mini.log 2>/dev/null || echo "No startup log available"; \
exit 1'
start-seaweedfs: check-binary
@echo "$(YELLOW)Starting SeaweedFS server for Parquet testing...$(NC)"
@# Use port-based cleanup for consistency and safety
@echo "Cleaning up any existing processes..."
@lsof -ti :$(MASTER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(VOLUME_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(FILER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(S3_PORT) 2>/dev/null | xargs -r kill -TERM || true
@# Clean up gRPC ports (HTTP port + 10000)
@lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@sleep 2
@$(MAKE) start-seaweedfs-ci
stop-seaweedfs:
@echo "$(YELLOW)Stopping SeaweedFS server...$(NC)"
@# Use port-based cleanup for consistency and safety
@lsof -ti :$(MASTER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(VOLUME_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(FILER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@if [ -f /tmp/weed-mini.pid ]; then \
echo "Stopping weed mini..."; \
kill $$(cat /tmp/weed-mini.pid) || true; \
rm -f /tmp/weed-mini.pid; \
fi
@lsof -ti :$(S3_PORT) 2>/dev/null | xargs -r kill -TERM || true
@# Clean up gRPC ports (HTTP port + 10000)
@lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@sleep 2
@echo "$(GREEN)SeaweedFS server stopped$(NC)"
@ -245,22 +169,10 @@ stop-seaweedfs-safe:
@# Use port-based cleanup which is safer in CI
@if command -v lsof >/dev/null 2>&1; then \
echo "Using lsof for port-based cleanup..."; \
lsof -ti :$(MASTER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(VOLUME_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(FILER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
else \
echo "lsof not available, using netstat approach..."; \
netstat -tlnp 2>/dev/null | grep :$(MASTER_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$(VOLUME_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$(FILER_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$(S3_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$$(( $(MASTER_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$$(( $(VOLUME_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$$(( $(FILER_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
fi
@sleep 2
@echo "$(GREEN)SeaweedFS server safely stopped$(NC)"
@ -351,18 +263,14 @@ test-implicit-dir-with-server: build-weed setup-python
# Debug targets
debug-logs:
@echo "$(YELLOW)=== Master Log ===$(NC)"
@tail -n 50 /tmp/seaweedfs-parquet-master.log || echo "No master log found"
@echo "$(YELLOW)=== Volume Log ===$(NC)"
@tail -n 50 /tmp/seaweedfs-parquet-volume.log || echo "No volume log found"
@echo "$(YELLOW)=== Filer Log ===$(NC)"
@tail -n 50 /tmp/seaweedfs-parquet-filer.log || echo "No filer log found"
@echo "$(YELLOW)=== Mini Log ===$(NC)"
@tail -n 50 /tmp/seaweedfs-parquet-mini.log || echo "No mini log found"
debug-status:
@echo "$(YELLOW)=== Process Status ===$(NC)"
@ps aux | grep -E "(weed|seaweedfs)" | grep -v grep || echo "No SeaweedFS processes found"
@echo "$(YELLOW)=== Port Status ===$(NC)"
@netstat -an | grep -E "($(MASTER_PORT)|$(VOLUME_PORT)|$(FILER_PORT)|$(S3_PORT))" || echo "No ports in use"
@netstat -an | grep -E "($(S3_PORT))" || echo "No ports in use"
# Manual test targets for development
manual-start: start-seaweedfs

46
test/s3/parquet/TEST_COVERAGE.md

@ -1,46 +0,0 @@
# Test Coverage Documentation
## Overview
This document provides comprehensive test coverage documentation for the SeaweedFS S3 Parquet integration tests.
## Test Categories
### Unit Tests (Go)
- 17 test cases covering S3 API handlers
- Tests for implicit directory handling
- HEAD request behavior validation
- Located in: `weed/s3api/s3api_implicit_directory_test.go`
### Integration Tests (Python)
- 6 test cases for implicit directory fix
- Tests HEAD request behavior on directory markers
- s3fs directory detection validation
- PyArrow dataset read compatibility
- Located in: `test_implicit_directory_fix.py`
### End-to-End Tests (Python)
- 20 test cases combining write and read methods
- Small file tests (5 rows): 10 test combinations
- Large file tests (200,000 rows): 10 test combinations
- Tests multiple write methods: `pads.write_dataset`, `pq.write_table+s3fs`
- Tests multiple read methods: `pads.dataset`, `pq.ParquetDataset`, `pq.read_table`, `s3fs+direct`, `s3fs+buffered`
- Located in: `s3_parquet_test.py`
## Coverage Summary
| Test Type | Count | Status |
|-----------|-------|--------|
| Unit Tests (Go) | 17 | ✅ Pass |
| Integration Tests (Python) | 6 | ✅ Pass |
| End-to-End Tests (Python) | 20 | ✅ Pass |
| **Total** | **43** | **✅ All Pass** |
## TODO
- [ ] Add detailed test execution time metrics
- [ ] Document test data generation strategies
- [ ] Add code coverage percentages for Go tests
- [ ] Document edge cases and corner cases tested
- [ ] Add performance benchmarking results

1
test/s3/parquet/test_implicit_directory_fix.py

@ -60,6 +60,7 @@ def setup_s3():
endpoint_url=S3_ENDPOINT_URL,
aws_access_key_id=S3_ACCESS_KEY,
aws_secret_access_key=S3_SECRET_KEY,
region_name='us-east-1',
use_ssl=False
)

36
test/s3/remote_cache/Makefile

@ -10,19 +10,17 @@ all: test-with-server
# Configuration
WEED_BINARY := ../../../weed/weed_binary
ACCESS_KEY ?= some_access_key1
SECRET_KEY ?= some_secret_key1
# Primary SeaweedFS (the one being tested - has remote caching)
PRIMARY_S3_PORT := 8333
PRIMARY_FILER_PORT := 8888
PRIMARY_MASTER_PORT := 9333
PRIMARY_VOLUME_PORT := 8080
PRIMARY_METRICS_PORT := 9324
PRIMARY_DIR := ./test-primary-data
# Secondary SeaweedFS (acts as "remote" S3 storage)
REMOTE_S3_PORT := 8334
REMOTE_FILER_PORT := 8889
REMOTE_MASTER_PORT := 9334
REMOTE_VOLUME_PORT := 8081
REMOTE_METRICS_PORT := 9325
REMOTE_DIR := ./test-remote-data
@ -73,18 +71,11 @@ start-remote: check-deps
@echo "Starting remote SeaweedFS (secondary instance)..."
@rm -f remote-server.pid
@mkdir -p $(REMOTE_DIR)
@$(WEED_BINARY) server \
-s3 \
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(WEED_BINARY) mini \
-s3.port=$(REMOTE_S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-filer \
-filer.port=$(REMOTE_FILER_PORT) \
-master.port=$(REMOTE_MASTER_PORT) \
-volume.port=$(REMOTE_VOLUME_PORT) \
-master.volumeSizeLimitMB=50 \
-volume.max=100 \
-dir=$(REMOTE_DIR) \
-volume.preStopSeconds=1 \
-ip.bind=0.0.0.0 \
-metricsPort=$(REMOTE_METRICS_PORT) \
> remote-weed.log 2>&1 & echo $$! > remote-server.pid
@echo "Waiting for remote SeaweedFS to start..."
@ -93,7 +84,7 @@ start-remote: check-deps
echo "Remote SeaweedFS started on port $(REMOTE_S3_PORT)"; \
exit 0; \
fi; \
sleep 1; \
sleep 3; \
done; \
echo "ERROR: Remote SeaweedFS failed to start"; \
cat remote-weed.log; \
@ -114,18 +105,11 @@ start-primary: check-deps
@echo "Starting primary SeaweedFS..."
@rm -f primary-server.pid
@mkdir -p $(PRIMARY_DIR)
@$(WEED_BINARY) server \
-s3 \
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(WEED_BINARY) mini \
-s3.port=$(PRIMARY_S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-filer \
-filer.port=$(PRIMARY_FILER_PORT) \
-master.port=$(PRIMARY_MASTER_PORT) \
-volume.port=$(PRIMARY_VOLUME_PORT) \
-master.volumeSizeLimitMB=50 \
-volume.max=100 \
-dir=$(PRIMARY_DIR) \
-volume.preStopSeconds=1 \
-ip.bind=0.0.0.0 \
-metricsPort=$(PRIMARY_METRICS_PORT) \
> primary-weed.log 2>&1 & echo $$! > primary-server.pid
@echo "Waiting for primary SeaweedFS to start..."
@ -134,7 +118,7 @@ start-primary: check-deps
echo "Primary SeaweedFS started on port $(PRIMARY_S3_PORT)"; \
exit 0; \
fi; \
sleep 1; \
sleep 3; \
done; \
echo "ERROR: Primary SeaweedFS failed to start"; \
cat primary-weed.log; \
@ -156,7 +140,7 @@ setup-remote:
@curl -s -X PUT "http://localhost:$(REMOTE_S3_PORT)/$(REMOTE_BUCKET)" || echo "Bucket may already exist"
@sleep 1
@echo "Configuring remote storage on primary..."
@printf 'remote.configure -name=seaweedremote -type=s3 -s3.access_key=any -s3.secret_key=any -s3.endpoint=http://localhost:$(REMOTE_S3_PORT) -s3.region=us-east-1\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.configure done"
@printf 'remote.configure -name=seaweedremote -type=s3 -s3.access_key=$(ACCESS_KEY) -s3.secret_key=$(SECRET_KEY) -s3.endpoint=http://localhost:$(REMOTE_S3_PORT) -s3.region=us-east-1\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.configure done"
@sleep 2
@echo "Mounting remote bucket on primary..."
@printf 'remote.mount -dir=/buckets/remotemounted -remote=seaweedremote/$(REMOTE_BUCKET) -nonempty\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.mount done"

15
test/s3/remote_cache/remote_cache_test.go

@ -34,8 +34,8 @@ const (
remoteEndpoint = "http://localhost:8334"
// Credentials (anonymous access for testing)
accessKey = "any"
secretKey = "any"
accessKey = "some_access_key1"
secretKey = "some_secret_key1"
// Bucket name - mounted on primary as remote storage
testBucket = "remotemounted"
@ -121,17 +121,6 @@ func getFromPrimary(t *testing.T, key string) []byte {
return data
}
// syncToRemote syncs local data to remote storage
func syncToRemote(t *testing.T) {
t.Log("Syncing to remote storage...")
output, err := runWeedShell(t, "remote.cache.uncache -dir=/buckets/"+testBucket+" -include=*")
if err != nil {
t.Logf("syncToRemote warning: %v", err)
}
t.Log(output)
time.Sleep(1 * time.Second)
}
// uncacheLocal purges the local cache, forcing data to be fetched from remote
func uncacheLocal(t *testing.T, pattern string) {
t.Logf("Purging local cache for pattern: %s", pattern)

31
test/s3/retention/Makefile

@ -6,11 +6,14 @@
# Configuration
WEED_BINARY := ../../../weed/weed_binary
S3_PORT := 8333
ACCESS_KEY ?= some_access_key1
SECRET_KEY ?= some_secret_key1
MASTER_PORT := 9333
VOLUME_PORT := 8080
FILER_PORT := 8888
TEST_TIMEOUT := 15m
TEST_PATTERN := TestRetention
SERVER_DIR := ./test-volume-data/server-data
# Default target
help:
@ -80,23 +83,16 @@ start-server: check-deps
@ls -la ../../../docker/compose/s3.json || echo "⚠️ Config file not found, continuing without it"
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
@echo "🔍 DEBUG: Creating server data directory..."
@mkdir -p $(SERVER_DIR)
@echo "🔍 DEBUG: Launching SeaweedFS S3 server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=$(SERVER_DIR) -s3.port=$(S3_PORT)"
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(WEED_BINARY) mini \
-dir=$(SERVER_DIR) \
-s3.port=$(S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
-filer.maxMB=64 \
-master.volumeSizeLimitMB=50 \
-volume.max=100 \
-dir=./test-volume-data \
-volume.preStopSeconds=1 \
-metricsPort=9324 \
> weed-test.log 2>&1 & echo $$! > weed-server.pid
@echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')"
> weed-test.log 2>&1 & \
echo $$! > weed-server.pid
@echo "🔍 DEBUG: Server PID: $$(cat weed-test.pid 2>/dev/null || echo 'PID file not found')"
@echo "🔍 DEBUG: Checking if PID is still running..."
@sleep 2
@if [ -f weed-server.pid ]; then \
@ -105,7 +101,6 @@ start-server: check-deps
else \
echo "⚠️ PID file not found"; \
fi
@echo "🔍 DEBUG: Waiting for server to start (up to 90 seconds)..."
@for i in $$(seq 1 90); do \
echo "🔍 DEBUG: Attempt $$i/90 - checking port $(S3_PORT)"; \
if curl -s http://localhost:$(S3_PORT) >/dev/null 2>&1; then \
@ -123,8 +118,6 @@ start-server: check-deps
if [ $$i -eq 15 ]; then \
echo "🔍 DEBUG: After 15 seconds, checking port bindings..."; \
netstat -tlnp 2>/dev/null | grep $(S3_PORT) || echo "Port $(S3_PORT) not bound"; \
netstat -tlnp 2>/dev/null | grep 9333 || echo "Port 9333 not bound"; \
netstat -tlnp 2>/dev/null | grep 8080 || echo "Port 8080 not bound"; \
fi; \
if [ $$i -eq 30 ]; then \
echo "⚠️ Server taking longer than expected (30s), checking logs..."; \

115
test/s3/sse/Makefile

@ -93,54 +93,35 @@ start-seaweedfs: check-binary
@sleep 2
# Create necessary directories
@mkdir -p /tmp/seaweedfs-test-sse-master
@mkdir -p /tmp/seaweedfs-test-sse-volume
@mkdir -p /tmp/seaweedfs-test-sse-filer
# Start master server with volume size limit and explicit gRPC port
@nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -port.grpc=$$(( $(MASTER_PORT) + 10000 )) -mdir=/tmp/seaweedfs-test-sse-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-sse-master.log 2>&1 &
@sleep 3
# Start volume server with master HTTP port and increased capacity
@nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-sse-volume -max=$(VOLUME_MAX_COUNT) -ip=127.0.0.1 > /tmp/seaweedfs-sse-volume.log 2>&1 &
@sleep 5
# Start filer server (using standard SeaweedFS gRPC port convention: HTTP port + 10000)
@nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -dataCenter=defaultDataCenter -ip=127.0.0.1 > /tmp/seaweedfs-sse-filer.log 2>&1 &
@sleep 3
@mkdir -p /tmp/seaweedfs-test-sse
# Create S3 configuration with SSE-KMS support
@printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}],"kms":{"type":"%s","configs":{"keyId":"%s","encryptionContext":{},"bucketKey":false}}}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" "$(KMS_TYPE)" "$(KMS_KEY_ID)" > /tmp/seaweedfs-sse-s3.json
# Start S3 server with KMS configuration
@nohup $(SEAWEEDFS_BINARY) s3 -port=$(S3_PORT) -filer=127.0.0.1:$(FILER_PORT) -config=/tmp/seaweedfs-sse-s3.json -ip.bind=127.0.0.1 > /tmp/seaweedfs-sse-s3.log 2>&1 &
@sleep 5
# Wait for S3 service to be ready
@echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)"
# Start weed mini
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) $(SEAWEEDFS_BINARY) mini \
-dir=/tmp/seaweedfs-test-sse \
-s3.port=$(S3_PORT) \
-s3.config=/tmp/seaweedfs-sse-s3.json \
> /tmp/seaweedfs-sse-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid
@echo "Checking S3 service is ready..."
@for i in $$(seq 1 30); do \
if curl -s -f http://127.0.0.1:$(S3_PORT) > /dev/null 2>&1; then \
echo "$(GREEN)S3 service is ready$(NC)"; \
if curl -s http://127.0.0.1:$(S3_PORT) > /dev/null 2>&1; then \
echo "✅ S3 service is ready"; \
break; \
fi; \
echo "Waiting for S3 service... ($$i/30)"; \
sleep 1; \
done
# Additional wait for filer gRPC to be ready
@echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)"
@sleep 2
@echo "$(GREEN)SeaweedFS server started successfully for SSE testing$(NC)"
@echo "Master: http://localhost:$(MASTER_PORT)"
@echo "Volume: http://localhost:$(VOLUME_PORT)"
@echo "Filer: http://localhost:$(FILER_PORT)"
@echo "S3: http://localhost:$(S3_PORT)"
@echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB"
@echo "SSE-KMS Support: Enabled"
stop-seaweedfs:
@echo "$(YELLOW)Stopping SeaweedFS server...$(NC)"
@# Use port-based cleanup for consistency and safety
@if [ -f /tmp/weed-mini.pid ]; then \
echo "Stopping weed mini..."; \
kill $$(cat /tmp/weed-mini.pid) || true; \
rm -f /tmp/weed-mini.pid; \
fi
@lsof -ti :$(MASTER_PORT) | xargs -r kill -TERM || true
@lsof -ti :$(VOLUME_PORT) | xargs -r kill -TERM || true
@lsof -ti :$(FILER_PORT) | xargs -r kill -TERM || true
@ -345,71 +326,33 @@ start-seaweedfs-ci: check-binary
@echo "$(YELLOW)Starting SeaweedFS server for CI testing...$(NC)"
# Create necessary directories
@mkdir -p /tmp/seaweedfs-test-sse-master
@mkdir -p /tmp/seaweedfs-test-sse-volume
@mkdir -p /tmp/seaweedfs-test-sse-filer
@mkdir -p /tmp/seaweedfs-test-sse
# Clean up any old server logs
@rm -f /tmp/seaweedfs-sse-*.log || true
# Start master server with volume size limit and explicit gRPC port
@echo "Starting master server..."
@nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -port.grpc=$$(( $(MASTER_PORT) + 10000 )) -mdir=/tmp/seaweedfs-test-sse-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-sse-master.log 2>&1 &
@sleep 3
# Start volume server with master HTTP port and increased capacity
@echo "Starting volume server..."
@nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -master=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-sse-volume -max=$(VOLUME_MAX_COUNT) -ip=127.0.0.1 > /tmp/seaweedfs-sse-volume.log 2>&1 &
@sleep 5
# Create S3 JSON configuration with KMS (Local provider) and basic identity for embedded S3
@sed -e 's/ACCESS_KEY_PLACEHOLDER/$(ACCESS_KEY)/g' \
-e 's/SECRET_KEY_PLACEHOLDER/$(SECRET_KEY)/g' \
s3-config-template.json > /tmp/seaweedfs-s3.json
# Start filer server with embedded S3 using the JSON config (with verbose logging)
@echo "Starting filer server with embedded S3..."
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) GLOG_v=4 nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -dataCenter=defaultDataCenter -ip=127.0.0.1 -s3 -s3.port=$(S3_PORT) -s3.config=/tmp/seaweedfs-s3.json > /tmp/seaweedfs-sse-filer.log 2>&1 &
@sleep 5
# Start weed mini with embedded S3 using the JSON config (with verbose logging)
@echo "Starting weed mini with embedded S3..."
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) GLOG_v=4 $(SEAWEEDFS_BINARY) mini \
-dir=/tmp/seaweedfs-test-sse \
-s3.port=$(S3_PORT) \
-s3.config=/tmp/seaweedfs-s3.json \
-ip=127.0.0.1 \
> /tmp/seaweedfs-sse-mini.log 2>&1 & echo $$! > /tmp/weed-mini.pid
# Wait for S3 service to be ready - use port-based checking for reliability
@echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)"
@for i in $$(seq 1 20); do \
if netstat -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \
ss -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \
lsof -i :$(S3_PORT) >/dev/null 2>&1; then \
echo "$(GREEN)S3 service is listening on port $(S3_PORT)$(NC)"; \
sleep 1; \
@echo "Checking S3 service is ready..."
@for i in $$(seq 1 30); do \
if curl -s http://127.0.0.1:$(S3_PORT) > /dev/null 2>&1; then \
echo "✅ S3 service is ready"; \
break; \
fi; \
if [ $$i -eq 20 ]; then \
echo "$(RED)S3 service failed to start within 20 seconds$(NC)"; \
echo "=== Detailed Logs ==="; \
echo "Master log:"; tail -30 /tmp/seaweedfs-sse-master.log || true; \
echo "Volume log:"; tail -30 /tmp/seaweedfs-sse-volume.log || true; \
echo "Filer log:"; tail -30 /tmp/seaweedfs-sse-filer.log || true; \
echo "=== Port Status ==="; \
netstat -an 2>/dev/null | grep ":$(S3_PORT)" || \
ss -an 2>/dev/null | grep ":$(S3_PORT)" || \
echo "No port listening on $(S3_PORT)"; \
echo "=== Process Status ==="; \
ps aux | grep -E "weed.*(filer|s3).*$(S3_PORT)" | grep -v grep || echo "No S3 process found"; \
exit 1; \
fi; \
echo "Waiting for S3 service... ($$i/20)"; \
sleep 1; \
done
# Additional wait for filer gRPC to be ready
@echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)"
@sleep 2
@echo "$(GREEN)SeaweedFS server started successfully for SSE testing$(NC)"
@echo "Master: http://localhost:$(MASTER_PORT)"
@echo "Volume: http://localhost:$(VOLUME_PORT)"
@echo "Filer: http://localhost:$(FILER_PORT)"
@echo "S3: http://localhost:$(S3_PORT)"
@echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB"
@echo "SSE-KMS Support: Enabled"
# GitHub Actions compatible quick test subset
test-quick-with-server: build-weed

36
test/s3/tagging/Makefile

@ -72,41 +72,17 @@ start-server: check-deps
fi
@echo "🔍 DEBUG: Checking binary at $(WEED_BINARY)"
@ls -la $(WEED_BINARY) || (echo "❌ Binary not found!" && exit 1)
@echo "🔍 DEBUG: Checking config file at ../../../docker/compose/s3.json"
@ls -la ../../../docker/compose/s3.json || echo "⚠️ Config file not found, continuing without it"
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -filer -filer.maxMB=64 -s3 -ip.bind 0.0.0.0 -dir=./test-volume-data -master.raftHashicorp -master.electionTimeout 1s -master.volumeSizeLimitMB=100 -volume.max=100 -volume.preStopSeconds=1 -master.port=$(MASTER_PORT) -volume.port=$(VOLUME_PORT) -filer.port=$(FILER_PORT) -s3.port=$(S3_PORT) -metricsPort=9329 -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -master.peers=none"
@$(WEED_BINARY) server \
-filer \
@echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=./test-volume-data -s3.port=$(S3_PORT)"
@$(WEED_BINARY) mini \
-filer.maxMB=64 \
-s3 \
-ip.bind 0.0.0.0 \
-dir=./test-volume-data \
-master.raftHashicorp \
-master.electionTimeout 1s \
-master.volumeSizeLimitMB=100 \
-volume.max=100 \
-volume.preStopSeconds=1 \
-master.port=$(MASTER_PORT) \
-volume.port=$(VOLUME_PORT) \
-filer.port=$(FILER_PORT) \
-s3.port=$(S3_PORT) \
-metricsPort=9329 \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-master.peers=none \
> weed-test.log 2>&1 & echo $$! > weed-server.pid
@echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')"
@echo "🔍 DEBUG: Checking if PID is still running..."
@sleep 2
@if [ -f weed-server.pid ]; then \
SERVER_PID=$$(cat weed-server.pid); \
ps -p $$SERVER_PID || echo "⚠️ Server PID $$SERVER_PID not found after 2 seconds"; \
else \
echo "⚠️ PID file not found"; \
fi
> weed-test.log 2>&1 & \
echo $$! > weed-server.pid
@echo "🔍 DEBUG: Waiting for server to start (up to 90 seconds)..."
@for i in $$(seq 1 90); do \
echo "🔍 DEBUG: Attempt $$i/90 - checking port $(S3_PORT)"; \
@ -125,8 +101,6 @@ start-server: check-deps
if [ $$i -eq 15 ]; then \
echo "🔍 DEBUG: After 15 seconds, checking port bindings..."; \
netstat -tlnp 2>/dev/null | grep $(S3_PORT) || echo "Port $(S3_PORT) not bound"; \
netstat -tlnp 2>/dev/null | grep $(MASTER_PORT) || echo "Port $(MASTER_PORT) not bound"; \
netstat -tlnp 2>/dev/null | grep $(VOLUME_PORT) || echo "Port $(VOLUME_PORT) not bound"; \
fi; \
if [ $$i -eq 30 ]; then \
echo "⚠️ Server taking longer than expected (30s), checking logs..."; \
@ -141,7 +115,7 @@ start-server: check-deps
echo "🔍 DEBUG: Final process check:"; \
ps aux | grep weed | grep -v grep || echo "No weed processes found"; \
echo "🔍 DEBUG: Final port check:"; \
netstat -tlnp 2>/dev/null | grep -E "($(S3_PORT)|$(MASTER_PORT)|$(VOLUME_PORT))" || echo "No ports bound"; \
netstat -tlnp 2>/dev/null | grep -E "($(S3_PORT))" || echo "No ports bound"; \
echo "=== Full server logs ==="; \
if [ -f weed-test.log ]; then \
cat weed-test.log; \

67
test/s3/versioning/Makefile

@ -43,13 +43,13 @@ build-weed:
@echo "Building SeaweedFS binary..."
@cd ../../../weed && go build -o weed_binary .
@chmod +x $(WEED_BINARY)
@echo " SeaweedFS binary built at $(WEED_BINARY)"
@echo "OK SeaweedFS binary built at $(WEED_BINARY)"
check-deps: build-weed
@echo "Checking dependencies..."
@echo "🔍 DEBUG: Checking Go installation..."
@echo "DEBUG: Checking Go installation..."
@command -v go >/dev/null 2>&1 || (echo "Go is required but not installed" && exit 1)
@echo "🔍 DEBUG: Go version: $$(go version)"
@echo "DEBUG: Go version: $$(go version)"
@echo "🔍 DEBUG: Checking binary at $(WEED_BINARY)..."
@test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found at $(WEED_BINARY)" && exit 1)
@echo "🔍 DEBUG: Binary size: $$(ls -lh $(WEED_BINARY) | awk '{print $$5}')"
@ -57,7 +57,7 @@ check-deps: build-weed
@echo "🔍 DEBUG: Checking Go module dependencies..."
@go list -m github.com/aws/aws-sdk-go-v2 >/dev/null 2>&1 || (echo "AWS SDK Go v2 not found. Run 'go mod tidy'." && exit 1)
@go list -m github.com/stretchr/testify >/dev/null 2>&1 || (echo "Testify not found. Run 'go mod tidy'." && exit 1)
@echo " All dependencies are available"
@echo "OK All dependencies are available"
# Start SeaweedFS server for testing
start-server: check-deps
@ -81,21 +81,11 @@ start-server: check-deps
@echo "🔍 DEBUG: Creating volume directory..."
@mkdir -p ./test-volume-data
@echo "🔍 DEBUG: Launching SeaweedFS server in background..."
@echo "🔍 DEBUG: Command: $(WEED_BINARY) server -debug -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=true -s3.config=../../../docker/compose/s3.json -filer -filer.maxMB=64 -master.volumeSizeLimitMB=50 -master.peers=none -volume.max=100 -dir=./test-volume-data -volume.preStopSeconds=1 -metricsPort=9324"
@$(WEED_BINARY) server \
-debug \
-s3 \
@echo "🔍 DEBUG: Command: $(WEED_BINARY) mini -dir=./test-volume-data -s3.port=$(S3_PORT) -s3.config=../../../docker/compose/s3.json"
@$(WEED_BINARY) mini \
-dir=./test-volume-data \
-s3.port=$(S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
-filer.maxMB=64 \
-master.volumeSizeLimitMB=50 \
-master.peers=none \
-volume.max=100 \
-dir=./test-volume-data \
-volume.preStopSeconds=1 \
-metricsPort=9324 \
> weed-test.log 2>&1 & echo $$! > weed-server.pid
@echo "🔍 DEBUG: Server PID: $$(cat weed-server.pid 2>/dev/null || echo 'PID file not found')"
@echo "🔍 DEBUG: Checking if PID is still running..."
@ -222,13 +212,13 @@ test-with-server: start-server
test-versioning-with-configs: check-deps
@echo "Testing with different S3 configurations..."
@echo "Testing with empty folder allowed..."
@$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -filer -master.volumeSizeLimitMB=100 -master.peers=none -volume.max=100 > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid
@$(WEED_BINARY) mini -s3.port=$(S3_PORT) > weed-test-config1.log 2>&1 & echo $$! > weed-config1.pid
@sleep 5
@go test -v -timeout=5m -run "TestVersioningBasicWorkflow" . || true
@if [ -f weed-config1.pid ]; then kill -TERM $$(cat weed-config1.pid) 2>/dev/null || true; rm -f weed-config1.pid; fi
@sleep 2
@echo "Testing with delete bucket not empty disabled..."
@$(WEED_BINARY) server -s3 -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=false -filer -master.volumeSizeLimitMB=100 -master.peers=none -volume.max=100 > weed-test-config2.log 2>&1 & echo $$! > weed-config2.pid
@$(WEED_BINARY) mini -s3.port=$(S3_PORT) -s3.allowDeleteBucketNotEmpty=false > weed-test-config2.log 2>&1 & echo $$! > weed-config2.pid
@sleep 5
@go test -v -timeout=5m -run "TestVersioningBasicWorkflow" . || true
@if [ -f weed-config2.pid ]; then kill -TERM $$(cat weed-config2.pid) 2>/dev/null || true; rm -f weed-config2.pid; fi
@ -271,19 +261,14 @@ debug-server:
@echo "Starting SeaweedFS server in debug mode..."
@$(MAKE) stop-server
@mkdir -p ./test-volume-data
@$(WEED_BINARY) server \
@$(WEED_BINARY) mini \
-debug \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
-filer.maxMB=16 \
-master.volumeSizeLimitMB=50 \
-master.peers=none \
-volume.max=100 \
-dir=./test-volume-data \
-volume.preStopSeconds=1 \
-metricsPort=9324
# Run a single test for debugging
@ -320,24 +305,24 @@ health-check:
# Simple server start without process cleanup (for CI troubleshooting)
start-server-simple: check-deps
@echo "Starting SeaweedFS server (simple mode)..."
@$(WEED_BINARY) server \
-debug \
@$(WEED_BINARY) mini \
-dir=$(SERVER_DIR) \
-s3 \
-s3.port=$(S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-s3.config=../../../docker/compose/s3.json \
-filer \
-filer.maxMB=64 \
-master.volumeSizeLimitMB=50 \
-master.peers=none \
-volume.max=100 \
-volume.preStopSeconds=1 \
-metricsPort=9324 \
> weed-test.log 2>&1 & echo $$! > weed-server.pid
@echo "Server PID: $$(cat weed-server.pid)"
@echo "Waiting for server to start..."
@sleep 10
@curl -s http://localhost:$(S3_PORT) >/dev/null 2>&1 && echo "✅ Server started successfully" || echo "❌ Server failed to start"
-s3.config=$(S3_CONFIG) \
> weed-server.log 2>&1 & \
echo $$! > weed-server.pid
@echo "Waiting for S3 server to be ready..."
@for i in $$(seq 1 30); do \
if echo | nc -z localhost $(S3_PORT); then \
echo "S3 server is ready!"; \
exit 0; \
fi; \
sleep 1; \
done; \
echo "S3 server failed to start"; \
exit 1
# Simple test run without server management
test-versioning-simple: check-deps

4
weed/command/mini.go

@ -1062,9 +1062,7 @@ func startMiniWorker() {
// Set admin client
workerInstance.SetAdminClient(adminClient)
// Start metrics server for health checks and monitoring (uses shared metrics port like other services)
// This allows Kubernetes probes to check worker health via /health endpoint
go stats_collect.StartMetricsServer(*miniMetricsHttpIp, *miniMetricsHttpPort)
// Metrics server is already started in the main init function above, so no need to start it again here
// Start the worker
err = workerInstance.Start()

Loading…
Cancel
Save