From 3586f6786ea5ae222a7f19da65720825366ffaab Mon Sep 17 00:00:00 2001 From: chrislu Date: Sat, 22 Nov 2025 23:05:36 -0800 Subject: [PATCH] fix: force volume creation before tests to prevent 'No writable volumes' error Root cause: With -max=0 (unlimited volumes), volumes are created on-demand, but no volumes existed when tests started, causing first write to fail. Solution: - Explicitly trigger volume growth via /vol/grow API - Create 3 volumes with replication=000 before running tests - Verify volumes exist before proceeding - Fail early with clear message if volumes can't be created Changes: - POST to http://localhost:9333/vol/grow?replication=000&count=3 - Wait up to 10 seconds for volumes to appear - Show volume count and layout status - Exit with error if no volumes after 10 attempts - Applied to both spark-tests and spark-example jobs This ensures writable volumes exist before Spark tries to write data. --- .github/workflows/spark-integration-tests.yml | 54 ++++++++++++------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/.github/workflows/spark-integration-tests.yml b/.github/workflows/spark-integration-tests.yml index 5f0571f0e..ea1225110 100644 --- a/.github/workflows/spark-integration-tests.yml +++ b/.github/workflows/spark-integration-tests.yml @@ -180,24 +180,31 @@ jobs: curl -f http://localhost:9333/cluster/status || exit 1 curl -f http://localhost:8888/ || exit 1 - # Check volume server registration and volume availability + # Check volume server registration and force volume creation echo "Checking volume server status..." curl -s http://localhost:9333/dir/status | jq '.' || echo "jq not available" - echo "Waiting for volume server to register and create volumes..." - for i in {1..15}; do - VOLUME_COUNT=$(curl -s http://localhost:9333/dir/status | jq -r '.Topology.DataCenters[0].Racks[0].DataNodes[0].Volumes // [] | length' 2>/dev/null || echo "0") - echo "Attempt $i/15: Volume count = $VOLUME_COUNT" - if [ "$VOLUME_COUNT" != "0" ] && [ "$VOLUME_COUNT" != "null" ]; then + + echo "Triggering initial volume growth..." + curl -X POST "http://localhost:9333/vol/grow?replication=000&count=3" || echo "Volume growth request sent" + sleep 2 + + echo "Verifying volumes were created..." + for i in {1..10}; do + VOLUME_COUNT=$(curl -s http://localhost:9333/dir/status | jq -r '.Topology.DataCenters[0].Racks[0].DataNodes[0].Volumes // 0' 2>/dev/null || echo "0") + echo "Attempt $i/10: Volume count = $VOLUME_COUNT" + if [ "$VOLUME_COUNT" -gt 0 ] 2>/dev/null; then echo "✓ Volume server has $VOLUME_COUNT volumes registered" + curl -s http://localhost:9333/dir/status | jq -r '.Topology.Layouts[]' || true break fi - if [ $i -eq 15 ]; then - echo "⚠️ No volumes created yet, but continuing (volumes may be created on-demand)" + if [ $i -eq 10 ]; then + echo "⚠️ Warning: No volumes created, tests may fail with 'No writable volumes'" + exit 1 fi - sleep 2 + sleep 1 done - echo "✓ All SeaweedFS services are healthy" + echo "✓ All SeaweedFS services are healthy and volumes are ready" - name: Build Spark integration tests working-directory: test/java/spark @@ -371,24 +378,31 @@ jobs: curl -f http://localhost:9333/cluster/status || exit 1 curl -f http://localhost:8888/ || exit 1 - # Check volume server registration and volume availability + # Check volume server registration and force volume creation echo "Checking volume server status..." curl -s http://localhost:9333/dir/status | jq '.' || echo "jq not available" - echo "Waiting for volume server to register and create volumes..." - for i in {1..15}; do - VOLUME_COUNT=$(curl -s http://localhost:9333/dir/status | jq -r '.Topology.DataCenters[0].Racks[0].DataNodes[0].Volumes // [] | length' 2>/dev/null || echo "0") - echo "Attempt $i/15: Volume count = $VOLUME_COUNT" - if [ "$VOLUME_COUNT" != "0" ] && [ "$VOLUME_COUNT" != "null" ]; then + + echo "Triggering initial volume growth..." + curl -X POST "http://localhost:9333/vol/grow?replication=000&count=3" || echo "Volume growth request sent" + sleep 2 + + echo "Verifying volumes were created..." + for i in {1..10}; do + VOLUME_COUNT=$(curl -s http://localhost:9333/dir/status | jq -r '.Topology.DataCenters[0].Racks[0].DataNodes[0].Volumes // 0' 2>/dev/null || echo "0") + echo "Attempt $i/10: Volume count = $VOLUME_COUNT" + if [ "$VOLUME_COUNT" -gt 0 ] 2>/dev/null; then echo "✓ Volume server has $VOLUME_COUNT volumes registered" + curl -s http://localhost:9333/dir/status | jq -r '.Topology.Layouts[]' || true break fi - if [ $i -eq 15 ]; then - echo "⚠️ No volumes created yet, but continuing (volumes may be created on-demand)" + if [ $i -eq 10 ]; then + echo "⚠️ Warning: No volumes created, tests may fail with 'No writable volumes'" + exit 1 fi - sleep 2 + sleep 1 done - echo "✓ All SeaweedFS services are healthy" + echo "✓ All SeaweedFS services are healthy and volumes are ready" - name: Build project working-directory: test/java/spark