From 72b4bf9098bd1be1e2fde4f0e8c72f53edd60774 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 23 Nov 2025 19:37:28 -0800 Subject: [PATCH] fix: extract correct chunk ID (not source_file_id) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The grep was matching 'source_file_id' instead of 'file_id'. Fixed pattern to look for ' file_id: ' (with spaces) which excludes 'source_file_id:' line. Now will correctly extract: file_id: "7,d0cdf5711" ← THIS ONE Instead of: source_file_id: "0,000000000" ← NOT THIS The correct chunk ID should download successfully from volume server! --- .github/workflows/spark-integration-tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spark-integration-tests.yml b/.github/workflows/spark-integration-tests.yml index b7f83bbad..55a7b1355 100644 --- a/.github/workflows/spark-integration-tests.yml +++ b/.github/workflows/spark-integration-tests.yml @@ -140,7 +140,8 @@ jobs: FULL_LOG=$(docker compose logs spark-tests 2>&1) # Extract chunk file_id (format: "7,d0364fd01") - CHUNK_ID=$(echo "$FULL_LOG" | grep -B 50 "EOFException" | grep 'file_id:' | tail -1 | grep -oP '"\K[^"]+') + # Look for the line "file_id: " but NOT "source_file_id: " + CHUNK_ID=$(echo "$FULL_LOG" | grep -B 50 "EOFException" | grep ' file_id: "' | tail -1 | grep -oP '"\K[^"]+') echo "Found chunk ID: $CHUNK_ID" if [ -n "$CHUNK_ID" ]; then