From 72b4bf9098bd1be1e2fde4f0e8c72f53edd60774 Mon Sep 17 00:00:00 2001
From: chrislu <chris.lu@gmail.com>
Date: Sun, 23 Nov 2025 19:37:28 -0800
Subject: [PATCH] fix: extract correct chunk ID (not source_file_id)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The grep was matching 'source_file_id' instead of 'file_id'.

Fixed pattern to look for '  file_id: ' (with spaces) which excludes
'source_file_id:' line.

Now will correctly extract:
  file_id: "7,d0cdf5711"  ← THIS ONE
Instead of:
  source_file_id: "0,000000000"  ← NOT THIS

The correct chunk ID should download successfully from volume server!
---
 .github/workflows/spark-integration-tests.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/spark-integration-tests.yml b/.github/workflows/spark-integration-tests.yml
index b7f83bbad..55a7b1355 100644
--- a/.github/workflows/spark-integration-tests.yml
+++ b/.github/workflows/spark-integration-tests.yml
@@ -140,7 +140,8 @@ jobs:
               FULL_LOG=$(docker compose logs spark-tests 2>&1)
               
               # Extract chunk file_id (format: "7,d0364fd01")
-              CHUNK_ID=$(echo "$FULL_LOG" | grep -B 50 "EOFException" | grep 'file_id:' | tail -1 | grep -oP '"\K[^"]+')
+              # Look for the line "file_id: " but NOT "source_file_id: "
+              CHUNK_ID=$(echo "$FULL_LOG" | grep -B 50 "EOFException" | grep '  file_id: "' | tail -1 | grep -oP '"\K[^"]+')
               echo "Found chunk ID: $CHUNK_ID"
               
               if [ -n "$CHUNK_ID" ]; then