fmt

3 months ago · 221252d34e
7 changed files with 388 additions and 177 deletions
--- a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java
+++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java
@ -24,7 +24,7 @@ public class SeaweedRead {

    // returns bytesRead
    public static long read(FilerClient filerClient, List<VisibleInterval> visibleIntervals,
-                            final long position, final ByteBuffer buf, final long fileSize) throws IOException {
+            final long position, final ByteBuffer buf, final long fileSize) throws IOException {

        List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, buf.remaining());

@ -51,7 +51,7 @@ public class SeaweedRead {
            }
        }

-        //TODO parallel this
+        // TODO parallel this
        long readCount = 0;
        long startOffset = position;
        for (ChunkView chunkView : chunkViews) {
@ -59,7 +59,7 @@ public class SeaweedRead {
            if (startOffset < chunkView.logicOffset) {
                long gap = chunkView.logicOffset - startOffset;
                LOG.debug("zero [{},{})", startOffset, startOffset + gap);
-                buf.position(buf.position()+ (int)gap);
+                buf.position(buf.position() + (int) gap);
                readCount += gap;
                startOffset += gap;
            }
@ -86,7 +86,7 @@ public class SeaweedRead {
        if (startOffset < limit) {
            long gap = limit - startOffset;
            LOG.debug("zero2 [{},{})", startOffset, startOffset + gap);
-            buf.position(buf.position()+ (int)gap);
+            buf.position(buf.position() + (int) gap);
            readCount += gap;
            startOffset += gap;
        }
@ -94,7 +94,8 @@ public class SeaweedRead {
        return readCount;
    }

-    private static int readChunkView(FilerClient filerClient, long startOffset, ByteBuffer buf, ChunkView chunkView, FilerProto.Locations locations) throws IOException {
+    private static int readChunkView(FilerClient filerClient, long startOffset, ByteBuffer buf, ChunkView chunkView,
+            FilerProto.Locations locations) throws IOException {

        byte[] chunkData = chunkCache.getChunk(chunkView.fileId);

@ -105,13 +106,15 @@ public class SeaweedRead {

        int len = (int) chunkView.size - (int) (startOffset - chunkView.logicOffset);
        LOG.debug("readChunkView fid:{} chunkData.length:{} chunkView.offset:{} chunkView[{};{}) startOffset:{}",
-                chunkView.fileId, chunkData.length, chunkView.offset, chunkView.logicOffset, chunkView.logicOffset + chunkView.size, startOffset);
+                chunkView.fileId, chunkData.length, chunkView.offset, chunkView.logicOffset,
+                chunkView.logicOffset + chunkView.size, startOffset);
        buf.put(chunkData, (int) (startOffset - chunkView.logicOffset + chunkView.offset), len);

        return len;
    }

-    public static byte[] doFetchFullChunkData(FilerClient filerClient, ChunkView chunkView, FilerProto.Locations locations) throws IOException {
+    public static byte[] doFetchFullChunkData(FilerClient filerClient, ChunkView chunkView,
+            FilerProto.Locations locations) throws IOException {

        byte[] data = null;
        IOException lastException = null;
@ -214,8 +217,7 @@ public class SeaweedRead {
                        chunkStart,
                        isFullChunk,
                        chunk.cipherKey,
-                        chunk.isCompressed
-                ));
+                        chunk.isCompressed));
            }
        }
        return views;
@ -268,7 +270,8 @@ public class SeaweedRead {
        public final byte[] cipherKey;
        public final boolean isCompressed;

-        public VisibleInterval(long start, long stop, String fileId, long modifiedTime, long chunkOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
+        public VisibleInterval(long start, long stop, String fileId, long modifiedTime, long chunkOffset,
+                boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
            this.start = start;
            this.stop = stop;
            this.modifiedTime = modifiedTime;
@ -302,7 +305,8 @@ public class SeaweedRead {
        public final byte[] cipherKey;
        public final boolean isCompressed;

-        public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) {
+        public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk, byte[] cipherKey,
+                boolean isCompressed) {
            this.fileId = fileId;
            this.offset = offset;
            this.size = size;
--- a/test/java/spark/EOF_EXCEPTION_ANALYSIS.md
+++ b/test/java/spark/EOF_EXCEPTION_ANALYSIS.md
@ -0,0 +1,177 @@
+# EOFException Analysis: "Still have: 78 bytes left"
+
+## Problem Summary
+
+Spark Parquet writes succeed, but subsequent reads fail with:
+```
+java.io.EOFException: Reached the end of stream. Still have: 78 bytes left
+```
+
+## What the Logs Tell Us
+
+### Write Phase ✅ (Everything looks correct)
+
+**year=2020 file:**
+```
+🔧 Created stream: position=0 bufferSize=1048576
+🔒 close START: position=0 buffer.position()=696 totalBytesWritten=696
+→ Submitted 696 bytes, new position=696
+✅ close END: finalPosition=696 totalBytesWritten=696
+Calculated file size: 696 (chunks: 696, attr: 696, #chunks: 1)
+```
+
+**year=2021 file:**
+```
+🔧 Created stream: position=0 bufferSize=1048576
+🔒 close START: position=0 buffer.position()=684 totalBytesWritten=684
+→ Submitted 684 bytes, new position=684
+✅ close END: finalPosition=684 totalBytesWritten=684
+Calculated file size: 684 (chunks: 684, attr: 684, #chunks: 1)
+```
+
+**Key observations:**
+- ✅ `totalBytesWritten == position == buffer == chunks == attr`
+- ✅ All bytes received through `write()` are flushed and stored
+- ✅ File metadata is consistent
+- ✅ No bytes lost in SeaweedFS layer
+
+### Read Phase ❌ (Parquet expects more bytes)
+
+**Consistent pattern:**
+- year=2020: wrote 696 bytes, **expects 774 bytes** → missing 78
+- year=2021: wrote 684 bytes, **expects 762 bytes** → missing 78
+
+The **78-byte discrepancy is constant across both files**, suggesting it's not random data loss.
+
+## Hypotheses
+
+### H1: Parquet Footer Not Fully Written
+Parquet file structure:
+```
+[Magic "PAR1" 4B] [Data pages] [Footer] [Footer length 4B] [Magic "PAR1" 4B]
+```
+
+**Possible scenario:**
+1. Parquet writes 684 bytes of data pages
+2. Parquet **intends** to write 78 bytes of footer metadata
+3. Our `SeaweedOutputStream.close()` is called
+4. Only data pages (684 bytes) make it to the file
+5. Footer (78 bytes) is lost or never written
+
+**Evidence for:**
+- 78 bytes is a reasonable size for a Parquet footer with minimal metadata
+- Files say "snappy.parquet" → compressed, so footer would be small
+- Consistent 78-byte loss across files
+
+**Evidence against:**
+- Our `close()` logs show all bytes received via `write()` were processed
+- If Parquet wrote footer to stream, we'd see `totalBytesWritten=762`
+
+### H2: FSDataOutputStream Position Tracking Mismatch
+Hadoop wraps our stream:
+```java
+new FSDataOutputStream(seaweedOutputStream, statistics)
+```
+
+**Possible scenario:**
+1. Parquet writes 684 bytes → `FSDataOutputStream` increments position to 684
+2. Parquet writes 78-byte footer → `FSDataOutputStream` increments position to 762
+3. **BUT** only 684 bytes reach our `SeaweedOutputStream.write()`
+4. Parquet queries `FSDataOutputStream.getPos()` → returns 762
+5. Parquet writes "file size: 762" in its footer
+6. Actual file only has 684 bytes
+
+**Evidence for:**
+- Would explain why our logs show 684 but Parquet expects 762
+- FSDataOutputStream might have its own buffering
+
+**Evidence against:**
+- FSDataOutputStream is well-tested Hadoop core component
+- Unlikely to lose bytes
+
+### H3: Race Condition During File Rename
+Files are written to `_temporary/` then renamed to final location.
+
+**Possible scenario:**
+1. Write completes successfully (684 bytes)
+2. `close()` flushes and updates metadata
+3. File is renamed while metadata is propagating
+4. Read happens before metadata sync completes
+5. Reader gets stale file size or incomplete footer
+
+**Evidence for:**
+- Distributed systems often have eventual consistency issues
+- Rename might not sync metadata immediately
+
+**Evidence against:**
+- We added `fs.seaweed.write.flush.sync=true` to force sync
+- Error is consistent, not intermittent
+
+### H4: Compression-Related Size Confusion
+Files use Snappy compression (`*.snappy.parquet`).
+
+**Possible scenario:**
+1. Parquet tracks uncompressed size internally
+2. Writes compressed data to stream
+3. Size mismatch between compressed file and uncompressed metadata
+
+**Evidence against:**
+- Parquet handles compression internally and consistently
+- Would affect all Parquet users, not just SeaweedFS
+
+## Next Debugging Steps
+
+### Added: getPos() Logging
+```java
+public synchronized long getPos() {
+    long currentPos = position + buffer.position();
+    LOG.info("[DEBUG-2024] 📍 getPos() called: flushedPosition={} bufferPosition={} returning={}", 
+            position, buffer.position(), currentPos);
+    return currentPos;
+}
+```
+
+**Will reveal:**
+- If/when Parquet queries position
+- What value is returned vs what was actually written
+- If FSDataOutputStream bypasses our position tracking
+
+### Next Steps if getPos() is NOT called:
+→ Parquet is not using position tracking
+→ Focus on footer write completion
+
+### Next Steps if getPos() returns 762 but we only wrote 684:
+→ FSDataOutputStream has buffering issue or byte loss
+→ Need to investigate Hadoop wrapper behavior
+
+### Next Steps if getPos() returns 684 (correct):
+→ Issue is in footer metadata or read path
+→ Need to examine Parquet footer contents
+
+## Parquet File Format Context
+
+Typical small Parquet file (~700 bytes):
+```
+Offset   Content
+0-3      Magic "PAR1"
+4-650    Row group data (compressed)
+651-728  Footer metadata (schema, row group pointers)
+729-732  Footer length (4 bytes, value: 78)
+733-736  Magic "PAR1"
+Total: 737 bytes
+```
+
+If footer length field says "78" but only data exists:
+- File ends at byte 650
+- Footer starts at byte 651 (but doesn't exist)
+- Reader tries to read 78 bytes, gets EOFException
+
+This matches our error pattern perfectly.
+
+## Recommended Fix Directions
+
+1. **Ensure footer is fully written before close returns**
+2. **Add explicit fsync/hsync before metadata write**
+3. **Verify FSDataOutputStream doesn't buffer separately**
+4. **Check if Parquet needs special OutputStreamAdapter**
+
--- a/test/java/spark/src/main/java/seaweed/spark/SparkSeaweedFSExample.java
+++ b/test/java/spark/src/main/java/seaweed/spark/SparkSeaweedFSExample.java
@ -12,14 +12,14 @@ import org.apache.spark.sql.SparkSession;
 * 
 * Example usage:
 * spark-submit \
- *   --class seaweed.spark.SparkSeaweedFSExample \
- *   --master local[2] \
- *   --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \
- *   --conf spark.hadoop.fs.seaweed.filer.host=localhost \
- *   --conf spark.hadoop.fs.seaweed.filer.port=8888 \
- *   --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \
- *   target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \
- *   seaweedfs://localhost:8888/output
+ * --class seaweed.spark.SparkSeaweedFSExample \
+ * --master local[2] \
+ * --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \
+ * --conf spark.hadoop.fs.seaweed.filer.host=localhost \
+ * --conf spark.hadoop.fs.seaweed.filer.port=8888 \
+ * --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \
+ * target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \
+ * seaweedfs://localhost:8888/output
 */
 public class SparkSeaweedFSExample {

@ -34,8 +34,8 @@ public class SparkSeaweedFSExample {

        // Create Spark session
        SparkSession spark = SparkSession.builder()
-            .appName("SeaweedFS Spark Example")
-            .getOrCreate();
+                .appName("SeaweedFS Spark Example")
+                .getOrCreate();

        try {
            System.out.println("=== SeaweedFS Spark Integration Example ===\n");
@ -43,11 +43,10 @@ public class SparkSeaweedFSExample {
            // Example 1: Generate data and write to SeaweedFS
            System.out.println("1. Generating sample data...");
            Dataset<Row> data = spark.range(0, 1000)
-                .selectExpr(
-                    "id",
-                    "id * 2 as doubled",
-                    "CAST(rand() * 100 AS INT) as random_value"
-                );
+                    .selectExpr(
+                            "id",
+                            "id * 2 as doubled",
+                            "CAST(rand() * 100 AS INT) as random_value");

            System.out.println("   Generated " + data.count() + " rows");
            data.show(5);
@ -58,8 +57,8 @@ public class SparkSeaweedFSExample {
            System.out.println("   Path: " + parquetPath);

            data.write()
-                .mode(SaveMode.Overwrite)
-                .parquet(parquetPath);
+                    .mode(SaveMode.Overwrite)
+                    .parquet(parquetPath);

            System.out.println("   ✓ Write completed");

@ -71,10 +70,9 @@ public class SparkSeaweedFSExample {
            // Perform aggregation
            System.out.println("\n4. Performing aggregation...");
            Dataset<Row> stats = readData.selectExpr(
-                "COUNT(*) as count",
-                "AVG(random_value) as avg_random",
-                "MAX(doubled) as max_doubled"
-            );
+                    "COUNT(*) as count",
+                    "AVG(random_value) as avg_random",
+                    "MAX(doubled) as max_doubled");

            stats.show();

@ -82,31 +80,30 @@ public class SparkSeaweedFSExample {
            String statsPath = outputPath + "/stats.parquet";
            System.out.println("5. Writing stats to: " + statsPath);
            stats.write()
-                .mode(SaveMode.Overwrite)
-                .parquet(statsPath);
+                    .mode(SaveMode.Overwrite)
+                    .parquet(statsPath);

            // Create a partitioned dataset
            System.out.println("\n6. Creating partitioned dataset...");
            Dataset<Row> partitionedData = data.selectExpr(
-                "*",
-                "CAST(id % 10 AS INT) as partition_key"
-            );
+                    "*",
+                    "CAST(id % 10 AS INT) as partition_key");

            String partitionedPath = outputPath + "/partitioned.parquet";
            System.out.println("   Path: " + partitionedPath);

            partitionedData.write()
-                .mode(SaveMode.Overwrite)
-                .partitionBy("partition_key")
-                .parquet(partitionedPath);
+                    .mode(SaveMode.Overwrite)
+                    .partitionBy("partition_key")
+                    .parquet(partitionedPath);

            System.out.println("   ✓ Partitioned write completed");

            // Read specific partition
            System.out.println("\n7. Reading specific partition (partition_key=0)...");
            Dataset<Row> partition0 = spark.read()
-                .parquet(partitionedPath)
-                .filter("partition_key = 0");
+                    .parquet(partitionedPath)
+                    .filter("partition_key = 0");

            System.out.println("   Partition 0 contains " + partition0.count() + " rows");
            partition0.show(5);
@ -116,14 +113,13 @@ public class SparkSeaweedFSExample {
            readData.createOrReplaceTempView("seaweedfs_data");

            Dataset<Row> sqlResult = spark.sql(
-                "SELECT " +
-                "  CAST(id / 100 AS INT) as bucket, " +
-                "  COUNT(*) as count, " +
-                "  AVG(random_value) as avg_random " +
-                "FROM seaweedfs_data " +
-                "GROUP BY CAST(id / 100 AS INT) " +
-                "ORDER BY bucket"
-            );
+                    "SELECT " +
+                            "  CAST(id / 100 AS INT) as bucket, " +
+                            "  COUNT(*) as count, " +
+                            "  AVG(random_value) as avg_random " +
+                            "FROM seaweedfs_data " +
+                            "GROUP BY CAST(id / 100 AS INT) " +
+                            "ORDER BY bucket");

            System.out.println("   Bucketed statistics:");
            sqlResult.show();
@ -140,6 +136,3 @@ public class SparkSeaweedFSExample {
        }
    }
 }
-
-
-
--- a/test/java/spark/src/test/java/seaweed/spark/SparkSQLTest.java
+++ b/test/java/spark/src/test/java/seaweed/spark/SparkSQLTest.java
@ -21,11 +21,10 @@ public class SparkSQLTest extends SparkTestBase {

        // Create test data
        List<Employee> employees = Arrays.asList(
-            new Employee(1, "Alice", "Engineering", 100000),
-            new Employee(2, "Bob", "Sales", 80000),
-            new Employee(3, "Charlie", "Engineering", 120000),
-            new Employee(4, "David", "Sales", 75000)
-        );
+                new Employee(1, "Alice", "Engineering", 100000),
+                new Employee(2, "Bob", "Sales", 80000),
+                new Employee(3, "Charlie", "Engineering", 120000),
+                new Employee(4, "David", "Sales", 75000));

        Dataset<Row> df = spark.createDataFrame(employees, Employee.class);

@ -39,14 +38,12 @@ public class SparkSQLTest extends SparkTestBase {

        // Run SQL queries
        Dataset<Row> engineeringEmployees = spark.sql(
-            "SELECT name, salary FROM employees WHERE department = 'Engineering'"
-        );
+                "SELECT name, salary FROM employees WHERE department = 'Engineering'");

        assertEquals(2, engineeringEmployees.count());

        Dataset<Row> highPaidEmployees = spark.sql(
-            "SELECT name, salary FROM employees WHERE salary > 90000"
-        );
+                "SELECT name, salary FROM employees WHERE salary > 90000");

        assertEquals(2, highPaidEmployees.count());
    }
@ -57,12 +54,11 @@ public class SparkSQLTest extends SparkTestBase {

        // Create sales data
        List<Sale> sales = Arrays.asList(
-            new Sale("2024-01", "Product A", 100),
-            new Sale("2024-01", "Product B", 150),
-            new Sale("2024-02", "Product A", 120),
-            new Sale("2024-02", "Product B", 180),
-            new Sale("2024-03", "Product A", 110)
-        );
+                new Sale("2024-01", "Product A", 100),
+                new Sale("2024-01", "Product B", 150),
+                new Sale("2024-02", "Product A", 120),
+                new Sale("2024-02", "Product B", 180),
+                new Sale("2024-03", "Product A", 110));

        Dataset<Row> df = spark.createDataFrame(sales, Sale.class);

@ -76,8 +72,7 @@ public class SparkSQLTest extends SparkTestBase {

        // Aggregate query
        Dataset<Row> monthlySales = spark.sql(
-            "SELECT month, SUM(amount) as total FROM sales GROUP BY month ORDER BY month"
-        );
+                "SELECT month, SUM(amount) as total FROM sales GROUP BY month ORDER BY month");

        List<Row> results = monthlySales.collectAsList();
        assertEquals(3, results.size());
@ -91,15 +86,13 @@ public class SparkSQLTest extends SparkTestBase {

        // Create employee data
        List<Employee> employees = Arrays.asList(
-            new Employee(1, "Alice", "Engineering", 100000),
-            new Employee(2, "Bob", "Sales", 80000)
-        );
+                new Employee(1, "Alice", "Engineering", 100000),
+                new Employee(2, "Bob", "Sales", 80000));

        // Create department data
        List<Department> departments = Arrays.asList(
-            new Department("Engineering", "Building Products"),
-            new Department("Sales", "Selling Products")
-        );
+                new Department("Engineering", "Building Products"),
+                new Department("Sales", "Selling Products"));

        Dataset<Row> empDf = spark.createDataFrame(employees, Employee.class);
        Dataset<Row> deptDf = spark.createDataFrame(departments, Department.class);
@ -117,16 +110,14 @@ public class SparkSQLTest extends SparkTestBase {

        // Join query
        Dataset<Row> joined = spark.sql(
-            "SELECT e.name, e.salary, d.description " +
-            "FROM emp e JOIN dept d ON e.department = d.name"
-        );
+                "SELECT e.name, e.salary, d.description " +
+                        "FROM emp e JOIN dept d ON e.department = d.name");

        assertEquals(2, joined.count());

        List<Row> results = joined.collectAsList();
-        assertTrue(results.stream().anyMatch(r -> 
-            "Alice".equals(r.getString(0)) && "Building Products".equals(r.getString(2))
-        ));
+        assertTrue(results.stream()
+                .anyMatch(r -> "Alice".equals(r.getString(0)) && "Building Products".equals(r.getString(2))));
    }

    @Test
@ -135,11 +126,10 @@ public class SparkSQLTest extends SparkTestBase {

        // Create employee data with salaries
        List<Employee> employees = Arrays.asList(
-            new Employee(1, "Alice", "Engineering", 100000),
-            new Employee(2, "Bob", "Engineering", 120000),
-            new Employee(3, "Charlie", "Sales", 80000),
-            new Employee(4, "David", "Sales", 90000)
-        );
+                new Employee(1, "Alice", "Engineering", 100000),
+                new Employee(2, "Bob", "Engineering", 120000),
+                new Employee(3, "Charlie", "Sales", 80000),
+                new Employee(4, "David", "Sales", 90000));

        Dataset<Row> df = spark.createDataFrame(employees, Employee.class);

@ -151,19 +141,18 @@ public class SparkSQLTest extends SparkTestBase {

        // Window function query - rank employees by salary within department
        Dataset<Row> ranked = spark.sql(
-            "SELECT name, department, salary, " +
-            "RANK() OVER (PARTITION BY department ORDER BY salary DESC) as rank " +
-            "FROM employees_ranked"
-        );
+                "SELECT name, department, salary, " +
+                        "RANK() OVER (PARTITION BY department ORDER BY salary DESC) as rank " +
+                        "FROM employees_ranked");

        assertEquals(4, ranked.count());

        // Verify Bob has rank 1 in Engineering (highest salary)
        List<Row> results = ranked.collectAsList();
        Row bobRow = results.stream()
-            .filter(r -> "Bob".equals(r.getString(0)))
-            .findFirst()
-            .orElse(null);
+                .filter(r -> "Bob".equals(r.getString(0)))
+                .findFirst()
+                .orElse(null);

        assertNotNull(bobRow);
        assertEquals(1, bobRow.getInt(3));
@ -176,7 +165,8 @@ public class SparkSQLTest extends SparkTestBase {
        private String department;
        private int salary;

-        public Employee() {}
+        public Employee() {
+        }

        public Employee(int id, String name, String department, int salary) {
            this.id = id;
@ -185,14 +175,37 @@ public class SparkSQLTest extends SparkTestBase {
            this.salary = salary;
        }

-        public int getId() { return id; }
-        public void setId(int id) { this.id = id; }
-        public String getName() { return name; }
-        public void setName(String name) { this.name = name; }
-        public String getDepartment() { return department; }
-        public void setDepartment(String department) { this.department = department; }
-        public int getSalary() { return salary; }
-        public void setSalary(int salary) { this.salary = salary; }
+        public int getId() {
+            return id;
+        }
+
+        public void setId(int id) {
+            this.id = id;
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public void setName(String name) {
+            this.name = name;
+        }
+
+        public String getDepartment() {
+            return department;
+        }
+
+        public void setDepartment(String department) {
+            this.department = department;
+        }
+
+        public int getSalary() {
+            return salary;
+        }
+
+        public void setSalary(int salary) {
+            this.salary = salary;
+        }
    }

    public static class Sale implements java.io.Serializable {
@ -200,7 +213,8 @@ public class SparkSQLTest extends SparkTestBase {
        private String product;
        private int amount;

-        public Sale() {}
+        public Sale() {
+        }

        public Sale(String month, String product, int amount) {
            this.month = month;
@ -208,31 +222,57 @@ public class SparkSQLTest extends SparkTestBase {
            this.amount = amount;
        }

-        public String getMonth() { return month; }
-        public void setMonth(String month) { this.month = month; }
-        public String getProduct() { return product; }
-        public void setProduct(String product) { this.product = product; }
-        public int getAmount() { return amount; }
-        public void setAmount(int amount) { this.amount = amount; }
+        public String getMonth() {
+            return month;
+        }
+
+        public void setMonth(String month) {
+            this.month = month;
+        }
+
+        public String getProduct() {
+            return product;
+        }
+
+        public void setProduct(String product) {
+            this.product = product;
+        }
+
+        public int getAmount() {
+            return amount;
+        }
+
+        public void setAmount(int amount) {
+            this.amount = amount;
+        }
    }

    public static class Department implements java.io.Serializable {
        private String name;
        private String description;

-        public Department() {}
+        public Department() {
+        }

        public Department(String name, String description) {
            this.name = name;
            this.description = description;
        }

-        public String getName() { return name; }
-        public void setName(String name) { this.name = name; }
-        public String getDescription() { return description; }
-        public void setDescription(String description) { this.description = description; }
-    }
-}
+        public String getName() {
+            return name;
+        }

+        public void setName(String name) {
+            this.name = name;
+        }

+        public String getDescription() {
+            return description;
+        }

+        public void setDescription(String description) {
+            this.description = description;
+        }
+    }
+}
--- a/test/java/spark/src/test/java/seaweed/spark/SparkTestBase.java
+++ b/test/java/spark/src/test/java/seaweed/spark/SparkTestBase.java
@ -18,16 +18,13 @@ public abstract class SparkTestBase {

    protected SparkSession spark;
    protected static final String TEST_ROOT = "/test-spark";
-    protected static final boolean TESTS_ENABLED = 
-        "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED"));
+    protected static final boolean TESTS_ENABLED = "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED"));

    // SeaweedFS connection settings
-    protected static final String SEAWEEDFS_HOST = 
-        System.getenv().getOrDefault("SEAWEEDFS_FILER_HOST", "localhost");
-    protected static final String SEAWEEDFS_PORT = 
-        System.getenv().getOrDefault("SEAWEEDFS_FILER_PORT", "8888");
-    protected static final String SEAWEEDFS_GRPC_PORT = 
-        System.getenv().getOrDefault("SEAWEEDFS_FILER_GRPC_PORT", "18888");
+    protected static final String SEAWEEDFS_HOST = System.getenv().getOrDefault("SEAWEEDFS_FILER_HOST", "localhost");
+    protected static final String SEAWEEDFS_PORT = System.getenv().getOrDefault("SEAWEEDFS_FILER_PORT", "8888");
+    protected static final String SEAWEEDFS_GRPC_PORT = System.getenv().getOrDefault("SEAWEEDFS_FILER_GRPC_PORT",
+            "18888");

    @Before
    public void setUpSpark() throws IOException {
@ -36,39 +33,40 @@ public abstract class SparkTestBase {
        }

        SparkConf sparkConf = new SparkConf()
-            .setAppName("SeaweedFS Integration Test")
-            .setMaster("local[1]")  // Single thread to avoid concurrent gRPC issues
-            .set("spark.driver.host", "localhost")
-            .set("spark.sql.warehouse.dir", getSeaweedFSPath("/spark-warehouse"))
-            // SeaweedFS configuration
-            .set("spark.hadoop.fs.defaultFS", String.format("seaweedfs://%s:%s", SEAWEEDFS_HOST, SEAWEEDFS_PORT))
-            .set("spark.hadoop.fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem")
-            .set("spark.hadoop.fs.seaweed.impl", "seaweed.hdfs.SeaweedFileSystem")
-            .set("spark.hadoop.fs.seaweed.filer.host", SEAWEEDFS_HOST)
-            .set("spark.hadoop.fs.seaweed.filer.port", SEAWEEDFS_PORT)
-            .set("spark.hadoop.fs.seaweed.filer.port.grpc", SEAWEEDFS_GRPC_PORT)
-            .set("spark.hadoop.fs.AbstractFileSystem.seaweedfs.impl", "seaweed.hdfs.SeaweedAbstractFileSystem")
-            // Set replication to empty string to use filer default
-            .set("spark.hadoop.fs.seaweed.replication", "")
-            // Smaller buffer to reduce load
-            .set("spark.hadoop.fs.seaweed.buffer.size", "1048576")  // 1MB
-            // Reduce parallelism
-            .set("spark.default.parallelism", "1")
-            .set("spark.sql.shuffle.partitions", "1")
-            // Simpler output committer
-            .set("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version", "2")
-            .set("spark.sql.sources.commitProtocolClass", "org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol")
-            // Disable speculative execution to reduce load
-            .set("spark.speculation", "false")
-            // Increase task retry to handle transient consistency issues
-            .set("spark.task.maxFailures", "4")
-            // Wait longer before retrying failed tasks
-            .set("spark.task.reaper.enabled", "true")
-            .set("spark.task.reaper.pollingInterval", "1s");
+                .setAppName("SeaweedFS Integration Test")
+                .setMaster("local[1]") // Single thread to avoid concurrent gRPC issues
+                .set("spark.driver.host", "localhost")
+                .set("spark.sql.warehouse.dir", getSeaweedFSPath("/spark-warehouse"))
+                // SeaweedFS configuration
+                .set("spark.hadoop.fs.defaultFS", String.format("seaweedfs://%s:%s", SEAWEEDFS_HOST, SEAWEEDFS_PORT))
+                .set("spark.hadoop.fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem")
+                .set("spark.hadoop.fs.seaweed.impl", "seaweed.hdfs.SeaweedFileSystem")
+                .set("spark.hadoop.fs.seaweed.filer.host", SEAWEEDFS_HOST)
+                .set("spark.hadoop.fs.seaweed.filer.port", SEAWEEDFS_PORT)
+                .set("spark.hadoop.fs.seaweed.filer.port.grpc", SEAWEEDFS_GRPC_PORT)
+                .set("spark.hadoop.fs.AbstractFileSystem.seaweedfs.impl", "seaweed.hdfs.SeaweedAbstractFileSystem")
+                // Set replication to empty string to use filer default
+                .set("spark.hadoop.fs.seaweed.replication", "")
+                // Smaller buffer to reduce load
+                .set("spark.hadoop.fs.seaweed.buffer.size", "1048576") // 1MB
+                // Reduce parallelism
+                .set("spark.default.parallelism", "1")
+                .set("spark.sql.shuffle.partitions", "1")
+                // Simpler output committer
+                .set("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version", "2")
+                .set("spark.sql.sources.commitProtocolClass",
+                        "org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol")
+                // Disable speculative execution to reduce load
+                .set("spark.speculation", "false")
+                // Increase task retry to handle transient consistency issues
+                .set("spark.task.maxFailures", "4")
+                // Wait longer before retrying failed tasks
+                .set("spark.task.reaper.enabled", "true")
+                .set("spark.task.reaper.pollingInterval", "1s");

        spark = SparkSession.builder()
-            .config(sparkConf)
-            .getOrCreate();
+                .config(sparkConf)
+                .getOrCreate();

        // Clean up test directory
        cleanupTestDirectory();
@ -108,7 +106,7 @@ public abstract class SparkTestBase {
            try {
                Configuration conf = spark.sparkContext().hadoopConfiguration();
                org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(
-                    java.net.URI.create(getSeaweedFSPath("/")), conf);
+                        java.net.URI.create(getSeaweedFSPath("/")), conf);
                org.apache.hadoop.fs.Path testPath = new org.apache.hadoop.fs.Path(TEST_ROOT);
                if (fs.exists(testPath)) {
                    fs.delete(testPath, true);
@ -128,4 +126,3 @@ public abstract class SparkTestBase {
        }
    }
 }
-
--- a/weed/pb/grpc_client_server.go
+++ b/weed/pb/grpc_client_server.go
@ -62,10 +62,10 @@ func NewGrpcServer(opts ...grpc.ServerOption) *grpc.Server {
 		}),
 		grpc.MaxRecvMsgSize(Max_Message_Size),
 		grpc.MaxSendMsgSize(Max_Message_Size),
-		grpc.MaxConcurrentStreams(1000), // Allow more concurrent streams
-		grpc.InitialWindowSize(16*1024*1024), // 16MB initial window
+		grpc.MaxConcurrentStreams(1000),          // Allow more concurrent streams
+		grpc.InitialWindowSize(16*1024*1024),     // 16MB initial window
 		grpc.InitialConnWindowSize(16*1024*1024), // 16MB connection window
-		grpc.MaxHeaderListSize(8*1024*1024), // 8MB header list limit
+		grpc.MaxHeaderListSize(8*1024*1024),      // 8MB header list limit
 		grpc.UnaryInterceptor(requestIDUnaryInterceptor()),
 	)
 	for _, opt := range opts {