Browse Source

update

pull/7508/head
chrislu 1 month ago
parent
commit
aeba6513ad
  1. 5
      test/s3/parquet/Makefile
  2. 37
      test/s3/parquet/test_pyarrow_native_s3.py

5
test/s3/parquet/Makefile

@ -285,7 +285,6 @@ test-with-server: build-weed setup-python
BUCKET_NAME=$(BUCKET_NAME) \ BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) $(PYTHON_TEST_SCRIPT) || exit 1; \ $(VENV_DIR)/bin/$(PYTHON) $(PYTHON_TEST_SCRIPT) || exit 1; \
echo "✅ All tests completed successfully"; \ echo "✅ All tests completed successfully"; \
$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true; \
else \ else \
echo "❌ Failed to start SeaweedFS cluster"; \ echo "❌ Failed to start SeaweedFS cluster"; \
echo "=== Server startup logs ==="; \ echo "=== Server startup logs ==="; \
@ -340,7 +339,6 @@ test-implicit-dir-with-server: build-weed setup-python
BUCKET_NAME=test-implicit-dir \ BUCKET_NAME=test-implicit-dir \
$(VENV_DIR)/bin/$(PYTHON) test_implicit_directory_fix.py || exit 1; \ $(VENV_DIR)/bin/$(PYTHON) test_implicit_directory_fix.py || exit 1; \
echo "✅ All tests completed successfully"; \ echo "✅ All tests completed successfully"; \
$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true; \
else \ else \
echo "❌ Failed to start SeaweedFS cluster"; \ echo "❌ Failed to start SeaweedFS cluster"; \
echo "=== Server startup logs ==="; \ echo "=== Server startup logs ==="; \
@ -395,7 +393,6 @@ test-native-s3-with-server: build-weed setup-python
BUCKET_NAME=$(BUCKET_NAME) \ BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_pyarrow_native_s3.py || exit 1; \ $(VENV_DIR)/bin/$(PYTHON) test_pyarrow_native_s3.py || exit 1; \
echo "✅ All tests completed successfully"; \ echo "✅ All tests completed successfully"; \
$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true; \
else \ else \
echo "❌ Failed to start SeaweedFS cluster"; \ echo "❌ Failed to start SeaweedFS cluster"; \
echo "=== Server startup logs ==="; \ echo "=== Server startup logs ==="; \
@ -417,7 +414,6 @@ test-native-s3-with-sse: build-weed setup-python
BUCKET_NAME=$(BUCKET_NAME) \ BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_pyarrow_native_s3.py || exit 1; \ $(VENV_DIR)/bin/$(PYTHON) test_pyarrow_native_s3.py || exit 1; \
echo "✅ All SSE-S3 tests completed successfully"; \ echo "✅ All SSE-S3 tests completed successfully"; \
$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true; \
else \ else \
echo "❌ Failed to start SeaweedFS cluster with SSE-S3"; \ echo "❌ Failed to start SeaweedFS cluster with SSE-S3"; \
echo "=== Server startup logs ==="; \ echo "=== Server startup logs ==="; \
@ -439,7 +435,6 @@ test-sse-s3-compat: build-weed setup-python
BUCKET_NAME=$(BUCKET_NAME) \ BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_sse_s3_compatibility.py || exit 1; \ $(VENV_DIR)/bin/$(PYTHON) test_sse_s3_compatibility.py || exit 1; \
echo "✅ All SSE-S3 compatibility tests completed successfully"; \ echo "✅ All SSE-S3 compatibility tests completed successfully"; \
$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true; \
else \ else \
echo "❌ Failed to start SeaweedFS cluster with SSE-S3"; \ echo "❌ Failed to start SeaweedFS cluster with SSE-S3"; \
echo "=== Server startup logs ==="; \ echo "=== Server startup logs ==="; \

37
test/s3/parquet/test_pyarrow_native_s3.py

@ -32,7 +32,6 @@ import os
import secrets import secrets
import sys import sys
import logging import logging
from datetime import datetime
from typing import Optional from typing import Optional
import pyarrow as pa import pyarrow as pa
@ -171,6 +170,7 @@ def ensure_bucket_exists(s3: pafs.S3FileSystem) -> bool:
logging.info(f"✓ Bucket exists: {BUCKET_NAME}") logging.info(f"✓ Bucket exists: {BUCKET_NAME}")
return True return True
except Exception: except Exception:
# Bucket likely does not exist or is not accessible; fall back to creation.
pass pass
# Try to create the bucket # Try to create the bucket
@ -178,8 +178,8 @@ def ensure_bucket_exists(s3: pafs.S3FileSystem) -> bool:
s3.create_dir(BUCKET_NAME) s3.create_dir(BUCKET_NAME)
logging.info(f"✓ Bucket created: {BUCKET_NAME}") logging.info(f"✓ Bucket created: {BUCKET_NAME}")
return True return True
except Exception as e:
logging.error(f"✗ Failed to create/check bucket with PyArrow: {e}")
except Exception:
logging.exception("✗ Failed to create/check bucket with PyArrow")
return False return False
@ -198,36 +198,42 @@ def test_write_and_read(s3: pafs.S3FileSystem, test_name: str, num_rows: int) ->
filesystem=s3, filesystem=s3,
format="parquet", format="parquet",
) )
logging.info(f" ✓ Write completed")
logging.info(" ✓ Write completed")
# Test Method 1: Read with pq.read_table # Test Method 1: Read with pq.read_table
logging.info(f" Reading with pq.read_table...")
logging.info(" Reading with pq.read_table...")
table_read = pq.read_table(filename, filesystem=s3) table_read = pq.read_table(filename, filesystem=s3)
if table_read.num_rows != num_rows: if table_read.num_rows != num_rows:
return False, f"pq.read_table: Row count mismatch (expected {num_rows}, got {table_read.num_rows})" return False, f"pq.read_table: Row count mismatch (expected {num_rows}, got {table_read.num_rows})"
if not table_read.equals(table):
return False, "pq.read_table: Table contents mismatch"
logging.info(f" ✓ pq.read_table: {table_read.num_rows:,} rows") logging.info(f" ✓ pq.read_table: {table_read.num_rows:,} rows")
# Test Method 2: Read with pq.ParquetDataset # Test Method 2: Read with pq.ParquetDataset
logging.info(f" Reading with pq.ParquetDataset...")
logging.info(" Reading with pq.ParquetDataset...")
dataset = pq.ParquetDataset(filename, filesystem=s3) dataset = pq.ParquetDataset(filename, filesystem=s3)
table_dataset = dataset.read() table_dataset = dataset.read()
if table_dataset.num_rows != num_rows: if table_dataset.num_rows != num_rows:
return False, f"pq.ParquetDataset: Row count mismatch (expected {num_rows}, got {table_dataset.num_rows})" return False, f"pq.ParquetDataset: Row count mismatch (expected {num_rows}, got {table_dataset.num_rows})"
if not table_dataset.equals(table):
return False, "pq.ParquetDataset: Table contents mismatch"
logging.info(f" ✓ pq.ParquetDataset: {table_dataset.num_rows:,} rows") logging.info(f" ✓ pq.ParquetDataset: {table_dataset.num_rows:,} rows")
# Test Method 3: Read with pads.dataset # Test Method 3: Read with pads.dataset
logging.info(f" Reading with pads.dataset...")
logging.info(" Reading with pads.dataset...")
dataset_pads = pads.dataset(filename, filesystem=s3) dataset_pads = pads.dataset(filename, filesystem=s3)
table_pads = dataset_pads.to_table() table_pads = dataset_pads.to_table()
if table_pads.num_rows != num_rows: if table_pads.num_rows != num_rows:
return False, f"pads.dataset: Row count mismatch (expected {num_rows}, got {table_pads.num_rows})" return False, f"pads.dataset: Row count mismatch (expected {num_rows}, got {table_pads.num_rows})"
if not table_pads.equals(table):
return False, "pads.dataset: Table contents mismatch"
logging.info(f" ✓ pads.dataset: {table_pads.num_rows:,} rows") logging.info(f" ✓ pads.dataset: {table_pads.num_rows:,} rows")
return True, "All read methods passed" return True, "All read methods passed"
except Exception as e:
logging.exception(f" ✗ Test failed: {e}")
return False, f"{type(e).__name__}: {str(e)}"
except Exception as exc:
logging.exception(" ✗ Test failed")
return False, f"{type(exc).__name__}: {exc}"
def cleanup_test_files(s3: pafs.S3FileSystem) -> None: def cleanup_test_files(s3: pafs.S3FileSystem) -> None:
@ -236,15 +242,12 @@ def cleanup_test_files(s3: pafs.S3FileSystem) -> None:
test_path = f"{BUCKET_NAME}/{TEST_DIR}" test_path = f"{BUCKET_NAME}/{TEST_DIR}"
logging.info(f"Cleaning up test directory: {test_path}") logging.info(f"Cleaning up test directory: {test_path}")
# Delete all files in the test directory
file_info = s3.get_file_info(pafs.FileSelector(test_path, recursive=True))
for info in file_info:
if info.type == pafs.FileType.File:
s3.delete_file(info.path)
# Delete the test directory and all its contents
s3.delete_dir(test_path)
logging.info("✓ Test directory cleaned up") logging.info("✓ Test directory cleaned up")
except Exception as e:
logging.warning(f"Failed to cleanup test directory: {e}")
except Exception:
logging.exception("Failed to cleanup test directory")
def main(): def main():

Loading…
Cancel
Save