You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

253 lines
7.3 KiB

#!/bin/bash
# Test RDMA functionality in simulation environment
# This script validates that RDMA devices and libraries are working
set -e
echo "🧪 Testing RDMA simulation environment..."
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
local status="$1"
local message="$2"
case "$status" in
"success")
echo -e "${GREEN}$message${NC}"
;;
"warning")
echo -e "${YELLOW}⚠️ $message${NC}"
;;
"error")
echo -e "${RED}$message${NC}"
;;
"info")
echo -e "${BLUE}📋 $message${NC}"
;;
esac
}
# Function to test RDMA devices
test_rdma_devices() {
print_status "info" "Testing RDMA devices..."
# Check for InfiniBand/RDMA devices
if [ -d /sys/class/infiniband ]; then
local device_count=$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)
if [ "$device_count" -gt 0 ]; then
print_status "success" "Found $device_count RDMA device(s)"
# List devices
for device in /sys/class/infiniband/*; do
if [ -d "$device" ]; then
local dev_name=$(basename "$device")
print_status "info" "Device: $dev_name"
fi
done
return 0
else
print_status "error" "No RDMA devices found"
return 1
fi
else
print_status "error" "/sys/class/infiniband directory not found"
return 1
fi
}
# Function to test libibverbs
test_libibverbs() {
print_status "info" "Testing libibverbs..."
if command -v ibv_devinfo >/dev/null 2>&1; then
# Get device info
local device_info=$(ibv_devinfo 2>/dev/null)
if [ -n "$device_info" ]; then
print_status "success" "libibverbs working - devices detected"
# Show basic info
echo "$device_info" | head -5
# Test device capabilities
if echo "$device_info" | grep -q "transport.*InfiniBand\|transport.*Ethernet"; then
print_status "success" "RDMA transport layer detected"
else
print_status "warning" "Transport layer information unclear"
fi
return 0
else
print_status "error" "ibv_devinfo found no devices"
return 1
fi
else
print_status "error" "ibv_devinfo command not found"
return 1
fi
}
# Function to test UCX
test_ucx() {
print_status "info" "Testing UCX..."
if command -v ucx_info >/dev/null 2>&1; then
# Test UCX device detection
local ucx_output=$(ucx_info -d 2>/dev/null)
if [ -n "$ucx_output" ]; then
print_status "success" "UCX detecting devices"
# Show UCX device info
echo "$ucx_output" | head -10
# Check for RDMA transports
if echo "$ucx_output" | grep -q "rc\|ud\|dc"; then
print_status "success" "UCX RDMA transports available"
else
print_status "warning" "UCX RDMA transports not detected"
fi
return 0
else
print_status "warning" "UCX not detecting devices"
return 1
fi
else
print_status "warning" "UCX tools not available"
return 1
fi
}
# Function to test RDMA CM (Connection Manager)
test_rdma_cm() {
print_status "info" "Testing RDMA Connection Manager..."
# Check for RDMA CM device
if [ -e /dev/infiniband/rdma_cm ]; then
print_status "success" "RDMA CM device found"
return 0
else
print_status "warning" "RDMA CM device not found"
return 1
fi
}
# Function to test basic RDMA operations
test_rdma_operations() {
print_status "info" "Testing basic RDMA operations..."
# Try to run a simple RDMA test if tools are available
if command -v ibv_rc_pingpong >/dev/null 2>&1; then
# This would need a client/server setup, so just check if binary exists
print_status "success" "RDMA test tools available (ibv_rc_pingpong)"
else
print_status "warning" "RDMA test tools not available"
fi
# Check for other useful RDMA utilities
local tools_found=0
for tool in ibv_asyncwatch ibv_read_lat ibv_write_lat; do
if command -v "$tool" >/dev/null 2>&1; then
tools_found=$((tools_found + 1))
fi
done
if [ "$tools_found" -gt 0 ]; then
print_status "success" "Found $tools_found additional RDMA test tools"
else
print_status "warning" "No additional RDMA test tools found"
fi
}
# Function to generate test summary
generate_summary() {
echo ""
print_status "info" "RDMA Simulation Test Summary"
echo "======================================"
# Re-run key tests for summary
local devices_ok=0
local libibverbs_ok=0
local ucx_ok=0
if [ -d /sys/class/infiniband ] && [ "$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)" -gt 0 ]; then
devices_ok=1
fi
if command -v ibv_devinfo >/dev/null 2>&1 && ibv_devinfo >/dev/null 2>&1; then
libibverbs_ok=1
fi
if command -v ucx_info >/dev/null 2>&1 && ucx_info -d >/dev/null 2>&1; then
ucx_ok=1
fi
echo "📊 Test Results:"
[ "$devices_ok" -eq 1 ] && print_status "success" "RDMA Devices: PASS" || print_status "error" "RDMA Devices: FAIL"
[ "$libibverbs_ok" -eq 1 ] && print_status "success" "libibverbs: PASS" || print_status "error" "libibverbs: FAIL"
[ "$ucx_ok" -eq 1 ] && print_status "success" "UCX: PASS" || print_status "warning" "UCX: FAIL/WARNING"
echo ""
if [ "$devices_ok" -eq 1 ] && [ "$libibverbs_ok" -eq 1 ]; then
print_status "success" "RDMA simulation environment is ready! 🎉"
echo ""
print_status "info" "You can now:"
echo " - Run RDMA applications"
echo " - Test SeaweedFS RDMA engine with real RDMA"
echo " - Use UCX for high-performance transfers"
return 0
else
print_status "error" "RDMA simulation setup needs attention"
echo ""
print_status "info" "Troubleshooting:"
echo " - Run setup script: sudo /opt/rdma-sim/setup-soft-roce.sh"
echo " - Check container privileges (--privileged flag)"
echo " - Verify kernel RDMA support"
return 1
fi
}
# Main test execution
main() {
echo "🚀 RDMA Simulation Test Suite"
echo "======================================"
# Run tests
test_rdma_devices || true
echo ""
test_libibverbs || true
echo ""
test_ucx || true
echo ""
test_rdma_cm || true
echo ""
test_rdma_operations || true
echo ""
# Generate summary
generate_summary
}
# Health check mode (for Docker healthcheck)
if [ "$1" = "healthcheck" ]; then
# Quick health check - just verify devices exist
if [ -d /sys/class/infiniband ] && [ "$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)" -gt 0 ]; then
exit 0
else
exit 1
fi
fi
# Execute main function
main "$@"