You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
183 lines
5.3 KiB
183 lines
5.3 KiB
#!/bin/bash
|
|
|
|
# Setup Soft-RoCE (RXE) for RDMA simulation
|
|
# This script enables RDMA over Ethernet using the RXE kernel module
|
|
|
|
set -e
|
|
|
|
echo "🔧 Setting up Soft-RoCE (RXE) RDMA simulation..."
|
|
|
|
# Function to check if running with required privileges
|
|
check_privileges() {
|
|
if [ "$EUID" -ne 0 ]; then
|
|
echo "❌ This script requires root privileges"
|
|
echo "Run with: sudo $0 or inside a privileged container"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Function to load RXE kernel module
|
|
load_rxe_module() {
|
|
echo "📦 Loading RXE kernel module..."
|
|
|
|
# Try to load the rdma_rxe module
|
|
if modprobe rdma_rxe 2>/dev/null; then
|
|
echo "✅ rdma_rxe module loaded successfully"
|
|
else
|
|
echo "⚠️ Failed to load rdma_rxe module, trying alternative approach..."
|
|
|
|
# Alternative: Try loading rxe_net (older kernels)
|
|
if modprobe rxe_net 2>/dev/null; then
|
|
echo "✅ rxe_net module loaded successfully"
|
|
else
|
|
echo "❌ Failed to load RXE modules. Possible causes:"
|
|
echo " - Kernel doesn't support RXE (needs CONFIG_RDMA_RXE=m)"
|
|
echo " - Running in unprivileged container"
|
|
echo " - Missing kernel modules"
|
|
echo ""
|
|
echo "🔧 Workaround: Run container with --privileged flag"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Verify module is loaded
|
|
if lsmod | grep -q "rdma_rxe\|rxe_net"; then
|
|
echo "✅ RXE module verification successful"
|
|
else
|
|
echo "❌ RXE module verification failed"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Function to setup virtual RDMA device
|
|
setup_rxe_device() {
|
|
echo "🌐 Setting up RXE device over Ethernet interface..."
|
|
|
|
# Find available network interface (prefer eth0, fallback to others)
|
|
local interface=""
|
|
for iface in eth0 enp0s3 enp0s8 lo; do
|
|
if ip link show "$iface" >/dev/null 2>&1; then
|
|
interface="$iface"
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [ -z "$interface" ]; then
|
|
echo "❌ No suitable network interface found"
|
|
echo "Available interfaces:"
|
|
ip link show | grep "^[0-9]" | cut -d':' -f2 | tr -d ' '
|
|
exit 1
|
|
fi
|
|
|
|
echo "📡 Using network interface: $interface"
|
|
|
|
# Create RXE device
|
|
echo "🔨 Creating RXE device on $interface..."
|
|
|
|
# Try modern rxe_cfg approach first
|
|
if command -v rxe_cfg >/dev/null 2>&1; then
|
|
rxe_cfg add "$interface" || {
|
|
echo "⚠️ rxe_cfg failed, trying manual approach..."
|
|
setup_rxe_manual "$interface"
|
|
}
|
|
else
|
|
echo "⚠️ rxe_cfg not available, using manual setup..."
|
|
setup_rxe_manual "$interface"
|
|
fi
|
|
}
|
|
|
|
# Function to manually setup RXE device
|
|
setup_rxe_manual() {
|
|
local interface="$1"
|
|
|
|
# Use sysfs interface to create RXE device
|
|
if [ -d /sys/module/rdma_rxe ]; then
|
|
echo "$interface" > /sys/module/rdma_rxe/parameters/add 2>/dev/null || {
|
|
echo "❌ Failed to add RXE device via sysfs"
|
|
exit 1
|
|
}
|
|
else
|
|
echo "❌ RXE sysfs interface not found"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Function to verify RDMA devices
|
|
verify_rdma_devices() {
|
|
echo "🔍 Verifying RDMA devices..."
|
|
|
|
# Check for RDMA devices
|
|
if [ -d /sys/class/infiniband ]; then
|
|
local devices=$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)
|
|
if [ "$devices" -gt 0 ]; then
|
|
echo "✅ Found $devices RDMA device(s):"
|
|
ls /sys/class/infiniband/
|
|
|
|
# Show device details
|
|
for device in /sys/class/infiniband/*; do
|
|
if [ -d "$device" ]; then
|
|
local dev_name=$(basename "$device")
|
|
echo " 📋 Device: $dev_name"
|
|
|
|
# Try to get device info
|
|
if command -v ibv_devinfo >/dev/null 2>&1; then
|
|
ibv_devinfo -d "$dev_name" | head -10
|
|
fi
|
|
fi
|
|
done
|
|
else
|
|
echo "❌ No RDMA devices found in /sys/class/infiniband/"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "❌ /sys/class/infiniband directory not found"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Function to test basic RDMA functionality
|
|
test_basic_rdma() {
|
|
echo "🧪 Testing basic RDMA functionality..."
|
|
|
|
# Test libibverbs
|
|
if command -v ibv_devinfo >/dev/null 2>&1; then
|
|
echo "📋 RDMA device information:"
|
|
ibv_devinfo | head -20
|
|
else
|
|
echo "⚠️ ibv_devinfo not available"
|
|
fi
|
|
|
|
# Test UCX if available
|
|
if command -v ucx_info >/dev/null 2>&1; then
|
|
echo "📋 UCX information:"
|
|
ucx_info -d | head -10
|
|
else
|
|
echo "⚠️ UCX tools not available"
|
|
fi
|
|
}
|
|
|
|
# Main execution
|
|
main() {
|
|
echo "🚀 Starting Soft-RoCE RDMA simulation setup..."
|
|
echo "======================================"
|
|
|
|
check_privileges
|
|
load_rxe_module
|
|
setup_rxe_device
|
|
verify_rdma_devices
|
|
test_basic_rdma
|
|
|
|
echo ""
|
|
echo "🎉 Soft-RoCE setup completed successfully!"
|
|
echo "======================================"
|
|
echo "✅ RDMA simulation is ready for testing"
|
|
echo "📡 You can now run RDMA applications"
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo " - Test with: /opt/rdma-sim/test-rdma.sh"
|
|
echo " - Check UCX: /opt/rdma-sim/ucx-info.sh"
|
|
echo " - Run your RDMA applications"
|
|
}
|
|
|
|
# Execute main function
|
|
main "$@"
|