You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							183 lines
						
					
					
						
							5.3 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							183 lines
						
					
					
						
							5.3 KiB
						
					
					
				
								#!/bin/bash
							 | 
						|
								
							 | 
						|
								# Setup Soft-RoCE (RXE) for RDMA simulation
							 | 
						|
								# This script enables RDMA over Ethernet using the RXE kernel module
							 | 
						|
								
							 | 
						|
								set -e
							 | 
						|
								
							 | 
						|
								echo "🔧 Setting up Soft-RoCE (RXE) RDMA simulation..."
							 | 
						|
								
							 | 
						|
								# Function to check if running with required privileges
							 | 
						|
								check_privileges() {
							 | 
						|
								    if [ "$EUID" -ne 0 ]; then
							 | 
						|
								        echo "❌ This script requires root privileges"
							 | 
						|
								        echo "Run with: sudo $0 or inside a privileged container"
							 | 
						|
								        exit 1
							 | 
						|
								    fi
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								# Function to load RXE kernel module
							 | 
						|
								load_rxe_module() {
							 | 
						|
								    echo "📦 Loading RXE kernel module..."
							 | 
						|
								    
							 | 
						|
								    # Try to load the rdma_rxe module
							 | 
						|
								    if modprobe rdma_rxe 2>/dev/null; then
							 | 
						|
								        echo "✅ rdma_rxe module loaded successfully"
							 | 
						|
								    else
							 | 
						|
								        echo "⚠️  Failed to load rdma_rxe module, trying alternative approach..."
							 | 
						|
								        
							 | 
						|
								        # Alternative: Try loading rxe_net (older kernels)
							 | 
						|
								        if modprobe rxe_net 2>/dev/null; then
							 | 
						|
								            echo "✅ rxe_net module loaded successfully"
							 | 
						|
								        else
							 | 
						|
								            echo "❌ Failed to load RXE modules. Possible causes:"
							 | 
						|
								            echo "  - Kernel doesn't support RXE (needs CONFIG_RDMA_RXE=m)"
							 | 
						|
								            echo "  - Running in unprivileged container"
							 | 
						|
								            echo "  - Missing kernel modules"
							 | 
						|
								            echo ""
							 | 
						|
								            echo "🔧 Workaround: Run container with --privileged flag"
							 | 
						|
								            exit 1
							 | 
						|
								        fi
							 | 
						|
								    fi
							 | 
						|
								    
							 | 
						|
								    # Verify module is loaded
							 | 
						|
								    if lsmod | grep -q "rdma_rxe\|rxe_net"; then
							 | 
						|
								        echo "✅ RXE module verification successful"
							 | 
						|
								    else
							 | 
						|
								        echo "❌ RXE module verification failed"
							 | 
						|
								        exit 1
							 | 
						|
								    fi
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								# Function to setup virtual RDMA device
							 | 
						|
								setup_rxe_device() {
							 | 
						|
								    echo "🌐 Setting up RXE device over Ethernet interface..."
							 | 
						|
								    
							 | 
						|
								    # Find available network interface (prefer eth0, fallback to others)
							 | 
						|
								    local interface=""
							 | 
						|
								    for iface in eth0 enp0s3 enp0s8 lo; do
							 | 
						|
								        if ip link show "$iface" >/dev/null 2>&1; then
							 | 
						|
								            interface="$iface"
							 | 
						|
								            break
							 | 
						|
								        fi
							 | 
						|
								    done
							 | 
						|
								    
							 | 
						|
								    if [ -z "$interface" ]; then
							 | 
						|
								        echo "❌ No suitable network interface found"
							 | 
						|
								        echo "Available interfaces:"
							 | 
						|
								        ip link show | grep "^[0-9]" | cut -d':' -f2 | tr -d ' '
							 | 
						|
								        exit 1
							 | 
						|
								    fi
							 | 
						|
								    
							 | 
						|
								    echo "📡 Using network interface: $interface"
							 | 
						|
								    
							 | 
						|
								    # Create RXE device
							 | 
						|
								    echo "🔨 Creating RXE device on $interface..."
							 | 
						|
								    
							 | 
						|
								    # Try modern rxe_cfg approach first
							 | 
						|
								    if command -v rxe_cfg >/dev/null 2>&1; then
							 | 
						|
								        rxe_cfg add "$interface" || {
							 | 
						|
								            echo "⚠️  rxe_cfg failed, trying manual approach..."
							 | 
						|
								            setup_rxe_manual "$interface"
							 | 
						|
								        }
							 | 
						|
								    else
							 | 
						|
								        echo "⚠️  rxe_cfg not available, using manual setup..."
							 | 
						|
								        setup_rxe_manual "$interface"
							 | 
						|
								    fi
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								# Function to manually setup RXE device
							 | 
						|
								setup_rxe_manual() {
							 | 
						|
								    local interface="$1"
							 | 
						|
								    
							 | 
						|
								    # Use sysfs interface to create RXE device
							 | 
						|
								    if [ -d /sys/module/rdma_rxe ]; then
							 | 
						|
								        echo "$interface" > /sys/module/rdma_rxe/parameters/add 2>/dev/null || {
							 | 
						|
								            echo "❌ Failed to add RXE device via sysfs"
							 | 
						|
								            exit 1
							 | 
						|
								        }
							 | 
						|
								    else
							 | 
						|
								        echo "❌ RXE sysfs interface not found"
							 | 
						|
								        exit 1
							 | 
						|
								    fi
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								# Function to verify RDMA devices
							 | 
						|
								verify_rdma_devices() {
							 | 
						|
								    echo "🔍 Verifying RDMA devices..."
							 | 
						|
								    
							 | 
						|
								    # Check for RDMA devices
							 | 
						|
								    if [ -d /sys/class/infiniband ]; then
							 | 
						|
								        local devices=$(ls /sys/class/infiniband/ 2>/dev/null | wc -l)
							 | 
						|
								        if [ "$devices" -gt 0 ]; then
							 | 
						|
								            echo "✅ Found $devices RDMA device(s):"
							 | 
						|
								            ls /sys/class/infiniband/
							 | 
						|
								            
							 | 
						|
								            # Show device details
							 | 
						|
								            for device in /sys/class/infiniband/*; do
							 | 
						|
								                if [ -d "$device" ]; then
							 | 
						|
								                    local dev_name=$(basename "$device")
							 | 
						|
								                    echo "  📋 Device: $dev_name"
							 | 
						|
								                    
							 | 
						|
								                    # Try to get device info
							 | 
						|
								                    if command -v ibv_devinfo >/dev/null 2>&1; then
							 | 
						|
								                        ibv_devinfo -d "$dev_name" | head -10
							 | 
						|
								                    fi
							 | 
						|
								                fi
							 | 
						|
								            done
							 | 
						|
								        else
							 | 
						|
								            echo "❌ No RDMA devices found in /sys/class/infiniband/"
							 | 
						|
								            exit 1
							 | 
						|
								        fi
							 | 
						|
								    else
							 | 
						|
								        echo "❌ /sys/class/infiniband directory not found"
							 | 
						|
								        exit 1
							 | 
						|
								    fi
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								# Function to test basic RDMA functionality
							 | 
						|
								test_basic_rdma() {
							 | 
						|
								    echo "🧪 Testing basic RDMA functionality..."
							 | 
						|
								    
							 | 
						|
								    # Test libibverbs
							 | 
						|
								    if command -v ibv_devinfo >/dev/null 2>&1; then
							 | 
						|
								        echo "📋 RDMA device information:"
							 | 
						|
								        ibv_devinfo | head -20
							 | 
						|
								    else
							 | 
						|
								        echo "⚠️  ibv_devinfo not available"
							 | 
						|
								    fi
							 | 
						|
								    
							 | 
						|
								    # Test UCX if available
							 | 
						|
								    if command -v ucx_info >/dev/null 2>&1; then
							 | 
						|
								        echo "📋 UCX information:"
							 | 
						|
								        ucx_info -d | head -10
							 | 
						|
								    else
							 | 
						|
								        echo "⚠️  UCX tools not available"
							 | 
						|
								    fi
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								# Main execution
							 | 
						|
								main() {
							 | 
						|
								    echo "🚀 Starting Soft-RoCE RDMA simulation setup..."
							 | 
						|
								    echo "======================================"
							 | 
						|
								    
							 | 
						|
								    check_privileges
							 | 
						|
								    load_rxe_module
							 | 
						|
								    setup_rxe_device  
							 | 
						|
								    verify_rdma_devices
							 | 
						|
								    test_basic_rdma
							 | 
						|
								    
							 | 
						|
								    echo ""
							 | 
						|
								    echo "🎉 Soft-RoCE setup completed successfully!"
							 | 
						|
								    echo "======================================"
							 | 
						|
								    echo "✅ RDMA simulation is ready for testing"
							 | 
						|
								    echo "📡 You can now run RDMA applications"
							 | 
						|
								    echo ""
							 | 
						|
								    echo "Next steps:"
							 | 
						|
								    echo "  - Test with: /opt/rdma-sim/test-rdma.sh"
							 | 
						|
								    echo "  - Check UCX: /opt/rdma-sim/ucx-info.sh"
							 | 
						|
								    echo "  - Run your RDMA applications"
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								# Execute main function
							 | 
						|
								main "$@"
							 |