Mark,
Here's the init script I use on our 3-card CUDA box. In particular, note the mknod stuff, which might be at issue in your situation. (Sorry about line breaks; you may have to guess in a couple spots.)
----- %< ----- #!/bin/bash # # Startup/shutdown script for nVidia CUDA # # chkconfig: 345 80 20 # description: Startup/shutdown script for nVidia CUDA # # =====================================================
# Source function library.
. /etc/init.d/functions
DRIVER=nvidia RETVAL=0
# Create /dev nodes for nvidia devices function createnodes() { # Count the number of NVIDIA controllers found. N3D=$(/sbin/lspci | grep -i NVIDIA | grep "3D controller" | wc -l) NVGA=$(/sbin/lspci | grep -i NVIDIA | grep "VGA compatible controller" | wc -l)
N=$(expr $N3D + $NVGA - 1) for i in $(seq 0 $N); do mknod -m 666 /dev/nvidia$i c 195 $i RETVAL=$? [ "$RETVAL" = 0 ] || exit $RETVAL done
mknod -m 666 /dev/nvidiactl c 195 255 RETVAL=$? [ "$RETVAL" = 0 ] || exit $RETVAL }
# Remove /dev nodes for nvidia devices function removenodes() { rm -f /dev/nvidia* }
# Start daemon function start() { echo -n $"Loading $DRIVER kernel module: " modprobe $DRIVER && success || failure RETVAL=$? echo [ "$RETVAL" = 0 ] || exit $RETVAL
echo -n $"Initializing CUDA /dev entries: " createnodes && success || failure RETVAL=$? echo [ "$RETVAL" = 0 ] || exit $RETVAL # this can fail without stopping the entire script echo -n $"Setting persistence mode: " /usr/bin/nvidia-smi -pm 1 && success || failure }
# Stop daemon function stop() { echo -n $"Unloading $DRIVER kernel module: " rmmod -f $DRIVER && success || failure RETVAL=$? echo [ "$RETVAL" = 0 ] || exit $RETVAL
echo -n $"Removing CUDA /dev entries: " removenodes && success || failure RETVAL=$? echo [ "$RETVAL" = 0 ] || exit $RETVAL }
# See how we were called case "$1" in start) start ;; stop) stop ;; restart) stop start ;; *) echo $"Usage: $0 {start|stop|restart}" RETVAL=1 esac exit $RETVAL ----- %< -----