building NGSolve with CUDA#
git clone --recurse-submodules https://github.com/NGSolve/ngsolve.git src/ngsolve
mkdir -p build/ngsolve
cd build/ngsolve
cmake ../../src/ngsolve -DUSE_SUPERBUILD=ON -DUSE_CCACHE=ON -DCMAKE_INSTALL_PREFIX=~/install -DUSE_CUDA=ON
Building on the musica cluster with EESSI/2023.06#
module --force purge
module load EESSI/2023.06 ASC/2023.06
module load GCC/12 CMake/3.26.3-GCCcore-12.3.0 OpenBLAS/0.3.23-GCC-12.3.0 ccache CUDA
python3.12 -m venv ngs
source ngs/bin/activate
pip install --upgrade netgen-occt-devel numpy pybind11 pybind11_stubgen pip
git clone --recurse-submodules https://github.com/NGSolve/ngsolve.git src/ngsolve
# rm -rf build/ngsolve
mkdir -p build/ngsolve
cd build/ngsolve
cmake ~/src/ngsolve \
-DCMAKE_BUILD_TYPE=Release \
-DUSE_SUPERBUILD=ON \
-DUSE_CCACHE=ON \
-DCMAKE_INSTALL_PREFIX=~/install \
-DUSE_CUDA=ON \
-DUSE_GUI=OFF \
-DCMAKE_CUDA_ARCHITECTURES="90" \
-DUSE_UMFPACK=OFF \
-DBUILD_STUB_FILES=OFF
make -j 8 install
the slurm script submit_slurm.sh:#
#!/bin/bash
#SBATCH --job-name "myjob"
#SBATCH --gres=gpu:1
#SBATCH -p zen4_0768_h100x4
#SBATCH --qos zen4_0768_h100x4
#SBATCH --threads-per-core=1
#SBATCH --time=01:00:00
# Optional: load modules (adjust to your environment)
module --force purge
module load EESSI/2023.06 ASC/2023.06
module load GCC/12 OpenBLAS/0.3.23-GCC-12.3.0 CUDA
source /home/js65943/ngs/bin/activate
export LD_LIBRARY_PATH="/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen4/software/GCCcore/12.3.0/lib64:$LD_LIBRARY_PATH"
export LD_LIBRARY_PATH="/cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen4/software/OpenBLAS/0.3.23-GCC-12.3.0/lib:$LD_LIBRARY_PATH"
export LD_LIBRARY_PATH="/cvmfs/software.asc.ac.at/versions/2023.06/software/linux/x86_64/amd/zen4/software/CUDA/12.9.0/lib:$LD_LIBRARY_PATH"
export PYTHONPATH=$HOME/install/lib/python3.12/site-packages:$PYTHONPATH
export PATH=$HOME/install/bin:$PATH
which python
nvidia-smi
# Run from the directory you submitted from
# cd "${SLURM_SUBMIT_DIR:-$PWD}"
cd ~/submit
# Example commands: replace with your job's commands
echo "Job started at $(date)"
echo "Running on host $(hostname)"
# python test.py
python3 -c "import ngsolve; print ('NGSolve version', ngsolve.__version__)"
python3 -c "import ngsolve.ngscuda"
echo "Job finished at $(date)"
which you submit as
sbatch submit_slurm.sh
Open problems:
calling compiler wrapper, e.g. ConvBenchmark.py
needs
LD_LIBRARY_PATHat run-time, but must not set at compile-time (which seems to cause the compiler wrapper problem). Shall we hardcode the pathes into the shared libs ?
build on the musica cluster EESSI/2025.06#
cannot connect to the cuda driver
now compiling exactly with gcc 13 and Python 3.14
importing ngsolve.ngscuda still not working, cannot connect to cuda driver
module --force purge
module load EESSI/2025.06 ASC/2025.06
module load GCC/13 OpenBLAS/0.3.27-GCC-13.3.0 CUDA
python3.14 -m venv ngs
source ngs/bin/activate
pip install --upgrade netgen-occt-devel cmake numpy pybind11 pip
git clone --recurse-submodules https://github.com/NGSolve/ngsolve.git src/ngsolve
# rm -rf build/ngsolve
mkdir -p build/ngsolve
cd build/ngsolve
cmake ~/src/ngsolve \
-DCMAKE_BUILD_TYPE=Release \
-DUSE_SUPERBUILD=ON \
-DUSE_CCACHE=ON \
-DCMAKE_INSTALL_PREFIX=~/install \
-DUSE_CUDA=ON \
-DUSE_GUI=OFF \
-DCMAKE_CUDA_ARCHITECTURES="90" \
-DUSE_UMFPACK=OFF \
-DBUILD_STUB_FILES=OFF
make -j 8 install
the slurm script submit_slurm.sh:#
#!/bin/bash
#SBATCH --job-name "myjob"
#SBATCH --gres=gpu:1
#SBATCH -p zen4_0768_h100x4
#SBATCH --qos zen4_0768_h100x4
#SBATCH --threads-per-core=1
#SBATCH --time=01:00:00
# Optional: load modules (adjust to your environment)
module purge --force
module load EESSI/2025.06 ASC/2025.06
module load GCC/13 OpenBLAS/0.3.27-GCC-13.3.0 CUDA
source /home/js65943/ngs/bin/activate
export LD_LIBRARY_PATH="/cvmfs/software.eessi.io/versions/2025.06/software/linux/x86_64/amd/zen4/software/OpenBLAS/0.3.27-GCC-13.3.0/lib:$LD_LIBRARY_PATH"
export LD_LIBRARY_PATH="/cvmfs/software.asc.ac.at/versions/2025.06/software/linux/x86_64/amd/zen4/software/CUDA/12.9.0/lib:$LD_LIBRARY_PATH"
export PYTHONPATH="/home/js65943/install/lib/python3.14/site-packages:$PYTHONPATH"
which python
nvidia-smi
# Run from the directory you submitted from
# cd "${SLURM_SUBMIT_DIR:-$PWD}"
cd ~/submit
# Example commands: replace with your job's commands
echo "Job started at $(date)"
echo "Running on host $(hostname)"
# python test.py
python3 -c "import ngsolve; print ('NGSolve version', ngsolve.__version__)"
python3 -c "import ngsolve.ngscuda"
echo "Job finished at $(date)"
and a python file test.py in the submit directory:
import ngsolve
print ("have ngsolve")
import ngsolve.ngscuda
which you submit as
sbatch --partition=zen4_0768_h100x4 submit_slurm.sh
remaining problem#
the output gives
/home/js65943/ngs/bin/python
Sat Dec 13 15:07:26 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.95.05 Driver Version: 580.95.05 CUDA Version: 13.0 |
+-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA H100 On | 00000000:C6:00.0 Off | 0 |
| N/A 41C P0 68W / 700W | 0MiB / 95830MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
Job started at Sat Dec 13 15:07:26 CET 2025
Running on host n3015-020
cudaGetDeviceCount() failed: CUDA driver version is insufficient for CUDA runtime version
CUDA Device Query...
Initializing cublas and cusparse.
** On entry to cusparseCreate(): CUDA context cannot be initialized
have ngsolve