简单记录一下编译安装和使用 GPGPU-Sim,以备查询。
安装CUDA
gpgpu-sim需要cuda toolkit,通常安装cuda toolkit需要正确的安装driver和GPU,但是gpgpu sim只需要cuda里面的一些library,所以其实并非真正的安装,而是把cuda toolkit中的library从安装包中抽取出来。
1 2 3 4 |
# different from the normal CUDA installation sh ../cuda_9.1.85_387.26_linux.run --silent --toolkit --toolkitpath=path_to_cuda export CUDA_INSTALL_PATH=path_to_cuda |
安装依赖
-
Simulator:
- CUDA Toolkit (4.2 to 10.2). 9.1 Currently the most rigorously validated.
- gcc, g++, make, makedepend
- xutils, bison, flex, zlib
-
AerialVision:
- python-pmw, python-ply, python-numpy
- python-matplotlib, libpng12-dev
-
Documentation:
- doxygen, graphviz
一键安装:
1 2 3 4 |
sudo apt-get install -y \ xutils-dev bison zlib1g-dev flex libglu1-mesa-dev doxygen graphviz \ python-pmw python-ply python-numpy python-matplotlib python-pip libpng-dev |
编译GPGPU-Sim
1 2 3 4 5 6 7 8 9 10 11 |
# Get gpgpu-sim git clone -b dev https://github.com/purdue-aalp/gpgpu-sim_distribution.git cd gpgpu-sim_distribution # Configure environment Setup CUDA_INSTALL_PATH source setup_environment release # Compile make -j |
使用GPGPU-Sim
在GPGPU-Sim上跑下面的vectorAdd.cu
程序。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#include <iostream> #include <cuda_runtime.h> #define N 16384 // write kernel function of vector addition __global__ void vecAdd(float *a, float *b, float *c, int n) { int i = threadIdx.x + blockDim.x * blockIdx.x; if (i < n) c[i] = a[i] + b[i]; } int main() { float *a, *b, *c; float *d_a, *d_b, *d_c; int size = N * sizeof(float); // allocate space for device copies of a, b, c cudaMalloc((void **)&d_a, size); cudaMalloc((void **)&d_b, size); cudaMalloc((void **)&d_c, size); // allocate space for host copies of a, b, c and setup input values a = (float *)malloc(size); b = (float *)malloc(size); c = (float *)malloc(size); for (int i = 0; i < N; i++) { a[i] = i; b[i] = i * i; } // copy inputs to device cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice); cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice); // launch vecAdd() kernel on GPU vecAdd<<<(N + 255) / 256, 256>>>(d_a, d_b, d_c, N); cudaDeviceSynchronize(); // copy result back to host cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost); // verify result for (int i = 0; i < N; i++) { if (a[i] + b[i] != c[i]) { std::cout << "Error: " << a[i] << " + " << b[i] << " != " << c[i] << std::endl; break; } } std::cout << "Done!" << std::endl; // clean up free(a); free(b); free(c); cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); return 0; } |
编译程序
1 2 |
nvcc vectorAdd.cu --cudart shared -o vectorAdd |
检查可执行文件
1 2 3 4 5 6 7 8 9 10 11 |
$ ldd vectorAdd linux-vdso.so.1 => (0x00007fff54724000) libcudart.so.9.1 => /root/gpgpu-sim_distribution/lib/gcc-5.4.0/cuda-9010/release/libcudart.so.9.1 (0x00007fefaebf5000) libstdc++.so.6 => /usr/lib/x86_64-linux-gnu/libstdc++.so.6 (0x00007fefae873000) libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x00007fefae65d000) libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fefae293000) libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 (0x00007fefae079000) libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007fefadd70000) libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007fefadb53000) /lib64/ld-linux-x86-64.so.2 (0x00007fefaf215000) |
可以看到libcudart.so.9.1已经重定向到gpgpu-sim中的library,说明binary中使用的是gpgpusim的连接库。
执行程序
1 2 3 4 5 6 7 8 9 10 |
# copy config files to the directory that contains the binary cp configs/tested-cfgs/SM6_TITANX/config_pascal_islip.icnt path_to_binary cp configs/tested-cfgs/SM6_TITANX/gpgpusim.config path_to_binary # set env (if in new terminal) source setup_environment # run the application ./vectorAdd |
Simulator输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
... Long output ... ----------------------------END-of-Interconnect-DETAILS------------------------- gpgpu_simulation_time = 0 days, 0 hrs, 0 min, 1 sec (1 sec) gpgpu_simulation_rate = 344064 (inst/sec) gpgpu_simulation_rate = 1570 (cycle/sec) gpgpu_silicon_slowdown = 902547x GPGPU-Sim: synchronize waiting for inactive GPU simulation GPGPU-Sim API: Stream Manager State GPGPU-Sim: detected inactive GPU simulation thread Done! GPGPU-Sim: *** exit detected *** |
使用docker环境
使用docker环境很多时候更方便。
Dockerfile
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# Copyright © 2024 Mao Lin FROM ubuntu:16.04 LABEL maintainer="Mao Lin" LABEL email="" LABEL version="1.0" LABEL description="This is a docker image for gpgpu-sim." # set timezone, required for install dependencies ENV TZ=America/Los_Angeles RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone # install basic dependencies RUN apt-get update && apt-get install -y \ build-essential git wget vim cmake # install dependencies RUN apt-get install -y \ xutils-dev bison zlib1g-dev flex libglu1-mesa-dev doxygen graphviz \ python-pmw python-ply python-numpy python-matplotlib python-pip libpng-dev # install cuda 9.1 RUN wget https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_387.26_linux -P /tmp RUN sh /tmp/cuda_9.1.85_387.26_linux --silent --toolkit --toolkitpath=/usr/local/cuda-9.1 ENV CUDA_INSTALL_PATH=/usr/local/cuda-9.1 # customize bashrc RUN echo "PS1='\[\033[01;33m\]\u\[\033[01;32m\]@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '" >> /tmp/bashrc \ && echo "alias ls='ls --color=auto'" >> /tmp/bashrc \ && echo "alias grep='grep --color=auto'" >> /tmp/bashrc \ && echo "alias fgrep='fgrep --color=auto'" >> /tmp/bashrc \ && echo "alias egrep='egrep --color=auto'" >> /tmp/bashrc \ && echo "alias ll='ls -alF'" >> /tmp/bashrc \ && echo "alias la='ls -A'" >> /tmp/bashrc \ && echo "alias l='ls -CF'" >> /tmp/bashrc # Entrypoint WORKDIR /root CMD ["bash", "--rcfile", "/tmp/bashrc"] |
编译docker
1 2 |
docker build -t gpgpu-sim-dist . |
Run docker
1 2 3 4 |
docker run --rm -it -v $(pwd):/root gpgpu-sim-dist # or docker run -idt -v $(pwd):/root gpgpu-sim-dist |
最新评论
感谢博主,让我PyTorch入了门!
博主你好,今晚我们下馆子不?
博主,你的博客用的哪家的服务器。
您好,请问您对QNN-MO-PYNQ这个项目有研究吗?想请问如何去训练自己的数据集从而实现新的目标检测呢?
where is the source code ? bomb1 188 2 8 0 0 hello world 0 0 0 0 0 0 1 1 9?5
在安装qemu的过程中,一定在make install 前加入 sudo赋予权限。
所以作者你是训练的tiny-yolov3还是yolov3...
很有用