Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UVM Support to current GPGPU-Sim #219

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"name": "CUDA 12.8",
"image": "ghcr.io/accel-sim/accel-sim-framework:Ubuntu-24.04-cuda-12.8"
}
4 changes: 4 additions & 0 deletions .devcontainer/sst_integration/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"name": "SST CUDA 11.7",
"image": "ghcr.io/accel-sim/accel-sim-framework:SST-Integration-Ubuntu-22.04-cuda-11.7-llvm-18.1.8-riscv-gnu-2024.08.06-nightly"
}
2 changes: 1 addition & 1 deletion .github/workflows/accelsim.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
build-QV100:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
build-TITANV:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: TITANV

Expand All @@ -32,7 +32,7 @@ jobs:
build-TITANV-LOCALXBAR:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: TITANV-LOCALXBAR

Expand All @@ -46,7 +46,7 @@ jobs:
build-QV100:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: QV100

Expand All @@ -60,7 +60,7 @@ jobs:
build-2060:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: RTX2060

Expand All @@ -74,7 +74,7 @@ jobs:
build-3070:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: RTX3070

Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
build-TITANV:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: TITANV

Expand All @@ -32,7 +32,7 @@ jobs:
build-TITANV-LOCALXBAR:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: TITANV-LOCALXBAR

Expand All @@ -46,7 +46,7 @@ jobs:
build-QV100:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: QV100

Expand All @@ -60,7 +60,7 @@ jobs:
build-2060:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: RTX2060

Expand All @@ -74,7 +74,7 @@ jobs:
build-3070:
runs-on: ubuntu-latest
container:
image: tgrogers/accel-sim_regress:Ubuntu-22.04-cuda-11.7
image: ghcr.io/accel-sim/accel-sim-framework:ubuntu-24.04-cuda-12.8
env:
CONFIG: RTX3070

Expand Down
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,17 @@ debug_tools/WatchYourStep/ptxjitplus/gpgpu*
debug_tools/WatchYourStep/ptxjitplus/*.old
debug_tools/WatchYourStep/ptxjitplus/ptxjitplus
debug_tools/WatchYourStep/ptxjitplus/*.ptx
*.tmp

# Accel-sim packages used for regressions
accel-sim-framework/
gpu-app-collection/

setup

# OS/IDE specific files
.idea/
.vscode/
.DS_Store
.DS_store
__pycache__/
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,8 @@ install(CODE "execute_process\(\
install(CODE "execute_process\(\
COMMAND ${CMAKE_COMMAND} -E create_symlink \
${GPGPUSIM_INSTALL_PATH}/$<TARGET_FILE_NAME:cudart> \
${GPGPUSIM_INSTALL_PATH}/$<TARGET_FILE_NAME:cudart>.11.0\)")
${GPGPUSIM_INSTALL_PATH}/$<TARGET_FILE_NAME:cudart>.11.0\)")
install(CODE "execute_process\(\
COMMAND ${CMAKE_COMMAND} -E create_symlink \
${GPGPUSIM_INSTALL_PATH}/$<TARGET_FILE_NAME:cudart> \
${GPGPUSIM_INSTALL_PATH}/$<TARGET_FILE_NAME:cudart>.12\)")
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ $(SIM_LIB_DIR)/libcudart.so: makedirs $(LIBS) cudalib
if [ ! -f $(SIM_LIB_DIR)/libcudart.so.10.0 ]; then ln -s libcudart.so $(SIM_LIB_DIR)/libcudart.so.10.0; fi
if [ ! -f $(SIM_LIB_DIR)/libcudart.so.10.1 ]; then ln -s libcudart.so $(SIM_LIB_DIR)/libcudart.so.10.1; fi
if [ ! -f $(SIM_LIB_DIR)/libcudart.so.11.0 ]; then ln -s libcudart.so $(SIM_LIB_DIR)/libcudart.so.11.0; fi
if [ ! -f $(SIM_LIB_DIR)/libcudart.so.12 ]; then ln -s libcudart.so $(SIM_LIB_DIR)/libcudart.so.12; fi
if [ ! -f $(SIM_LIB_DIR)/libcudart_mod.so ]; then ln -s libcudart.so $(SIM_LIB_DIR)/libcudart_mod.so; fi

$(SIM_LIB_DIR)/libcudart.dylib: makedirs $(LIBS) cudalib
Expand Down
15 changes: 0 additions & 15 deletions bitbucket-pipelines.yml

This file was deleted.

4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM2_GTX480/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@


# In Fermi, the cache and shared memory can be configured to 16kb:48kb(default) or 48kb:16kb
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# Note: Hashing set index function (H) only applies to a set size of 32 or 64.
-gpgpu_cache:dl1 N:32:128:4,L:L:m:N:H,S:64:8,8
-gpgpu_shmem_size 49152
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM3_KEPLER_TITAN/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@
# Greedy then oldest scheduler
-gpgpu_scheduler gto

# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# Note: Hashing set index function (H) only applies to a set size of 32 or 64.
# The defulat is to disable the L1 cache, unless cache modifieres are used
-gpgpu_cache:dl1 S:4:128:32,L:L:s:N:L,A:256:8,16:0,32
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM6_TITANX/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@
-gpgpu_dual_issue_diff_exec_units 1

## L1/shared memory configuration
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# Note: Hashing set index function (H) only applies to a set size of 32 or 64.
# The defulat is to disable the L1 cache, unless cache modifieres are used
-gpgpu_l1_banks 2
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM75_RTX2060/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@
-gpgpu_dual_issue_diff_exec_units 1

## L1/shared memory configuration
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# In adaptive cache, we adaptively assign the remaining shared memory to L1 cache
# For more info, see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory-7-x
-gpgpu_adaptive_cache_config 1
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM75_RTX2060_S/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@
-gpgpu_num_reg_banks 16
-gpgpu_reg_file_port_throughput 2

# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
-gpgpu_adaptive_cache_config 0
-gpgpu_l1_banks 4
-gpgpu_cache:dl1 S:1:128:512,L:L:s:N:L,A:256:8,16:0,32
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM7_GV100/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@
-gpgpu_dual_issue_diff_exec_units 1

## L1/shared memory configuration
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# Defualt config is 32KB DL1 and 96KB shared memory
# In Volta, we assign the remaining shared memory to L1 cache
# if the assigned shd mem = 0, then L1 cache = 128KB
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM7_QV100/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@
-gpgpu_dual_issue_diff_exec_units 1

## L1/shared memory configuration
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# Defualt config is 32KB DL1 and 96KB shared memory
# In Volta, we assign the remaining shared memory to L1 cache
# if the assigned shd mem = 0, then L1 cache = 128KB
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM7_TITANV/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@
-gpgpu_dual_issue_diff_exec_units 1

## L1/shared memory configuration
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# Defualt config is 32KB DL1 and 96KB shared memory
# In Volta, we assign the remaining shared memory to L1 cache
# if the assigned shd mem = 0, then L1 cache = 128KB
Expand Down
4 changes: 2 additions & 2 deletions configs/tested-cfgs/SM86_RTX3070/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@
-gpgpu_dual_issue_diff_exec_units 1

## L1/shared memory configuration
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>,<data_port_width>
# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
# In adaptive cache, we adaptively assign the remaining shared memory to L1 cache
# For more info, see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory-7-x
-gpgpu_adaptive_cache_config 1
Expand Down
20 changes: 10 additions & 10 deletions format-code.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# This bash script formats GPGPU-Sim using clang-format
THIS_DIR="$( cd "$( dirname "$BASH_SOURCE" )" && pwd )"
echo "Running clang-format on $THIS_DIR"
clang-format -i ${THIS_DIR}/libcuda/*.h
clang-format -i ${THIS_DIR}/libcuda/*.cc
clang-format -i ${THIS_DIR}/src/*.h
clang-format -i ${THIS_DIR}/src/*.cc
clang-format -i ${THIS_DIR}/src/gpgpu-sim/*.h
clang-format -i ${THIS_DIR}/src/gpgpu-sim/*.cc
clang-format -i ${THIS_DIR}/src/cuda-sim/*.h
clang-format -i ${THIS_DIR}/src/cuda-sim/*.cc
clang-format -i ${THIS_DIR}/src/accelwattch/*.h
clang-format -i ${THIS_DIR}/src/accelwattch/*.cc
clang-format -i ${THIS_DIR}/libcuda/*.h --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/libcuda/*.cc --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/*.h --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/*.cc --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/gpgpu-sim/*.h --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/gpgpu-sim/*.cc --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/cuda-sim/*.h --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/cuda-sim/*.cc --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/accelwattch/*.h --style=file:${THIS_DIR}/.clang-format
clang-format -i ${THIS_DIR}/src/accelwattch/*.cc --style=file:${THIS_DIR}/.clang-format
6 changes: 3 additions & 3 deletions gpgpusim_check.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ else()
message(CHECK_PASS "${CUDAToolkit_NVCC_EXECUTABLE}")
message(CHECK_START "Checking CUDA compiler version")
message(CHECK_PASS "${CUDAToolkit_VERSION}")
if((CUDAToolkit_VERSION VERSION_LESS 2.0.3) OR (CUDAToolkit_VERSION VERSION_GREATER 11.10.0))
message(FATAL_ERROR "GPGPU-Sim ${CMAKE_PROJECT_VERSION} not tested with CUDA version ${CUDAToolkit_VERSION} (please see README)")
if((CUDAToolkit_VERSION VERSION_LESS 2.0.3) OR (CUDAToolkit_VERSION VERSION_GREATER 13.0.0))
message(WARNING "GPGPU-Sim not tested with CUDA version ${CUDAToolkit_VERSION} (please see README)")
endif()
endif()

Expand Down Expand Up @@ -132,4 +132,4 @@ list(POP_BACK CMAKE_MESSAGE_INDENT)
message(CHECK_PASS "done")
message(STATUS "Be sure to run 'source setup' "
"before you run CUDA program with GPGPU-Sim or building with external "
"simulator like SST")
"simulator like SST")
4 changes: 2 additions & 2 deletions libcuda/cuda_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -2607,12 +2607,12 @@ typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
/**
* Device that represents the CPU
*/
#define CU_DEVICE_CPU ((CUdevice)-1)
#define CU_DEVICE_CPU ((CUdevice) - 1)

/**
* Device that represents an invalid device
*/
#define CU_DEVICE_INVALID ((CUdevice)-2)
#define CU_DEVICE_INVALID ((CUdevice) - 2)

/** @} */ /* END CUDA_TYPES */

Expand Down
8 changes: 2 additions & 6 deletions libcuda/cuda_api_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ struct _cuda_device_id {
m_next = NULL;
m_gpgpu = gpu;
}
struct _cuda_device_id *next() {
return m_next;
}
struct _cuda_device_id *next() { return m_next; }
unsigned num_shader() const { return m_gpgpu->get_config().num_shader(); }
int num_devices() const {
if (m_next == NULL)
Expand Down Expand Up @@ -158,9 +156,7 @@ class kernel_config {
void set_grid_dim(dim3 *d) { m_GridDim = *d; }
void set_block_dim(dim3 *d) { m_BlockDim = *d; }
gpgpu_ptx_sim_arg_list_t get_args() { return m_args; }
struct CUstream_st *get_stream() {
return m_stream;
}
struct CUstream_st *get_stream() { return m_stream; }

private:
dim3 m_GridDim;
Expand Down
Loading