Skip to content

Commit 4b91fcf

Browse files
committed
compiler: added PTX compilers using LLVM and Open64(nvopencc)
1 parent e639547 commit 4b91fcf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+599
-0
lines changed

CMakeLists.txt

+10
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ OPTION(user "user mode(default off)" OFF)
1414
OPTION(runtime "enable CUDA runtime API(default on)" ON)
1515
OPTION(usched "enable user mode scheduler(default off)" OFF)
1616
OPTION(use_as "use assembler(default off)" OFF)
17+
OPTION(use_llvm "use LLVM compiler(default off)" OFF)
18+
OPTION(use_open64 "use Open64(nvopencc) compiler(default off)" OFF)
1719

1820
MACRO(INCLUDE_DIRECTORY_IF_EXISTS DIR)
1921
IF(EXISTS "${DIR}/")
@@ -53,3 +55,11 @@ ENDIF(NOT user)
5355
IF(use_as)
5456
ADD_SUBDIRECTORY(compiler/as)
5557
ENDIF(use_as)
58+
59+
IF(use_llvm)
60+
ADD_SUBDIRECTORY(compiler/llvm)
61+
ENDIF(use_llvm)
62+
63+
IF(use_open64)
64+
ADD_SUBDIRECTORY(compiler/open64)
65+
ENDIF(use_open64)

compiler/llvm/CMakeLists.txt

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
EXECUTE_PROCESS(
2+
COMMAND ${CMAKE_C_COMPILER} -dumpversion
3+
OUTPUT_VARIABLE GCC_VERSION
4+
)
5+
if (GCC_VERSION VERSION_GREATER 4.7 OR GCC_VERSION VERSION_EQUAL 4.7)
6+
EXECUTE_PROCESS(
7+
COMMAND svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm
8+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
9+
)
10+
EXECUTE_PROCESS(
11+
COMMAND svn co http://llvm.org/svn/llvm-project/cfe/trunk clang
12+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/llvm/tools
13+
)
14+
EXECUTE_PROCESS(
15+
COMMAND svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk compiler-rt
16+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/llvm/projects
17+
)
18+
EXECUTE_PROCESS(
19+
COMMAND mkdir build
20+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
21+
)
22+
EXECUTE_PROCESS(
23+
COMMAND cmake ${CMAKE_CURRENT_BINARY_DIR}/llvm
24+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/build
25+
)
26+
ADD_CUSTOM_TARGET(llvm ALL make
27+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/build
28+
)
29+
ADD_CUSTOM_TARGET(clean make clean
30+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/build
31+
)
32+
INSTALL(
33+
CODE "
34+
EXECUTE_PROCESS(
35+
COMMAND make install
36+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/build
37+
)
38+
"
39+
)
40+
else()
41+
MESSAGE("skip: LLVM is required GCC version 4.7 or later.")
42+
endif()

compiler/open64/CMakeLists.txt

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
FIND_PACKAGE(FLEX)
2+
FIND_PACKAGE(BISON)
3+
FIND_FILE(PERL4_CORELIBS getopts.pl PATHS /usr/lib/perl5 /usr/share/perl5)
4+
FIND_PROGRAM(TCSH tcsh PATHS /bin)
5+
EXECUTE_PROCESS(
6+
COMMAND ${CMAKE_C_COMPILER} -dumpversion
7+
OUTPUT_VARIABLE GCC_VERSION
8+
)
9+
if (GCC_VERSION VERSION_LESS 4.7)
10+
if(EXISTS ${PERL4_CORELIBS})
11+
if(EXISTS ${TCSH})
12+
EXECUTE_PROCESS(
13+
COMMAND tar xzf ${CMAKE_CURRENT_SOURCE_DIR}/nvopencc_5.0_src_13604779.tar.gz
14+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
15+
)
16+
EXECUTE_PROCESS(
17+
COMMAND cat ${CMAKE_CURRENT_SOURCE_DIR}/nvopencc_5.0.patch
18+
COMMAND patch -p0 --read-only=ignore
19+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
20+
)
21+
ADD_CUSTOM_TARGET(open64 ALL make
22+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/src/targia3264_nvisa
23+
)
24+
ADD_CUSTOM_TARGET(clean make clean
25+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/src/targia3264_nvisa
26+
)
27+
INSTALL(
28+
PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/src/targia3264_nvisa/bin/nvopencc
29+
DESTINATION gdev/open64/bin
30+
)
31+
FILE(GLOB libs "${CMAKE_CURRENT_BINARY_DIR}/src/targia3264_nvisa/lib/*")
32+
INSTALL(
33+
PROGRAMS ${libs}
34+
DESTINATION gdev/open64/lib
35+
)
36+
else()
37+
MESSAGE("skip: Open64 is required tcsh.")
38+
endif()
39+
else()
40+
MESSAGE("skip: Open64 is required perl4-corelibs package.")
41+
endif()
42+
else()
43+
MESSAGE("skip: Open64 is required GCC version 4.6 or older.")
44+
endif()

compiler/open64/nvopencc_5.0.patch

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
--- ./src/Makefile.gsetup.orig 2012-04-24 02:04:57.000000000 +0900
2+
+++ ./src/Makefile.gsetup 2014-10-27 04:16:31.717636564 +0900
3+
@@ -531,7 +531,7 @@ ERROR_ON_WARNINGS = #-Werror
4+
ifeq ($(BUILD_TARGET), NVISA)
5+
# This is due to unsupported aliases in Mach-O
6+
ifneq ($(BUILD_OS), DARWIN)
7+
-ERROR_ON_WARNINGS = -Werror
8+
+ERROR_ON_WARNINGS = #-Werror
9+
endif
10+
endif
11+
12+
--- ./src/gccfe/gnu/c-parse.y.orig 2012-03-16 04:52:30.000000000 +0900
13+
+++ ./src/gccfe/gnu/c-parse.y 2014-10-27 04:50:16.125664019 +0900
14+
@@ -66,6 +66,7 @@ Software Foundation, 59 Temple Place - S
15+
#include <locale.h>
16+
#endif
17+
18+
+#include "y.tab.c"
19+
20+
/* Like YYERROR but do call yyerror. */
21+
#define YYERROR1 { yyerror ("syntax error"); YYERROR; }
22+
--- ./src/linux/make/gcommondefs.orig 2012-03-16 04:53:45.000000000 +0900
23+
+++ ./src/linux/make/gcommondefs 2014-10-27 04:06:48.885628660 +0900
24+
@@ -135,7 +135,8 @@ ifeq ($(BUILD_COMPILER), GNU)
25+
F90 +=
26+
endif
27+
ifeq ($(BUILD_TARGET), NVISA)
28+
- ROOT_DIR := $(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))/../../../..
29+
+# ROOT_DIR := $(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))/../../../..
30+
+ ROOT_DIR := $(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))/../../..
31+
ifneq ($(BUILD_OS), LINUX)
32+
ifneq ($(BUILD_OS), DARWIN)
33+
ifndef USE_NATIVE
Binary file not shown.

test/cuda/common/clang/cuda.h

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* Minimal declarations for CUDA support. Testing purposes only. */
2+
3+
#include <stddef.h>
4+
5+
#define __constant__ __attribute__((constant))
6+
#define __device__ __attribute__((device))
7+
#define __global__ __attribute__((global))
8+
#define __host__ __attribute__((host))
9+
#define __shared__ __attribute__((shared))
10+
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
11+
12+
struct dim3 {
13+
unsigned x, y, z;
14+
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
15+
};
16+
17+
typedef struct cudaStream *cudaStream_t;
18+
19+
int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
20+
cudaStream_t stream = 0);

test/cuda/common/clang/float_gpu.cu

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#include <stdint.h>
2+
#include "clang/cuda.h"
3+
extern "C"
4+
__global__
5+
void add(float a, float b, float *c)
6+
{
7+
*c = a + b;
8+
}

test/cuda/common/clang/fmadd_gpu.cu

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include <stdint.h>
2+
#include "clang/cuda.h"
3+
__global__
4+
void add(float *a, float *b, float *c, int n)
5+
{
6+
int i = __builtin_ptx_read_ctaid_x() * __builtin_ptx_read_ntid_x()
7+
+ __builtin_ptx_read_tid_x();
8+
int j = __builtin_ptx_read_ctaid_y() * __builtin_ptx_read_ntid_y()
9+
+ __builtin_ptx_read_tid_y();
10+
if (i < n && j < n) {
11+
int idx = i * n + j;
12+
c[idx] = a[idx] + b[idx];
13+
}
14+
}

test/cuda/common/clang/fmmul_gpu.cu

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include <stdint.h>
2+
#include "clang/cuda.h"
3+
__global__
4+
void mul(float *a, float *b, float *c, int n)
5+
{
6+
int i = __builtin_ptx_read_ctaid_x() * __builtin_ptx_read_ntid_x()
7+
+ __builtin_ptx_read_tid_x();
8+
int j = __builtin_ptx_read_ctaid_y() * __builtin_ptx_read_ntid_y()
9+
+ __builtin_ptx_read_tid_y();
10+
if (i < n && j < n) {
11+
int idx = i * n + j;
12+
c[idx] = a[idx] * b[idx];
13+
}
14+
}

test/cuda/common/clang/idle_gpu.cu

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include <stdint.h>
2+
#include "clang/cuda.h"
3+
4+
extern "C"
5+
__global__
6+
void idle(unsigned int *p, unsigned int n)
7+
{
8+
int x = __builtin_ptx_read_ctaid_x() * __builtin_ptx_read_ntid_x()
9+
+ __builtin_ptx_read_tid_x();
10+
int y = __builtin_ptx_read_ctaid_y() * __builtin_ptx_read_ntid_y()
11+
+ __builtin_ptx_read_tid_y();
12+
unsigned int i = 0, j = 0, k = 0;
13+
__shared__ int s;
14+
15+
s = *p;
16+
if (x == 0 && y == 0) {
17+
for (i = 0; i < n; i++) {
18+
if (x + y > n) {
19+
s = s + x;
20+
if (s > x + y)
21+
s = x;
22+
}
23+
}
24+
}
25+
*p = s;
26+
}

test/cuda/common/clang/loop_gpu.cu

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <stdint.h>
2+
#include "clang/cuda.h"
3+
4+
__global__
5+
void loop(uint32_t *data, uint32_t size, uint32_t n)
6+
{
7+
int i;
8+
// for (i = 0; i < n/40; i++) {
9+
for (i = 0; i < n/5; i++) {
10+
if (i * 4 < size)
11+
data[i] = i + n;
12+
}
13+
}

test/cuda/common/clang/madd_gpu.cu

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#include <stdint.h>
2+
#include "clang/cuda.h"
3+
__global__
4+
void add(uint32_t *a, uint32_t *b, uint32_t *c, uint32_t n)
5+
{
6+
int i = __builtin_ptx_read_ctaid_x() * __builtin_ptx_read_ntid_x()
7+
+ __builtin_ptx_read_tid_x();
8+
int j = __builtin_ptx_read_ctaid_y() * __builtin_ptx_read_ntid_y()
9+
+ __builtin_ptx_read_tid_y();
10+
if (i < n && j < n) {
11+
int idx = i * n + j;
12+
c[idx] = a[idx] + b[idx];
13+
}
14+
}

test/cuda/common/clang/mmul_gpu.cu

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include "clang/cuda.h"
2+
3+
extern "C" __global__ void multiply(unsigned int *a, unsigned int *b, unsigned int *c,
4+
int n)
5+
{
6+
unsigned int i;
7+
unsigned int product = 0;
8+
9+
int row = __builtin_ptx_read_ctaid_y() * __builtin_ptx_read_ntid_y()
10+
+ __builtin_ptx_read_tid_y();
11+
int col = __builtin_ptx_read_ctaid_x() * __builtin_ptx_read_ntid_x()
12+
+ __builtin_ptx_read_tid_x();
13+
14+
if(row < n && col < n){
15+
for (i = 0; i < n; i++)
16+
product += a[row * n + i] * b[i * n + col];
17+
18+
c[row*n + col] = product;
19+
}
20+
}
21+

test/cuda/user/float/Makefile.llvm

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Makefile
2+
TARGET = user_test
3+
ARCH = sm_20
4+
CC = gcc
5+
CLANG = clang -cc1 -nostdsysteminc -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm
6+
LLC = llc -march=nvptx64 -mcpu=$(ARCH)
7+
PTXAS = ptxas -arch $(ARCH)
8+
LIBS = -lucuda -lgdev
9+
CFLAGS = -L /usr/local/gdev/lib64 -I /usr/local/gdev/include
10+
11+
all:
12+
$(CLANG) -I . -o float_gpu.ll clang/float_gpu.cu
13+
$(LLC) -o float_gpu.ptx float_gpu.ll
14+
$(PTXAS) -o float_gpu.cubin float_gpu.ptx
15+
$(CC) -o $(TARGET) $(CFLAGS) main.c float.c $(LIBS)
16+
17+
clean:
18+
rm -f $(TARGET) *.cubin *.ptx *.ll ./*~

test/cuda/user/float/clang/cuda.h

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/cuda.h
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/float_gpu.cu

test/cuda/user/fmadd/Makefile.llvm

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Makefile
2+
TARGET = user_test
3+
ARCH = sm_20
4+
CC = gcc
5+
CLANG = clang -cc1 -nostdsysteminc -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm
6+
LLC = llc -march=nvptx64 -mcpu=$(ARCH)
7+
PTXAS = ptxas -arch $(ARCH)
8+
LIBS = -lucuda -lgdev
9+
CFLAGS = -L /usr/local/gdev/lib64 -I /usr/local/gdev/include
10+
11+
all:
12+
$(CLANG) -I . -o fmadd_gpu.ll clang/fmadd_gpu.cu
13+
$(LLC) -o fmadd_gpu.ptx fmadd_gpu.ll
14+
$(PTXAS) -o fmadd_gpu.cubin fmadd_gpu.ptx
15+
$(CC) -o $(TARGET) $(CFLAGS) main.c fmadd.c $(LIBS)
16+
17+
clean:
18+
rm -f $(TARGET) *.cubin *.ptx *.ll ./*~

test/cuda/user/fmadd/clang/cuda.h

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/cuda.h
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/fmadd_gpu.cu

test/cuda/user/fmmul/Makefile.llvm

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Makefile
2+
TARGET = user_test
3+
ARCH = sm_20
4+
CC = gcc
5+
CLANG = clang -cc1 -nostdsysteminc -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm
6+
LLC = llc -march=nvptx64 -mcpu=$(ARCH)
7+
PTXAS = ptxas -arch $(ARCH)
8+
LIBS = -lucuda -lgdev
9+
CFLAGS = -L /usr/local/gdev/lib64 -I /usr/local/gdev/include
10+
11+
all:
12+
$(CLANG) -I . -o fmmul_gpu.ll clang/fmmul_gpu.cu
13+
$(LLC) -o fmmul_gpu.ptx fmmul_gpu.ll
14+
$(PTXAS) -o fmmul_gpu.cubin fmmul_gpu.ptx
15+
$(CC) -o $(TARGET) $(CFLAGS) main.c fmmul.c $(LIBS)
16+
17+
clean:
18+
rm -f $(TARGET) *.cubin *.ptx *.ll ./*~

test/cuda/user/fmmul/clang/cuda.h

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/cuda.h
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/fmmul_gpu.cu

test/cuda/user/idle/Makefile.llvm

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Makefile
2+
TARGET = user_test
3+
ARCH = sm_20
4+
CC = gcc
5+
CLANG = clang -cc1 -nostdsysteminc -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm
6+
LLC = llc -march=nvptx64 -mcpu=$(ARCH)
7+
PTXAS = ptxas -arch $(ARCH)
8+
LIBS = -lucuda -lgdev
9+
CFLAGS = -L /usr/local/gdev/lib64 -I /usr/local/gdev/include
10+
11+
all:
12+
$(CLANG) -I . -o idle_gpu.ll clang/idle_gpu.cu
13+
$(LLC) -o idle_gpu.ptx idle_gpu.ll
14+
$(PTXAS) -o idle_gpu.cubin idle_gpu.ptx
15+
$(CC) -o $(TARGET) $(CFLAGS) main.c idle.c $(LIBS)
16+
17+
clean:
18+
rm -f $(TARGET) *.cubin *.ptx *.ll ./*~

test/cuda/user/idle/clang/cuda.h

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/cuda.h

test/cuda/user/idle/clang/idle_gpu.cu

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../common/clang/idle_gpu.cu

test/cuda/user/loop/Makefile.llvm

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Makefile
2+
TARGET = user_test
3+
ARCH = sm_20
4+
CC = gcc
5+
CLANG = clang -cc1 -nostdsysteminc -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm
6+
LLC = llc -march=nvptx64 -mcpu=$(ARCH)
7+
PTXAS = ptxas -arch $(ARCH)
8+
LIBS = -lucuda -lgdev
9+
CFLAGS = -L /usr/local/gdev/lib64 -I /usr/local/gdev/include
10+
11+
all:
12+
$(CLANG) -I . -o loop_gpu.ll clang/loop_gpu.cu
13+
$(LLC) -o loop_gpu.ptx loop_gpu.ll
14+
$(PTXAS) -o loop_gpu.cubin loop_gpu.ptx
15+
$(CC) -o $(TARGET) $(CFLAGS) main.c loop.c $(LIBS)
16+
17+
clean:
18+
rm -f $(TARGET) *.cubin *.ptx *.ll ./*~

0 commit comments

Comments
 (0)