Skip to content

riscv64 initial import #131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Source/GmmLib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -167,6 +167,8 @@ endif()

if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
set(GMMLIB_MARCH "armv8-a+fp+simd")
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
set(GMMLIB_MARCH "rv64g")
elseif("${GMMLIB_MARCH}" STREQUAL "")
set(GMMLIB_MARCH "corei7")
endif()
@@ -302,6 +304,7 @@ set(SOURCES_
${BS_DIR_GMMLIB}/Texture/GmmTextureSpecialCases.cpp
${BS_DIR_GMMLIB}/Texture/GmmTextureOffset.cpp
${BS_DIR_GMMLIB}/GlobalInfo/GmmInfo.cpp
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
${BS_DIR_GMMLIB}/Utility/GmmLog/GmmLog.cpp
${BS_DIR_GMMLIB}/Utility/GmmUtility.cpp
@@ -579,6 +582,9 @@ if(UNIX)
FILES_MATCHING PATTERN "*.h"
PATTERN "*.hpp")

install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)

install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)

34 changes: 34 additions & 0 deletions Source/GmmLib/Linux.cmake
Original file line number Diff line number Diff line change
@@ -55,6 +55,40 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
-fPIC
-g
)
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
SET (GMMLIB_COMPILER_FLAGS_COMMON
#general warnings
#-Wall
-Winit-self
-Winvalid-pch
-Wpointer-arith
-Wno-unused
-Wno-unknown-pragmas
-Wno-comments
-Wno-narrowing
-Wno-overflow
-Wno-parentheses
-Wno-missing-braces
-Wno-sign-compare
-Werror=address
-Werror=format-security
-Werror=return-type

# General optimization options
-march=${GMMLIB_MARCH}
-finline-functions
-fno-short-enums
-Wa,--noexecstack
-fno-strict-aliasing
# Other common flags
-fstack-protector
-fdata-sections
-ffunction-sections
-fmessage-length=0
-fvisibility=hidden
-fPIC
-g
)
else()
SET (GMMLIB_COMPILER_FLAGS_COMMON
#general warnings
25 changes: 23 additions & 2 deletions Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
Original file line number Diff line number Diff line change
@@ -375,6 +375,8 @@ extern void CpuSwizzleBlt(CPU_SWIZZLE_BLT_SURFACE *pDest, CPU_SWIZZLE_BLT_SURFAC
#include <intrin.h>
#elif defined(__ARM_ARCH)
#include <sse2neon.h>
#elif defined(__riscv)
#include "riscv_sse2_support.h"
#elif((defined __clang__) ||(__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
#include <cpuid.h>
#include <x86intrin.h>
@@ -427,7 +429,10 @@ int SwizzleOffset( // ##########################################################

if(PDepSupported == -1)
{
#if(_MSC_VER >= 1700)
#if defined(__riscv)
#define PDEP(Src, Mask) 0
PDepSupported = 0;
#elif(_MSC_VER >= 1700)
#define PDEP(Src, Mask) _pdep_u32((Src), (Mask))
int CpuInfo[4];
__cpuidex(CpuInfo, 7, 0);
@@ -692,21 +697,34 @@ void CpuSwizzleBlt( // #########################################################
} __m24; // 24-bit/3-byte memory element.

// Macros intended to compile to various types of "load register from memory" instructions...
#if defined(__riscv)
#define MOVB_R( Reg, Src) memcpy((uint8_t *)&(Reg), (uint8_t *)(Src), sizeof(__m128i))
#define MOVW_R( Reg, Src) memcpy((uint16_t *)&(Reg), (uint16_t *)(Src), sizeof(__m128i))
#define MOV3_R( Reg, Src) memcpy((__m24 *)&(Reg), (__m24 *)(Src), sizeof(__m24))
#define MOVD_R( Reg, Src) memcpy((uint32_t *)&(Reg), (uint32_t *)(Src), sizeof(__m128i))
#else
#define MOVB_R( Reg, Src) (*(uint8_t *)&(Reg) = *(uint8_t *)(Src))
#define MOVW_R( Reg, Src) (*(uint16_t *)&(Reg) = *(uint16_t *)(Src))
#define MOV3_R( Reg, Src) (*(__m24 *)&(Reg) = *(__m24 *)(Src))
#define MOVD_R( Reg, Src) (*(uint32_t *)&(Reg) = *(uint32_t *)(Src))
#endif

#define MOVQ_R( Reg, Src) ((Reg) = _mm_loadl_epi64((__m128i *)(Src)))
#define MOVDQ_R( Reg, Src) ((Reg) = _mm_load_si128( (__m128i *)(Src)))
#define MOVDQU_R(Reg, Src) ((Reg) = _mm_loadu_si128((__m128i *)(Src)))

// As above, but the other half: "store to memory from register"...
#if defined(__riscv)
#define MOVB_M( Dest, Reg) memcpy((uint8_t *)(Dest), (uint8_t *)&(Reg), sizeof(__m128i))
#define MOVW_M( Dest, Reg) memcpy((uint16_t *)(Dest), (uint16_t *)&(Reg), sizeof(__m128i))
#define MOV3_M( Dest, Reg) memcpy((__m24 *)(Dest), (__m24 *)&(Reg), sizeof(__m24))
#define MOVD_M( Dest, Reg) memcpy((uint32_t *)(Dest), (uint32_t *)&(Reg), sizeof(__m128i))
#else
#define MOVB_M( Dest, Reg)(*(uint8_t *)(Dest) = *(uint8_t *)&(Reg))
#define MOVW_M( Dest, Reg)(*(uint16_t *)(Dest) = *(uint16_t *)&(Reg))
#define MOV3_M( Dest, Reg)(*(__m24 *)(Dest) = *(__m24 *)&(Reg))
#define MOVD_M( Dest, Reg)(*(uint32_t *)(Dest) = *(uint32_t *)&(Reg))

#endif
#define MOVQ_M( Dest, Reg)(_mm_storel_epi64((__m128i *)(Dest), (Reg)))
#define MOVDQ_M( Dest, Reg)(_mm_store_si128( (__m128i *)(Dest), (Reg)))
#define MOVDQU_M( Dest, Reg)(_mm_storeu_si128((__m128i *)(Dest), (Reg)))
@@ -749,6 +767,9 @@ void CpuSwizzleBlt( // #########################################################
#elif(defined(__ARM_ARCH))
#define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg))
StreamingLoadSupported = 0;
#elif(defined(__riscv))
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
StreamingLoadSupported = 0;
#elif((defined __clang__) || (__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
unsigned int eax, ebx, ecx, edx;
57 changes: 57 additions & 0 deletions Source/GmmLib/Utility/CpuSwizzleBlt/riscv_sse2_support.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma
#ifndef __RISCV_SSE_SUPPORT_HPP__
#define __RISCV_SSE_SUPPORT_HPP__
#if defined(__riscv)

typedef uint16_t __attribute__((vector_size(8))) __m128i;

__m128i _mm_loadl_epi64(__m128i const* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(uint64_t));
return ret;
}

__m128i _mm_load_si128 (__m128i const* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(__m128i));
return ret;
}

__m128i _mm_loadu_si128 (__m128i const* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(__m128i));
return ret;
}

void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) {
memcpy(&a, &mem_addr, sizeof(uint64_t));
}

void _mm_store_si128 (__m128i* mem_addr, __m128i a) {
memcpy(&mem_addr, &a, sizeof(__m128i));
}

void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) {
memcpy(&mem_addr, &a, sizeof(__m128i));
}

void _mm_stream_si128 (void* mem_addr, __m128i a) {
memcpy(&mem_addr, &a, sizeof(__m128i));
}

__m128i _mm_stream_load_si128 (void* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(__m128i));
return ret;
}

#define RISCV_FENCE(p, s) \
__asm__ __volatile__ ("fence " #p "," #s : : : "memory")

void _mm_sfence() {
RISCV_FENCE(rw,rw);
}
#else
#error "compiling for rv64g (riscv64) but compiler architecture macro undefined"
#endif
#endif