Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions 3rd_party/musa_compat/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# MUSA Compatibility Layer CMake Configuration
#
# This module provides MUSA API compatibility when the actual SDK is not available.
#
# Options:
# MNN_MUSA_COMPAT_STUB - Use stub implementation (compile only, no GPU)
# MNN_MUSA_COMPAT_CUDA - Map MUSA to CUDA (requires CUDA SDK)
# MNN_MUSA_NATIVE - Use native MUSA SDK (requires MUSA SDK)
#
# Priority: NATIVE > CUDA > STUB

cmake_minimum_required(VERSION 3.6)

# Check for native MUSA SDK first
if(MNN_MUSA_NATIVE AND NOT MNN_MUSA_COMPAT_STUB AND NOT MNN_MUSA_COMPAT_CUDA)
find_package(MUSA QUIET)
if(MUSA_FOUND)
message(STATUS "MUSA Compat: Using native MUSA SDK")
set(MNN_USE_NATIVE_MUSA ON)
set(MUSA_COMPAT_INCLUDE_DIRS ${MUSA_INCLUDE_DIRS})
set(MUSA_COMPAT_LIBRARIES ${MUSA_LIBRARIES})
else()
message(WARNING "MUSA SDK not found, falling back to compatibility layer")
set(MUSA_FOUND FALSE)
endif()
endif()

# Fallback to CUDA mapping
if(NOT MUSA_FOUND AND MNN_MUSA_COMPAT_CUDA)
find_package(CUDA QUIET)
if(CUDA_FOUND)
message(STATUS "MUSA Compat: Mapping MUSA to CUDA")
set(MNN_USE_CUDA_AS_MUSA ON)
set(MUSA_COMPAT_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
set(MUSA_COMPAT_LIBRARIES ${CUDA_LIBRARIES})
set(MUSA_FOUND TRUE)
else()
message(WARNING "CUDA not found for MUSA compatibility")
endif()
endif()

# Final fallback: stub implementation
if(NOT MUSA_FOUND OR MNN_MUSA_COMPAT_STUB)
message(STATUS "MUSA Compat: Using stub implementation (compile only, no GPU)")
set(MNN_USE_MUSA_STUB ON)
set(MUSA_COMPAT_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
set(MUSA_COMPAT_LIBRARIES "")
set(MUSA_FOUND TRUE)
endif()

# Export variables
set(MUSA_INCLUDE_DIRS ${MUSA_COMPAT_INCLUDE_DIRS} PARENT_SCOPE)
set(MUSA_LIBRARIES ${MUSA_COMPAT_LIBRARIES} PARENT_SCOPE)
set(MUSA_FOUND ${MUSA_FOUND} PARENT_SCOPE)

# Add compile definitions
if(MNN_USE_NATIVE_MUSA)
add_definitions(-DMNN_USE_NATIVE_MUSA)
elseif(MNN_USE_CUDA_AS_MUSA)
add_definitions(-DMNN_USE_CUDA_AS_MUSA)
elseif(MNN_USE_MUSA_STUB)
add_definitions(-DMNN_USE_MUSA_STUB)
endif()

message(STATUS "MUSA Compat: Include dirs = ${MUSA_COMPAT_INCLUDE_DIRS}")
message(STATUS "MUSA Compat: Libraries = ${MUSA_COMPAT_LIBRARIES}")
140 changes: 140 additions & 0 deletions 3rd_party/musa_compat/include/musa_runtime.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/**
* MUSA Runtime API Compatibility Layer (Fixed)
*/

#ifndef MUSA_RUNTIME_COMPAT_H
#define MUSA_RUNTIME_COMPAT_H

#ifdef __cplusplus
extern "C" {
#endif

#include <stddef.h>

/* Stub implementation for compilation only */
typedef int musaError_t;
enum {
musaSuccess = 0,
musaErrorMemoryAllocation = 1,
musaErrorInvalidDevice = 2,
musaErrorInvalidValue = 3,
musaErrorNotInitialized = 4,
};

typedef struct _musaStream* musaStream_t;
typedef struct _musaEvent* musaEvent_t;

typedef enum {
musaMemcpyHostToDevice = 0,
musaMemcpyDeviceToHost = 1,
musaMemcpyDeviceToDevice = 2,
musaMemcpyDefault = 3
} musaMemcpyKind;

typedef struct {
char name[256];
size_t totalGlobalMem;
int major;
int minor;
int multiProcessorCount;
int maxThreadsPerBlock;
int maxThreadsDim[3];
int maxGridSize[3];
int clockRate;
size_t sharedMemPerBlock;
int regsPerBlock;
int warpSize;
size_t memPitch;
int maxThreadsPerMultiProcessor;
int computeMode;
int deviceOverlap;
int kernelExecTimeoutEnabled;
int integrated;
int canMapHostMemory;
int concurrentKernels;
int ECCEnabled;
int pciBusID;
int pciDeviceID;
int tccDriver;
int asyncEngineCount;
int unifiedAddressing;
int memoryClockRate;
int memoryBusWidth;
int l2CacheSize;
size_t sharedMemPerMultiprocessor;
int regsPerMultiprocessor;
int managedMemory;
int computePreemption;
int canUseHostPointerForRegisteredMem;
int cooperativeLaunch;
int pageableMemoryAccess;
int concurrentManagedAccess;
int directManagedMemAccessFromHost;
} musaDeviceProp;

/* Stub functions */
static inline musaError_t musaMalloc(void **ptr, size_t size) {
(void)ptr; (void)size;
return musaErrorNotInitialized;
}
static inline musaError_t musaFree(void *ptr) {
(void)ptr;
return musaErrorNotInitialized;
}
static inline musaError_t musaMemcpy(void *dst, const void *src, size_t count, musaMemcpyKind kind) {
(void)dst; (void)src; (void)count; (void)kind;
return musaErrorNotInitialized;
}
static inline musaError_t musaMemset(void *ptr, int value, size_t count) {
(void)ptr; (void)value; (void)count;
return musaErrorNotInitialized;
}
static inline musaError_t musaGetDeviceCount(int *count) {
if (count) *count = 0;
return musaSuccess;
}
static inline musaError_t musaGetDeviceProperties(musaDeviceProp *prop, int device) {
(void)prop; (void)device;
return musaErrorInvalidDevice;
}
static inline musaError_t musaDeviceSynchronize(void) {
return musaSuccess;
}
static inline musaError_t musaGetLastError(void) {
return musaSuccess;
}
static inline const char* musaGetErrorString(musaError_t error) {
(void)error;
return "MUSA not available (stub)";
}
static inline musaError_t musaSetDevice(int device) {
(void)device;
return musaErrorInvalidDevice;
}
static inline musaError_t musaGetDevice(int *device) {
if (device) *device = 0;
return musaSuccess;
}
static inline musaError_t musaStreamCreate(musaStream_t *stream) {
(void)stream;
return musaSuccess;
}
static inline musaError_t musaStreamDestroy(musaStream_t stream) {
(void)stream;
return musaSuccess;
}
static inline musaError_t musaMemGetInfo(size_t *free, size_t *total) {
if (free) *free = 0;
if (total) *total = 0;
return musaSuccess;
}
static inline musaError_t musaMemcpyAsync(void *dst, const void *src, size_t count, musaMemcpyKind kind, musaStream_t stream) {
(void)dst; (void)src; (void)count; (void)kind; (void)stream;
return musaErrorNotInitialized;
}

#ifdef __cplusplus
}
#endif

#endif /* MUSA_RUNTIME_COMPAT_H */
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ option(MNN_ONEDNN "Enable oneDNN" OFF)
option(MNN_AVX2 "Open AVX2 Compile for x86 if possible" ON)
option(MNN_AVX512 "Enable AVX512" OFF)
option(MNN_CUDA "Enable CUDA" OFF)
option(MNN_MUSA "Enable MUSA (Moore Threads GPU)" OFF)
option(MNN_MUSA_COMPAT_STUB "MUSA stub mode (compile only, no GPU)" ON)
option(MNN_MUSA_COMPAT_CUDA "Map MUSA to CUDA for testing" OFF)
option(MNN_MUSA_NATIVE "Use native MUSA SDK" OFF)
option(MNN_TENSORRT "Enable TensorRT" OFF)
option(MNN_COREML "Enable CoreML" OFF)
option(MNN_NNAPI "Enable NNAPI" OFF)
Expand Down Expand Up @@ -265,6 +269,7 @@ message(STATUS "\tTensorRT: ${MNN_TENSORRT}")
message(STATUS "\tCoreML: ${MNN_COREML}")
message(STATUS "\tNNAPI: ${MNN_NNAPI}")
message(STATUS "\tCUDA: ${MNN_CUDA}")
message(STATUS "\tMUSA: ${MNN_MUSA}")
message(STATUS "\tOpenMP: ${MNN_OPENMP}")
message(STATUS "\tBF16: ${MNN_SUPPORT_BF16}")
message(STATUS "\tThreadPool: ${MNN_USE_THREAD_POOL}")
Expand Down Expand Up @@ -640,6 +645,16 @@ IF(MNN_CUDA)
list(APPEND MNN_EXTRA_DEPENDS ${MNN_CUDA_LIBS})
ENDIF()

# MUSA (Moore Threads GPU)
IF(MNN_MUSA)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/musa/)
list(APPEND MNN_TARGETS MNN_MUSA)
if (NOT MSVC)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_MUSA>)
endif()
list(APPEND MNN_EXTRA_DEPENDS ${MNN_MUSA_LIBS})
ENDIF()

# Express
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/express/)
IF(MNN_SEP_BUILD)
Expand Down
3 changes: 3 additions & 0 deletions include/MNN/MNNForwardType.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ typedef enum {
/*NVIDIA GPU API*/
MNN_FORWARD_CUDA = 2,

/*Moore Threads GPU API*/
MNN_FORWARD_MUSA = 15,

/*Android / Common Device GPU API*/
MNN_FORWARD_OPENCL = 3,
MNN_FORWARD_OPENGL = 6,
Expand Down
106 changes: 106 additions & 0 deletions source/backend/musa/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# MUSA Backend CMakeLists.txt
#
# MUSA (Moore Threads GPU) Backend for MNN
#
# This build script supports three modes:
# 1. Native MUSA SDK - Full MUSA support
# 2. CUDA compatibility - Map MUSA to CUDA (for testing/development)
# 3. Stub mode - Compile only, no GPU execution

# Include MUSA compatibility layer
include(${CMAKE_SOURCE_DIR}/3rd_party/musa_compat/CMakeLists.txt)

if(NOT MUSA_FOUND)
message(WARNING "MUSA backend disabled: No MUSA/CUDA SDK found and stub mode not enabled")
return()
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")

option(MNN_MUSA_QUANT "Enable MNN MUSA Quant File" OFF)
option(MNN_MUSA_BF16 "Enable MNN MUSA Bfloat16 File" OFF)
option(MNN_MUSA_COMPAT_STUB "Use stub implementation (compile only)" ON)

IF (MNN_MUSA_QUANT)
add_definitions(-DENABLE_MUSA_QUANT)
ENDIF()

IF (MNN_MUSA_BF16)
add_definitions(-DENABLE_MUSA_BF16)
ENDIF()

IF (MNN_LOW_MEMORY)
add_definitions(-DMNN_LOW_MEMORY)
ENDIF()

# Source files
file(GLOB_RECURSE MNN_MUSA_SRC ${CMAKE_CURRENT_LIST_DIR}/core/* ${CMAKE_CURRENT_SOURCE_DIR}/execution/*)

if(NOT MNN_SUPPORT_TRANSFORMER_FUSE)
file(GLOB_RECURSE MNN_MUSA_TRANSFORMER_FUSE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/execution/plugin/*)
list(REMOVE_ITEM MNN_MUSA_SRC ${MNN_MUSA_TRANSFORMER_FUSE_SRC})
endif()

# Include directories - use compat layer first
include_directories(
${CMAKE_SOURCE_DIR}/3rd_party/musa_compat/include
${MUSA_INCLUDE_DIRS}
${CMAKE_CURRENT_LIST_DIR}/
${CMAKE_SOURCE_DIR}/include/
)

# Build library based on available SDK
if(MNN_USE_NATIVE_MUSA)
# Native MUSA build
message(STATUS "Building MUSA backend with native MUSA SDK")
if(WIN32)
musa_add_library(MNN_MUSA STATIC Register.cpp ${MNN_MUSA_SRC})
set(MNN_MUSA_LIBS MNN_MUSA ${MUSA_LIBRARIES} PARENT_SCOPE)
else()
musa_add_library(MNN_Musa_Main SHARED ${MNN_MUSA_SRC})
set(MNN_MUSA_LIBS MNN_Musa_Main PARENT_SCOPE)
add_library(MNN_MUSA OBJECT Register.cpp)
endif()

elseif(MNN_USE_CUDA_AS_MUSA)
# CUDA compatibility mode
message(STATUS "Building MUSA backend with CUDA compatibility")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -D_FORCE_INLINES -w")
if(CMAKE_BUILD_TYPE MATCHES Debug)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O0")
else()
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -O3")
endif()

# Rename .cu files to .cu for CUDA compilation
foreach(SRC_FILE ${MNN_MUSA_SRC})
if(SRC_FILE MATCHES "\\.cu$")
list(APPEND MNN_MUSA_CU_SRC ${SRC_FILE})
endif()
endforeach()

if(WIN32)
cuda_add_library(MNN_MUSA STATIC Register.cpp ${MNN_MUSA_CU_SRC})
set(MNN_MUSA_LIBS MNN_MUSA ${CUDA_LIBRARIES} PARENT_SCOPE)
else()
cuda_add_library(MNN_Musa_Main SHARED ${MNN_MUSA_CU_SRC})
set(MNN_MUSA_LIBS MNN_Musa_Main PARENT_SCOPE)
add_library(MNN_MUSA OBJECT Register.cpp)
endif()

else()
# Stub mode - compile C++ files only (skip .cu files)
message(STATUS "Building MUSA backend in STUB mode (no GPU execution)")

# Filter out .cu files, keep only .cpp/.hpp
foreach(SRC_FILE ${MNN_MUSA_SRC})
if(NOT SRC_FILE MATCHES "\\.cu$")
list(APPEND MNN_MUSA_CPP_SRC ${SRC_FILE})
endif()
endforeach()

add_library(MNN_MUSA OBJECT Register.cpp ${MNN_MUSA_CPP_SRC})
set(MNN_MUSA_LIBS MNN_MUSA PARENT_SCOPE)
endif()

message(STATUS "MUSA Backend: Configured successfully")
Loading