Skip to content

Commit

Permalink
build fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Nov 12, 2024
1 parent bf79cb3 commit 28b3b76
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 221 deletions.
47 changes: 4 additions & 43 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,37 +29,6 @@ endif()
unset(GGML_EXTRA_LIBS_PRIVATE)
unset(GGML_EXTRA_LIBS_PUBLIC)

# musa, hip: add directory with a CMakeLists.txt file, but no source files (use refer to ggml-cuda files as ../ggml-cuda)
if (GGML_MUSA)
list(APPEND CMAKE_MODULE_PATH "/usr/local/musa/cmake/")
find_package(MUSAToolkit)
set(CUDAToolkit_FOUND ${MUSAToolkit_FOUND})
else()
find_package(CUDAToolkit)
endif()

# if (GGML_MUSA)
# set(CMAKE_CUDA_COMPILER ${MUSAToolkit_MCC_EXECUTABLE})
# else()
# if (GGML_MUSA)
# set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX)
# foreach(SOURCE ${GGML_SOURCES_CUDA})
# set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
# endforeach()
# endif()


if (GGML_MUSA)
set(CMAKE_C_COMPILER clang)
set(CMAKE_C_EXTENSIONS OFF)
set(CMAKE_CXX_COMPILER clang++)
set(CMAKE_CXX_EXTENSIONS OFF)

set(GGML_CUDA ON)

list(APPEND GGML_CDEF_PUBLIC GGML_USE_MUSA)
endif()

if (GGML_AMX)
if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
else()
Expand Down Expand Up @@ -597,12 +566,6 @@ function(get_flags CCID CCVER)
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)

if (NOT GGML_MUSA)
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
list(APPEND CXX_FLAGS -Wno-format-truncation)
endif()
endif()
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
list(APPEND CXX_FLAGS -Wextra-semi)
endif()
Expand Down Expand Up @@ -779,18 +742,16 @@ add_library(ggml-base STATIC
ggml-backend.cpp
ggml-threading.cpp
ggml-threading.h
ggml-quants.c # for quantize functions TODO: move dot fns to a separate file
ggml-quants.c
ggml-quants.h
ggml-aarch64.c
ggml-aarch64.h

)

add_subdirectory(ggml-cpu)
)

add_library(ggml
ggml-backend-reg.cpp
)
)
add_subdirectory(ggml-cpu)

target_link_libraries(ggml PUBLIC ggml-base ggml-cpu)

Expand Down
6 changes: 3 additions & 3 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "ggml-backend.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-cpu.h"
#include "ggml-impl.h"
#include <cstring>
#include <vector>

Expand Down Expand Up @@ -45,8 +47,6 @@
#include "ggml-kompute.h"
#endif

#include "ggml-cpu.h"

struct ggml_backend_registry {
std::vector<ggml_backend_reg_t> backends;
std::vector<ggml_backend_dev_t> devices;
Expand Down
23 changes: 0 additions & 23 deletions ggml/src/ggml-cpu/ggml-cpu-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -10783,22 +10783,6 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
#endif
}

//
// ============================================= 3-bit using D4 lattice
//

void quantize_row_iq3_xxs(const float * restrict x, void * restrict vy, int64_t k) {
assert(k % QK_K == 0);
block_iq3_xxs * restrict y = vy;
quantize_row_iq3_xxs_ref(x, y, k);
}

void quantize_row_iq3_s(const float * restrict x, void * restrict vy, int64_t k) {
assert(k % QK_K == 0);
block_iq3_s * restrict y = vy;
quantize_row_iq3_s_ref(x, y, k);
}

// ============================ 4-bit non-linear quants

void quantize_row_iq4_nl(const float * restrict x, void * restrict y, int64_t k) {
Expand All @@ -10810,10 +10794,3 @@ void quantize_row_iq4_xs(const float * restrict x, void * restrict y, int64_t k)
assert(k % QK_K == 0);
quantize_iq4_xs(x, y, 1, k, NULL);
}

// =============================== 2.5625 bpw

void quantize_row_iq2_s(const float * restrict x, void * restrict y, int64_t k) {
assert(k % QK_K == 0);
quantize_iq2_s(x, y, 1, k, NULL);
}
3 changes: 0 additions & 3 deletions ggml/src/ggml-cpu/ggml-cpu-quants.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@ void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
void quantize_row_tq1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);

void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq3_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
void quantize_row_iq2_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);

// Dot product
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
Expand Down
7 changes: 4 additions & 3 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,19 +356,20 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
.nrows = 1,
},
[GGML_TYPE_IQ3_XXS] = {
.from_float = quantize_row_iq3_xxs,
// NOTE: from_float for iq3 and iq2_s was removed because these quants require initialization in ggml_quantize_init
//.from_float = quantize_row_iq3_xxs,
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
.vec_dot_type = GGML_TYPE_Q8_K,
.nrows = 1,
},
[GGML_TYPE_IQ3_S] = {
.from_float = quantize_row_iq3_s,
//.from_float = quantize_row_iq3_s,
.vec_dot = ggml_vec_dot_iq3_s_q8_K,
.vec_dot_type = GGML_TYPE_Q8_K,
.nrows = 1,
},
[GGML_TYPE_IQ2_S] = {
.from_float = quantize_row_iq2_s,
//.from_float = quantize_row_iq2_s,
.vec_dot = ggml_vec_dot_iq2_s_q8_K,
.vec_dot_type = GGML_TYPE_Q8_K,
.nrows = 1,
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
#include <sys/sysctl.h>
#endif

#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#endif

// ggml-backend interface

#ifdef GGML_USE_CPU_HBM
Expand Down
Loading

0 comments on commit 28b3b76

Please sign in to comment.