From 73c8cb363e21b3482b942c2aa31ae7ce012e9691 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Sat, 6 Dec 2025 21:27:00 +0800 Subject: [PATCH 1/3] support gpt-oss GPU by OP add-id, mul_mat for mxfp4, swiglu_oai, fix warning --- ggml/src/ggml-sycl/add-id.cpp | 77 +++++++++++++++++++++++ ggml/src/ggml-sycl/add-id.hpp | 8 +++ ggml/src/ggml-sycl/common.hpp | 17 +++++ ggml/src/ggml-sycl/convert.cpp | 15 +++++ ggml/src/ggml-sycl/dequantize.hpp | 18 ++++++ ggml/src/ggml-sycl/dpct/helper.hpp | 59 +++++++++++++++++- ggml/src/ggml-sycl/element_wise.cpp | 97 +++++++++++++++++++++++++++++ ggml/src/ggml-sycl/element_wise.hpp | 4 ++ ggml/src/ggml-sycl/ggml-sycl.cpp | 28 ++++++--- ggml/src/ggml-sycl/mmvq.cpp | 22 +++++++ ggml/src/ggml-sycl/pad.cpp | 10 +-- ggml/src/ggml-sycl/ssm_conv.cpp | 2 +- ggml/src/ggml-sycl/vecdotq.hpp | 58 +++++++++++++++++ 13 files changed, 399 insertions(+), 16 deletions(-) create mode 100644 ggml/src/ggml-sycl/add-id.cpp create mode 100644 ggml/src/ggml-sycl/add-id.hpp diff --git a/ggml/src/ggml-sycl/add-id.cpp b/ggml/src/ggml-sycl/add-id.cpp new file mode 100644 index 00000000000..00c073cf937 --- /dev/null +++ b/ggml/src/ggml-sycl/add-id.cpp @@ -0,0 +1,77 @@ +#include +#include "common.hpp" +#include "add-id.hpp" + +static void add_id_kernel( + const float* src0, + const float* src1, + const int32_t* src2, + float* dst, + int64_t ne0, + int64_t ne1, + size_t nb01, + size_t nb02, + size_t nb11, + size_t nb21, + sycl::nd_item<3> item_ct1) { + const int64_t i1 = item_ct1.get_group(2); + const int64_t i2 = item_ct1.get_group(1); + + const int i11 = + *(const int32_t*)((const char*)src2 + i1 * sizeof(int32_t) + i2 * nb21); + + const size_t nb1 = ne0 * sizeof(float); + const size_t nb2 = ne1 * nb1; + + float* dst_row = (float*)((char*)dst + i1 * nb1 + i2 * nb2); + const float* src0_row = + (const float*)((const char*)src0 + i1 * nb01 + i2 * nb02); + const float* src1_row = (const float*)((const char*)src1 + i11 * nb11); + + for (int64_t i0 = item_ct1.get_local_id(2); i0 < ne0; + i0 += item_ct1.get_local_range(2)) { + dst_row[i0] = src0_row[i0] + src1_row[i0]; + } +} + +void ggml_sycl_add_id(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { + const ggml_tensor* src0 = dst->src[0]; + const ggml_tensor* src1 = dst->src[1]; + const ggml_tensor* src2 = dst->src[2]; + + GGML_TENSOR_TERNARY_OP_LOCALS + + GGML_ASSERT(dst->type == GGML_TYPE_F32); + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT(src2->type == GGML_TYPE_I32); + + GGML_ASSERT(nb00 == sizeof(float)); + GGML_ASSERT(nb10 == sizeof(float)); + GGML_ASSERT(nb20 == sizeof(int32_t)); + + const float* src0_d = (const float*)src0->data; + const float* src1_d = (const float*)src1->data; + const int32_t* src2_d = (const int32_t*)src2->data; + float* dst_d = (float*)dst->data; + + int threads = std::min((int)ne00, 768); // cols + ctx.stream()->parallel_for( + sycl::nd_range<3>( + sycl::range<3>(1, ne02, ne01) * sycl::range<3>(1, 1, threads), + sycl::range<3>(1, 1, threads)), + [=](sycl::nd_item<3> item_ct1) { + add_id_kernel( + src0_d, + src1_d, + src2_d, + dst_d, + ne0, + ne1, + nb01, + nb02, + nb11, + nb21, + item_ct1); + }); +} diff --git a/ggml/src/ggml-sycl/add-id.hpp b/ggml/src/ggml-sycl/add-id.hpp new file mode 100644 index 00000000000..7df65b049c3 --- /dev/null +++ b/ggml/src/ggml-sycl/add-id.hpp @@ -0,0 +1,8 @@ +#ifndef GGML_SYCL_ADD_ID_HPP +#define GGML_SYCL_ADD_ID_HPP + +#include "common.hpp" + +void ggml_sycl_add_id(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + +#endif // GGML_SYCL_ADD_ID_HPP \ No newline at end of file diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 637630c1d23..519638fd416 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -642,5 +642,22 @@ static __dpct_inline__ sycl::uint2 fast_div_modulo(uint32_t n, const sycl::uint3 return sycl::uint2(div_val, mod_val); } +static __dpct_inline__ int ggml_sycl_dp4a(const int a, const int b, int c) { + return dpct::dp4a(a, b, c); +} + +static __dpct_inline__ float ggml_sycl_e8m0_to_fp32(uint8_t x) { + uint32_t bits; + if (x == 0) { + bits = 0x00400000; + } else { + bits = (uint32_t) x << 23; + } + + float result; + memcpy(&result, &bits, sizeof(float)); + return result; +} + #endif // GGML_SYCL_COMMON_HPP diff --git a/ggml/src/ggml-sycl/convert.cpp b/ggml/src/ggml-sycl/convert.cpp index 7c6ea8a57a2..8bdae36458c 100644 --- a/ggml/src/ggml-sycl/convert.cpp +++ b/ggml/src/ggml-sycl/convert.cpp @@ -472,6 +472,16 @@ static void dequantize_row_iq4_nl_sycl(const void *vx, dst_t *y, const int64_t k } } +template +static void dequantize_row_mxfp4_sycl(const void * vx, dst_t * y, const int64_t k, dpct::queue_ptr stream) { + const int nb = (k + QK_K - 1) / QK_K; + stream->parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { + dequantize_block_mxfp4(vx, y, item_ct1); + }); +} + template static void convert_unary_nc(const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t s01, const int64_t s02, const int64_t s03, @@ -518,6 +528,7 @@ static void convert_unary_sycl(const void * vx, dst_t * y, const int64_t k, dpct convert_unary_nc_sycl(vx, y, k, 1, 1, 1, k, k, k, queue); } + to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type, ggml_tensor * dst) { switch (type) { case GGML_TYPE_Q4_0: @@ -571,6 +582,8 @@ to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type, ggml_tensor * dst) { return dequantize_row_iq4_xs_sycl; case GGML_TYPE_IQ4_NL: return dequantize_row_iq4_nl_sycl; + case GGML_TYPE_MXFP4: + return dequantize_row_mxfp4_sycl; case GGML_TYPE_F32: return convert_unary_sycl; #ifdef GGML_SYCL_HAS_BF16 @@ -636,6 +649,8 @@ to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type, ggml_tensor *dst) { return dequantize_row_iq4_xs_sycl; case GGML_TYPE_IQ4_NL: return dequantize_row_iq4_nl_sycl; + case GGML_TYPE_MXFP4: + return dequantize_row_mxfp4_sycl; case GGML_TYPE_F16: return convert_unary_sycl; #ifdef GGML_SYCL_HAS_BF16 diff --git a/ggml/src/ggml-sycl/dequantize.hpp b/ggml/src/ggml-sycl/dequantize.hpp index 540539bb223..da2a605daa8 100644 --- a/ggml/src/ggml-sycl/dequantize.hpp +++ b/ggml/src/ggml-sycl/dequantize.hpp @@ -819,5 +819,23 @@ dequantize_block_iq4_xs(const void *__restrict__ vx, dst_t *__restrict__ yy, } } +template +static void dequantize_block_mxfp4(const void * __restrict__ vx, dst_t * __restrict__ yy, + const sycl::nd_item<3> &item_ct1) { + // auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); + const int64_t i = item_ct1.get_group(2); + const block_mxfp4 * x = (const block_mxfp4 *) vx + i*(QK_K/QK_MXFP4); + + const int64_t tid = item_ct1.get_local_id(2); + const int64_t il = tid/8; // 0...3 + const int64_t ib = tid%8; // 0...7 + dst_t * y = yy + i*QK_K + 32*ib + 4*il; + const uint8_t * q4 = x[ib].qs + 4*il; + const float d = ggml_sycl_e8m0_to_fp32(x[ib].e); + for (int j = 0; j < 4; ++j) { + y[j+ 0] = d * kvalues_mxfp4[q4[j] & 0xf]*0.5f; + y[j+16] = d * kvalues_mxfp4[q4[j] >> 4]*0.5f; + } +} #endif // GGML_SYCL_DEQUANTIZE_HPP diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index f93cfa701f5..30ec1e8dafc 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -1860,10 +1860,31 @@ namespace dpct : id); } + template + using dot_product_acc_t = std::conditional_t< + std::is_unsigned_v && std::is_unsigned_v, + uint32_t, + int32_t>; + + template + sycl::vec extract_and_sign_or_zero_extend4(T val) { + return sycl::vec(val) + .template as, int8_t, uint8_t>, + 4>>() + .template convert(); + } + template - inline auto dp4a(T1 a, T2 b, T3 c) - { - return syclcompat::dp4a(a, b, c); + inline auto dp4a(T1 a, T2 b, T3 c) { + dot_product_acc_t res = c; + auto va = extract_and_sign_or_zero_extend4(a); + auto vb = extract_and_sign_or_zero_extend4(b); + res += va[0] * vb[0]; + res += va[1] * vb[1]; + res += va[2] * vb[2]; + res += va[3] * vb[3]; + return res; } struct sub_sat @@ -2972,6 +2993,38 @@ namespace dpct atomic_fetch_add(addr, operand, memoryOrder); } + inline unsigned int byte_level_permute( + unsigned int a, unsigned int b, unsigned int s) { + unsigned int ret; + ret = ((((std::uint64_t)b << 32 | a) >> (s & 0x7) * 8) & 0xff) | + (((((std::uint64_t)b << 32 | a) >> ((s >> 4) & 0x7) * 8) & 0xff) + << 8) | + (((((std::uint64_t)b << 32 | a) >> ((s >> 8) & 0x7) * 8) & 0xff) + << 16) | + (((((std::uint64_t)b << 32 | a) >> ((s >> 12) & 0x7) * 8) & 0xff) + << 24); + return ret; + } + + inline uint32_t byte_level_permute_custom( + uint32_t low32, uint32_t high32, uint32_t sel, int mode = 0) { + constexpr uint16_t lookup[6][4] = { + {0x3210, 0x4321, 0x5432, 0x6543}, // Forward 4-byte extract + {0x5670, 0x6701, 0x7012, 0x0123}, // Backward 4-byte extract + {0x0000, 0x1111, 0x2222, 0x3333}, // Replicate 8-bit values + {0x3210, 0x3211, 0x3222, 0x3333}, // Edge clamp left + {0x0000, 0x1110, 0x2210, 0x3210}, // Edge clamp right + {0x1010, 0x3232, 0x1010, 0x3232} // Replicate 16-bit values + }; + + if (mode >= 1 && mode <= 6) { + return byte_level_permute(low32, high32, lookup[mode - 1][sel & 0x3]); + } else if (!mode) { + return byte_level_permute(low32, high32, sel); + } + return 0; + } + } // COPY from DPCT head files #endif // GGML_SYCL_DPCT_HELPER_HPP diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 7d54ce600ee..8d83b2446bd 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -911,6 +911,98 @@ static inline void ggml_sycl_op_swiglu(ggml_backend_sycl_context & ctx, ggml_ten }); } +__dpct_inline__ float ggml_sycl_op_swiglu_oai_single(float x, float g, float alpha = 1.702f, float limit = 7.0f) { + x = sycl::fmin(x, limit); + g = sycl::fmax(sycl::fmin(g, limit), -limit); + + float out_glu = x / (1.0f + sycl::native::exp(-x * alpha)); + out_glu = out_glu * (1.0f + g); + return out_glu; +} + + +template +static void swiglu_oai_kernel(const T * x, const T * g, T * dst, const int64_t k, + const int64_t n, const int64_t o0, const int64_t o1, + float alpha, float limit, sycl::nd_item<3> item_ct1) { + const int64_t i = int64_t(item_ct1.get_local_range(2)) * item_ct1.get_group(2) + item_ct1.get_local_id(2); + + if (i >= k) { + return; + } + + const int64_t j0 = (i / n) * o0 + (i % n); + const int64_t j1 = o0 == o1 ? j0 : (i / n) * o1 + (i % n); + + float xi = x[j0]; + float gi = g[j1]; + + dst[i] = ggml_sycl_op_swiglu_oai_single(xi, gi, alpha, limit); +} + +template +static void swiglu_oai_sycl(const T * x, + const T * g, + T * dst, + const int64_t k, + const int64_t n, + const int64_t o0, + const int64_t o1, + const float alpha, + const float limit, + dpct::queue_ptr stream) { + const int64_t num_blocks = (k + SYCL_GLU_BLOCK_SIZE - 1) / SYCL_GLU_BLOCK_SIZE; + stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_GLU_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_GLU_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { + swiglu_oai_kernel(x, g, dst, k, n, o0, o1, alpha, limit, item_ct1); + }); +} + +void ggml_sycl_op_swiglu_oai(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + void * src0_d = src0->data; + void * src1_d = src1 ? src1->data : src0->data; + const int64_t src0_o = src0->nb[1]; + const int64_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + void * dst_d = dst->data; + const int64_t nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + dpct::queue_ptr stream = ctx.stream(); + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(src0->nb[0] == ggml_element_size(src0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + GGML_ASSERT(src0->type == dst->type); + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == ggml_nrows(src0)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src1->nb[0] == ggml_element_size(src1)); + GGML_ASSERT(src1->ne[0] == nc); + GGML_ASSERT(src0->type == src1->type); + } + + //const int32_t swapped = ((const int32_t *) dst->op_params)[1]; + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + const float alpha = ggml_get_op_params_f32(dst, 2); + const float limit = ggml_get_op_params_f32(dst, 3); + + float * src0_p = (float *) src0_d; + float * src1_p = (float *) src1_d; + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + swiglu_oai_sycl(src0_p, src1_p, (float *)dst_d, ggml_nelements(dst), nc, src0_o / sizeof(float), src1_o / sizeof(float), alpha, limit, stream); +} + static inline void ggml_sycl_op_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst, [](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) { @@ -1070,6 +1162,11 @@ void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_op_swiglu(ctx, dst); } +void ggml_sycl_swiglu_oai(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_swiglu_oai(ctx, dst); +} + void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_geglu_erf(ctx, dst); diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index fcf93295cb2..0913a2e529b 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -5,6 +5,8 @@ #include "ggml.h" #include // For std::numeric_limits +#define SYCL_GLU_BLOCK_SIZE 256 + template T neg_infinity() { return -std::numeric_limits::infinity(); @@ -41,6 +43,8 @@ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_swiglu_oai(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + void ggml_sycl_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 7449a916093..6b243374bdc 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -39,6 +39,7 @@ #include "ggml-impl.h" #include "ggml-backend-impl.h" +#include "ggml-sycl/add-id.hpp" #include "ggml-sycl/backend.hpp" #include "ggml-sycl/common.hpp" #include "ggml-sycl/element_wise.hpp" @@ -3313,6 +3314,7 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor bool use_mul_mat_q = ggml_sycl_supports_mmq(src0->type) && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32; + // mmvq and mmq need the __dp4a instruction which is available for gen12+ // Workaround in https://github.com/ggerganov/llama.cpp/commit/95f84d5ce8b449a9b16009434aca800df504a02e use_mul_mat_q = use_mul_mat_q && (src0->type != GGML_TYPE_IQ2_XXS); @@ -3320,7 +3322,6 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor use_mul_mat_q = use_mul_mat_q && (src1->ne[1] <= MMQ_MAX_BATCH_SIZE); #endif // SYCL_USE_XMX - // mmvq path is faster in the CUDA backend. if (!g_ggml_sycl_prioritize_dmmv && (ctx.stream()->get_backend() == sycl::backend::ext_oneapi_cuda // Dispatch becomes obscure with the reorder, MMVQ when the reorder optimization @@ -3711,6 +3712,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_OP_ADD1: // TODO: more efficient implementation ggml_sycl_add(ctx, dst); break; + case GGML_OP_ADD_ID: + ggml_sycl_add_id(ctx, dst); + break; case GGML_OP_SUB: ggml_sycl_sub(ctx, dst); break; @@ -3803,6 +3807,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_GLU_OP_SWIGLU: ggml_sycl_swiglu(ctx, dst); break; + case GGML_GLU_OP_SWIGLU_OAI: + ggml_sycl_swiglu_oai(ctx, dst); + break; case GGML_GLU_OP_GEGLU_ERF: ggml_sycl_geglu_erf(ctx, dst); break; @@ -4397,6 +4404,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_GLU_OP_REGLU: case GGML_GLU_OP_GEGLU: case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_SWIGLU_OAI: case GGML_GLU_OP_GEGLU_ERF: case GGML_GLU_OP_GEGLU_QUICK: return ggml_is_contiguous_1(op->src[0]); @@ -4424,19 +4432,25 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g } } ggml_type src0_type = op->src[0]->type; - if (src0_type == GGML_TYPE_BF16 || src0_type == GGML_TYPE_MXFP4) { - // TODO: support MXFP4 + if (src0_type == GGML_TYPE_BF16 ) { + // TODO: support GGML_TYPE_BF16 // FIXME: keep a list of supported types to avoid breaking the backend when a new type is added return false; } + // TODO: The configuration below needs more work to be supported with oneDNN - if (ggml_is_permuted(a) && !ggml_is_contiguous(a) && a->ne[2] > 1 && a->ne[3] > 1) { - return false; + if (ggml_is_permuted(a) && !ggml_is_contiguous(a) && + a->ne[2] > 1 && a->ne[3] > 1 && + src0_type == GGML_TYPE_F16) { + return false; } + + // With oneMKL: return true (as default) + // With oneDNN: return false // TODO: This specific configuration can fail with oneDNN and needs more debugging if (!ggml_is_permuted(a) && ggml_is_permuted(b) && b->ne[2] > 1 && b->ne[3] > 1 && a->ne[0] > 128 && a->ne[2] == 1 && src0_type == GGML_TYPE_F16) { - return false; + return true; } return true; } @@ -4553,9 +4567,9 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_VIEW: case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: - return true; case GGML_OP_ADD: case GGML_OP_ADD1: + case GGML_OP_ADD_ID: case GGML_OP_SUB: case GGML_OP_COUNT_EQUAL: case GGML_OP_MUL: diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml/src/ggml-sycl/mmvq.cpp index 5b7f0640749..316aa0d0fb5 100644 --- a/ggml/src/ggml-sycl/mmvq.cpp +++ b/ggml/src/ggml-sycl/mmvq.cpp @@ -595,6 +595,25 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy, } } +static void mul_mat_vec_mxfp4_q8_1_sycl(const void * vx, const void * vy, float * dst, const int ncols, const int nrows, + dpct::queue_ptr stream) { + GGML_ASSERT(ncols % QK_MXFP4 == 0); + const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; + const sycl::range<3> block_nums(1, 1, block_num_y); + const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); + + { + stream->submit([&](sycl::handler & cgh) { + cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); + }); + } +} + + static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy, float *dst, const int ncols, const int nrows, @@ -1123,6 +1142,9 @@ void ggml_sycl_op_mul_mat_vec_q(ggml_backend_sycl_context & ctx, const ggml_tens case GGML_TYPE_IQ4_XS: mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream); break; + case GGML_TYPE_MXFP4: + mul_mat_vec_mxfp4_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream); + break; default: GGML_ABORT("fatal error"); } diff --git a/ggml/src/ggml-sycl/pad.cpp b/ggml/src/ggml-sycl/pad.cpp index 413712c5847..f989c5e4b8b 100644 --- a/ggml/src/ggml-sycl/pad.cpp +++ b/ggml/src/ggml-sycl/pad.cpp @@ -14,10 +14,10 @@ #include "pad.hpp" static void pad_f32(const float * src, float * dst, - const int lp0, const int rp0, const int lp1, const int rp1, - const int lp2, const int rp2, const int lp3, const int rp3, - const int ne0, const int ne1, const int ne2, const int ne3) { - auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); + const int lp0, const int rp0, const int lp1, const int rp1, + const int lp2, const int rp2, const int lp3, const int rp3, + const int ne0, const int ne1, const int ne2, const int ne3, + sycl::nd_item<3> item_ct1) { int i0 = item_ct1.get_local_id(2) + item_ct1.get_group(2) * item_ct1.get_local_range(2); int i1 = item_ct1.get_group(1); @@ -63,7 +63,7 @@ static void pad_f32_sycl(const float *src, float *dst, const int lp0, sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { pad_f32(src, dst, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3, ne0, ne1, - ne2, ne3); + ne2, ne3, item_ct1); }); } diff --git a/ggml/src/ggml-sycl/ssm_conv.cpp b/ggml/src/ggml-sycl/ssm_conv.cpp index 0dc0f71c9a1..eea9a73d67e 100644 --- a/ggml/src/ggml-sycl/ssm_conv.cpp +++ b/ggml/src/ggml-sycl/ssm_conv.cpp @@ -88,7 +88,7 @@ void ggml_sycl_ssm_conv(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ASSERT(src0->nb[0] == sizeof(float)); GGML_ASSERT(src1->nb[0] == sizeof(float)); - GGML_ASSERT(src0->nb[1] == src0->ne[0] * static_cast(sizeof(float))); + GGML_ASSERT(src0->nb[1] == src0->ne[0] * sizeof(float)); const int src_stride_inner = ncs; const int src_stride_seq = ncs * d_inner; diff --git a/ggml/src/ggml-sycl/vecdotq.hpp b/ggml/src/ggml-sycl/vecdotq.hpp index 4088ddb54f0..43482b3672c 100644 --- a/ggml/src/ggml-sycl/vecdotq.hpp +++ b/ggml/src/ggml-sycl/vecdotq.hpp @@ -20,6 +20,18 @@ typedef float (*vec_dot_q_sycl_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs); +static __dpct_inline__ int get_int_b1(const void * x, const int & i32) { + const uint8_t * x8 = (const uint8_t *) x; + + int x32 = x8[4*i32 + 0] << 0; + x32 |= x8[4*i32 + 1] << 8; + x32 |= x8[4*i32 + 2] << 16; + x32 |= x8[4*i32 + 3] << 24; + + return x32; +} + + static __dpct_inline__ int get_int_from_int8(const int8_t* x8, const int& i32) { const uint16_t* x16 = (const uint16_t*)(x8 + sizeof(int) * i32); // assume at least 2 byte @@ -75,6 +87,28 @@ static __dpct_inline__ void get_int_from_table_16(const uint32_t &q4, val2 = v1 | (v2 << 16); } +static __dpct_inline__ sycl::int2 get_int_from_table_16( + const int& q4, const int8_t* table) { + const uint32_t* table32 = (const uint32_t*)table; + uint32_t tmp[2]; + const uint32_t low_high_selection_indices = + (0x32103210 | ((q4 & 0x88888888) >> 1)); +#pragma unroll + for (uint32_t i = 0; i < 2; ++i) { + const uint32_t shift = 16 * i; + + const uint32_t low = + dpct::byte_level_permute(table32[0], table32[1], q4 >> shift); + const uint32_t high = + dpct::byte_level_permute(table32[2], table32[3], q4 >> shift); + tmp[i] = dpct::byte_level_permute( + low, high, low_high_selection_indices >> shift); + } + return sycl::int2( + dpct::byte_level_permute(tmp[0], tmp[1], 0x6420), + dpct::byte_level_permute(tmp[0], tmp[1], 0x7531)); +} + #define VDR_Q2_K_Q8_1_MMVQ 1 // contiguous v/x values @@ -685,6 +719,30 @@ vec_dot_q4_1_q8_1(const void *__restrict__ vbq, return vec_dot_q4_1_q8_1_impl(v, u, bq4_1->dm, bq8_1->ds); } +#define VDR_MXFP4_Q8_1_MMVQ 2 +#define VDR_MXFP4_Q8_1_MMQ 4 + +static __dpct_inline__ float vec_dot_mxfp4_q8_1(const void * __restrict__ vbq, + const block_q8_1 * __restrict__ bq8_1, + const int & iqs) { + const block_mxfp4 * bq4 = (const block_mxfp4 *) vbq; + + const int * q8 = (const int *) bq8_1->qs + iqs; + + int sumi = 0; +#pragma unroll + for (int l = 0; l < VDR_MXFP4_Q8_1_MMVQ; ++l) { + const int aux_q4 = get_int_b1(bq4->qs, iqs + l); + const sycl::int2 v = get_int_from_table_16(aux_q4, kvalues_mxfp4); + sumi = ggml_sycl_dp4a(v.x(), q8[l + 0], sumi); + sumi = ggml_sycl_dp4a(v.y(), q8[l + 4], sumi); + } + + const float d = ggml_sycl_e8m0_to_fp32(bq4->e) * 0.5f * (bq8_1->ds)[0]; + return d * sumi; +} + + static __dpct_inline__ float vec_dot_q5_0_q8_1(const void *__restrict__ vbq, const block_q8_1 *__restrict__ bq8_1, const int &iqs) { From b70857d36ec682b7d2b970e75853d834cdd783b0 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Sat, 6 Dec 2025 22:55:19 +0800 Subject: [PATCH 2/3] fix fault ut case, update ops.md --- docs/ops/SYCL.csv | 1158 ++++++++++++++++++++---------- ggml/src/ggml-sycl/ggml-sycl.cpp | 7 +- 2 files changed, 799 insertions(+), 366 deletions(-) diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index 85a45d6ae0d..91b442bde8e 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -33,14 +33,14 @@ "SYCL0","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL" "SYCL0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" -"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL" -"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL" -"SYCL0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL" -"SYCL0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL" -"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL" -"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL" -"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL" -"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL" +"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" "SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" @@ -287,14 +287,14 @@ "SYCL0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","yes","SYCL" "SYCL0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","yes","SYCL" "SYCL0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","yes","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=2.000000","support","0","no","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=7.000000","support","0","no","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=2.000000","support","0","no","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=7.000000","support","0","no","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=2.000000","support","0","no","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=7.000000","support","0","no","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=2.000000","support","0","no","SYCL" -"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=7.000000","support","0","no","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=2.000000","support","1","yes","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=7.000000","support","1","yes","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=2.000000","support","1","yes","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=7.000000","support","1","yes","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=2.000000","support","1","yes","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=7.000000","support","1","yes","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=2.000000","support","1","yes","SYCL" +"SYCL0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=7.000000","support","1","yes","SYCL" "SYCL0","GET_ROWS","type=f32,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","SYCL" "SYCL0","GET_ROWS","type=f32,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","SYCL" "SYCL0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","SYCL" @@ -4964,6 +4964,7 @@ "SYCL0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","SYCL" "SYCL0","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","0","no","SYCL" "SYCL0","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","0","no","SYCL" +"SYCL0","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","0","no","SYCL" "SYCL0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","SYCL" "SYCL0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","SYCL" "SYCL0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","SYCL" @@ -5419,17 +5420,45 @@ "SYCL0","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","SYCL" "SYCL0","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","SYCL" "SYCL0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","0","no","SYCL" +"SYCL0","CPY","type_src=i32,type_dst=i32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","0","no","SYCL" +"SYCL0","CPY","type_src=i32,type_dst=i32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","SYCL" "SYCL0","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","SYCL" -"SYCL0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","SYCL" -"SYCL0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","SYCL" -"SYCL0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","SYCL" -"SYCL0","CONT","type=f32,ne=[2,3,5,7]","support","1","yes","SYCL" -"SYCL0","CONT","type=f16,ne=[2,1,1,1]","support","1","yes","SYCL" -"SYCL0","CONT","type=f16,ne=[2,1,3,5]","support","1","yes","SYCL" -"SYCL0","CONT","type=f16,ne=[2,3,5,7]","support","1","yes","SYCL" -"SYCL0","CONT","type=bf16,ne=[2,1,1,1]","support","0","no","SYCL" -"SYCL0","CONT","type=bf16,ne=[2,1,3,5]","support","0","no","SYCL" -"SYCL0","CONT","type=bf16,ne=[2,3,5,7]","support","0","no","SYCL" +"SYCL0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=f16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","SYCL" +"SYCL0","CONT","type=bf16,ne=[2,1,1,1],use_view_slice=0","support","0","no","SYCL" +"SYCL0","CONT","type=bf16,ne=[2,1,3,5],use_view_slice=0","support","0","no","SYCL" +"SYCL0","CONT","type=bf16,ne=[2,3,5,7],use_view_slice=0","support","0","no","SYCL" +"SYCL0","CONT","type=bf16,ne=[1,4,4,1],use_view_slice=0","support","0","no","SYCL" +"SYCL0","CONT","type=bf16,ne=[1,8,17,1],use_view_slice=0","support","0","no","SYCL" +"SYCL0","CONT","type=bf16,ne=[10,10,10,1],use_view_slice=0","support","0","no","SYCL" "SYCL0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","SYCL" "SYCL0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","SYCL" "SYCL0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","SYCL" @@ -5655,6 +5684,7 @@ "SYCL0","MUL","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","SYCL" "SYCL0","DIV","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","SYCL" "SYCL0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","SYCL" +"SYCL0","ADD1","type=f32,ne=[1024,1024,1,1]","support","1","yes","SYCL" "SYCL0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","SYCL" "SYCL0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","SYCL" "SYCL0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","SYCL" @@ -5791,15 +5821,15 @@ "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -5944,15 +5974,15 @@ "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" @@ -5971,15 +6001,15 @@ "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6002,15 +6032,15 @@ "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" @@ -6029,15 +6059,15 @@ "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6176,15 +6206,15 @@ "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6207,15 +6237,15 @@ "SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6238,15 +6268,15 @@ "SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6269,15 +6299,15 @@ "SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6300,15 +6330,15 @@ "SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6331,15 +6361,15 @@ "SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6362,15 +6392,15 @@ "SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6393,81 +6423,81 @@ "SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" @@ -6489,12 +6519,12 @@ "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6520,12 +6550,12 @@ "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","SYCL" +"SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL" @@ -6807,145 +6837,145 @@ "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" "SYCL0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","0","no","SYCL" -"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","SYCL" +"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=0,m=32,n=8192,k=64","support","1","yes","SYCL" @@ -6954,7 +6984,7 @@ "SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=1,m=32,n=8192,k=64","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=50,n=200,k=64","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=1,n_used=1,b=0,m=8,n=16,k=1","support","1","yes","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=32,n_used=2,b=0,m=2880,n=32,k=2880","support","0","no","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=32,n_used=2,b=0,m=2880,n=32,k=2880","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","SYCL" @@ -7387,78 +7417,78 @@ "SYCL0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","no","SYCL" -"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","no","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","SYCL" +"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","SYCL" "SYCL0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","0","no","SYCL" "SYCL0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","0","no","SYCL" @@ -8589,64 +8619,68 @@ "SYCL0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","SYCL" "SYCL0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","SYCL" "SYCL0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=129","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","0","no","SYCL" -"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","0","no","SYCL" -"SYCL0","SQR","type=f16,ne=[10,5,4,3]","support","0","no","SYCL" -"SYCL0","SQRT","type=f16,ne=[10,3,3,2]","support","0","no","SYCL" -"SYCL0","LOG","type=f16,ne=[10,5,4,3]","support","0","no","SYCL" -"SYCL0","SIN","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","COS","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","SYCL" +"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","SYCL" +"SYCL0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","SYCL" +"SYCL0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","SYCL" +"SYCL0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","SYCL" +"SYCL0","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","COS","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","SYCL" "SYCL0","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","SYCL" -"SYCL0","FLOOR","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","CEIL","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","ROUND","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","TRUNC","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","SQR","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","SQRT","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","LOG","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","SIN","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","COS","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","0","no","SYCL" +"SYCL0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","SIN","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","COS","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","SYCL" "SYCL0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","SYCL" -"SYCL0","FLOOR","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","CEIL","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","ROUND","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","TRUNC","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" +"SYCL0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","FLOOR","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","ROUND","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" "SYCL0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","SYCL" "SYCL0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","SYCL" "SYCL0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","SYCL" @@ -8666,9 +8700,13 @@ "SYCL0","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","SYCL" "SYCL0","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","SYCL" "SYCL0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","FLOOR","type=f32,ne=[1024,1024,1,1]","support","1","yes","SYCL" "SYCL0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","CEIL","type=f32,ne=[1024,1024,1,1]","support","1","yes","SYCL" "SYCL0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","ROUND","type=f32,ne=[1024,1024,1,1]","support","1","yes","SYCL" "SYCL0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f32,ne=[1024,1024,1,1]","support","1","yes","SYCL" "SYCL0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","SYCL" "SYCL0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","SYCL" "SYCL0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","SYCL" @@ -9411,28 +9449,405 @@ "SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL" "SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL" "SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","0","no","SYCL" "SYCL0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[1023,2,1,3],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[1024,2,1,3],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[1025,2,1,3],order=0","support","1","yes","SYCL" -"SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2047,2,1,3],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2048,2,1,3],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2049,2,1,3],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","SYCL" -"SYCL0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","0","no","SYCL" "SYCL0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[1023,2,1,3],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[1024,2,1,3],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[1025,2,1,3],order=1","support","1","yes","SYCL" -"SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2047,2,1,3],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","0","no","SYCL" +"SYCL0","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","0","no","SYCL" "SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","SYCL" "SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","SYCL" "SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","SYCL" @@ -9445,6 +9860,10 @@ "SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","0","no","SYCL" "SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","0","no","SYCL" "SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","0","no","SYCL" +"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=0","support","0","no","SYCL" +"SYCL0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=1","support","0","no","SYCL" +"SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","0","no","SYCL" +"SYCL0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","0","no","SYCL" "SYCL0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","0","no","SYCL" "SYCL0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","0","no","SYCL" "SYCL0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","0","no","SYCL" @@ -9479,9 +9898,23 @@ "SYCL0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","SYCL" "SYCL0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","SYCL" "SYCL0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","SYCL" +"SYCL0","ARANGE","type=f32,start=0.000000,stop=1048576.000000,step=1.000000","support","1","yes","SYCL" "SYCL0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","SYCL" "SYCL0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","SYCL" "SYCL0","CUMSUM","type=f32,ne=[10,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[127,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[128,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[128,128,4,4]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[255,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[256,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[511,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[512,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[1023,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[1024,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[2047,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[2048,5,4,3]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[242004,1,1,1]","support","0","no","SYCL" +"SYCL0","CUMSUM","type=f32,ne=[375960,1,1,1]","support","0","no","SYCL" "SYCL0","XIELU","type=f32,ne=[10,5,4,3]","support","0","no","SYCL" "SYCL0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","0","no","SYCL" "SYCL0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","0","no","SYCL" @@ -9490,6 +9923,7 @@ "SYCL0","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","0","no","SYCL" "SYCL0","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","0","no","SYCL" "SYCL0","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","0","no","SYCL" +"SYCL0","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","0","no","SYCL" "SYCL0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","SYCL" "SYCL0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","SYCL" "SYCL0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","SYCL" @@ -9497,6 +9931,8 @@ "SYCL0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","SYCL" "SYCL0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","SYCL" "SYCL0","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","SYCL" +"SYCL0","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","SYCL" +"SYCL0","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[300,64,4,4]","support","0","no","SYCL" "SYCL0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","SYCL" "SYCL0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","SYCL" "SYCL0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","SYCL" diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 6b243374bdc..e996d98be8c 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -4440,17 +4440,14 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g // TODO: The configuration below needs more work to be supported with oneDNN if (ggml_is_permuted(a) && !ggml_is_contiguous(a) && - a->ne[2] > 1 && a->ne[3] > 1 && - src0_type == GGML_TYPE_F16) { + a->ne[2] > 1 && a->ne[3] > 1 && src0_type == GGML_TYPE_F16) { return false; } - // With oneMKL: return true (as default) - // With oneDNN: return false // TODO: This specific configuration can fail with oneDNN and needs more debugging if (!ggml_is_permuted(a) && ggml_is_permuted(b) && b->ne[2] > 1 && b->ne[3] > 1 && a->ne[0] > 128 && a->ne[2] == 1 && src0_type == GGML_TYPE_F16) { - return true; + return false; } return true; } From e5d514d6313708ba1a6ef096e5b75877d14eb359 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Sun, 7 Dec 2025 09:19:03 +0800 Subject: [PATCH 3/3] rebase, fix format issue --- docs/ops.md | 18 +++++++++--------- ggml/src/ggml-sycl/add-id.hpp | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index ef1febccaec..b2caec75ad6 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -18,12 +18,12 @@ Legend: | ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ | | ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ADD_ID | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | +| ADD_ID | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | -| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | +| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | ❌ | | CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | -| CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ | | CONV_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | @@ -31,7 +31,7 @@ Legend: | CONV_3D | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | -| COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | | COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | | CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | @@ -63,7 +63,7 @@ Legend: | IM2COL_3D | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | L2_NORM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | -| LOG | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | ✅ | ❌ | ❌ | ❌ | +| LOG | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ | | MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | @@ -97,14 +97,14 @@ Legend: | SIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ✅ | ❌ | ❌ | | SILU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ✅ | ❌ | ❌ | | SILU_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | -| SIN | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| SIN | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | | SOFTCAP | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | | SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | | SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ | ❌ | | SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | -| SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | -| SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | +| SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | | SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | | STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ | @@ -112,7 +112,7 @@ Legend: | SUM | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | | SUM_ROWS | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | ❌ | | SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | -| SWIGLU_OAI | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ | +| SWIGLU_OAI | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ | | TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | | TOP_K | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | diff --git a/ggml/src/ggml-sycl/add-id.hpp b/ggml/src/ggml-sycl/add-id.hpp index 7df65b049c3..e1b09ee8c7c 100644 --- a/ggml/src/ggml-sycl/add-id.hpp +++ b/ggml/src/ggml-sycl/add-id.hpp @@ -5,4 +5,4 @@ void ggml_sycl_add_id(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -#endif // GGML_SYCL_ADD_ID_HPP \ No newline at end of file +#endif // GGML_SYCL_ADD_ID_HPP