fix compile

zhewang1-intc · zhewang1-intc · commit cb58590c1fce · 2024-06-20T18:39:36.000+08:00
diff --git a/include/experimental/group/gemm/impl/int4_dequantize_xe.hpp b/include/experimental/group/gemm/impl/int4_dequantize_xe.hpp
@@ -101,13 +101,6 @@ class gemm_t<
       std::is_same<remove_const_t<dtype_b>, remove_const_t<int4x2>>::value ||
           std::is_same<remove_const_t<dtype_b>, remove_const_t<int4x8>>::value,
       "this is for 4bit matB ");
-  static_assert(
-      quant_info_.quant_mode != quant_mode::INT4_ASYM_ZERO_NO_DEGRAD &&
-          (std::is_same<remove_const_t<dtype_zero_pt>, remove_const_t<int4x2>>::
-               value ||
-           std::is_same<remove_const_t<dtype_zero_pt>, remove_const_t<int4x8>>::
-               value),
-      "this is for 4bit zero_pt ");
 
   /******** set memory attribute **********/
   static constexpr mem_space mem_space_a = mem_desc_a_t::space;
diff --git a/tests/integration/gemv/int4/main.cpp b/tests/integration/gemv/int4/main.cpp
@@ -40,14 +40,15 @@ class test_col_major_1 {
   static constexpr size_t sg_k = 1024 / 1;
   static constexpr size_t dequant_s = 128;
   // static constexpr quant_mode quant_mode = quant_mode::S4_ASYM;
-  static constexpr quant_mode quant_mode = quant_mode::S4_FULLRANGE_NO_ZP;
+  // static constexpr quant_mode quant_mode = quant_mode::S4_FULLRANGE_NO_ZP;
+  static constexpr quant_mode quant_mode = quant_mode::INT4_ASYM_ZERO_NO_DEGRAD;
 
   static constexpr size_t local_kslicing = 1;
   static constexpr size_t global_kslicing = 1;
   static constexpr mem_layout layout_a = mem_layout::row_major;
   static constexpr mem_layout layout_b = mem_layout::col_major;
   static constexpr mma_engine mma_eng = mma_engine::fpu;
-  static constexpr gpu_arch arch = gpu_arch::XeHpc;
+  static constexpr gpu_arch arch = gpu_arch::XeHpg;
   using data_type_a = fp16;
   using data_type_b = int4x8;
   using data_type_c = fp16;
@@ -131,7 +132,9 @@ std::vector<fp16> convert_int4(
     data_type_zero_pt zero_pt) {
   std::vector<fp16> dequant_fp16(sizeof(data_type_b) * 2);
 
-  int8_t zero_pt_i8 = zero_pt & 0xf;
+  int8_t zero_pt_i8;
+  if constexpr (quant_mode != quant_mode::INT4_ASYM_ZERO_NO_DEGRAD)
+    zero_pt_i8 = zero_pt & 0xf;
   for (uint32_t i = 0; i < dequant_fp16.size(); i++) {
     int8_t dequant_8bit = data_b & 0xf;
     if constexpr (quant_mode == quant_mode::S4_FULLRANGE_NO_ZP) {
@@ -173,15 +176,17 @@ std::vector<data_type_acc_in> dequantize_weight(
     for (uint32_t j = 0; j < width; j += step) {
       int start_b_in = i * width + j;
       int start_scale_in = start_b_in / step;
-      int start_zero_pt_in =
-          (j / step) * (matrix_n / pack_radio) + i / pack_radio;
+      int start_zero_pt_in = quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD
+          ? (j / step) * matrix_n + i
+          : (j / step) * (matrix_n / pack_radio) + i / pack_radio;
       int start_out =
           layout_b == mem_layout::row_major ? 0 : i * matrix_k + j * pack_radio;
+      data_type_zero_pt zp_value = zero_pt[start_zero_pt_in];
+      if constexpr (quant_mode != quant_mode::INT4_ASYM_ZERO_NO_DEGRAD)
+        zp_value = zp_value >> (4 * (i % pack_radio));
       for (uint32_t jj = 0; jj < step; jj++) {
         std::vector<fp16> dequant_fp16 = convert_int4<quant_mode>(
-            b[start_b_in + jj],
-            scale[start_scale_in],
-            zero_pt[start_zero_pt_in] >> (4 * (i % pack_radio)));
+            b[start_b_in + jj], scale[start_scale_in], zp_value);
         for (uint32_t jjj = 0; jjj < dequant_fp16.size(); jjj++) {
           b_out[start_out + pack_radio * jj + jjj] = dequant_fp16[jjj];
         }
@@ -502,7 +507,9 @@ void dequantize_gemv_run(int iter) {
             Acc_d,
             Cnt_d,
             epilogue_args);
-  } else if constexpr (compute_policy::quant_mode == quant_mode::S4_ASYM) {
+  } else if constexpr (
+      compute_policy::quant_mode == quant_mode::S4_ASYM ||
+      compute_policy::quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD) {
     gemm_arg =
         typename gemm_op_t::template arguments_t<compute_policy::quant_mode>(
             matrix_m,