Skip to content

Commit 9946bac

Browse files
committed
xe: jit: fix shr usage
1 parent 5ab84bf commit 9946bac

File tree

1 file changed

+31
-7
lines changed

1 file changed

+31
-7
lines changed

src/gpu/intel/jit/codegen/kernel.hpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -827,8 +827,8 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
827827
mach(1, _qot, _x, m);
828828
add(1, p_tmp, p, -32);
829829
cmp(1 | ge | f0[0], p, 32);
830-
shr<uint32_t>(1 | f0[0], _qot, _qot, p_tmp);
831-
shr<uint32_t>(1 | ~f0[0], _qot, _x, p);
830+
eshr(1 | f0[0], _qot, _qot, p_tmp);
831+
eshr(1 | ~f0[0], _qot, _x, p);
832832
if (!qot.isInvalid()) mov(mod, qot, _qot);
833833

834834
if (!rem.isInvalid()) {
@@ -915,7 +915,7 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
915915
_x = ra_.alloc_sub(div_type);
916916
mov(1, _x, x);
917917
}
918-
if (!qot.isInvalid()) shr(mod, qot, _x, ngen::utils::log2(y));
918+
if (!qot.isInvalid()) eshr(mod, qot, _x, ngen::utils::log2(y));
919919
if (!rem.isInvalid()) and_(mod, rem, _x, y - 1);
920920
if (_x != x) ra_.safeRelease(_x);
921921
return;
@@ -930,10 +930,18 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
930930
auto _qot = qot_tmp[0];
931931
mov(1, _x, x);
932932

933-
auto acc = acc0.retype(div_type);
934-
mul(1, acc[0], _x, m & 0xFFFF);
935-
mach(1, _qot, _x, m);
936-
shr<uint32_t>(1, _qot, _qot, p - 32);
933+
// qot = (x * m) >> p
934+
bool use_mach = true;
935+
if (use_mach) {
936+
auto acc = acc0.retype(div_type);
937+
mul(1, acc[0], _x, m & 0xFFFF);
938+
mach(1, _qot, _x, m);
939+
eshr(1, _qot, _qot, p - 32);
940+
} else {
941+
auto q_tmp = qot_tmp.retype(ngen::DataType::q);
942+
emul(1, q_tmp[0], _x, m);
943+
eshr(1, q_tmp, q_tmp, p);
944+
}
937945

938946
if (!rem.isInvalid()) {
939947
// rem = x - qot * y
@@ -1014,6 +1022,22 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
10141022
*this, mod, dst, src0, src1, emu_strategy_, emu_state_);
10151023
}
10161024

1025+
template <typename DT = void>
1026+
void eshr(const ngen::InstructionModifier &mod, ngen::RegData dst,
1027+
ngen::RegData src0, ngen::RegData src1) {
1028+
bool is_q = ngen_is_qw(src0.getType());
1029+
if (is_q) {
1030+
gpu_error_not_expected()
1031+
<< "eshr(q, q, reg_data) case not implemented";
1032+
} else {
1033+
if (ngen::isSigned(src0.getType())) {
1034+
asr<DT>(mod, dst, src0, src1);
1035+
} else {
1036+
shr<DT>(mod, dst, src0, src1);
1037+
}
1038+
}
1039+
}
1040+
10171041
void esel(const ngen::InstructionModifier &mod, const ngen_operand_t &dst,
10181042
const ngen_operand_t &src0, const ngen_operand_t &src1) {
10191043
if (ngen_is_qw(dst.type())) {

0 commit comments

Comments
 (0)