@@ -827,8 +827,8 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
827827        mach (1 , _qot, _x, m);
828828        add (1 , p_tmp, p, -32 );
829829        cmp (1  | ge | f0[0 ], p, 32 );
830-         shr< uint32_t > (1  | f0[0 ], _qot, _qot, p_tmp);
831-         shr< uint32_t > (1  | ~f0[0 ], _qot, _x, p);
830+         eshr (1  | f0[0 ], _qot, _qot, p_tmp);
831+         eshr (1  | ~f0[0 ], _qot, _x, p);
832832        if  (!qot.isInvalid ()) mov (mod, qot, _qot);
833833
834834        if  (!rem.isInvalid ()) {
@@ -915,7 +915,7 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
915915                _x = ra_.alloc_sub (div_type);
916916                mov (1 , _x, x);
917917            }
918-             if  (!qot.isInvalid ()) shr (mod, qot, _x, ngen::utils::log2 (y));
918+             if  (!qot.isInvalid ()) eshr (mod, qot, _x, ngen::utils::log2 (y));
919919            if  (!rem.isInvalid ()) and_ (mod, rem, _x, y - 1 );
920920            if  (_x != x) ra_.safeRelease (_x);
921921            return ;
@@ -930,10 +930,18 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
930930        auto  _qot = qot_tmp[0 ];
931931        mov (1 , _x, x);
932932
933-         auto  acc = acc0.retype (div_type);
934-         mul (1 , acc[0 ], _x, m & 0xFFFF );
935-         mach (1 , _qot, _x, m);
936-         shr<uint32_t >(1 , _qot, _qot, p - 32 );
933+         //  qot = (x * m) >> p
934+         bool  use_mach = true ;
935+         if  (use_mach) {
936+             auto  acc = acc0.retype (div_type);
937+             mul (1 , acc[0 ], _x, m & 0xFFFF );
938+             mach (1 , _qot, _x, m);
939+             eshr (1 , _qot, _qot, p - 32 );
940+         } else  {
941+             auto  q_tmp = qot_tmp.retype (ngen::DataType::q);
942+             emul (1 , q_tmp[0 ], _x, m);
943+             eshr (1 , q_tmp, q_tmp, p);
944+         }
937945
938946        if  (!rem.isInvalid ()) {
939947            //  rem = x - qot * y
@@ -1014,6 +1022,22 @@ class ir_to_ngen_generator_t : public BaseGeneratorT {
10141022                *this , mod, dst, src0, src1, emu_strategy_, emu_state_);
10151023    }
10161024
1025+     template  <typename  DT = void >
1026+     void  eshr (const  ngen::InstructionModifier &mod, ngen::RegData dst,
1027+             ngen::RegData src0, ngen::RegData src1) {
1028+         bool  is_q = ngen_is_qw (src0.getType ());
1029+         if  (is_q) {
1030+             gpu_error_not_expected ()
1031+                     << " eshr(q, q, reg_data) case not implemented" 
1032+         } else  {
1033+             if  (ngen::isSigned (src0.getType ())) {
1034+                 asr<DT>(mod, dst, src0, src1);
1035+             } else  {
1036+                 shr<DT>(mod, dst, src0, src1);
1037+             }
1038+         }
1039+     }
1040+ 
10171041    void  esel (const  ngen::InstructionModifier &mod, const  ngen_operand_t  &dst,
10181042            const  ngen_operand_t  &src0, const  ngen_operand_t  &src1) {
10191043        if  (ngen_is_qw (dst.type ())) {
0 commit comments