@@ -924,29 +924,17 @@ int IR_Builder::translateVISAArithmeticDoubleInst(
924
924
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
925
925
}; // for loop
926
926
927
- // make final copy to dst
928
- if (!noDstMove || !hasDefaultRoundDenorm) {
929
- G4_SrcRegRegion tsrc8_final (
930
- * this , Mod_src_undef, Direct,
931
- noDstMove ? dstOpnd-> getBase () : t8->getRegVar (),
932
- noDstMove ? dstOpnd-> getRegOff () : 0 , 0 , getRegionStride1 (), Type_DF);
927
+ if (!noDstMove) {
928
+ // make final copy to dst
929
+ // dst = r8:df mov (instExecSize) dstOpnd, t8_src_opnd_final {Q1/N1}
930
+ // final result is at r8.noacc
931
+ G4_SrcRegRegion tsrc8_final (* this , Mod_src_undef, Direct, t8->getRegVar (),
932
+ 0 , 0 , getRegionStride1 (), Type_DF);
933
933
G4_SrcRegRegion *t8_src_opnd_final = createSrcRegRegion (tsrc8_final);
934
934
t8_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
935
- if (hasDefaultRoundDenorm) {
936
- // mov(instExecSize) dstOpnd, t8_src_opnd_final
937
- inst = createInst (predOpnd, G4_mov, nullptr , saturate, instExecSize,
938
- dstOpnd, t8_src_opnd_final, nullptr ,
939
- Get_Gen4_Emask (emask, instExecSize), true );
940
- } else {
941
- // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
942
- // When denorm flush-to-zero is set, mov instructions with the same source
943
- // and destination data type may retain denorm as output. So, we need to
944
- // use add instruction instead.
945
- // add(instExecSize) dstOpnd, t8_src_opnd_final 0.0:df
946
- inst = createInst (predOpnd, G4_add, nullptr , saturate, instExecSize,
947
- dstOpnd, t8_src_opnd_final, createImm (0 , Type_DF),
948
- Get_Gen4_Emask (emask, instExecSize), true );
949
- }
935
+ inst = createInst (predOpnd, G4_mov, nullptr , saturate, instExecSize,
936
+ dstOpnd, t8_src_opnd_final, NULL ,
937
+ Get_Gen4_Emask (emask, instExecSize), true );
950
938
}
951
939
952
940
return VISA_SUCCESS;
@@ -1292,22 +1280,11 @@ int IR_Builder::translateVISAArithmeticSingleDivideIEEEInst(
1292
1280
};
1293
1281
1294
1282
// make final copy to dst
1283
+ // dst = r8:f mov (instExecSize) r20.0<1>:f r110.0<8;8,1>:f {Q1/H1}
1295
1284
t8_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
1296
- if (hasDefaultRoundDenorm) {
1297
- // mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f
1298
- inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
1299
- dstOpnd, t8_src_opnd_final, nullptr ,
1300
- Get_Gen4_Emask (emask, instExecSize), true );
1301
- } else {
1302
- // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1303
- // When denorm flush-to-zero is set, mov instructions with the same source
1304
- // and destination data type may retain denorm as output. So, we need to
1305
- // use add instruction instead.
1306
- // add (instExecSize) r86.0<1>:f r8.0<8;8,1>:f 0.0:f
1307
- inst = createInst (predOpnd, G4_add, condMod, saturate, instExecSize,
1308
- dstOpnd, t8_src_opnd_final, createImm (0 , Type_F),
1309
- Get_Gen4_Emask (emask, instExecSize), true );
1310
- }
1285
+ inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1286
+ t8_src_opnd_final, NULL ,
1287
+ Get_Gen4_Emask (emask, instExecSize), true );
1311
1288
1312
1289
return VISA_SUCCESS;
1313
1290
}
@@ -1618,22 +1595,11 @@ int IR_Builder::translateVISAArithmeticSingleSQRTIEEEInst(
1618
1595
};
1619
1596
1620
1597
// make final copy to dst
1598
+ // dst = r8:df mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f {Q1/H1}
1621
1599
t7_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
1622
- if (hasDefaultRoundDenorm) {
1623
- // mov (instExecSize) r86.0<1>:f r7.0<8;8,1>:f
1624
- inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
1625
- dstOpnd, t7_src_opnd_final, nullptr ,
1626
- Get_Gen4_Emask (emask, instExecSize), true );
1627
- } else {
1628
- // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1629
- // When denorm flush-to-zero is set, mov instructions with the same source
1630
- // and destination data type may retain denorm as output. So, we need to
1631
- // use add instruction instead.
1632
- // add (instExecSize) r86.0<1>:f r7.0<8;8,1>:f 0.0:f
1633
- inst = createInst (predOpnd, G4_add, condMod, saturate, instExecSize,
1634
- dstOpnd, t7_src_opnd_final, createImm (0 , Type_F),
1635
- Get_Gen4_Emask (emask, instExecSize), true );
1636
- }
1600
+ inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1601
+ t7_src_opnd_final, NULL ,
1602
+ Get_Gen4_Emask (emask, instExecSize), true );
1637
1603
1638
1604
return VISA_SUCCESS;
1639
1605
}
@@ -2190,30 +2156,18 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
2190
2156
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
2191
2157
};
2192
2158
2193
- // make final copy to dst
2194
- if (!noDstMove || !hasDefaultRoundDenorm) {
2195
- G4_SrcRegRegion tsrc7_final (*this , Mod_src_undef, Direct,
2196
- noDstMove ? dstOpnd->getBase ()
2197
- : t7->getRegVar (),
2198
- noDstMove ? dstOpnd->getRegOff () : 0 , 0 ,
2199
- getRegionStride1 (), t7->getElemType ());
2159
+ if (!noDstMove) {
2160
+ // make final copy to dst
2161
+ // src = r7:df
2162
+ // final result is at r7.noacc
2163
+ G4_SrcRegRegion tsrc7_final (*this , Mod_src_undef, Direct, t7->getRegVar (),
2164
+ 0 , 0 , getRegionStride1 (), t7->getElemType ());
2200
2165
G4_SrcRegRegion *t7_src_opnd_final = createSrcRegRegion (tsrc7_final);
2201
2166
t7_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
2202
- if (hasDefaultRoundDenorm) {
2203
- // mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df
2204
- inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
2205
- dstOpnd, t7_src_opnd_final, nullptr ,
2206
- Get_Gen4_Emask (emask, instExecSize), true );
2207
- } else {
2208
- // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
2209
- // When denorm flush-to-zero is set, mov instructions with the same source
2210
- // and destination data type may retain denorm as output. So, we need to
2211
- // use add instruction instead.
2212
- // add (instExecSize) r20.0<1>:df r7.0<8;8,1>:df 0.0:df
2213
- inst = createInst (predOpnd, G4_add, condMod, saturate, instExecSize,
2214
- dstOpnd, t7_src_opnd_final, createImm (0 , Type_DF),
2215
- Get_Gen4_Emask (emask, instExecSize), true );
2216
- }
2167
+ // mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df {Q1/H1}
2168
+ inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
2169
+ dstOpnd, t7_src_opnd_final, nullptr ,
2170
+ Get_Gen4_Emask (emask, instExecSize), true );
2217
2171
}
2218
2172
2219
2173
return VISA_SUCCESS;
0 commit comments