@@ -386,6 +386,22 @@ class GenXKernelBuilder {
386
386
std::map<Function *, VISA_GenVar *> FPMap;
387
387
SmallVector<InsertValueInst *, 10 > RetvInserts;
388
388
389
+ // The default float control from kernel attribute. Each subroutine may
390
+ // overrride this control mask, but it should revert back to the default float
391
+ // control mask before exiting from the subroutine.
392
+ uint32_t DefaultFloatControl = 0 ;
393
+
394
+ enum CRBits {
395
+ SinglePrecisionMode = 1 ,
396
+ RoundingMode = 3 << 4 ,
397
+ DoublePrecisionDenorm = 1 << 6 ,
398
+ SinglePrecisionDenorm = 1 << 7 ,
399
+ HalfPrecisionDenorm = 1 << 10 ,
400
+ SystolicDenorm = 1 << 30 ,
401
+ };
402
+
403
+ uint32_t CRMask = 0 ;
404
+
389
405
// normally false, set to true if there is any SIMD CF in the func or this is
390
406
// (indirectly) called inside any SIMD CF.
391
407
bool NoMask = false ;
@@ -418,6 +434,7 @@ class GenXKernelBuilder {
418
434
bool buildInstruction (Instruction *Inst);
419
435
bool buildMainInst (Instruction *Inst, genx::BaleInfo BI, unsigned Mod,
420
436
const DstOpndDesc &DstDesc);
437
+ void buildControlRegUpdate (unsigned Mask, bool Clear);
421
438
void buildJoin (CallInst *Join, BranchInst *Branch);
422
439
bool buildBranch (BranchInst *Branch);
423
440
void buildIndirectBr (IndirectBrInst *Br);
@@ -1109,6 +1126,12 @@ bool GenXKernelBuilder::run() {
1109
1126
GrfByteSize = Subtarget->getGRFByteSize ();
1110
1127
StackSurf = Subtarget->stackSurface ();
1111
1128
1129
+ CRMask = CRBits::RoundingMode | CRBits::DoublePrecisionDenorm |
1130
+ CRBits::SinglePrecisionDenorm | CRBits::HalfPrecisionDenorm;
1131
+
1132
+ if (Subtarget->hasSystolicDenormControl ())
1133
+ CRMask |= CRBits::SystolicDenorm;
1134
+
1112
1135
StackCallExecSize =
1113
1136
getExecSizeFromValue (BackendConfig->getInteropSubgroupSize ());
1114
1137
@@ -1301,6 +1324,28 @@ void GenXKernelBuilder::buildInstructions() {
1301
1324
beginFunctionLight (Func);
1302
1325
CurrentPadding = 0 ;
1303
1326
1327
+ // If a float control is specified, emit code to make that happen.
1328
+ // Float control contains rounding mode, denorm behaviour and single
1329
+ // precision float mode (ALT or IEEE) Relevant bits are already set as
1330
+ // defined for VISA control reg in header definition on enums
1331
+ if (Func->hasFnAttribute (genx::FunctionMD::CMFloatControl)) {
1332
+ uint32_t FloatControl = 0 ;
1333
+ Func->getFnAttribute (genx::FunctionMD::CMFloatControl)
1334
+ .getValueAsString ()
1335
+ .getAsInteger (0 , FloatControl);
1336
+
1337
+ // Clear current float control bits to known zero state
1338
+ buildControlRegUpdate (CRMask, true );
1339
+
1340
+ // Set rounding mode to required state if that isn't zero
1341
+ FloatControl &= CRMask;
1342
+ if (FloatControl) {
1343
+ if (FG->getHead () == Func)
1344
+ DefaultFloatControl = FloatControl;
1345
+ buildControlRegUpdate (FloatControl, false );
1346
+ }
1347
+ }
1348
+
1304
1349
// Only output a label for the initial basic block if it is used from
1305
1350
// somewhere else.
1306
1351
bool NeedsLabel = !Func->front ().use_empty ();
@@ -3970,6 +4015,39 @@ void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
3970
4015
#include " GenXIntrinsicsBuildMap.inc"
3971
4016
}
3972
4017
4018
+ /* *************************************************************************************************
4019
+ * buildControlRegUpdate : generate an instruction to apply a mask to
4020
+ * the control register (V14).
4021
+ *
4022
+ * Enter: Mask = the mask to apply
4023
+ * Clear = false if bits set in Mask should be set in V14,
4024
+ * true if bits set in Mask should be cleared in V14.
4025
+ */
4026
+ void GenXKernelBuilder::buildControlRegUpdate (unsigned Mask, bool Clear) {
4027
+ ISA_Opcode Opcode;
4028
+ // write opcode
4029
+ if (Clear) {
4030
+ Opcode = ISA_AND;
4031
+ Mask = ~Mask;
4032
+ } else
4033
+ Opcode = ISA_OR;
4034
+
4035
+ Region Single = Region (1 , 4 );
4036
+
4037
+ VISA_GenVar *Decl = nullptr ;
4038
+ CISA_CALL (Kernel->GetPredefinedVar (Decl, PREDEFINED_CR0));
4039
+ VISA_VectorOpnd *dst =
4040
+ createRegionOperand (&Single, Decl, DONTCARESIGNED, 0 , true );
4041
+ VISA_VectorOpnd *src0 =
4042
+ createRegionOperand (&Single, Decl, DONTCARESIGNED, 0 , false );
4043
+
4044
+ VISA_VectorOpnd *src1 = nullptr ;
4045
+ CISA_CALL (Kernel->CreateVISAImmediate (src1, &Mask, ISA_TYPE_UD));
4046
+
4047
+ appendVISALogicOrShiftInst (Opcode, nullptr , false , vISA_EMASK_M1, EXEC_SIZE_1,
4048
+ dst, src0, src1);
4049
+ }
4050
+
3973
4051
/* **********************************************************************
3974
4052
* buildBranch : build a conditional or unconditional branch
3975
4053
*
@@ -5369,6 +5447,17 @@ void GenXKernelBuilder::buildCall(CallInst *CI, const DstOpndDesc &DstDesc) {
5369
5447
}
5370
5448
5371
5449
void GenXKernelBuilder::buildRet (ReturnInst *RI) {
5450
+ uint32_t FloatControl = 0 ;
5451
+ auto F = RI->getFunction ();
5452
+ F->getFnAttribute (genx::FunctionMD::CMFloatControl)
5453
+ .getValueAsString ()
5454
+ .getAsInteger (0 , FloatControl);
5455
+ FloatControl &= CRMask;
5456
+ if (FloatControl != DefaultFloatControl) {
5457
+ buildControlRegUpdate (CRMask, true );
5458
+ if (DefaultFloatControl)
5459
+ buildControlRegUpdate (DefaultFloatControl, false );
5460
+ }
5372
5461
if (vc::requiresStackCall (Func)) {
5373
5462
appendVISACFFunctionRetInst (nullptr , vISA_EMASK_M1, StackCallExecSize);
5374
5463
} else {
0 commit comments