Skip to content

Commit 5373d9b

Browse files
davidjwooigcbot
authored andcommitted
Add option to split evaluate messages
1 parent bdc7ed0 commit 5373d9b

File tree

6 files changed

+55
-11
lines changed

6 files changed

+55
-11
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3803,6 +3803,11 @@ namespace IGC
38033803
SaveOption(vISA_cloneSampleInst, true);
38043804
}
38053805

3806+
if (m_program->m_Platform->getWATable().Wa_22011142311 && IGC_IS_FLAG_ENABLED(EnableEvaluateSamplerSplit))
3807+
{
3808+
SaveOption(vISA_cloneEvaluateSampleInst, true);
3809+
}
3810+
38063811
if (IGC_IS_FLAG_ENABLED(ForceFFIDOverwrite)/*|| m_program->m_Platform->WaOverwriteFFID()*/)
38073812
{
38083813
unsigned int ffid[unsigned(ShaderType::END)] = {

IGC/common/igc_flags.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,8 @@ DECLARE_IGC_REGKEY(bool, UseNewRegEncoding, true, "Use new location encoding for
425425
DECLARE_IGC_REGKEY(bool, EmitOffsetInDbgLoc, false, "Emit offset of private memory in DW_AT_location when available", false)
426426
DECLARE_IGC_REGKEY(bool, EnableA64WA, true, "Guarantee A64 load/store addres-hi is uniform", false)
427427
DECLARE_IGC_REGKEY(bool, EnableSamplerSplit, true, "Split SIMD8 Sampler message to 2 subspans and SIMD16 to odd and even", false)
428+
DECLARE_IGC_REGKEY(bool, EnableEvaluateSamplerSplit, false, "Split evaluate messages to sampler into either SIMD8 or SIMD1 messages", false)
429+
428430
DECLARE_IGC_REGKEY(bool, EnableZEBinary, false, "Enable output in ZE binary format", true)
429431
DECLARE_IGC_REGKEY(DWORD, OverrideOCLMaxParamSize, 0, "Override the value imposed on the kernel by CL_DEVICE_MAX_PARAMETER_SIZE. Value in bytes, if value==0 no override happens.", true)
430432

visa/Gen4_IR.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,12 @@ void G4_SendMsgDescriptor::setBindingTableIdx(unsigned idx)
468468
desc.value |= idx;
469469
}
470470

471+
uint32_t G4_SendMsgDescriptor::getSamplerMessageType() const
472+
{
473+
MUST_BE_TRUE(isSampler(), "wrong descriptor type for method");
474+
return (getFuncCtrl() >> 12) & 0x1f;
475+
}
476+
471477
bool G4_SendMsgDescriptor::is16BitInput() const
472478
{
473479
return desc.layout.simdMode2 == 1;

visa/Gen4_IR.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ class G4_SendMsgDescriptor
469469
// for sampler mesasges only
470470
bool isSampler() const { return getFuncId() == SFID::SAMPLER; }
471471
bool isCPSEnabled() const { return extDesc.layout.cps != 0; }
472+
uint32_t getSamplerMessageType() const;
472473
bool is16BitInput() const;
473474
bool is16BitReturn() const;
474475

visa/Optimizer.cpp

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -426,8 +426,9 @@ void Optimizer::insertHashMovs()
426426
//
427427
void Optimizer::cloneSampleInst()
428428
{
429-
430-
if (!builder.getOption(vISA_cloneSampleInst))
429+
bool cloneSample = builder.getOption(vISA_cloneSampleInst);
430+
bool cloneEvaluateSample = builder.getOption(vISA_cloneEvaluateSampleInst);
431+
if (!cloneSample && !cloneEvaluateSample)
431432
{
432433
return;
433434
}
@@ -443,18 +444,46 @@ void Optimizer::cloneSampleInst()
443444
auto inst = *I;
444445
if (inst->isSend() && inst->asSendInst()->getMsgDesc()->isSampler() && inst->getExecSize() >= builder.getNativeExecSize())
445446
{
446-
if (!hasSample)
447+
G4_InstSend* sendInst = inst->asSendInst();
448+
bool isEval = sendInst->getMsgDesc()->ResponseLength() == 0;
449+
uint32_t messageType = sendInst->getMsgDesc()->getSamplerMessageType();
450+
assert(!inst->getPredicate() && "do not handle predicated sampler inst for now");
451+
if (!isEval && cloneSample)
447452
{
448-
hasSample = true;
449-
auto flagInit = builder.createMov(g4::SIMD1, builder.createDst(tmpFlag->getRegVar(), isSIMD32 ? Type_UD : Type_UW),
450-
builder.createImm(isSIMD32 ? 0x0F0F0F0F : 0x0F0F, isSIMD32 ? Type_UD : Type_UW), InstOpt_WriteEnable, false);
453+
if (!hasSample)
454+
{
455+
hasSample = true;
456+
auto flagInit = builder.createMov(g4::SIMD1, builder.createDst(tmpFlag->getRegVar(), isSIMD32 ? Type_UD : Type_UW),
457+
builder.createImm(isSIMD32 ? 0x0F0F0F0F : 0x0F0F, isSIMD32 ? Type_UD : Type_UW), InstOpt_WriteEnable, false);
458+
bb->insertBefore(I, flagInit);
459+
}
460+
auto newInst = inst->cloneInst();
461+
inst->setPredicate(builder.createPredicate(PredState_Plus, tmpFlag->getRegVar(), 0));
462+
newInst->setPredicate(builder.createPredicate(PredState_Minus, tmpFlag->getRegVar(), 0));
463+
bb->insertAfter(I, newInst);
464+
}
465+
else if(isEval && cloneEvaluateSample && messageType != 0x1F)
466+
{
467+
// 0x1F is the opcode for sampler cache flush
468+
uint32_t newExecSize = (messageType == VISA_3D_SAMPLE_L || messageType == VISA_3D_LD) ? 8 : 1;
469+
uint32_t mask = (1 << newExecSize) - 1;
470+
auto evalTmpFlag = builder.createTempFlag(isSIMD32 ? 2 : 1);
471+
auto flagInit = builder.createMov(g4::SIMD1, builder.createDst(evalTmpFlag->getRegVar(), isSIMD32 ? Type_UD : Type_UW),
472+
builder.createImm(mask, isSIMD32 ? Type_UD : Type_UW), InstOpt_WriteEnable, false);
451473
bb->insertBefore(I, flagInit);
474+
inst->setPredicate(builder.createPredicate(PredState_Plus, evalTmpFlag->getRegVar(), 0));
475+
unsigned numInsts = kernel.getSimdSize() / newExecSize;
476+
for (unsigned int i = 1; i < numInsts; i++)
477+
{
478+
auto newInst = inst->cloneInst();
479+
bb->insertAfter(I, newInst);
480+
evalTmpFlag = builder.createTempFlag(isSIMD32 ? 2 : 1);
481+
flagInit = builder.createMov(g4::SIMD1, builder.createDst(evalTmpFlag->getRegVar(), isSIMD32 ? Type_UD : Type_UW),
482+
builder.createImm(mask << (i * newExecSize), isSIMD32 ? Type_UD : Type_UW), InstOpt_WriteEnable, false);
483+
newInst->setPredicate(builder.createPredicate(PredState_Plus, evalTmpFlag->getRegVar(), 0));
484+
bb->insertAfter(I, flagInit);
485+
}
452486
}
453-
assert(!inst->getPredicate() && "do not handle predicated sampler inst for now");
454-
auto newInst = inst->cloneInst();
455-
inst->setPredicate(builder.createPredicate(PredState_Plus, tmpFlag->getRegVar(), 0));
456-
newInst->setPredicate(builder.createPredicate(PredState_Minus, tmpFlag->getRegVar(), 0));
457-
bb->insertAfter(I, newInst);
458487
}
459488
I = Next;
460489
}

visa/include/VISAOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ DEF_VISA_OPTION(vISA_forceNoMaskWA, ET_BOOL, "-forceNoMaskWA",
234234
DEF_VISA_OPTION(vISA_DstSrcOverlapWA, ET_BOOL, "-dstSrcOverlapWA", UNUSED, true)
235235
DEF_VISA_OPTION(vISA_noSendSrcDstOverlap, ET_BOOL, "-noSendSrcDstOverlap", UNUSED, false)
236236
DEF_VISA_OPTION(vISA_cloneSampleInst, ET_BOOL, "-cloneSampleInst", UNUSED, false)
237+
DEF_VISA_OPTION(vISA_cloneEvaluateSampleInst, ET_BOOL, "-cloneEvaluateSampleInst", UNUSED, false)
237238

238239
//=== HW debugging options ===
239240
DEF_VISA_OPTION(vISA_GenerateDebugInfo, ET_BOOL, "-generateDebugInfo", UNUSED, false)

0 commit comments

Comments
 (0)