Skip to content

Commit 0b45589

Browse files
matborzyszkowskiigcbot
authored andcommitted
Add LNL functionality
Add LNL functionality
1 parent 48afa19 commit 0b45589

28 files changed

+3019
-26
lines changed

IGC/AdaptorCommon/RayTracing/SplitAsyncUtils.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ bool RematChecker::materializable(const Instruction& I) const
238238
case GenISAIntrinsic::GenISA_ldrawvector_indexed:
239239
return isReadOnly(cast<LdRawIntrinsic>(GII)->getResourceValue());
240240
case GenISAIntrinsic::GenISA_ldptr:
241+
case GenISAIntrinsic::GenISA_ldlptr:
241242
return true;
242243
default:
243244
return false;

IGC/Compiler/CISACodeGen/CISABuilder.cpp

+247-1
Original file line numberDiff line numberDiff line change
@@ -1766,6 +1766,69 @@ namespace IGC
17661766

17671767
void CEncoder::Arithmetic(ISA_Opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
17681768
{
1769+
// Single Precision or Double precision denorm mode in
1770+
// control register must be set to retain denorm mode
1771+
// when executing Math Macro instruction sequence.
1772+
// It applies to the platforms which has correctly implemented
1773+
// macros and INV and SQRT instructions.
1774+
// 1. Set appropriate bit in control register.
1775+
// 2. Execute inv or sqrt instruction
1776+
// 3. Flush denorm in the result if flushing was enabled.
1777+
// 4. Restore original denorm mode in control register.
1778+
bool forceRetainDenorms =
1779+
m_program->m_Platform->hasCorrectlyRoundedMacros() &&
1780+
((opcode == ISA_Opcode::ISA_INV) || (opcode == ISA_Opcode::ISA_SQRT)) &&
1781+
IsFloat(src0->GetType()) &&
1782+
(src1 == nullptr);
1783+
1784+
// Save original denorm mode. This value is a mask of bits
1785+
// corresponding to the denorm bits in Control register.
1786+
uint32_t oldDenormMode = m_fpDenormMode;
1787+
1788+
if (forceRetainDenorms)
1789+
{
1790+
IGC_ASSERT_MESSAGE(src1 == nullptr, "Unsupported opcode for Forcing Retain Denorm Mode.");
1791+
1792+
DenormModeEncoding denormMode = DenormModeEncoding::DenormFlushToZero;
1793+
1794+
// Check the type of src0, currently the restrictions applied only
1795+
// to unary instructions.
1796+
// Get the denorm mode mask for that data type.
1797+
switch (src0->GetType())
1798+
{
1799+
case ISA_TYPE_DF:
1800+
denormMode = DenormModeEncoding::Float64DenormRetain;
1801+
break;
1802+
1803+
case ISA_TYPE_F:
1804+
denormMode = DenormModeEncoding::Float32DenormRetain;
1805+
break;
1806+
1807+
case ISA_TYPE_HF:
1808+
denormMode = DenormModeEncoding::Float16DenormRetain;
1809+
break;
1810+
1811+
case ISA_TYPE_BF:
1812+
denormMode = DenormModeEncoding::FloatBFTFDenormRetain;
1813+
break;
1814+
1815+
default:
1816+
IGC_ASSERT_MESSAGE(0, "Incorrect Float type.");
1817+
}
1818+
1819+
// Check if the original denorm mode for the src0 data type
1820+
// was 0 (flush to zero).
1821+
// denormMode will always have a bit set for the given data
1822+
// type. If currently set mode is 0 (flush to zero), the
1823+
// ANDing these values will give 0, thus cr must be temporarily
1824+
// updated and flushing the destination must be added,
1825+
forceRetainDenorms &= (m_fpDenormMode && denormMode) == 0;
1826+
1827+
if (forceRetainDenorms)
1828+
{
1829+
SetDenormMode(denormMode);
1830+
}
1831+
}
17691832

17701833
VISA_VectorOpnd* srcOpnd0 = GetSourceOperand(src0, m_encoderState.m_srcOperand[0]);
17711834
VISA_VectorOpnd* srcOpnd1 = GetSourceOperand(src1, m_encoderState.m_srcOperand[1]);
@@ -1783,6 +1846,15 @@ namespace IGC
17831846
srcOpnd1,
17841847
srcOpnd2));
17851848

1849+
if (forceRetainDenorms)
1850+
{
1851+
// Restore the original denorm mode.
1852+
SetDenormMode(oldDenormMode);
1853+
1854+
// Force flushing the destination to zero by adding -0 to it.
1855+
CVariable* negativeZero = m_program->ImmToVariable((uint64_t)-0.0, dst->GetType());
1856+
Add(dst, dst, negativeZero);
1857+
}
17861858

17871859
}
17881860

@@ -3774,6 +3846,52 @@ namespace IGC
37743846
return VISA_3D_LOD;
37753847
case llvm_sample_killpix:
37763848
return VISA_3D_SAMPLE_KILLPIX;
3849+
case llvm_sample_mlodptr:
3850+
return VISA_3D_SAMPLE_MLOD;
3851+
case llvm_sample_c_mlodptr:
3852+
return VISA_3D_SAMPLE_C_MLOD;
3853+
case llvm_sample_bc_mlodptr:
3854+
return VISA_3D_SAMPLE_B_C;
3855+
case llvm_sample_dc_mlodptr:
3856+
return VISA_3D_SAMPLE_D_C_MLOD;
3857+
case llvm_gather4Iptr:
3858+
return VISA_3D_GATHER4_I;
3859+
case llvm_gather4Bptr:
3860+
return VISA_3D_GATHER4_B;
3861+
case llvm_gather4Lptr:
3862+
return VISA_3D_GATHER4_L;
3863+
case llvm_gather4ICptr:
3864+
return VISA_3D_GATHER4_I_C;
3865+
case llvm_gather4LCptr:
3866+
return VISA_3D_GATHER4_L_C;
3867+
case llvm_ldlptr:
3868+
return VISA_3D_LD_L;
3869+
case llvm_sample_poptr:
3870+
return VISA_3D_SAMPLE_PO;
3871+
case llvm_sample_pobptr:
3872+
return VISA_3D_SAMPLE_PO_B;
3873+
case llvm_sample_polptr:
3874+
return VISA_3D_SAMPLE_PO_L;
3875+
case llvm_sample_pocptr:
3876+
return VISA_3D_SAMPLE_PO_C;
3877+
case llvm_sample_podptr:
3878+
return VISA_3D_SAMPLE_PO_D;
3879+
case llvm_sample_polcptr:
3880+
return VISA_3D_SAMPLE_PO_L_C;
3881+
case llvm_gather4POPackedptr:
3882+
return VISA_3D_GATHER4_PO_PACKED;
3883+
case llvm_gather4POPackedLptr:
3884+
return VISA_3D_GATHER4_PO_PACKED_L;
3885+
case llvm_gather4POPackedBptr:
3886+
return VISA_3D_GATHER4_PO_PACKED_B;
3887+
case llvm_gather4POPackedIptr:
3888+
return VISA_3D_GATHER4_PO_PACKED_I;
3889+
case llvm_gather4POPackedCptr:
3890+
return VISA_3D_GATHER4_PO_PACKED_C;
3891+
case llvm_gather4POPackedICptr:
3892+
return VISA_3D_GATHER4_PO_PACKED_I_C;
3893+
case llvm_gather4POPackedLCptr:
3894+
return VISA_3D_GATHER4_PO_PACKED_L_C;
37773895
default:
37783896
IGC_ASSERT_MESSAGE(0, "wrong sampler subopcode");
37793897
return VISA_3D_SAMPLE;
@@ -4393,7 +4511,8 @@ namespace IGC
43934511
SaveOption(vISA_ActiveThreadsOnlyBarrier, true);
43944512
}
43954513

4396-
if ((context->type == ShaderType::OPENCL_SHADER || context->type == ShaderType::COMPUTE_SHADER) &&
4514+
if ((context->type == ShaderType::OPENCL_SHADER || context->type == ShaderType::COMPUTE_SHADER
4515+
|| context->type == ShaderType::RAYTRACING_SHADER || context->type == ShaderType::BINDLESS_SHADER) &&
43974516
(m_program->m_Platform->preemptionSupported() || IGC_IS_FLAG_ENABLED(ForcePreemptionWA)) &&
43984517
IGC_IS_FLAG_ENABLED(EnablePreemption))
43994518
{
@@ -9115,6 +9234,133 @@ namespace IGC
91159234
nullptr));
91169235
}
91179236

9237+
void CEncoder::LSC_TypedAtomic(
9238+
AtomicOp atomic_op, ResourceDescriptor* resource,
9239+
CVariable* pU, CVariable* pV, CVariable* pR,
9240+
CVariable* pSrc0, CVariable* pSrc1, CVariable* pDst,
9241+
unsigned elemSize, LSC_ADDR_SIZE addr_size)
9242+
{
9243+
// DG2: SIMD8, PVC: SIMD16, Xe2: SIMD32
9244+
VISA_Exec_Size execSize = visaExecSize(m_encoderState.m_simdSize);
9245+
9246+
// convert to LSC_OP
9247+
LSC_OP subOp = getLSCAtomicOpCode(atomic_op);
9248+
9249+
VISA_RawOpnd* dstOpnd = GetRawSource(pDst, 0);
9250+
// TODO unify the way we calculate offset for raw sources, maybe we shouldn't use offset at all
9251+
VISA_RawOpnd* pUOpnd = GetRawSource(pU, m_encoderState.m_srcOperand[0].subVar * getGRFSize());
9252+
VISA_RawOpnd* pVOpnd = GetRawSource(pV, m_encoderState.m_srcOperand[0].subVar * getGRFSize());
9253+
VISA_RawOpnd* pROpnd = GetRawSource(pR, m_encoderState.m_srcOperand[0].subVar * getGRFSize());
9254+
VISA_RawOpnd* pSrc0Opnd = GetRawSource(pSrc0, m_encoderState.m_srcOperand[1].subVar * getGRFSize());
9255+
VISA_RawOpnd* pSrc1Opnd = GetRawSource(pSrc1, m_encoderState.m_srcOperand[1].subVar * getGRFSize());
9256+
9257+
VISA_PredOpnd* predOpnd = GetFlagOperand(m_encoderState.m_flag);
9258+
IGC_ASSERT(m_encoderState.m_dstOperand.subVar == 0);
9259+
9260+
VISA_EMask_Ctrl mask = ConvertMaskToVisaType(m_encoderState.m_mask, m_encoderState.m_noMask);
9261+
VISA_VectorOpnd* globalOffsetOpnd = GetVISALSCSurfaceOpnd(resource->m_surfaceType, resource->m_resource);
9262+
LSC_CACHE_OPTS cache{ LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT };
9263+
LSC_DATA_SHAPE dataShape{};
9264+
dataShape.size = LSC_GetElementSize(elemSize);
9265+
dataShape.order = LSC_DATA_ORDER_NONTRANSPOSE;
9266+
dataShape.elems = LSC_GetElementNum(1);
9267+
9268+
V(vKernel->AppendVISALscTypedAtomic(
9269+
subOp,
9270+
predOpnd,
9271+
execSize,
9272+
mask,
9273+
cache,
9274+
getLSCAddrType(resource),
9275+
addr_size,
9276+
dataShape,
9277+
globalOffsetOpnd, 0,
9278+
dstOpnd,
9279+
pUOpnd, 0,
9280+
pVOpnd, 0,
9281+
pROpnd, 0,
9282+
nullptr,
9283+
pSrc0Opnd,
9284+
pSrc1Opnd));
9285+
}
9286+
9287+
void CEncoder::LSC_Typed2dBlock(
9288+
LSC_OP subOpcode,
9289+
CVariable* srcDst,
9290+
e_predefSurface surfaceType,
9291+
CVariable* bufId,
9292+
CVariable* xOffset,
9293+
CVariable* yOffset,
9294+
int blockWidth,
9295+
int blockHeight)
9296+
{
9297+
LSC_CACHE_OPTS cache{ LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT };
9298+
LSC_DATA_SHAPE_TYPED_BLOCK2D dataShape2D{};
9299+
dataShape2D.height = blockHeight;
9300+
dataShape2D.width = blockWidth;
9301+
9302+
VISA_VectorOpnd* surfOpnd = GetVISALSCSurfaceOpnd(surfaceType, bufId);
9303+
VISA_VectorOpnd* xOffsetOpnd = GetUniformSource(xOffset);
9304+
VISA_VectorOpnd* yOffsetOpnd = GetUniformSource(yOffset);
9305+
VISA_RawOpnd* dstOpnd = nullptr;
9306+
VISA_RawOpnd* srcOpnd = nullptr;
9307+
if (subOpcode == LSC_LOAD_BLOCK2D)
9308+
{
9309+
dstOpnd = GetRawDestination(srcDst);
9310+
}
9311+
else if (subOpcode == LSC_STORE_BLOCK2D)
9312+
{
9313+
srcOpnd = GetRawSource(srcDst);
9314+
}
9315+
9316+
V(vKernel->AppendVISALscTypedBlock2DInst(
9317+
subOpcode,
9318+
cache,
9319+
getLSCAddrType(surfaceType),
9320+
dataShape2D,
9321+
surfOpnd,
9322+
0,
9323+
dstOpnd,
9324+
xOffsetOpnd,
9325+
yOffsetOpnd,
9326+
0,
9327+
0,
9328+
srcOpnd));
9329+
}
9330+
9331+
void CEncoder::LSC_UntypedAppendCounterAtomic(
9332+
LSC_OP lscOp,
9333+
ResourceDescriptor* resource,
9334+
CVariable* dst,
9335+
CVariable* src0)
9336+
{
9337+
9338+
LSC_ADDR_TYPE AddrType = getLSCAddrType(resource);
9339+
LSC_CACHE_OPTS cache{ LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT };
9340+
VISA_VectorOpnd* surface = GetVISALSCSurfaceOpnd(resource->m_surfaceType, resource->m_resource);
9341+
9342+
LSC_DATA_SHAPE dataShape{};
9343+
dataShape.size = LSC_GetElementSize(32);
9344+
dataShape.order = LSC_DATA_ORDER_NONTRANSPOSE;
9345+
dataShape.elems = LSC_GetElementNum(1);
9346+
9347+
VISA_RawOpnd* dstOpnd = GetRawDestination(dst);
9348+
VISA_RawOpnd* srcOpnd = GetRawSource(src0);
9349+
V(vKernel->AppendVISALscUntypedAppendCounterAtomicInst(
9350+
lscOp,
9351+
GetFlagOperand(m_encoderState.m_flag),
9352+
visaExecSize(m_encoderState.m_simdSize),
9353+
ConvertMaskToVisaType(m_encoderState.m_mask, m_encoderState.m_noMask),
9354+
cache,
9355+
AddrType,
9356+
dataShape,
9357+
surface,
9358+
0x0,
9359+
dstOpnd,
9360+
srcOpnd));
9361+
9362+
}
9363+
91189364
void CEncoder::AppendBreakpoint() {
91199365
V(vKernel->AppendVISABreakpointInst());
91209366
}

IGC/Compiler/CISACodeGen/CISABuilder.hpp

+22
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,28 @@ namespace IGC
316316
LSC_ADDR_SIZE addr_size, int chMask,
317317
LSC_CACHE_OPTS cacheOpts = { LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT });
318318

319+
320+
void LSC_TypedAtomic(
321+
AtomicOp subOp, ResourceDescriptor* resource,
322+
CVariable* pU, CVariable* pV, CVariable* pR,
323+
CVariable* pSrc0, CVariable* pSrc1, CVariable* pSrcDst,
324+
unsigned elemSize, LSC_ADDR_SIZE addr_size);
325+
326+
327+
void LSC_Typed2dBlock(
328+
LSC_OP subOpcode,
329+
CVariable* dst,
330+
e_predefSurface surfaceType,
331+
CVariable* bufId,
332+
CVariable* xOffset,
333+
CVariable* yOffset,
334+
int blockWidth,
335+
int blockHeight);
336+
void LSC_UntypedAppendCounterAtomic(
337+
LSC_OP lscOp,
338+
ResourceDescriptor* resource,
339+
CVariable* dst,
340+
CVariable* src0);
319341
void AppendBreakpoint();
320342
void ScatterA64(CVariable* val, CVariable* offset, unsigned elementSize, unsigned numElems);
321343
void ByteGather(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);

IGC/Compiler/CISACodeGen/CISACodeGen.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ namespace IGC
171171
// to access thread arguments. we need to lower such messages
172172
// using special addressing mode.
173173
bool m_isThreadArg = false;
174-
ResourceDescriptor() : m_resource(nullptr), m_surfaceType(ESURFACE_NORMAL),
174+
bool m_isConstant;
175+
ResourceDescriptor() : m_resource(nullptr), m_surfaceType(ESURFACE_NORMAL), m_isConstant(false),
175176
m_isThreadArg(false) {}
176177
};
177178

IGC/Compiler/CISACodeGen/CShader.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,25 @@ CVariable* CShader::GetHWTID()
916916
{
917917
if (m_Platform->getHWTIDFromSR0())
918918
{
919+
if (m_Platform->getPlatformInfo().eProductFamily == IGFX_LUNARLAKE)
920+
{
921+
922+
uint32_t bitmask = BITMASK(16);
923+
m_HW_TID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, "HWTID");
924+
encoder.SetNoMask();
925+
encoder.SetSrcSubReg(0, 0);
926+
encoder.And(m_HW_TID, GetSR0(), ImmToVariable(bitmask, ISA_TYPE_D));
927+
encoder.Push();
928+
929+
// Remove bit [10]
930+
RemoveBitRange(m_HW_TID, 10, 1);
931+
// Remove bit [7]
932+
RemoveBitRange(m_HW_TID, 7, 1);
933+
// Remove bit [3]
934+
RemoveBitRange(m_HW_TID, 3, 1);
935+
936+
return m_HW_TID;
937+
}
919938
if (m_Platform->getPlatformInfo().eProductFamily == IGFX_PVC)
920939
{
921940
// [14:12] Slice ID.

IGC/Compiler/CISACodeGen/CoalescingEngine.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,9 @@ namespace IGC
179179

180180
if (GenIntrinsicInst * intrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(DefMI))
181181
{
182-
if ((isURBWriteIntrinsic(intrinsic) && !(IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_URB))) ||
183-
(llvm::isa<llvm::RTWriteIntrinsic>(intrinsic) && !(IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_RT))))
182+
if ((isURBWriteIntrinsic(intrinsic) && !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_URB)) ||
183+
(llvm::isa<llvm::RTWriteIntrinsic>(intrinsic) && !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_RT)) ||
184+
(llvm::isa<llvm::RTDualBlendSourceIntrinsic>(intrinsic) && !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_RT) && m_Platform.hasDualKSPPS()))
184185
{
185186
ProcessTuple(DefMI);
186187
}
@@ -1457,7 +1458,8 @@ namespace IGC
14571458
if (isSampleInstruction(inst) ||
14581459
isLdInstruction(inst) ||
14591460
isURBWriteIntrinsic(inst) ||
1460-
llvm::isa<llvm::RTWriteIntrinsic>(inst))
1461+
llvm::isa<llvm::RTWriteIntrinsic>(inst) ||
1462+
(llvm::isa<llvm::RTDualBlendSourceIntrinsic>(inst) && m_Platform.hasDualKSPPS()))
14611463
{
14621464
uint numOperands = inst->getNumOperands();
14631465
for (uint i = 0; i < numOperands; i++)

IGC/Compiler/CISACodeGen/ConstantCoalescing.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -2458,7 +2458,7 @@ Instruction* ConstantCoalescing::CreateSamplerLoad(
24582458
cast<PointerType>(resourcePtr->getType()) :
24592459
PointerType::get(irBuilder->getFloatTy(), addrSpace);
24602460

2461-
Type* types[] = { IGCLLVM::FixedVectorType::get(irBuilder->getFloatTy(), 4), resourceType };
2461+
Type* types[] = { IGCLLVM::FixedVectorType::get(irBuilder->getFloatTy(), 4), resourceType, resourceType };
24622462
Function* l = GenISAIntrinsic::getDeclaration(curFunc->getParent(),
24632463
llvm::GenISAIntrinsic::GenISA_ldptr,
24642464
types);
@@ -2468,6 +2468,7 @@ Instruction* ConstantCoalescing::CreateSamplerLoad(
24682468
irBuilder->getInt32(0),
24692469
irBuilder->getInt32(0),
24702470
irBuilder->getInt32(0),
2471+
UndefValue::get(resourceType),
24712472
resourcePtr ? resourcePtr : ConstantPointerNull::get(resourceType),
24722473
irBuilder->getInt32(0),
24732474
irBuilder->getInt32(0),

0 commit comments

Comments
 (0)