@@ -1766,6 +1766,69 @@ namespace IGC
1766
1766
1767
1767
void CEncoder::Arithmetic (ISA_Opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
1768
1768
{
1769
+ // Single Precision or Double precision denorm mode in
1770
+ // control register must be set to retain denorm mode
1771
+ // when executing Math Macro instruction sequence.
1772
+ // It applies to the platforms which has correctly implemented
1773
+ // macros and INV and SQRT instructions.
1774
+ // 1. Set appropriate bit in control register.
1775
+ // 2. Execute inv or sqrt instruction
1776
+ // 3. Flush denorm in the result if flushing was enabled.
1777
+ // 4. Restore original denorm mode in control register.
1778
+ bool forceRetainDenorms =
1779
+ m_program->m_Platform ->hasCorrectlyRoundedMacros () &&
1780
+ ((opcode == ISA_Opcode::ISA_INV) || (opcode == ISA_Opcode::ISA_SQRT)) &&
1781
+ IsFloat (src0->GetType ()) &&
1782
+ (src1 == nullptr );
1783
+
1784
+ // Save original denorm mode. This value is a mask of bits
1785
+ // corresponding to the denorm bits in Control register.
1786
+ uint32_t oldDenormMode = m_fpDenormMode;
1787
+
1788
+ if (forceRetainDenorms)
1789
+ {
1790
+ IGC_ASSERT_MESSAGE (src1 == nullptr , " Unsupported opcode for Forcing Retain Denorm Mode." );
1791
+
1792
+ DenormModeEncoding denormMode = DenormModeEncoding::DenormFlushToZero;
1793
+
1794
+ // Check the type of src0, currently the restrictions applied only
1795
+ // to unary instructions.
1796
+ // Get the denorm mode mask for that data type.
1797
+ switch (src0->GetType ())
1798
+ {
1799
+ case ISA_TYPE_DF:
1800
+ denormMode = DenormModeEncoding::Float64DenormRetain;
1801
+ break ;
1802
+
1803
+ case ISA_TYPE_F:
1804
+ denormMode = DenormModeEncoding::Float32DenormRetain;
1805
+ break ;
1806
+
1807
+ case ISA_TYPE_HF:
1808
+ denormMode = DenormModeEncoding::Float16DenormRetain;
1809
+ break ;
1810
+
1811
+ case ISA_TYPE_BF:
1812
+ denormMode = DenormModeEncoding::FloatBFTFDenormRetain;
1813
+ break ;
1814
+
1815
+ default :
1816
+ IGC_ASSERT_MESSAGE (0 , " Incorrect Float type." );
1817
+ }
1818
+
1819
+ // Check if the original denorm mode for the src0 data type
1820
+ // was 0 (flush to zero).
1821
+ // denormMode will always have a bit set for the given data
1822
+ // type. If currently set mode is 0 (flush to zero), the
1823
+ // ANDing these values will give 0, thus cr must be temporarily
1824
+ // updated and flushing the destination must be added,
1825
+ forceRetainDenorms &= (m_fpDenormMode && denormMode) == 0 ;
1826
+
1827
+ if (forceRetainDenorms)
1828
+ {
1829
+ SetDenormMode (denormMode);
1830
+ }
1831
+ }
1769
1832
1770
1833
VISA_VectorOpnd* srcOpnd0 = GetSourceOperand (src0, m_encoderState.m_srcOperand [0 ]);
1771
1834
VISA_VectorOpnd* srcOpnd1 = GetSourceOperand (src1, m_encoderState.m_srcOperand [1 ]);
@@ -1783,6 +1846,15 @@ namespace IGC
1783
1846
srcOpnd1,
1784
1847
srcOpnd2));
1785
1848
1849
+ if (forceRetainDenorms)
1850
+ {
1851
+ // Restore the original denorm mode.
1852
+ SetDenormMode (oldDenormMode);
1853
+
1854
+ // Force flushing the destination to zero by adding -0 to it.
1855
+ CVariable* negativeZero = m_program->ImmToVariable ((uint64_t )-0.0 , dst->GetType ());
1856
+ Add (dst, dst, negativeZero);
1857
+ }
1786
1858
1787
1859
}
1788
1860
@@ -3774,6 +3846,52 @@ namespace IGC
3774
3846
return VISA_3D_LOD;
3775
3847
case llvm_sample_killpix:
3776
3848
return VISA_3D_SAMPLE_KILLPIX;
3849
+ case llvm_sample_mlodptr:
3850
+ return VISA_3D_SAMPLE_MLOD;
3851
+ case llvm_sample_c_mlodptr:
3852
+ return VISA_3D_SAMPLE_C_MLOD;
3853
+ case llvm_sample_bc_mlodptr:
3854
+ return VISA_3D_SAMPLE_B_C;
3855
+ case llvm_sample_dc_mlodptr:
3856
+ return VISA_3D_SAMPLE_D_C_MLOD;
3857
+ case llvm_gather4Iptr:
3858
+ return VISA_3D_GATHER4_I;
3859
+ case llvm_gather4Bptr:
3860
+ return VISA_3D_GATHER4_B;
3861
+ case llvm_gather4Lptr:
3862
+ return VISA_3D_GATHER4_L;
3863
+ case llvm_gather4ICptr:
3864
+ return VISA_3D_GATHER4_I_C;
3865
+ case llvm_gather4LCptr:
3866
+ return VISA_3D_GATHER4_L_C;
3867
+ case llvm_ldlptr:
3868
+ return VISA_3D_LD_L;
3869
+ case llvm_sample_poptr:
3870
+ return VISA_3D_SAMPLE_PO;
3871
+ case llvm_sample_pobptr:
3872
+ return VISA_3D_SAMPLE_PO_B;
3873
+ case llvm_sample_polptr:
3874
+ return VISA_3D_SAMPLE_PO_L;
3875
+ case llvm_sample_pocptr:
3876
+ return VISA_3D_SAMPLE_PO_C;
3877
+ case llvm_sample_podptr:
3878
+ return VISA_3D_SAMPLE_PO_D;
3879
+ case llvm_sample_polcptr:
3880
+ return VISA_3D_SAMPLE_PO_L_C;
3881
+ case llvm_gather4POPackedptr:
3882
+ return VISA_3D_GATHER4_PO_PACKED;
3883
+ case llvm_gather4POPackedLptr:
3884
+ return VISA_3D_GATHER4_PO_PACKED_L;
3885
+ case llvm_gather4POPackedBptr:
3886
+ return VISA_3D_GATHER4_PO_PACKED_B;
3887
+ case llvm_gather4POPackedIptr:
3888
+ return VISA_3D_GATHER4_PO_PACKED_I;
3889
+ case llvm_gather4POPackedCptr:
3890
+ return VISA_3D_GATHER4_PO_PACKED_C;
3891
+ case llvm_gather4POPackedICptr:
3892
+ return VISA_3D_GATHER4_PO_PACKED_I_C;
3893
+ case llvm_gather4POPackedLCptr:
3894
+ return VISA_3D_GATHER4_PO_PACKED_L_C;
3777
3895
default :
3778
3896
IGC_ASSERT_MESSAGE (0 , " wrong sampler subopcode" );
3779
3897
return VISA_3D_SAMPLE;
@@ -4393,7 +4511,8 @@ namespace IGC
4393
4511
SaveOption (vISA_ActiveThreadsOnlyBarrier, true );
4394
4512
}
4395
4513
4396
- if ((context->type == ShaderType::OPENCL_SHADER || context->type == ShaderType::COMPUTE_SHADER) &&
4514
+ if ((context->type == ShaderType::OPENCL_SHADER || context->type == ShaderType::COMPUTE_SHADER
4515
+ || context->type == ShaderType::RAYTRACING_SHADER || context->type == ShaderType::BINDLESS_SHADER) &&
4397
4516
(m_program->m_Platform ->preemptionSupported () || IGC_IS_FLAG_ENABLED (ForcePreemptionWA)) &&
4398
4517
IGC_IS_FLAG_ENABLED (EnablePreemption))
4399
4518
{
@@ -9115,6 +9234,133 @@ namespace IGC
9115
9234
nullptr ));
9116
9235
}
9117
9236
9237
+ void CEncoder::LSC_TypedAtomic (
9238
+ AtomicOp atomic_op, ResourceDescriptor* resource,
9239
+ CVariable* pU, CVariable* pV, CVariable* pR,
9240
+ CVariable* pSrc0, CVariable* pSrc1, CVariable* pDst,
9241
+ unsigned elemSize, LSC_ADDR_SIZE addr_size)
9242
+ {
9243
+ // DG2: SIMD8, PVC: SIMD16, Xe2: SIMD32
9244
+ VISA_Exec_Size execSize = visaExecSize (m_encoderState.m_simdSize );
9245
+
9246
+ // convert to LSC_OP
9247
+ LSC_OP subOp = getLSCAtomicOpCode (atomic_op);
9248
+
9249
+ VISA_RawOpnd* dstOpnd = GetRawSource (pDst, 0 );
9250
+ // TODO unify the way we calculate offset for raw sources, maybe we shouldn't use offset at all
9251
+ VISA_RawOpnd* pUOpnd = GetRawSource (pU, m_encoderState.m_srcOperand [0 ].subVar * getGRFSize ());
9252
+ VISA_RawOpnd* pVOpnd = GetRawSource (pV, m_encoderState.m_srcOperand [0 ].subVar * getGRFSize ());
9253
+ VISA_RawOpnd* pROpnd = GetRawSource (pR, m_encoderState.m_srcOperand [0 ].subVar * getGRFSize ());
9254
+ VISA_RawOpnd* pSrc0Opnd = GetRawSource (pSrc0, m_encoderState.m_srcOperand [1 ].subVar * getGRFSize ());
9255
+ VISA_RawOpnd* pSrc1Opnd = GetRawSource (pSrc1, m_encoderState.m_srcOperand [1 ].subVar * getGRFSize ());
9256
+
9257
+ VISA_PredOpnd* predOpnd = GetFlagOperand (m_encoderState.m_flag );
9258
+ IGC_ASSERT (m_encoderState.m_dstOperand .subVar == 0 );
9259
+
9260
+ VISA_EMask_Ctrl mask = ConvertMaskToVisaType (m_encoderState.m_mask , m_encoderState.m_noMask );
9261
+ VISA_VectorOpnd* globalOffsetOpnd = GetVISALSCSurfaceOpnd (resource->m_surfaceType , resource->m_resource );
9262
+ LSC_CACHE_OPTS cache{ LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT };
9263
+ LSC_DATA_SHAPE dataShape{};
9264
+ dataShape.size = LSC_GetElementSize (elemSize);
9265
+ dataShape.order = LSC_DATA_ORDER_NONTRANSPOSE;
9266
+ dataShape.elems = LSC_GetElementNum (1 );
9267
+
9268
+ V (vKernel->AppendVISALscTypedAtomic (
9269
+ subOp,
9270
+ predOpnd,
9271
+ execSize,
9272
+ mask,
9273
+ cache,
9274
+ getLSCAddrType (resource),
9275
+ addr_size,
9276
+ dataShape,
9277
+ globalOffsetOpnd, 0 ,
9278
+ dstOpnd,
9279
+ pUOpnd, 0 ,
9280
+ pVOpnd, 0 ,
9281
+ pROpnd, 0 ,
9282
+ nullptr ,
9283
+ pSrc0Opnd,
9284
+ pSrc1Opnd));
9285
+ }
9286
+
9287
+ void CEncoder::LSC_Typed2dBlock (
9288
+ LSC_OP subOpcode,
9289
+ CVariable* srcDst,
9290
+ e_predefSurface surfaceType,
9291
+ CVariable* bufId,
9292
+ CVariable* xOffset,
9293
+ CVariable* yOffset,
9294
+ int blockWidth,
9295
+ int blockHeight)
9296
+ {
9297
+ LSC_CACHE_OPTS cache{ LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT };
9298
+ LSC_DATA_SHAPE_TYPED_BLOCK2D dataShape2D{};
9299
+ dataShape2D.height = blockHeight;
9300
+ dataShape2D.width = blockWidth;
9301
+
9302
+ VISA_VectorOpnd* surfOpnd = GetVISALSCSurfaceOpnd (surfaceType, bufId);
9303
+ VISA_VectorOpnd* xOffsetOpnd = GetUniformSource (xOffset);
9304
+ VISA_VectorOpnd* yOffsetOpnd = GetUniformSource (yOffset);
9305
+ VISA_RawOpnd* dstOpnd = nullptr ;
9306
+ VISA_RawOpnd* srcOpnd = nullptr ;
9307
+ if (subOpcode == LSC_LOAD_BLOCK2D)
9308
+ {
9309
+ dstOpnd = GetRawDestination (srcDst);
9310
+ }
9311
+ else if (subOpcode == LSC_STORE_BLOCK2D)
9312
+ {
9313
+ srcOpnd = GetRawSource (srcDst);
9314
+ }
9315
+
9316
+ V (vKernel->AppendVISALscTypedBlock2DInst (
9317
+ subOpcode,
9318
+ cache,
9319
+ getLSCAddrType (surfaceType),
9320
+ dataShape2D,
9321
+ surfOpnd,
9322
+ 0 ,
9323
+ dstOpnd,
9324
+ xOffsetOpnd,
9325
+ yOffsetOpnd,
9326
+ 0 ,
9327
+ 0 ,
9328
+ srcOpnd));
9329
+ }
9330
+
9331
+ void CEncoder::LSC_UntypedAppendCounterAtomic (
9332
+ LSC_OP lscOp,
9333
+ ResourceDescriptor* resource,
9334
+ CVariable* dst,
9335
+ CVariable* src0)
9336
+ {
9337
+
9338
+ LSC_ADDR_TYPE AddrType = getLSCAddrType (resource);
9339
+ LSC_CACHE_OPTS cache{ LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT };
9340
+ VISA_VectorOpnd* surface = GetVISALSCSurfaceOpnd (resource->m_surfaceType , resource->m_resource );
9341
+
9342
+ LSC_DATA_SHAPE dataShape{};
9343
+ dataShape.size = LSC_GetElementSize (32 );
9344
+ dataShape.order = LSC_DATA_ORDER_NONTRANSPOSE;
9345
+ dataShape.elems = LSC_GetElementNum (1 );
9346
+
9347
+ VISA_RawOpnd* dstOpnd = GetRawDestination (dst);
9348
+ VISA_RawOpnd* srcOpnd = GetRawSource (src0);
9349
+ V (vKernel->AppendVISALscUntypedAppendCounterAtomicInst (
9350
+ lscOp,
9351
+ GetFlagOperand (m_encoderState.m_flag ),
9352
+ visaExecSize (m_encoderState.m_simdSize ),
9353
+ ConvertMaskToVisaType (m_encoderState.m_mask , m_encoderState.m_noMask ),
9354
+ cache,
9355
+ AddrType,
9356
+ dataShape,
9357
+ surface,
9358
+ 0x0 ,
9359
+ dstOpnd,
9360
+ srcOpnd));
9361
+
9362
+ }
9363
+
9118
9364
void CEncoder::AppendBreakpoint () {
9119
9365
V (vKernel->AppendVISABreakpointInst ());
9120
9366
}
0 commit comments