Skip to content

Commit f48fbe9

Browse files
SwapnilGaikwadjacob-crawleytannergooding
authored
Implement SVE2 ConvertToSingleOdd and ConvertToSingleRoundToOdd (#118957)
Contributes to #94018 @dotnet/arm64-contrib @amanasifkhalid @a74nh Co-authored-by: @jacob-crawley --------- Co-authored-by: Jacob Crawley <[email protected]> Co-authored-by: Tanner Gooding <[email protected]>
1 parent 118ff8e commit f48fbe9

File tree

7 files changed

+130
-31
lines changed

7 files changed

+130
-31
lines changed

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
722722
assert(intrin.op3->IsVectorZero());
723723
break;
724724

725+
case NI_Sve2_ConvertToSingleOdd:
726+
case NI_Sve2_ConvertToSingleOddRoundToOdd:
727+
embOpt = INS_OPTS_D_TO_S;
728+
break;
729+
725730
default:
726731
break;
727732
}
@@ -798,6 +803,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
798803
falseReg, opt);
799804
break;
800805

806+
case NI_Sve2_ConvertToSingleOdd:
807+
case NI_Sve2_ConvertToSingleOddRoundToOdd:
808+
// TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`.
809+
// For these intrinsics we cannot use movprfx instruction to populate `targetReg` with
810+
// `embMaskOp1Reg`. Thus, we need to perform move before the operation.
811+
GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg,
812+
/* canSkip */ true, INS_OPTS_SCALABLE_S);
813+
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
814+
break;
815+
801816
default:
802817
assert(targetReg != embMaskOp2Reg);
803818

@@ -825,12 +840,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
825840
break;
826841
}
827842
}
843+
// If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first
844+
// so the `insEmbMask` operation can be merged on top of it.
828845
else if (targetReg != falseReg)
829846
{
830-
// If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first
831-
// so the `insEmbMask` operation can be merged on top of it.
832847

833-
if (falseReg != embMaskOp1Reg)
848+
if ((intrinEmbMask.id == NI_Sve2_ConvertToSingleOdd) ||
849+
(intrinEmbMask.id == NI_Sve2_ConvertToSingleOddRoundToOdd))
850+
{
851+
// TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`.
852+
// For these intrinsics we cannot use movprfx instruction to populate `targetReg` with
853+
// `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to
854+
// select the active lanes.
855+
assert((targetReg != embMaskOp2Reg) || (embMaskOp1Reg == targetReg));
856+
GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg,
857+
/* canSkip */ true, INS_OPTS_SCALABLE_S);
858+
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
859+
GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
860+
falseReg, opt);
861+
}
862+
else if (falseReg != embMaskOp1Reg)
834863
{
835864
// At the point, targetReg != embMaskOp1Reg != falseReg
836865
if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id))

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted,
342342
HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, {INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
343343
HARDWARE_INTRINSIC(Sve2, ConvertToDoubleOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtlt, INS_sve_fcvtlt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
344344
HARDWARE_INTRINSIC(Sve2, ConvertToSingleEvenRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtx, INS_sve_fcvtx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
345+
HARDWARE_INTRINSIC(Sve2, ConvertToSingleOdd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
346+
HARDWARE_INTRINSIC(Sve2, ConvertToSingleOddRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtxnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
345347
HARDWARE_INTRINSIC(Sve2, DotProductRotateComplex, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand)
346348
HARDWARE_INTRINSIC(Sve2, DotProductRotateComplexBySelectedIndex, -1, 5, {INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg)
347349
HARDWARE_INTRINSIC(Sve2, FusedAddHalving, -1, -1, {INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.PlatformNotSupported.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,6 +1196,26 @@ internal Arm64() { }
11961196
/// </summary>
11971197
public static Vector<ulong> BitwiseSelectRightInverted(Vector<ulong> select, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
11981198

1199+
// Down convert and narrow (top)
1200+
1201+
/// <summary>
1202+
/// svfloat32_t svcvtnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
1203+
/// svfloat32_t svcvtnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
1204+
/// FCVTNT Ztied.S, Pg/M, Zop.D
1205+
/// FCVTNT Ztied.S, Pg/M, Zop.D
1206+
/// </summary>
1207+
public static Vector<float> ConvertToSingleOdd(Vector<float> even, Vector<double> value) { throw new PlatformNotSupportedException(); }
1208+
1209+
// Down convert, rounding to odd (top)
1210+
1211+
/// <summary>
1212+
/// svfloat32_t svcvtxnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
1213+
/// svfloat32_t svcvtxnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
1214+
/// FCVTXNT Ztied.S, Pg/M, Zop.D
1215+
/// FCVTXNT Ztied.S, Pg/M, Zop.D
1216+
/// </summary>
1217+
public static Vector<float> ConvertToSingleOddRoundToOdd(Vector<float> even, Vector<double> value) { throw new PlatformNotSupportedException(); }
1218+
11991219
// Complex dot product
12001220

12011221
/// <summary>

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,6 +1196,26 @@ internal Arm64() { }
11961196
/// </summary>
11971197
public static Vector<ulong> BitwiseSelectRightInverted(Vector<ulong> select, Vector<ulong> left, Vector<ulong> right) => BitwiseSelectRightInverted(select, left, right);
11981198

1199+
// Down convert and narrow (top)
1200+
1201+
/// <summary>
1202+
/// svfloat32_t svcvtnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
1203+
/// svfloat32_t svcvtnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
1204+
/// FCVTNT Ztied.S, Pg/M, Zop.D
1205+
/// FCVTNT Ztied.S, Pg/M, Zop.D
1206+
/// </summary>
1207+
public static Vector<float> ConvertToSingleOdd(Vector<float> even, Vector<double> value) => ConvertToSingleOdd(even, value);
1208+
1209+
// Down convert, rounding to odd (top)
1210+
1211+
/// <summary>
1212+
/// svfloat32_t svcvtxnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
1213+
/// svfloat32_t svcvtxnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
1214+
/// FCVTXNT Ztied.S, Pg/M, Zop.D
1215+
/// FCVTXNT Ztied.S, Pg/M, Zop.D
1216+
/// </summary>
1217+
public static Vector<float> ConvertToSingleOddRoundToOdd(Vector<float> even, Vector<double> value) => ConvertToSingleOddRoundToOdd(even, value);
1218+
11991219
// Complex dot product
12001220

12011221
/// <summary>

src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6354,6 +6354,8 @@ internal Arm64() { }
63546354
public static System.Numerics.Vector<ulong> BitwiseSelectRightInverted(System.Numerics.Vector<ulong> select, System.Numerics.Vector<ulong> left, System.Numerics.Vector<ulong> right) { throw null; }
63556355
public static System.Numerics.Vector<double> ConvertToDoubleOdd(System.Numerics.Vector<float> value) { throw null; }
63566356
public static System.Numerics.Vector<float> ConvertToSingleEvenRoundToOdd(System.Numerics.Vector<double> value) { throw null; }
6357+
public static System.Numerics.Vector<float> ConvertToSingleOdd(System.Numerics.Vector<float> even, System.Numerics.Vector<double> value) { throw null; }
6358+
public static System.Numerics.Vector<float> ConvertToSingleOddRoundToOdd(System.Numerics.Vector<float> even, System.Numerics.Vector<double> value) { throw null; }
63576359
public static System.Numerics.Vector<int> DotProductRotateComplex(System.Numerics.Vector<int> op1, System.Numerics.Vector<sbyte> op2, System.Numerics.Vector<sbyte> op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }
63586360
public static System.Numerics.Vector<long> DotProductRotateComplex(System.Numerics.Vector<long> op1, System.Numerics.Vector<short> op2, System.Numerics.Vector<short> op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }
63596361
public static System.Numerics.Vector<int> DotProductRotateComplexBySelectedIndex(System.Numerics.Vector<int> op1, System.Numerics.Vector<sbyte> op2, System.Numerics.Vector<sbyte> op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte imm_index, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }

0 commit comments

Comments
 (0)