Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
assert(intrin.op3->IsVectorZero());
break;

case NI_Sve2_ConvertToSingleOdd:
case NI_Sve2_ConvertToSingleOddRoundToOdd:
embOpt = INS_OPTS_D_TO_S;
break;

default:
break;
}
Expand Down Expand Up @@ -798,6 +803,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
falseReg, opt);
break;

case NI_Sve2_ConvertToSingleOdd:
case NI_Sve2_ConvertToSingleOddRoundToOdd:
// TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`.
// For these intrinsics we cannot use movprfx instruction to populate `targetReg` with
// `embMaskOp1Reg`. Thus, we need to perform move before the operation.
GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg,
/* canSkip */ true, INS_OPTS_SCALABLE_S);
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
break;

default:
assert(targetReg != embMaskOp2Reg);

Expand Down Expand Up @@ -825,12 +840,25 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
}
// If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first
// so the `insEmbMask` operation can be merged on top of it.
else if (targetReg != falseReg)
{
// If `targetReg` and `falseReg` are not same, then we need to move it to `targetReg` first
// so the `insEmbMask` operation can be merged on top of it.

if (falseReg != embMaskOp1Reg)
if ((intrinEmbMask.id == NI_Sve2_ConvertToSingleOdd) ||
(intrinEmbMask.id == NI_Sve2_ConvertToSingleOddRoundToOdd))
{
// TODO-SVE: Optimise away the explicit copying of `embMaskOp1Reg` to `targetReg`.
// For these intrinsics we cannot use movprfx instruction to populate `targetReg` with
// `embMaskOp1Reg`. Thus, we need to perform move before the operation, and then "sel" to
// select the active lanes.
GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have some asserts in other places that asserts we aren't overwriting one of the input registers. Do we need such asserts here as well (i.e. tgt = embMaskOp1Reg won't overwrite maskReg or falseReg)? -- This is basically done to ensure that things were correctly marked as delayFree in LSRA.

Copy link
Contributor Author

@SwapnilGaikwad SwapnilGaikwad Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take a look. I didn't understand fully how an assert would help here. For Sve2_ConvertToSingleOdd* we will emit FCVTNT/FCVTXNT which has destination as Ztied so overwriting the input anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's RMW and so overwriting one of the inputs is expected. The consideration is ensuring we're not overwriting the other input.

That is, we have a requirement that Ztied.S (embMaskOp1Reg) be targetReg and so we do the mov targetReg, embMaskOp1Reg to ensure the first operand is in this register.

However, if we didn't mark falseReg (Zop.D) as "delay free" then we can end up in a scenario where (targetReg == falseReg) && (targetReg != embMaskOp1Reg), which would cause this move to "trash" the data and end up with incorrect results.

The assert is meant to catch this scenario and ensure we correctly marked it as delay free in LSRA. We'd want similar assertions for other correctness considerations (not that these are needed here, just generally speaking), like the typical 3 cases of:

  • The MOVPRFX instruction must be unpredicated, or be predicated using the same governing predicate register and source element size as this instruction.
  • The MOVPRFX instruction must specify the same destination register as this instruction.
  • The destination register must not refer to architectural register state referenced by any other source operand register of this instruction.

/* canSkip */ true, INS_OPTS_SCALABLE_S);
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
falseReg, opt);
}
else if (falseReg != embMaskOp1Reg)
{
// At the point, targetReg != embMaskOp1Reg != falseReg
if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id))
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,8 @@ HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted,
HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, {INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, ConvertToDoubleOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtlt, INS_sve_fcvtlt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, ConvertToSingleEvenRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtx, INS_sve_fcvtx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, ConvertToSingleOdd, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, ConvertToSingleOddRoundToOdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtxnt, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, DotProductRotateComplex, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand)
HARDWARE_INTRINSIC(Sve2, DotProductRotateComplexBySelectedIndex, -1, 5, {INS_sve_cdot, INS_invalid, INS_sve_cdot, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(Sve2, FusedAddHalving, -1, -1, {INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,26 @@ internal Arm64() { }
/// </summary>
public static Vector<ulong> BitwiseSelectRightInverted(Vector<ulong> select, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }

// Down convert and narrow (top)

/// <summary>
/// svfloat32_t svcvtnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// svfloat32_t svcvtnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// FCVTNT Ztied.S, Pg/M, Zop.D
/// FCVTNT Ztied.S, Pg/M, Zop.D
/// </summary>
public static Vector<float> ConvertToSingleOdd(Vector<float> even, Vector<double> value) { throw new PlatformNotSupportedException(); }

// Down convert, rounding to odd (top)

/// <summary>
/// svfloat32_t svcvtxnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// svfloat32_t svcvtxnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// FCVTXNT Ztied.S, Pg/M, Zop.D
/// FCVTXNT Ztied.S, Pg/M, Zop.D
/// </summary>
public static Vector<float> ConvertToSingleOddRoundToOdd(Vector<float> even, Vector<double> value) { throw new PlatformNotSupportedException(); }

// Complex dot product

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,26 @@ internal Arm64() { }
/// </summary>
public static Vector<ulong> BitwiseSelectRightInverted(Vector<ulong> select, Vector<ulong> left, Vector<ulong> right) => BitwiseSelectRightInverted(select, left, right);

// Down convert and narrow (top)

/// <summary>
/// svfloat32_t svcvtnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// svfloat32_t svcvtnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// FCVTNT Ztied.S, Pg/M, Zop.D
/// FCVTNT Ztied.S, Pg/M, Zop.D
/// </summary>
public static Vector<float> ConvertToSingleOdd(Vector<float> even, Vector<double> value) => ConvertToSingleOdd(even, value);

// Down convert, rounding to odd (top)

/// <summary>
/// svfloat32_t svcvtxnt_f32[_f64]_m(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// svfloat32_t svcvtxnt_f32[_f64]_x(svfloat32_t even, svbool_t pg, svfloat64_t op)
/// FCVTXNT Ztied.S, Pg/M, Zop.D
/// FCVTXNT Ztied.S, Pg/M, Zop.D
/// </summary>
public static Vector<float> ConvertToSingleOddRoundToOdd(Vector<float> even, Vector<double> value) => ConvertToSingleOddRoundToOdd(even, value);

// Complex dot product

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6354,6 +6354,8 @@ internal Arm64() { }
public static System.Numerics.Vector<ulong> BitwiseSelectRightInverted(System.Numerics.Vector<ulong> select, System.Numerics.Vector<ulong> left, System.Numerics.Vector<ulong> right) { throw null; }
public static System.Numerics.Vector<double> ConvertToDoubleOdd(System.Numerics.Vector<float> value) { throw null; }
public static System.Numerics.Vector<float> ConvertToSingleEvenRoundToOdd(System.Numerics.Vector<double> value) { throw null; }
public static System.Numerics.Vector<float> ConvertToSingleOdd(System.Numerics.Vector<float> even, System.Numerics.Vector<double> value) { throw null; }
public static System.Numerics.Vector<float> ConvertToSingleOddRoundToOdd(System.Numerics.Vector<float> even, System.Numerics.Vector<double> value) { throw null; }
public static System.Numerics.Vector<int> DotProductRotateComplex(System.Numerics.Vector<int> op1, System.Numerics.Vector<sbyte> op2, System.Numerics.Vector<sbyte> op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }
public static System.Numerics.Vector<long> DotProductRotateComplex(System.Numerics.Vector<long> op1, System.Numerics.Vector<short> op2, System.Numerics.Vector<short> op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }
public static System.Numerics.Vector<int> DotProductRotateComplexBySelectedIndex(System.Numerics.Vector<int> op1, System.Numerics.Vector<sbyte> op2, System.Numerics.Vector<sbyte> op3, [ConstantExpected(Min = 0, Max = (byte)(3))] byte imm_index, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }
Expand Down
Loading
Loading