Skip to content

Commit 9801fdf

Browse files
authored
[X86][FP16] Customize MLOAD/MSTORE(vXf16) if VLX is not enabled (#142331)
Fixes: https://godbolt.org/z/fa4z97xsY
1 parent e9fad0e commit 9801fdf

File tree

2 files changed

+545
-204
lines changed

2 files changed

+545
-204
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2238,7 +2238,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
22382238
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
22392239
setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
22402240

2241-
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
2241+
for (auto VT : {MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16,
2242+
MVT::v16f16, MVT::v8f16}) {
22422243
setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
22432244
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
22442245
}
@@ -33192,8 +33193,8 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
3319233193
"Cannot lower masked load op.");
3319333194

3319433195
assert((ScalarVT.getSizeInBits() >= 32 ||
33195-
(Subtarget.hasBWI() &&
33196-
(ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
33196+
(Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16 ||
33197+
ScalarVT == MVT::f16))) &&
3319733198
"Unsupported masked load op.");
3319833199

3319933200
// This operation is legal for targets with VLX, but without
@@ -33240,9 +33241,9 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
3324033241
"Cannot lower masked store op.");
3324133242

3324233243
assert((ScalarVT.getSizeInBits() >= 32 ||
33243-
(Subtarget.hasBWI() &&
33244-
(ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
33245-
"Unsupported masked store op.");
33244+
(Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16 ||
33245+
ScalarVT == MVT::f16))) &&
33246+
"Unsupported masked store op.");
3324633247

3324733248
// This operation is legal for targets with VLX, but without
3324833249
// VLX the vector should be widened to 512 bit

0 commit comments

Comments
 (0)