Skip to content

Commit c6c359e

Browse files
author
liuzhenya
committed
[CIR] X86 vector fcmp-sse vector builtins
1 parent de4aa9c commit c6c359e

File tree

6 files changed

+231
-11
lines changed

6 files changed

+231
-11
lines changed

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
131131
return cir::IntType::get(getContext(), n, false);
132132
}
133133

134+
static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) {
135+
if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
136+
return intType.getWidth();
137+
if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
138+
return floatType.getWidth();
139+
140+
llvm_unreachable("Wrong type passed in or Non-CIR type passed in");
141+
}
134142
cir::IntType getSIntNTy(int n) {
135143
return cir::IntType::get(getContext(), n, true);
136144
}
@@ -575,6 +583,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
575583
return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
576584
}
577585

586+
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
587+
mlir::Value lhs, mlir::Value rhs) {
588+
VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
589+
auto integralTy =
590+
getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
591+
VectorType integralVecTy =
592+
VectorType::get(context, integralTy, vecCast.getSize());
593+
return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs);
594+
}
595+
578596
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
579597
return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
580598
}

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ struct MissingFeatures {
253253
static bool emitBranchThroughCleanup() { return false; }
254254
static bool emitCheckedInBoundsGEP() { return false; }
255255
static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
256+
static bool emitConstrainedFPCall() { return false; }
256257
static bool emitLifetimeMarkers() { return false; }
257258
static bool emitLValueAlignmentAssumption() { return false; }
258259
static bool emitNullCheckForDeleteCalls() { return false; }

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,35 @@ namespace clang::CIRGen {
2727

2828
class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
2929
const CIRGenTypeCache &typeCache;
30+
bool IsFPConstrained = false;
3031
llvm::StringMap<unsigned> recordNames;
3132
llvm::StringMap<unsigned> globalsVersioning;
3233

3334
public:
3435
CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc)
3536
: CIRBaseBuilderTy(mlirContext), typeCache(tc) {}
3637

38+
//
39+
// Floating point specific helpers
40+
// -------------------------------
41+
//
42+
43+
/// Enable/Disable use of constrained floating point math. When enabled the
44+
/// CreateF<op>() calls instead create constrained floating point intrinsic
45+
/// calls. Fast math flags are unaffected by this setting.
46+
void setIsFPConstrained(bool IsCon) {
47+
if (IsCon)
48+
llvm_unreachable("Constrained FP NYI");
49+
IsFPConstrained = IsCon;
50+
}
51+
52+
/// Query for the use of constrained floating point math
53+
bool getIsFPConstrained() {
54+
if (IsFPConstrained)
55+
llvm_unreachable("Constrained FP NYI");
56+
return IsFPConstrained;
57+
}
58+
3759
/// Get a cir::ConstArrayAttr for a string literal.
3860
/// Note: This is different from what is returned by
3961
/// mlir::Builder::getStringAttr() which is an mlir::StringAttr.

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,18 @@
2121
using namespace clang;
2222
using namespace clang::CIRGen;
2323

24-
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
25-
const CallExpr *e) {
26-
if (builtinID == Builtin::BI__builtin_cpu_is) {
27-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is");
24+
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
25+
const CallExpr *E) {
26+
if (BuiltinID == Builtin::BI__builtin_cpu_is) {
27+
cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_is");
2828
return {};
2929
}
30-
if (builtinID == Builtin::BI__builtin_cpu_supports) {
31-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports");
30+
if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
31+
cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_supports");
3232
return {};
3333
}
34-
if (builtinID == Builtin::BI__builtin_cpu_init) {
35-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init");
34+
if (BuiltinID == Builtin::BI__builtin_cpu_init) {
35+
cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_init");
3636
return {};
3737
}
3838

@@ -43,7 +43,56 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
4343
// Find out if any arguments are required to be integer constant expressions.
4444
assert(!cir::MissingFeatures::handleBuiltinICEArguments());
4545

46-
switch (builtinID) {
46+
llvm::SmallVector<mlir::Value, 4> Ops;
47+
48+
// Find out if any arguments are required to be integer constant expressions.
49+
unsigned ICEArguments = 0;
50+
ASTContext::GetBuiltinTypeError Error;
51+
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
52+
assert(Error == ASTContext::GE_None && "Should not codegen an error");
53+
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
54+
Ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, E));
55+
}
56+
57+
// OG has unordered comparison as a form of optimization in addition to
58+
// ordered comparison, while CIR doesn't.
59+
//
60+
// This means that we can't encode the comparison code of UGT (unordered
61+
// greater than), at least not at the CIR level.
62+
//
63+
// The boolean shouldInvert compensates for this.
64+
// For example: to get to the comparison code UGT, we pass in
65+
// getVectorFCmpIR(OLE, shouldInvert = true) since OLE is the inverse of UGT.
66+
67+
// There are several ways to support this otherwise:
68+
// - register extra CmpOpKind for unordered comparison types and build the
69+
// translation code for
70+
// to go from CIR -> LLVM dialect. Notice we get this naturally with
71+
// shouldInvert, benefiting from existing infrastructure, albeit having to
72+
// generate an extra `not` at CIR).
73+
// - Just add extra comparison code to a new VecCmpOpKind instead of
74+
// cluttering CmpOpKind.
75+
// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
76+
// comparison
77+
// - Just emit the intrinsics call instead of calling this helper, see how the
78+
// LLVM lowering handles this.
79+
auto getVectorFCmpIR = [this, &Ops, &E](cir::CmpOpKind pred,
80+
bool shouldInvert, bool isSignaling) {
81+
assert(!cir::MissingFeatures::cgFPOptionsRAII());
82+
auto loc = getLoc(E->getExprLoc());
83+
mlir::Value cmp;
84+
if (builder.getIsFPConstrained())
85+
// TODO: Add isSignaling boolean once emitConstrainedFPCall implemented
86+
assert(cir::MissingFeatures::emitConstrainedFPCall());
87+
else
88+
cmp = builder.createVecCompare(loc, pred, Ops[0], Ops[1]);
89+
90+
mlir::Value bitCast = builder.createBitcast(
91+
shouldInvert ? builder.createNot(cmp) : cmp, Ops[0].getType());
92+
return bitCast;
93+
};
94+
95+
switch (BuiltinID) {
4796
default:
4897
return {};
4998
case X86::BI_mm_prefetch:
@@ -710,10 +759,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
710759
case X86::BI__builtin_ia32_cmpunordpd:
711760
case X86::BI__builtin_ia32_cmpneqps:
712761
case X86::BI__builtin_ia32_cmpneqpd:
762+
cgm.errorNYI(E->getSourceRange(),
763+
std::string("unimplemented X86 builtin call: ") +
764+
getContext().BuiltinInfo.getName(BuiltinID));
765+
return {};
713766
case X86::BI__builtin_ia32_cmpnltps:
714767
case X86::BI__builtin_ia32_cmpnltpd:
768+
return getVectorFCmpIR(cir::CmpOpKind::lt, /*shouldInvert=*/true,
769+
/*isSignaling=*/true);
715770
case X86::BI__builtin_ia32_cmpnleps:
716771
case X86::BI__builtin_ia32_cmpnlepd:
772+
return getVectorFCmpIR(cir::CmpOpKind::le, /*shouldInvert=*/true,
773+
/*isSignaling=*/true);
717774
case X86::BI__builtin_ia32_cmpordps:
718775
case X86::BI__builtin_ia32_cmpordpd:
719776
case X86::BI__builtin_ia32_cmpph128_mask:
@@ -798,9 +855,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
798855
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
799856
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
800857
case X86::BI__builtin_ia32_prefetchi:
801-
cgm.errorNYI(e->getSourceRange(),
858+
cgm.errorNYI(E->getSourceRange(),
802859
std::string("unimplemented X86 builtin call: ") +
803-
getContext().BuiltinInfo.getName(builtinID));
860+
getContext().BuiltinInfo.getName(BuiltinID));
804861
return {};
805862
}
806863
}
864+
865+
mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned ICEArguments,
866+
unsigned Idx,
867+
const CallExpr *E) {
868+
mlir::Value Arg = {};
869+
if ((ICEArguments & (1 << Idx)) == 0) {
870+
Arg = emitScalarExpr(E->getArg(Idx));
871+
} else {
872+
// If this is required to be a constant, constant fold it so that we
873+
// know that the generated intrinsic gets a ConstantInt.
874+
std::optional<llvm::APSInt> Result =
875+
E->getArg(Idx)->getIntegerConstantExpr(getContext());
876+
assert(Result && "Expected argument to be a constant");
877+
Arg = builder.getConstInt(getLoc(E->getSourceRange()), *Result);
878+
}
879+
return Arg;
880+
}

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,6 +1699,9 @@ class CIRGenFunction : public CIRGenTypeCache {
16991699
void emitScalarInit(const clang::Expr *init, mlir::Location loc,
17001700
LValue lvalue, bool capturedByInit = false);
17011701

1702+
mlir::Value emitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
1703+
const CallExpr *E);
1704+
17021705
void emitStaticVarDecl(const VarDecl &d, cir::GlobalLinkageKind linkage);
17031706

17041707
void emitStoreOfComplex(mlir::Location loc, mlir::Value v, LValue dest,
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
2+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OGCG
4+
5+
typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
6+
typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
7+
8+
__m128 test_cmpnleps(__m128 A, __m128 B) {
9+
// CIR-LABEL: @test_cmpnleps
10+
// CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
11+
// CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
12+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4>
13+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
14+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
15+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
16+
17+
// LLVM-LABEL: test_cmpnleps
18+
// LLVM: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
19+
// LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
20+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
21+
// LLVM-NEXT: ret <4 x float> [[CAST]]
22+
23+
// OGCG-LABEL: test_cmpnleps
24+
// OGCG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
25+
// OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
26+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
27+
// OGCG-NEXT: ret <4 x float> [[CAST]]
28+
return __builtin_ia32_cmpnleps(A, B); //done!
29+
}
30+
31+
32+
__m128d test_cmpnlepd(__m128d A, __m128d B) {
33+
// CIR-LABEL: @test_cmpnlepd
34+
// CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
35+
// CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
36+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2>
37+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
38+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
39+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
40+
41+
// LLVM-LABEL: test_cmpnlepd
42+
// LLVM: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
43+
// LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
44+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
45+
// LLVM-NEXT: ret <2 x double> [[CAST]]
46+
47+
// OGCG-LABEL: test_cmpnlepd
48+
// OGCG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
49+
// OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
50+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
51+
// OGCG-NEXT: ret <2 x double> [[CAST]]
52+
return __builtin_ia32_cmpnlepd(A, B); // done!
53+
}
54+
55+
56+
__m128 test_cmpnltps(__m128 A, __m128 B) {
57+
// CIR-LABEL: @test_cmpnltps
58+
// CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
59+
// CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
60+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4>
61+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
62+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
63+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
64+
65+
// LLVM-LABEL: test_cmpnltps
66+
// LLVM: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
67+
// LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
68+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
69+
// LLVM-NEXT: ret <4 x float> [[CAST]]
70+
71+
// OGCG-LABEL: test_cmpnltps
72+
// OGCG: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
73+
// OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
74+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
75+
// OGCG-NEXT: ret <4 x float> [[CAST]]
76+
return __builtin_ia32_cmpnltps(A, B); // done!
77+
}
78+
79+
80+
__m128d test_cmpnltpd(__m128d A, __m128d B) {
81+
// CIR-LABEL: @test_cmpnltpd
82+
// CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
83+
// CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
84+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2>
85+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
86+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
87+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
88+
89+
// LLVM-LABEL: test_cmpnltpd
90+
// LLVM: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
91+
// LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
92+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
93+
// LLVM-NEXT: ret <2 x double> [[CAST]]
94+
95+
// OGCG-LABEL: test_cmpnltpd
96+
// OGCG: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
97+
// OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
98+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
99+
// OGCG-NEXT: ret <2 x double> [[CAST]]
100+
return __builtin_ia32_cmpnltpd(A, B); // done!
101+
}
102+

0 commit comments

Comments
 (0)