Skip to content

Commit d3aa05b

Browse files
author
liuzhenya
committed
[CIR] X86 vector fcmp-sse vector builtins
1 parent 36c1273 commit d3aa05b

File tree

7 files changed

+220
-5
lines changed

7 files changed

+220
-5
lines changed

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
131131
return cir::IntType::get(getContext(), n, false);
132132
}
133133

134+
static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) {
135+
if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
136+
return intType.getWidth();
137+
if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
138+
return floatType.getWidth();
139+
140+
llvm_unreachable("Wrong type passed in or Non-CIR type passed in");
141+
}
134142
cir::IntType getSIntNTy(int n) {
135143
return cir::IntType::get(getContext(), n, true);
136144
}
@@ -575,6 +583,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
575583
return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
576584
}
577585

586+
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
587+
mlir::Value lhs, mlir::Value rhs) {
588+
VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
589+
IntType integralTy =
590+
getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
591+
VectorType integralVecTy =
592+
VectorType::get(context, integralTy, vecCast.getSize());
593+
return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs);
594+
}
595+
578596
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
579597
return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
580598
}

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ struct MissingFeatures {
256256
static bool emitBranchThroughCleanup() { return false; }
257257
static bool emitCheckedInBoundsGEP() { return false; }
258258
static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
259+
static bool emitConstrainedFPCall() { return false; }
259260
static bool emitLifetimeMarkers() { return false; }
260261
static bool emitLValueAlignmentAssumption() { return false; }
261262
static bool emitNullCheckForDeleteCalls() { return false; }

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,26 @@ namespace clang::CIRGen {
2727

2828
class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
2929
const CIRGenTypeCache &typeCache;
30+
bool isFPConstrained = false;
3031
llvm::StringMap<unsigned> recordNames;
3132
llvm::StringMap<unsigned> globalsVersioning;
3233

3334
public:
3435
CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc)
3536
: CIRBaseBuilderTy(mlirContext), typeCache(tc) {}
3637

38+
//
39+
// Floating point specific helpers
40+
// -------------------------------
41+
//
42+
43+
/// Query for the use of constrained floating point math
44+
bool getisFPConstrained() {
45+
if (isFPConstrained)
46+
llvm_unreachable("Constrained FP NYI");
47+
return isFPConstrained;
48+
}
49+
3750
/// Get a cir::ConstArrayAttr for a string literal.
3851
/// Note: This is different from what is returned by
3952
/// mlir::Builder::getStringAttr() which is an mlir::StringAttr.

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,27 @@ CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e,
625625
getTarget().getTriple().getArch());
626626
}
627627

628+
// Handle immediate-constant (ICE) requirements for builtin args.
629+
// `iceArguments` is a bitmask: if bit `idx` is set, arg `idx` must be an
630+
// integer constant expression; we constant-fold it so the intrinsic sees
631+
// a ConstantInt. Otherwise we emit it as a normal scalar value.
632+
mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned iceArguments,
633+
unsigned idx,
634+
const CallExpr *expr) {
635+
mlir::Value arg = {};
636+
if ((iceArguments & (1 << idx)) == 0) {
637+
arg = emitScalarExpr(expr->getArg(idx));
638+
} else {
639+
// If this is required to be a constant, constant fold it so that we
640+
// know that the generated intrinsic gets a ConstantInt.
641+
std::optional<llvm::APSInt> result =
642+
expr->getArg(idx)->getIntegerConstantExpr(getContext());
643+
assert(result && "Expected argument to be a constant");
644+
arg = builder.getConstInt(getLoc(expr->getSourceRange()), *result);
645+
}
646+
return arg;
647+
}
648+
628649
/// Given a builtin id for a function like "__builtin_fabsf", return a Function*
629650
/// for "fabsf".
630651
cir::FuncOp CIRGenModule::getBuiltinLibFunction(const FunctionDecl *fd,

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,17 @@ using namespace clang;
2222
using namespace clang::CIRGen;
2323

2424
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
25-
const CallExpr *e) {
25+
const CallExpr *expr) {
2626
if (builtinID == Builtin::BI__builtin_cpu_is) {
27-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is");
27+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
2828
return {};
2929
}
3030
if (builtinID == Builtin::BI__builtin_cpu_supports) {
31-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports");
31+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
3232
return {};
3333
}
3434
if (builtinID == Builtin::BI__builtin_cpu_init) {
35-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init");
35+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
3636
return {};
3737
}
3838

@@ -43,6 +43,53 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
4343
// Find out if any arguments are required to be integer constant expressions.
4444
assert(!cir::MissingFeatures::handleBuiltinICEArguments());
4545

46+
llvm::SmallVector<mlir::Value> ops;
47+
48+
// Find out if any arguments are required to be integer constant expressions.
49+
unsigned iceArguments = 0;
50+
ASTContext::GetBuiltinTypeError error;
51+
getContext().GetBuiltinType(builtinID, error, &iceArguments);
52+
assert(error == ASTContext::GE_None &&
53+
"builtinID should be checked before calling emitX86BuiltinExpr");
54+
for (auto [idx, arg] : llvm::enumerate(expr->arguments())) {
55+
ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, expr));
56+
}
57+
58+
// OG has unordered comparison as a form of optimization in addition to
59+
// ordered comparison, while CIR doesn't.
60+
//
61+
// This means that we can't encode the comparison code of UGT (unordered
62+
// greater than), at least not at the CIR level.
63+
//
64+
// The boolean shouldInvert compensates for this.
65+
// For example: to get to the comparison code UGT, we pass in
66+
// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
67+
68+
// There are several ways to support this otherwise:
69+
// - register extra CmpOpKind for unordered comparison types and build the
70+
// translation code for
71+
// to go from CIR -> LLVM dialect. Notice we get this naturally with
72+
// shouldInvert, benefiting from existing infrastructure, albeit having to
73+
// generate an extra `not` at CIR).
74+
// - Just add extra comparison code to a new VecCmpOpKind instead of
75+
// cluttering CmpOpKind.
76+
// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
77+
// comparison
78+
// - Just emit the intrinsics call instead of calling this helper, see how the
79+
// LLVM lowering handles this.
80+
auto emitVectorFCmp = [this, &ops, &expr](cir::CmpOpKind pred,
81+
bool shouldInvert,
82+
bool isSignaling) {
83+
assert(!cir::MissingFeatures::cgFPOptionsRAII());
84+
auto loc = getLoc(expr->getExprLoc());
85+
mlir::Value cmp;
86+
assert(cir::MissingFeatures::emitConstrainedFPCall());
87+
cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
88+
mlir::Value bitCast = builder.createBitcast(
89+
shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
90+
return bitCast;
91+
};
92+
4693
switch (builtinID) {
4794
default:
4895
return {};
@@ -710,10 +757,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
710757
case X86::BI__builtin_ia32_cmpunordpd:
711758
case X86::BI__builtin_ia32_cmpneqps:
712759
case X86::BI__builtin_ia32_cmpneqpd:
760+
cgm.errorNYI(expr->getSourceRange(),
761+
std::string("unimplemented X86 builtin call: ") +
762+
getContext().BuiltinInfo.getName(builtinID));
763+
return {};
713764
case X86::BI__builtin_ia32_cmpnltps:
714765
case X86::BI__builtin_ia32_cmpnltpd:
766+
return emitVectorFCmp (cir::CmpOpKind::lt, /*shouldInvert=*/true,
767+
/*isSignaling=*/true);
715768
case X86::BI__builtin_ia32_cmpnleps:
716769
case X86::BI__builtin_ia32_cmpnlepd:
770+
return emitVectorFCmp (cir::CmpOpKind::le, /*shouldInvert=*/true,
771+
/*isSignaling=*/true);
717772
case X86::BI__builtin_ia32_cmpordps:
718773
case X86::BI__builtin_ia32_cmpordpd:
719774
case X86::BI__builtin_ia32_cmpph128_mask:
@@ -798,7 +853,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
798853
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
799854
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
800855
case X86::BI__builtin_ia32_prefetchi:
801-
cgm.errorNYI(e->getSourceRange(),
856+
cgm.errorNYI(expr->getSourceRange(),
802857
std::string("unimplemented X86 builtin call: ") +
803858
getContext().BuiltinInfo.getName(builtinID));
804859
return {};

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,6 +1699,9 @@ class CIRGenFunction : public CIRGenTypeCache {
16991699
void emitScalarInit(const clang::Expr *init, mlir::Location loc,
17001700
LValue lvalue, bool capturedByInit = false);
17011701

1702+
mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned idx,
1703+
const CallExpr *expr);
1704+
17021705
void emitStaticVarDecl(const VarDecl &d, cir::GlobalLinkageKind linkage);
17031706

17041707
void emitStoreOfComplex(mlir::Location loc, mlir::Value v, LValue dest,
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o %t-cir.ll
4+
// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
5+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o %t.ll
6+
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
7+
8+
typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
9+
typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
10+
11+
__m128 test_cmpnleps(__m128 A, __m128 B) {
12+
// CIR-LABEL: @test_cmpnleps
13+
// CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<4 x !cir.float>, !cir.vector<4 x !s32i>
14+
// CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
15+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<4 x !s32i> -> !cir.vector<4 x !cir.float>
16+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>
17+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
18+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<4 x !cir.float>
19+
20+
// LLVM-LABEL: test_cmpnleps
21+
// LLVM: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
22+
// LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
23+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
24+
// LLVM-NEXT: ret <4 x float> [[CAST]]
25+
26+
// OGCG-LABEL: test_cmpnleps
27+
// OGCG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
28+
// OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
29+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
30+
// OGCG-NEXT: ret <4 x float> [[CAST]]
31+
return __builtin_ia32_cmpnleps(A, B);
32+
}
33+
34+
35+
__m128d test_cmpnlepd(__m128d A, __m128d B) {
36+
// CIR-LABEL: @test_cmpnlepd
37+
// CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<2 x !cir.double>, !cir.vector<2 x !s64i>
38+
// CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
39+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<2 x !s64i> -> !cir.vector<2 x !cir.double>
40+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>
41+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
42+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<2 x !cir.double>
43+
44+
// LLVM-LABEL: test_cmpnlepd
45+
// LLVM: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
46+
// LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
47+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
48+
// LLVM-NEXT: ret <2 x double> [[CAST]]
49+
50+
// OGCG-LABEL: test_cmpnlepd
51+
// OGCG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
52+
// OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
53+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
54+
// OGCG-NEXT: ret <2 x double> [[CAST]]
55+
return __builtin_ia32_cmpnlepd(A, B);
56+
}
57+
58+
59+
__m128 test_cmpnltps(__m128 A, __m128 B) {
60+
// CIR-LABEL: @test_cmpnltps
61+
// CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<4 x !cir.float>, !cir.vector<4 x !s32i>
62+
// CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
63+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<4 x !s32i> -> !cir.vector<4 x !cir.float>
64+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>
65+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
66+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<4 x !cir.float>
67+
68+
// LLVM-LABEL: test_cmpnltps
69+
// LLVM: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
70+
// LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
71+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
72+
// LLVM-NEXT: ret <4 x float> [[CAST]]
73+
74+
// OGCG-LABEL: test_cmpnltps
75+
// OGCG: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
76+
// OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
77+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
78+
// OGCG-NEXT: ret <4 x float> [[CAST]]
79+
return __builtin_ia32_cmpnltps(A, B);
80+
}
81+
82+
83+
__m128d test_cmpnltpd(__m128d A, __m128d B) {
84+
// CIR-LABEL: @test_cmpnltpd
85+
// CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<2 x !cir.double>, !cir.vector<2 x !s64i>
86+
// CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
87+
// CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<2 x !s64i> -> !cir.vector<2 x !cir.double>
88+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>
89+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
90+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<2 x !cir.double>
91+
92+
// LLVM-LABEL: test_cmpnltpd
93+
// LLVM: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
94+
// LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
95+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
96+
// LLVM-NEXT: ret <2 x double> [[CAST]]
97+
98+
// OGCG-LABEL: test_cmpnltpd
99+
// OGCG: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
100+
// OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
101+
// OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
102+
// OGCG-NEXT: ret <2 x double> [[CAST]]
103+
return __builtin_ia32_cmpnltpd(A, B);
104+
}

0 commit comments

Comments
 (0)