[clang] [CIR] X86 vector fcmp-sse vector builtins (PR #167125)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Nov 8 03:28:38 PST 2025
https://github.com/woruyu created https://github.com/llvm/llvm-project/pull/167125
### Summary
This PR resolves https://github.com/llvm/llvm-project/issues/163895. Just add fcmp-sse part of X86 vector builtins for CIR.
>From c6c359e544eaa9137e8397ab95111b54dfc5dbd5 Mon Sep 17 00:00:00 2001
From: liuzhenya <zyliu at siorigin.com>
Date: Sat, 8 Nov 2025 01:26:12 -1000
Subject: [PATCH] [CIR] X86 vector fcmp-sse vector builtins
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 18 ++++
clang/include/clang/CIR/MissingFeatures.h | 1 +
clang/lib/CIR/CodeGen/CIRGenBuilder.h | 22 ++++
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 96 +++++++++++++++--
clang/lib/CIR/CodeGen/CIRGenFunction.h | 3 +
clang/test/CIR/CodeGen/builtin-fcmp-sse.c | 102 ++++++++++++++++++
6 files changed, 231 insertions(+), 11 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/builtin-fcmp-sse.c
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 3288f5b12c77e..d05c5181fce70 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -131,6 +131,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
return cir::IntType::get(getContext(), n, false);
}
+ static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) {
+ if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
+ return intType.getWidth();
+ if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
+ return floatType.getWidth();
+
+ llvm_unreachable("Wrong type passed in or Non-CIR type passed in");
+ }
cir::IntType getSIntNTy(int n) {
return cir::IntType::get(getContext(), n, true);
}
@@ -575,6 +583,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
}
+ cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
+ mlir::Value lhs, mlir::Value rhs) {
+ VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
+ auto integralTy =
+ getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
+ VectorType integralVecTy =
+ VectorType::get(context, integralTy, vecCast.getSize());
+ return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs);
+ }
+
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
}
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index af1ffffcf54c0..70b9a1f9163d7 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -253,6 +253,7 @@ struct MissingFeatures {
static bool emitBranchThroughCleanup() { return false; }
static bool emitCheckedInBoundsGEP() { return false; }
static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
+ static bool emitConstrainedFPCall() { return false; }
static bool emitLifetimeMarkers() { return false; }
static bool emitLValueAlignmentAssumption() { return false; }
static bool emitNullCheckForDeleteCalls() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index e5066fac19185..4686cfa99b963 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -27,6 +27,7 @@ namespace clang::CIRGen {
class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
const CIRGenTypeCache &typeCache;
+ bool IsFPConstrained = false;
llvm::StringMap<unsigned> recordNames;
llvm::StringMap<unsigned> globalsVersioning;
@@ -34,6 +35,27 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc)
: CIRBaseBuilderTy(mlirContext), typeCache(tc) {}
+ //
+ // Floating point specific helpers
+ // -------------------------------
+ //
+
+ /// Enable/Disable use of constrained floating point math. When enabled the
+ /// CreateF<op>() calls instead create constrained floating point intrinsic
+ /// calls. Fast math flags are unaffected by this setting.
+ void setIsFPConstrained(bool IsCon) {
+ if (IsCon)
+ llvm_unreachable("Constrained FP NYI");
+ IsFPConstrained = IsCon;
+ }
+
+ /// Query for the use of constrained floating point math
+ bool getIsFPConstrained() {
+ if (IsFPConstrained)
+ llvm_unreachable("Constrained FP NYI");
+ return IsFPConstrained;
+ }
+
/// Get a cir::ConstArrayAttr for a string literal.
/// Note: This is different from what is returned by
/// mlir::Builder::getStringAttr() which is an mlir::StringAttr.
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 0198a9d4eb192..9448f06e4e4aa 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -21,18 +21,18 @@
using namespace clang;
using namespace clang::CIRGen;
-mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
- const CallExpr *e) {
- if (builtinID == Builtin::BI__builtin_cpu_is) {
- cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is");
+mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ if (BuiltinID == Builtin::BI__builtin_cpu_is) {
+ cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_is");
return {};
}
- if (builtinID == Builtin::BI__builtin_cpu_supports) {
- cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports");
+ if (BuiltinID == Builtin::BI__builtin_cpu_supports) {
+ cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_supports");
return {};
}
- if (builtinID == Builtin::BI__builtin_cpu_init) {
- cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init");
+ if (BuiltinID == Builtin::BI__builtin_cpu_init) {
+ cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_init");
return {};
}
@@ -43,7 +43,56 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
// Find out if any arguments are required to be integer constant expressions.
assert(!cir::MissingFeatures::handleBuiltinICEArguments());
- switch (builtinID) {
+ llvm::SmallVector<mlir::Value, 4> Ops;
+
+ // Find out if any arguments are required to be integer constant expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+ Ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, E));
+ }
+
+ // OG has unordered comparison as a form of optimization in addition to
+ // ordered comparison, while CIR doesn't.
+ //
+ // This means that we can't encode the comparison code of UGT (unordered
+ // greater than), at least not at the CIR level.
+ //
+ // The boolean shouldInvert compensates for this.
+ // For example: to get to the comparison code UGT, we pass in
+ // getVectorFCmpIR(OLE, shouldInvert = true) since OLE is the inverse of UGT.
+
+ // There are several ways to support this otherwise:
+ // - register extra CmpOpKind for unordered comparison types and build the
+ // translation code for
+ // to go from CIR -> LLVM dialect. Notice we get this naturally with
+ // shouldInvert, benefiting from existing infrastructure, albeit having to
+ // generate an extra `not` at CIR).
+ // - Just add extra comparison code to a new VecCmpOpKind instead of
+ // cluttering CmpOpKind.
+ // - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
+ // comparison
+ // - Just emit the intrinsics call instead of calling this helper, see how the
+ // LLVM lowering handles this.
+ auto getVectorFCmpIR = [this, &Ops, &E](cir::CmpOpKind pred,
+ bool shouldInvert, bool isSignaling) {
+ assert(!cir::MissingFeatures::cgFPOptionsRAII());
+ auto loc = getLoc(E->getExprLoc());
+ mlir::Value cmp;
+ if (builder.getIsFPConstrained())
+ // TODO: Add isSignaling boolean once emitConstrainedFPCall implemented
+ assert(cir::MissingFeatures::emitConstrainedFPCall());
+ else
+ cmp = builder.createVecCompare(loc, pred, Ops[0], Ops[1]);
+
+ mlir::Value bitCast = builder.createBitcast(
+ shouldInvert ? builder.createNot(cmp) : cmp, Ops[0].getType());
+ return bitCast;
+ };
+
+ switch (BuiltinID) {
default:
return {};
case X86::BI_mm_prefetch:
@@ -710,10 +759,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_cmpunordpd:
case X86::BI__builtin_ia32_cmpneqps:
case X86::BI__builtin_ia32_cmpneqpd:
+ cgm.errorNYI(E->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(BuiltinID));
+ return {};
case X86::BI__builtin_ia32_cmpnltps:
case X86::BI__builtin_ia32_cmpnltpd:
+ return getVectorFCmpIR(cir::CmpOpKind::lt, /*shouldInvert=*/true,
+ /*isSignaling=*/true);
case X86::BI__builtin_ia32_cmpnleps:
case X86::BI__builtin_ia32_cmpnlepd:
+ return getVectorFCmpIR(cir::CmpOpKind::le, /*shouldInvert=*/true,
+ /*isSignaling=*/true);
case X86::BI__builtin_ia32_cmpordps:
case X86::BI__builtin_ia32_cmpordpd:
case X86::BI__builtin_ia32_cmpph128_mask:
@@ -798,9 +855,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
case X86::BI__builtin_ia32_prefetchi:
- cgm.errorNYI(e->getSourceRange(),
+ cgm.errorNYI(E->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
+ getContext().BuiltinInfo.getName(BuiltinID));
return {};
}
}
+
+mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned ICEArguments,
+ unsigned Idx,
+ const CallExpr *E) {
+ mlir::Value Arg = {};
+ if ((ICEArguments & (1 << Idx)) == 0) {
+ Arg = emitScalarExpr(E->getArg(Idx));
+ } else {
+ // If this is required to be a constant, constant fold it so that we
+ // know that the generated intrinsic gets a ConstantInt.
+ std::optional<llvm::APSInt> Result =
+ E->getArg(Idx)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
+ Arg = builder.getConstInt(getLoc(E->getSourceRange()), *Result);
+ }
+ return Arg;
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index f879e580989f7..5a88b37ceb352 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1699,6 +1699,9 @@ class CIRGenFunction : public CIRGenTypeCache {
void emitScalarInit(const clang::Expr *init, mlir::Location loc,
LValue lvalue, bool capturedByInit = false);
+ mlir::Value emitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
+ const CallExpr *E);
+
void emitStaticVarDecl(const VarDecl &d, cir::GlobalLinkageKind linkage);
void emitStoreOfComplex(mlir::Location loc, mlir::Value v, LValue dest,
diff --git a/clang/test/CIR/CodeGen/builtin-fcmp-sse.c b/clang/test/CIR/CodeGen/builtin-fcmp-sse.c
new file mode 100644
index 0000000000000..c90ff08c19542
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-fcmp-sse.c
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OGCG
+
+typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
+
+__m128 test_cmpnleps(__m128 A, __m128 B) {
+ // CIR-LABEL: @test_cmpnleps
+ // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+ // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+ // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4>
+ // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+ // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+ // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
+
+ // LLVM-LABEL: test_cmpnleps
+ // LLVM: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
+ // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // LLVM-NEXT: ret <4 x float> [[CAST]]
+
+ // OGCG-LABEL: test_cmpnleps
+ // OGCG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
+ // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // OGCG-NEXT: ret <4 x float> [[CAST]]
+ return __builtin_ia32_cmpnleps(A, B); //done!
+}
+
+
+__m128d test_cmpnlepd(__m128d A, __m128d B) {
+ // CIR-LABEL: @test_cmpnlepd
+ // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+ // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
+ // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2>
+ // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+ // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+ // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
+
+ // LLVM-LABEL: test_cmpnlepd
+ // LLVM: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
+ // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // LLVM-NEXT: ret <2 x double> [[CAST]]
+
+ // OGCG-LABEL: test_cmpnlepd
+ // OGCG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
+ // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // OGCG-NEXT: ret <2 x double> [[CAST]]
+ return __builtin_ia32_cmpnlepd(A, B); // done!
+}
+
+
+__m128 test_cmpnltps(__m128 A, __m128 B) {
+ // CIR-LABEL: @test_cmpnltps
+ // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+ // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+ // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4>
+ // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+ // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+ // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
+
+ // LLVM-LABEL: test_cmpnltps
+ // LLVM: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
+ // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // LLVM-NEXT: ret <4 x float> [[CAST]]
+
+ // OGCG-LABEL: test_cmpnltps
+ // OGCG: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
+ // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // OGCG-NEXT: ret <4 x float> [[CAST]]
+ return __builtin_ia32_cmpnltps(A, B); // done!
+}
+
+
+__m128d test_cmpnltpd(__m128d A, __m128d B) {
+ // CIR-LABEL: @test_cmpnltpd
+ // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+ // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
+ // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2>
+ // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+ // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+ // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
+
+ // LLVM-LABEL: test_cmpnltpd
+ // LLVM: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
+ // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // LLVM-NEXT: ret <2 x double> [[CAST]]
+
+ // OGCG-LABEL: test_cmpnltpd
+ // OGCG: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
+ // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // OGCG-NEXT: ret <2 x double> [[CAST]]
+ return __builtin_ia32_cmpnltpd(A, B); // done!
+}
+
More information about the cfe-commits
mailing list