[clang] [CIR]Upstream generic intrinsic emission path (PR #179098)
Priyanshu Kumar via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 11 09:57:45 PST 2026
https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/179098
>From 5f1482a60cb7a179ca6a119505bb37b43d15f454 Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Sun, 1 Feb 2026 15:38:02 +0530
Subject: [PATCH 01/13] Upstream generic intrinsic emission path
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 200 ++++++++++++++++++
.../CIR/CodeGenBuiltins/X86/rd-builtins.c | 25 +++
2 files changed, 225 insertions(+)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 5e6c9e8e2490e..5fd70df43a5f6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -26,6 +26,7 @@
#include "clang/Basic/OperatorKinds.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/ErrorHandling.h"
using namespace clang;
@@ -726,6 +727,108 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
return RValue::getIgnored();
}
+static mlir::Type
+decodeFixedType(ArrayRef<llvm::Intrinsic::IITDescriptor> &infos,
+ mlir::MLIRContext *context) {
+ using namespace llvm::Intrinsic;
+
+ IITDescriptor descriptor = infos.front();
+ infos = infos.slice(1);
+
+ switch (descriptor.Kind) {
+ case IITDescriptor::Void:
+ return cir::VoidType::get(context);
+ case IITDescriptor::Integer:
+ return cir::IntType::get(context, descriptor.Integer_Width,
+ /*isSigned=*/true);
+ case IITDescriptor::Float:
+ return cir::SingleType::get(context);
+ case IITDescriptor::Double:
+ return cir::DoubleType::get(context);
+ default:
+ llvm_unreachable("NYI");
+ }
+}
+
+/// Helper function to correct integer signedness for intrinsic arguments.
+/// IIT always returns signed integers, but the actual intrinsic may expect
+/// unsigned integers based on the AST FunctionDecl parameter types.
+static mlir::Type getIntrinsicArgumentTypeFromAST(mlir::Type iitType,
+ const CallExpr *E,
+ unsigned argIndex,
+ mlir::MLIRContext *context) {
+ // If it's not an integer type, return as-is
+ auto intTy = dyn_cast<cir::IntType>(iitType);
+ if (!intTy)
+ return iitType;
+
+ // Get the FunctionDecl from the CallExpr
+ const FunctionDecl *FD = nullptr;
+ if (const auto *DRE =
+ dyn_cast<DeclRefExpr>(E->getCallee()->IgnoreImpCasts())) {
+ FD = dyn_cast<FunctionDecl>(DRE->getDecl());
+ }
+
+ // If we have FunctionDecl and this argument exists, check its signedness
+ if (FD && argIndex < FD->getNumParams()) {
+ QualType paramType = FD->getParamDecl(argIndex)->getType();
+ if (paramType->isUnsignedIntegerType()) {
+ // Create unsigned version of the type
+ return cir::IntType::get(context, intTy.getWidth(), /*isSigned=*/false);
+ }
+ }
+
+ // Default: keep IIT type (signed)
+ return iitType;
+}
+
+static mlir::Value getCorrectedPtr(mlir::Value argValue, mlir::Type expectedTy,
+ CIRGenBuilderTy &builder) {
+ auto ptrType = mlir::dyn_cast<cir::PointerType>(argValue.getType());
+ assert(ptrType && "expected pointer type");
+
+ auto expectedPtrType = mlir::cast<cir::PointerType>(expectedTy);
+ assert(ptrType.getPointee() != expectedPtrType.getPointee() &&
+ "types should not match");
+
+ if (ptrType.getAddrSpace() != expectedPtrType.getAddrSpace()) {
+ auto newPtrType = cir::PointerType::get(ptrType.getPointee(),
+ expectedPtrType.getAddrSpace());
+ return builder.createAddrSpaceCast(argValue, newPtrType);
+ }
+
+ return argValue;
+}
+
+static cir::FuncType getIntrinsicType(mlir::MLIRContext *context,
+ llvm::Intrinsic::ID id) {
+ using namespace llvm::Intrinsic;
+
+ SmallVector<IITDescriptor, 8> table;
+ getIntrinsicInfoTableEntries(id, table);
+
+ ArrayRef<IITDescriptor> tableRef = table;
+ mlir::Type resultTy = decodeFixedType(tableRef, context);
+
+ SmallVector<mlir::Type, 8> argTypes;
+ bool isVarArg = false;
+ while (!tableRef.empty()) {
+ auto kind = tableRef.front().Kind;
+ if (kind == IITDescriptor::VarArg) {
+ isVarArg = true;
+ break; // VarArg is last
+ }
+ argTypes.push_back(decodeFixedType(tableRef, context));
+ }
+
+ // CIR convention: no explicit void return type
+ if (isa<cir::VoidType>(resultTy))
+ return cir::FuncType::get(context, argTypes, /*optionalReturnType=*/nullptr,
+ isVarArg);
+
+ return cir::FuncType::get(context, argTypes, resultTy, isVarArg);
+}
+
RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
const CallExpr *e,
ReturnValueSlot returnValue) {
@@ -1816,6 +1919,103 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
return emitLibraryCall(*this, fd, e,
cgm.getBuiltinLibFunction(fd, builtinID));
+ // See if we have a target specific intrinsic.
+ std::string name = getContext().BuiltinInfo.getName(builtinID);
+ Intrinsic::ID intrinsicID = Intrinsic::not_intrinsic;
+ StringRef prefix =
+ llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
+ if (!prefix.empty()) {
+ intrinsicID = Intrinsic::getIntrinsicForClangBuiltin(prefix.data(), name);
+ // NOTE we don't need to perform a compatibility flag check here since the
+ // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
+ // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
+ if (intrinsicID == Intrinsic::not_intrinsic)
+ intrinsicID = Intrinsic::getIntrinsicForMSBuiltin(prefix.data(), name);
+ }
+
+ if (intrinsicID != Intrinsic::not_intrinsic) {
+ unsigned iceArguments = 0;
+ ASTContext::GetBuiltinTypeError error;
+ getContext().GetBuiltinType(builtinID, error, &iceArguments);
+ assert(error == ASTContext::GE_None && "Should not codegen an error");
+
+ llvm::StringRef name = llvm::Intrinsic::getName(intrinsicID);
+ // cir::LLVMIntrinsicCallOp expects intrinsic name to not have prefix
+ // "llvm." For example, `llvm.nvvm.barrier0` should be passed as
+ // `nvvm.barrier0`.
+ if (!name.consume_front("llvm."))
+ assert(false && "bad intrinsic name!");
+
+ cir::FuncType intrinsicType =
+ getIntrinsicType(&getMLIRContext(), intrinsicID);
+
+ SmallVector<mlir::Value> args;
+ for (unsigned i = 0; i < e->getNumArgs(); i++) {
+ mlir::Value argValue =
+ emitScalarOrConstFoldImmArg(iceArguments, i, e->getArg(i));
+ // If the intrinsic arg type is different from the builtin arg type
+ // we need to do a bit cast.
+ mlir::Type argType = argValue.getType();
+ mlir::Type expectedTy = intrinsicType.getInput(i);
+
+ // Use helper to get the correct integer type based on AST signedness
+ mlir::Type correctedExpectedTy =
+ getIntrinsicArgumentTypeFromAST(expectedTy, e, i, &getMLIRContext());
+
+ if (argType != correctedExpectedTy)
+ argValue = getCorrectedPtr(argValue, expectedTy, builder);
+
+ args.push_back(argValue);
+ }
+
+ cir::LLVMIntrinsicCallOp intrinsicCall = cir::LLVMIntrinsicCallOp::create(
+ builder, getLoc(e->getExprLoc()), builder.getStringAttr(name),
+ intrinsicType.getReturnType(), args);
+
+ // Convert the intrinsic result to the CallExpr/AST expected return type if
+ // they differ. This can happen when an intrinsic's IIT uses a signed
+ // integer type while the AST declares an unsigned type, or when an
+ // intrinsic returns an integer but the AST expects a pointer (or vice
+ // versa). Coerce conservatively so subsequent stores/verifications succeed.
+ mlir::Value intrinsicRes = intrinsicCall.getResult();
+ mlir::Type builtinReturnType = intrinsicRes.getType();
+ mlir::Type expectedRetTy = convertType(e->getType());
+
+ if (builtinReturnType != expectedRetTy) {
+ // Integer -> Integer or width/signage differences.
+ if (cir::IntType fromInt =
+ mlir::dyn_cast<cir::IntType>(builtinReturnType)) {
+ if (cir::IntType toInt = mlir::dyn_cast<cir::IntType>(expectedRetTy))
+ intrinsicRes = builder.createIntCast(intrinsicRes, expectedRetTy);
+ else if (mlir::dyn_cast<cir::PointerType>(expectedRetTy))
+ intrinsicRes = builder.createIntToPtr(intrinsicRes, expectedRetTy);
+ else
+ intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
+
+ } else if (cir::PointerType fromPtr =
+ mlir::dyn_cast<cir::PointerType>(builtinReturnType)) {
+ if (mlir::dyn_cast<cir::IntType>(expectedRetTy))
+ intrinsicRes = builder.createPtrToInt(intrinsicRes, expectedRetTy);
+ else if (cir::PointerType toPtr =
+ mlir::dyn_cast<cir::PointerType>(expectedRetTy)) {
+ if (fromPtr.getAddrSpace() != toPtr.getAddrSpace())
+ intrinsicRes =
+ builder.createAddrSpaceCast(intrinsicRes, expectedRetTy);
+ else if (fromPtr.getPointee() != toPtr.getPointee())
+ intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
+ } else
+ intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
+
+ } else
+ intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
+ }
+
+ if (isa<cir::VoidType>(expectedRetTy))
+ return RValue::get(nullptr);
+
+ return RValue::get(intrinsicRes);
+ }
+
// Some target-specific builtins can have aggregate return values, e.g.
// __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
// returnValue to be non-null, so that the target-specific emission code can
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
new file mode 100644
index 0000000000000..a1ac394110e39
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
@@ -0,0 +1,25 @@
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -S -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+#include <x86intrin.h>
+
+unsigned long long test_rdpmc(int a) {
+ // CIR-LABEL: test_rdpmc
+ // CIR: cir.call @__rdpmc
+ // CIR: cir.store %{{.*}}, %{{.*}} : !u64i, !cir.ptr<!u64i>
+ // CIR: cir.return %{{.*}} : !u64i
+
+ // LLVM-LABEL: @test_rdpmc
+ // LLVM: call i64 @llvm.x86.rdpmc
+ // LLVM: store i64 %{{.*}}, ptr %{{.*}}, align 8
+ // LLVM: ret i64 %{{.*}}
+
+ // OGCG-LABEL: @test_rdpmc
+ // OGCG: call i64 @llvm.x86.rdpmc
+ // OGCG: ret i64 %{{.*}}
+ return _rdpmc(a);
+}
>From 440ac55b0d95f24c7fbaa0df3085ca43c225876f Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Sun, 1 Feb 2026 22:33:03 +0530
Subject: [PATCH 02/13] Update test
---
clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
index a1ac394110e39..a66302c50cec5 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
@@ -1,12 +1,16 @@
-// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -S -emit-llvm %s -o %t-cir.ll
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
-// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
#include <x86intrin.h>
+// CIR-LABEL: @__rdpmc
+// CIR: cir.call_llvm_intrinsic "x86.rdpmc"
+// CIR: cir.cast integral %{{.*}} : !s64i -> !u64i
+
unsigned long long test_rdpmc(int a) {
// CIR-LABEL: test_rdpmc
// CIR: cir.call @__rdpmc
>From e8cd250d9f5cf4545a4324b498e253137a67fc82 Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Tue, 3 Feb 2026 19:26:22 +0530
Subject: [PATCH 03/13] Adress reviews and update test
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 122 +++++++-----------
.../CIR/CodeGenBuiltins/X86/rd-builtins.c | 3 +-
2 files changed, 47 insertions(+), 78 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 5fd70df43a5f6..2453489b67668 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -727,8 +727,10 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
return RValue::getIgnored();
}
+// FIXME: Remove cgf parameter when all descriptor kinds are implemented
static mlir::Type
-decodeFixedType(ArrayRef<llvm::Intrinsic::IITDescriptor> &infos,
+decodeFixedType(CIRGenFunction &cgf,
+ ArrayRef<llvm::Intrinsic::IITDescriptor> &infos,
mlir::MLIRContext *context) {
using namespace llvm::Intrinsic;
@@ -738,6 +740,8 @@ decodeFixedType(ArrayRef<llvm::Intrinsic::IITDescriptor> &infos,
switch (descriptor.Kind) {
case IITDescriptor::Void:
return cir::VoidType::get(context);
+ // If the intrinsic expects unsigned integers, the signedness is corrected in
+ // correctIntegerSignedness()
case IITDescriptor::Integer:
return cir::IntType::get(context, descriptor.Integer_Width,
/*isSigned=*/true);
@@ -746,39 +750,23 @@ decodeFixedType(ArrayRef<llvm::Intrinsic::IITDescriptor> &infos,
case IITDescriptor::Double:
return cir::DoubleType::get(context);
default:
- llvm_unreachable("NYI");
+ cgf.cgm.errorNYI("Unimplemented intrinsic type descriptor");
+ return cir::VoidType::get(context);
}
}
-/// Helper function to correct integer signedness for intrinsic arguments.
-/// IIT always returns signed integers, but the actual intrinsic may expect
-/// unsigned integers based on the AST FunctionDecl parameter types.
-static mlir::Type getIntrinsicArgumentTypeFromAST(mlir::Type iitType,
- const CallExpr *E,
- unsigned argIndex,
- mlir::MLIRContext *context) {
- // If it's not an integer type, return as-is
+/// Helper function to correct integer signedness for intrinsic arguments and
+/// return type. IIT always returns signed integers, but the actual intrinsic
+/// may expect unsigned integers based on the AST FunctionDecl parameter types.
+static mlir::Type correctIntegerSignedness(mlir::Type iitType, QualType astType,
+ mlir::MLIRContext *context) {
auto intTy = dyn_cast<cir::IntType>(iitType);
if (!intTy)
return iitType;
- // Get the FunctionDecl from the CallExpr
- const FunctionDecl *FD = nullptr;
- if (const auto *DRE =
- dyn_cast<DeclRefExpr>(E->getCallee()->IgnoreImpCasts())) {
- FD = dyn_cast<FunctionDecl>(DRE->getDecl());
- }
-
- // If we have FunctionDecl and this argument exists, check its signedness
- if (FD && argIndex < FD->getNumParams()) {
- QualType paramType = FD->getParamDecl(argIndex)->getType();
- if (paramType->isUnsignedIntegerType()) {
- // Create unsigned version of the type
- return cir::IntType::get(context, intTy.getWidth(), /*isSigned=*/false);
- }
+ if (astType->isUnsignedIntegerType()) {
+ return cir::IntType::get(context, intTy.getWidth(), /*isSigned=*/false);
}
-
- // Default: keep IIT type (signed)
return iitType;
}
@@ -788,8 +776,7 @@ static mlir::Value getCorrectedPtr(mlir::Value argValue, mlir::Type expectedTy,
assert(ptrType && "expected pointer type");
auto expectedPtrType = mlir::cast<cir::PointerType>(expectedTy);
- assert(ptrType.getPointee() != expectedPtrType.getPointee() &&
- "types should not match");
+ assert(ptrType != expectedPtrType && "types should not match");
if (ptrType.getAddrSpace() != expectedPtrType.getAddrSpace()) {
auto newPtrType = cir::PointerType::get(ptrType.getPointee(),
@@ -797,10 +784,11 @@ static mlir::Value getCorrectedPtr(mlir::Value argValue, mlir::Type expectedTy,
return builder.createAddrSpaceCast(argValue, newPtrType);
}
- return argValue;
+ return builder.createBitcast(argValue, expectedTy);
}
-static cir::FuncType getIntrinsicType(mlir::MLIRContext *context,
+static cir::FuncType getIntrinsicType(CIRGenFunction &cgf,
+ mlir::MLIRContext *context,
llvm::Intrinsic::ID id) {
using namespace llvm::Intrinsic;
@@ -808,17 +796,18 @@ static cir::FuncType getIntrinsicType(mlir::MLIRContext *context,
getIntrinsicInfoTableEntries(id, table);
ArrayRef<IITDescriptor> tableRef = table;
- mlir::Type resultTy = decodeFixedType(tableRef, context);
+ mlir::Type resultTy = decodeFixedType(cgf, tableRef, context);
SmallVector<mlir::Type, 8> argTypes;
bool isVarArg = false;
while (!tableRef.empty()) {
- auto kind = tableRef.front().Kind;
+ llvm::Intrinsic::IITDescriptor::IITDescriptorKind kind =
+ tableRef.front().Kind;
if (kind == IITDescriptor::VarArg) {
isVarArg = true;
break; // VarArg is last
}
- argTypes.push_back(decodeFixedType(tableRef, context));
+ argTypes.push_back(decodeFixedType(cgf, tableRef, context));
}
// CIR convention: no explicit void return type
@@ -836,8 +825,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
// See if we can constant fold this builtin. If so, don't emit it at all.
// TODO: Extend this handling to all builtin calls that we can constant-fold.
+ // Do not constant-fold immediate (target-specific) builtins; their ASTs can
+ // trigger the constant evaluator in cases it cannot safely handle.
+ // Skip EvaluateAsRValue for those.
Expr::EvalResult result;
- if (e->isPRValue() && e->EvaluateAsRValue(result, cgm.getASTContext()) &&
+ if (e->isPRValue() && !getContext().BuiltinInfo.isImmediate(builtinID) &&
+ e->EvaluateAsRValue(result, cgm.getASTContext()) &&
!result.hasSideEffects()) {
if (result.Val.isInt())
return RValue::get(builder.getConstInt(loc, result.Val.getInt()));
@@ -1947,9 +1940,10 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
assert(false && "bad intrinsic name!");
cir::FuncType intrinsicType =
- getIntrinsicType(&getMLIRContext(), intrinsicID);
+ getIntrinsicType(*this, &getMLIRContext(), intrinsicID);
SmallVector<mlir::Value> args;
+ const FunctionDecl *fd = e->getDirectCallee();
for (unsigned i = 0; i < e->getNumArgs(); i++) {
mlir::Value argValue =
emitScalarOrConstFoldImmArg(iceArguments, i, e->getArg(i));
@@ -1958,9 +1952,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
mlir::Type argType = argValue.getType();
mlir::Type expectedTy = intrinsicType.getInput(i);
- // Use helper to get the correct integer type based on AST signedness
- mlir::Type correctedExpectedTy =
- getIntrinsicArgumentTypeFromAST(expectedTy, e, i, &getMLIRContext());
+ // Correct integer signedness based on AST parameter type
+ mlir::Type correctedExpectedTy = expectedTy;
+ if (fd && i < fd->getNumParams()) {
+ correctedExpectedTy = correctIntegerSignedness(
+ expectedTy, fd->getParamDecl(i)->getType(), &getMLIRContext());
+ }
if (argType != correctedExpectedTy)
argValue = getCorrectedPtr(argValue, expectedTy, builder);
@@ -1968,49 +1965,22 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
args.push_back(argValue);
}
+ // Correct return type signedness based on AST return type before creating
+ // the call, avoiding unnecessary casts in the IR.
+ mlir::Type correctedReturnType = intrinsicType.getReturnType();
+ if (fd) {
+ correctedReturnType =
+ correctIntegerSignedness(intrinsicType.getReturnType(),
+ fd->getReturnType(), &getMLIRContext());
+ }
+
cir::LLVMIntrinsicCallOp intrinsicCall = cir::LLVMIntrinsicCallOp::create(
builder, getLoc(e->getExprLoc()), builder.getStringAttr(name),
- intrinsicType.getReturnType(), args);
+ correctedReturnType, args);
- // Convert the intrinsic result to the CallExpr/AST expected return type if
- // they differ. This can happen when an intrinsic's IIT uses a signed
- // integer type while the AST declares an unsigned type, or when an
- // intrinsic returns an integer but the AST expects a pointer (or vice
- // versa). Coerce conservatively so subsequent stores/verifications succeed.
mlir::Value intrinsicRes = intrinsicCall.getResult();
- mlir::Type builtinReturnType = intrinsicRes.getType();
- mlir::Type expectedRetTy = convertType(e->getType());
-
- if (builtinReturnType != expectedRetTy) {
- // Integer -> Integer or width/signage differences.
- if (cir::IntType fromInt =
- mlir::dyn_cast<cir::IntType>(builtinReturnType)) {
- if (cir::IntType toInt = mlir::dyn_cast<cir::IntType>(expectedRetTy))
- intrinsicRes = builder.createIntCast(intrinsicRes, expectedRetTy);
- else if (mlir::dyn_cast<cir::PointerType>(expectedRetTy))
- intrinsicRes = builder.createIntToPtr(intrinsicRes, expectedRetTy);
- else
- intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
-
- } else if (cir::PointerType fromPtr =
- mlir::dyn_cast<cir::PointerType>(builtinReturnType)) {
- if (mlir::dyn_cast<cir::IntType>(expectedRetTy))
- intrinsicRes = builder.createPtrToInt(intrinsicRes, expectedRetTy);
- else if (cir::PointerType toPtr =
- mlir::dyn_cast<cir::PointerType>(expectedRetTy)) {
- if (fromPtr.getAddrSpace() != toPtr.getAddrSpace())
- intrinsicRes =
- builder.createAddrSpaceCast(intrinsicRes, expectedRetTy);
- else if (fromPtr.getPointee() != toPtr.getPointee())
- intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
- } else
- intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
-
- } else
- intrinsicRes = builder.createBitcast(intrinsicRes, expectedRetTy);
- }
- if (isa<cir::VoidType>(expectedRetTy))
+ if (isa<cir::VoidType>(correctedReturnType))
return RValue::get(nullptr);
return RValue::get(intrinsicRes);
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
index a66302c50cec5..28d4d6f06ddd1 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/rd-builtins.c
@@ -8,8 +8,7 @@
#include <x86intrin.h>
// CIR-LABEL: @__rdpmc
-// CIR: cir.call_llvm_intrinsic "x86.rdpmc"
-// CIR: cir.cast integral %{{.*}} : !s64i -> !u64i
+// CIR: cir.call_llvm_intrinsic "x86.rdpmc" %{{.*}} : (!s32i) -> !u64i
unsigned long long test_rdpmc(int a) {
// CIR-LABEL: test_rdpmc
>From 016ea30078db8bdcb3818d4abde0fc9f9ea2d6fe Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 5 Feb 2026 00:19:57 +0530
Subject: [PATCH 04/13] Apply suggestion from @andykaylor
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 2453489b67668..a32bbeb749caf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1936,8 +1936,8 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
// cir::LLVMIntrinsicCallOp expects intrinsic name to not have prefix
// "llvm." For example, `llvm.nvvm.barrier0` should be passed as
// `nvvm.barrier0`.
- if (!name.consume_front("llvm."))
- assert(false && "bad intrinsic name!");
+ assert(name.starts_with("llvm.");
+ name = name.drop_front(/*strlen("llvm.")=*/5);
cir::FuncType intrinsicType =
getIntrinsicType(*this, &getMLIRContext(), intrinsicID);
>From 50afc771fd753edc07a13fa9d21216664b412953 Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Thu, 5 Feb 2026 01:07:49 +0530
Subject: [PATCH 05/13] Fix syntax error
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index a32bbeb749caf..4c0bb82848d4d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1936,7 +1936,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
// cir::LLVMIntrinsicCallOp expects intrinsic name to not have prefix
// "llvm." For example, `llvm.nvvm.barrier0` should be passed as
// `nvvm.barrier0`.
- assert(name.starts_with("llvm.");
+ assert(name.starts_with("llvm.") && "expected llvm. prefix");
name = name.drop_front(/*strlen("llvm.")=*/5);
cir::FuncType intrinsicType =
>From 80431425ac91b035e4095eb4d067d0b72278c73f Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Thu, 5 Feb 2026 11:26:14 +0530
Subject: [PATCH 06/13] Add test
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 2 ++
clang/test/CIR/CodeGen/builtins-x86.c | 36 +++++++++++++++++++++++++
2 files changed, 38 insertions(+)
create mode 100644 clang/test/CIR/CodeGen/builtins-x86.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 4c0bb82848d4d..0da9e3189f785 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -779,6 +779,8 @@ static mlir::Value getCorrectedPtr(mlir::Value argValue, mlir::Type expectedTy,
assert(ptrType != expectedPtrType && "types should not match");
if (ptrType.getAddrSpace() != expectedPtrType.getAddrSpace()) {
+ assert(!cir::MissingFeatures::addressSpace() &&
+ "address space handling not yet implemented");
auto newPtrType = cir::PointerType::get(ptrType.getPointee(),
expectedPtrType.getAddrSpace());
return builder.createAddrSpaceCast(argValue, newPtrType);
diff --git a/clang/test/CIR/CodeGen/builtins-x86.c b/clang/test/CIR/CodeGen/builtins-x86.c
new file mode 100644
index 0000000000000..0748147bd5b57
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtins-x86.c
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-ogcg.ll
+// RUN: FileCheck --input-file=%t-ogcg.ll %s -check-prefix=OGCG
+
+void test_sfence(void) {
+ // CIR-LABEL: @test_sfence
+ // CIR: cir.call_llvm_intrinsic "x86.sse.sfence" : () -> !void
+ // LLVM-LABEL: @test_sfence
+ // LLVM: call void @llvm.x86.sse.sfence
+ // OGCG-LABEL: @test_sfence
+ // OGCG: call void @llvm.x86.sse.sfence
+ __builtin_ia32_sfence();
+}
+
+// CIR-LABEL: @test_lfence
+void test_lfence(void) {
+ // CIR: cir.call_llvm_intrinsic "x86.sse2.lfence" : () -> !void
+ // LLVM-LABEL: @test_lfence
+ // LLVM: call void @llvm.x86.sse2.lfence()
+ // OGCG-LABEL: @test_lfence
+ // OGCG: call void @llvm.x86.sse2.lfence()
+ __builtin_ia32_lfence();
+}
+
+void test_pause(void) {
+ // CIR-LABEL: @test_pause
+ // CIR: cir.call_llvm_intrinsic "x86.sse2.pause" : () -> !void
+ // LLVM-LABEL: @test_pause
+ // LLVM: call void @llvm.x86.sse2.pause()
+ // OGCG-LABEL: @test_pause
+ // OGCG: call void @llvm.x86.sse2.pause()
+ __builtin_ia32_pause();
+}
>From 591473f146bff182d403ce2baab8e3df0c80d619 Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Thu, 5 Feb 2026 11:34:56 +0530
Subject: [PATCH 07/13] Update test
---
clang/test/CIR/CodeGen/builtins-x86.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/clang/test/CIR/CodeGen/builtins-x86.c b/clang/test/CIR/CodeGen/builtins-x86.c
index 0748147bd5b57..e03cd64e063dc 100644
--- a/clang/test/CIR/CodeGen/builtins-x86.c
+++ b/clang/test/CIR/CodeGen/builtins-x86.c
@@ -8,18 +8,22 @@
void test_sfence(void) {
// CIR-LABEL: @test_sfence
// CIR: cir.call_llvm_intrinsic "x86.sse.sfence" : () -> !void
+
// LLVM-LABEL: @test_sfence
// LLVM: call void @llvm.x86.sse.sfence
+
// OGCG-LABEL: @test_sfence
// OGCG: call void @llvm.x86.sse.sfence
__builtin_ia32_sfence();
}
-// CIR-LABEL: @test_lfence
void test_lfence(void) {
+ // CIR-LABEL: @test_lfence
// CIR: cir.call_llvm_intrinsic "x86.sse2.lfence" : () -> !void
+
// LLVM-LABEL: @test_lfence
// LLVM: call void @llvm.x86.sse2.lfence()
+
// OGCG-LABEL: @test_lfence
// OGCG: call void @llvm.x86.sse2.lfence()
__builtin_ia32_lfence();
@@ -28,8 +32,10 @@ void test_lfence(void) {
void test_pause(void) {
// CIR-LABEL: @test_pause
// CIR: cir.call_llvm_intrinsic "x86.sse2.pause" : () -> !void
+
// LLVM-LABEL: @test_pause
// LLVM: call void @llvm.x86.sse2.pause()
+
// OGCG-LABEL: @test_pause
// OGCG: call void @llvm.x86.sse2.pause()
__builtin_ia32_pause();
>From bcb49de67658a61b1fd3b2062e65a62a481e96fb Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Sun, 8 Feb 2026 10:11:26 +0530
Subject: [PATCH 08/13] Update CIRGenBuiltin.cpp
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 0da9e3189f785..6fc48f59e9d02 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -764,9 +764,9 @@ static mlir::Type correctIntegerSignedness(mlir::Type iitType, QualType astType,
if (!intTy)
return iitType;
- if (astType->isUnsignedIntegerType()) {
+ if (astType->isUnsignedIntegerType())
return cir::IntType::get(context, intTy.getWidth(), /*isSigned=*/false);
- }
+
return iitType;
}
@@ -1953,6 +1953,11 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
// we need to do a bit cast.
mlir::Type argType = argValue.getType();
mlir::Type expectedTy = intrinsicType.getInput(i);
+ if (!mlir::isa<cir::PointerType>(expectedTy)) {
+ cgm.errorNYI(e->getSourceRange(),
+ "intrinsic expects a pointer type (NYI for non-pointer)");
+ return getUndefRValue(e->getType());
+ }
// Correct integer signedness based on AST parameter type
mlir::Type correctedExpectedTy = expectedTy;
>From 1a92a0d180cbec053b14c02aaff7209abb7d604d Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Sun, 8 Feb 2026 10:30:22 +0530
Subject: [PATCH 09/13] Remove support for float/double arguments
Removed handling for Float and Double types in intrinsic type descriptor.
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 6fc48f59e9d02..27cb87ef01498 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -745,10 +745,6 @@ decodeFixedType(CIRGenFunction &cgf,
case IITDescriptor::Integer:
return cir::IntType::get(context, descriptor.Integer_Width,
/*isSigned=*/true);
- case IITDescriptor::Float:
- return cir::SingleType::get(context);
- case IITDescriptor::Double:
- return cir::DoubleType::get(context);
default:
cgf.cgm.errorNYI("Unimplemented intrinsic type descriptor");
return cir::VoidType::get(context);
>From b1370f8a8c8fb165d5281a6e75b94549a8e2fab5 Mon Sep 17 00:00:00 2001
From: Priyanshu <10b.priyanshu at gmail.com>
Date: Sun, 8 Feb 2026 13:38:01 +0530
Subject: [PATCH 10/13] Update CIRGenBuiltin.cpp
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 30 +++++++++++++++++++------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 27cb87ef01498..c1c45d40f8296 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1949,11 +1949,6 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
// we need to do a bit cast.
mlir::Type argType = argValue.getType();
mlir::Type expectedTy = intrinsicType.getInput(i);
- if (!mlir::isa<cir::PointerType>(expectedTy)) {
- cgm.errorNYI(e->getSourceRange(),
- "intrinsic expects a pointer type (NYI for non-pointer)");
- return getUndefRValue(e->getType());
- }
// Correct integer signedness based on AST parameter type
mlir::Type correctedExpectedTy = expectedTy;
@@ -1962,8 +1957,29 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
expectedTy, fd->getParamDecl(i)->getType(), &getMLIRContext());
}
- if (argType != correctedExpectedTy)
- argValue = getCorrectedPtr(argValue, expectedTy, builder);
+ if (mlir::isa<cir::PointerType>(expectedTy)) {
+ bool argIsPointer = mlir::isa<cir::PointerType>(argType);
+ bool argIsVectorOfPointer = false;
+ if (auto vecTy = dyn_cast<mlir::VectorType>(argType))
+ argIsVectorOfPointer =
+ mlir::isa<cir::PointerType>(vecTy.getElementType());
+
+ if (!argIsPointer && !argIsVectorOfPointer) {
+ cgm.errorNYI(
+ e->getSourceRange(),
+ "intrinsic expects a pointer type (NYI for non-pointer)");
+ return getUndefRValue(e->getType());
+ }
+
+ // Pointer handling (address-space cast / bitcast fallback).
+ if (argType != expectedTy)
+ argValue = getCorrectedPtr(argValue, expectedTy, builder);
+ } else {
+ // Non-pointer expected type: if needed, bitcast to the corrected
+ // expected type to match signedness/representation.
+ if (argType != correctedExpectedTy)
+ argValue = builder.createBitcast(argValue, correctedExpectedTy);
+ }
args.push_back(argValue);
}
>From 4a8a5bfd85e151d0ed401a230ec9a3c40f7b5934 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 10 Feb 2026 23:25:10 +0530
Subject: [PATCH 11/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index c1c45d40f8296..b6ce5207039b7 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -768,8 +768,7 @@ static mlir::Type correctIntegerSignedness(mlir::Type iitType, QualType astType,
static mlir::Value getCorrectedPtr(mlir::Value argValue, mlir::Type expectedTy,
CIRGenBuilderTy &builder) {
- auto ptrType = mlir::dyn_cast<cir::PointerType>(argValue.getType());
- assert(ptrType && "expected pointer type");
+ auto ptrType = mlir::cast<cir::PointerType>(argValue.getType());
auto expectedPtrType = mlir::cast<cir::PointerType>(expectedTy);
assert(ptrType != expectedPtrType && "types should not match");
>From 660efb74af20fd20f04836128c6eeca0c4a655d1 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 11 Feb 2026 05:59:23 +0530
Subject: [PATCH 12/13] [CIR][X86] Add support for vpshl/vpshr builtins
(#179538)
This patch also adds support for fshl/fshr operations so that
vpshl/vpshr intrinsics can lower to them
Part of: #167765
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 19 +-
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 13 +-
.../X86/avx512vbmi2-builtins.c | 401 ++++++++++++++++++
.../CodeGenBuiltins/builtins-elementwise.c | 87 ++++
4 files changed, 514 insertions(+), 6 deletions(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index b6ce5207039b7..1b479dde8b718 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1339,8 +1339,23 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
case Builtin::BI__builtin_elementwise_canonicalize:
case Builtin::BI__builtin_elementwise_copysign:
case Builtin::BI__builtin_elementwise_fma:
- case Builtin::BI__builtin_elementwise_fshl:
- case Builtin::BI__builtin_elementwise_fshr:
+ return errorBuiltinNYI(*this, e, builtinID);
+ case Builtin::BI__builtin_elementwise_fshl: {
+ mlir::Location loc = getLoc(e->getExprLoc());
+ mlir::Value a = emitScalarExpr(e->getArg(0));
+ mlir::Value b = emitScalarExpr(e->getArg(1));
+ mlir::Value c = emitScalarExpr(e->getArg(2));
+ return RValue::get(builder.emitIntrinsicCallOp(loc, "fshl", a.getType(),
+ mlir::ValueRange{a, b, c}));
+ }
+ case Builtin::BI__builtin_elementwise_fshr: {
+ mlir::Location loc = getLoc(e->getExprLoc());
+ mlir::Value a = emitScalarExpr(e->getArg(0));
+ mlir::Value b = emitScalarExpr(e->getArg(1));
+ mlir::Value c = emitScalarExpr(e->getArg(2));
+ return RValue::get(builder.emitIntrinsicCallOp(loc, "fshr", a.getType(),
+ mlir::ValueRange{a, b, c}));
+ }
case Builtin::BI__builtin_elementwise_add_sat:
case Builtin::BI__builtin_elementwise_sub_sat:
case Builtin::BI__builtin_elementwise_max:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index cad80317cb870..7800e90d130b5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -2058,6 +2058,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_pternlogd256_maskz:
case X86::BI__builtin_ia32_pternlogq128_maskz:
case X86::BI__builtin_ia32_pternlogq256_maskz:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
case X86::BI__builtin_ia32_vpshldd128:
case X86::BI__builtin_ia32_vpshldd256:
case X86::BI__builtin_ia32_vpshldd512:
@@ -2067,6 +2071,8 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_vpshldw128:
case X86::BI__builtin_ia32_vpshldw256:
case X86::BI__builtin_ia32_vpshldw512:
+ return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
+ ops[1], ops[2], false);
case X86::BI__builtin_ia32_vpshrdd128:
case X86::BI__builtin_ia32_vpshrdd256:
case X86::BI__builtin_ia32_vpshrdd512:
@@ -2076,10 +2082,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_vpshrdw128:
case X86::BI__builtin_ia32_vpshrdw256:
case X86::BI__builtin_ia32_vpshrdw512:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return mlir::Value{};
+ // Ops 0 and 1 are swapped.
+ return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[1],
+ ops[0], ops[2], true);
case X86::BI__builtin_ia32_reduce_fadd_pd512:
case X86::BI__builtin_ia32_reduce_fadd_ps512:
case X86::BI__builtin_ia32_reduce_fadd_ph512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c
new file mode 100644
index 0000000000000..170c6fd48ac81
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c
@@ -0,0 +1,401 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding -triple x86_64-unknown-linux-gnu -fclangir -target-feature +avx512vbmi2 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding -triple x86_64-unknown-linux-gnu -fclangir -target-feature +avx512vbmi2 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vbmi2 -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+
+#include <immintrin.h>
+
+__m512i test_mm512_shldv_epi64(__m512i s, __m512i a, __m512i b) {
+ // CIR-LABEL: @_mm512_shldv_epi64
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !u64i>
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !u64i>, !cir.vector<8 x !u64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !u64i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !u64i> -> !cir.vector<8 x !s64i>
+ // CIR-LABEL: @test_mm512_shldv_epi64
+ // CIR: %{{.*}} = cir.call @_mm512_shldv_epi64
+ // LLVM-LABEL: @test_mm512_shldv_epi64
+ // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+ // OGCG-LABEL: @test_mm512_shldv_epi64
+ // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64>
+ return _mm512_shldv_epi64(s, a, b);
+}
+
+__m512i test_mm512_mask_shldi_epi64(__m512i s, __mmask8 u, __m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_mask_shldi_epi64
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}}
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}})
+ // LLVM-LABEL: @test_mm512_mask_shldi_epi64
+ // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 47))
+ // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+ // OGCG-LABEL: @test_mm512_mask_shldi_epi64
+ // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47))
+ // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_mask_shldi_epi64(s, u, a, b, 47);
+}
+
+__m512i test_mm512_maskz_shldi_epi64(__mmask8 u, __m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_maskz_shldi_epi64
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_maskz_shldi_epi64
+ // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 63))
+ // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+ // OGCG-LABEL: @test_mm512_maskz_shldi_epi64
+ // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63))
+ // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_maskz_shldi_epi64(u, a, b, 63);
+}
+
+__m512i test_mm512_shldi_epi64(__m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_shldi_epi64
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_shldi_epi64
+ // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 31))
+ // OGCG-LABEL: @test_mm512_shldi_epi64
+ // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
+ return _mm512_shldi_epi64(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldi_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_mask_shldi_epi32
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_mask_shldi_epi32
+ // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 7))
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_mask_shldi_epi32
+ // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7))
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_shldi_epi32(s, u, a, b, 7);
+}
+
+__m512i test_mm512_maskz_shldi_epi32(__mmask16 u, __m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_maskz_shldi_epi32
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+ // LLVM-LABEL: @test_mm512_maskz_shldi_epi32
+ // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 15))
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_maskz_shldi_epi32
+ // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15))
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_maskz_shldi_epi32(u, a, b, 15);
+}
+
+__m512i test_mm512_shldi_epi32(__m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_shldi_epi32
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_shldi_epi32
+ // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 31))
+ // OGCG-LABEL: @test_mm512_shldi_epi32
+ // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
+ return _mm512_shldi_epi32(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldi_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_mask_shldi_epi16
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_mask_shldi_epi16
+ // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 3))
+ // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_mask_shldi_epi16
+ // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3))
+ // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_mask_shldi_epi16(s, u, a, b, 3);
+}
+
+__m512i test_mm512_maskz_shldi_epi16(__mmask32 u, __m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_maskz_shldi_epi16
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_maskz_shldi_epi16
+ // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 15))
+ // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_maskz_shldi_epi16
+ // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
+ // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_maskz_shldi_epi16(u, a, b, 15);
+}
+
+__m512i test_mm512_shldi_epi16(__m512i a, __m512i b) {
+ // CIR-LABEL: test_mm512_shldi_epi16
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_shldi_epi16
+ // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 31))
+ // OGCG-LABEL: @test_mm512_shldi_epi16
+ // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31))
+ return _mm512_shldi_epi16(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldv_epi64(__m512i s, __mmask8 u, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_mask_shldv_epi64
+ // CIR: cir.call @_mm512_shldv_epi64(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+ // CIR-LABEL: test_mm512_mask_shldv_epi64
+ // CIR: cir.call @_mm512_mask_shldv_epi64
+ // LLVM-LABEL: @test_mm512_mask_shldv_epi64
+ // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+ // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+ // OGCG-LABEL: @test_mm512_mask_shldv_epi64
+ // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64>
+ // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_mask_shldv_epi64(s, u, a, b);
+}
+
+__m512i test_mm512_shldv_epi32(__m512i s, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_shldv_epi32
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !u32i>, !cir.vector<16 x !u32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !u32i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<16 x !u32i> -> !cir.vector<8 x !s64i>
+ // CIR-LABEL: test_mm512_shldv_epi32
+ // CIR: cir.call @_mm512_shldv_epi32
+ // LLVM-LABEL: @test_mm512_shldv_epi32
+ // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_shldv_epi32
+ // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+ return _mm512_shldv_epi32(s, a, b);
+}
+
+__m512i test_mm512_mask_shldv_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @_mm512_mask_shldv_epi16
+ // CIR: cir.call @_mm512_shldv_epi16(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<32 x !s16i>
+ // CIR-LABEL: @test_mm512_mask_shldv_epi16
+ // CIR: cir.call @_mm512_mask_shldv_epi16
+ // LLVM-LABEL: @test_mm512_mask_shldv_epi16
+ // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_mask_shldv_epi16
+ // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+ // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_mask_shldv_epi16(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shldv_epi16(__mmask32 u, __m512i s, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_maskz_shldv_epi16
+ // CIR: cir.call @_mm512_shldv_epi16(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<32 x !s16i>
+ // CIR-LABEL: @test_mm512_maskz_shldv_epi16
+ // CIR: cir.call @_mm512_maskz_shldv_epi16
+ // LLVM-LABEL: @test_mm512_maskz_shldv_epi16
+ // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_maskz_shldv_epi16
+ // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+ // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_maskz_shldv_epi16(u, s, a, b);
+}
+
+__m512i test_mm512_shldv_epi16(__m512i s, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_shldv_epi16
+ // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}}{{.*}} : (!cir.vector<32 x !u16i>, !cir.vector<32 x !u16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !u16i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<32 x !u16i> -> !cir.vector<8 x !s64i>
+ // CIR-LABEL: @test_mm512_shldv_epi16
+ // CIR: cir.call @_mm512_shldv_epi16
+ // LLVM-LABEL: @test_mm512_shldv_epi16
+ // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_shldv_epi16
+ // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+ return _mm512_shldv_epi16(s, a, b);
+}
+
+__m512i test_mm512_mask_shrdi_epi64(__m512i s, __mmask8 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_mask_shrdi_epi64
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_mask_shrdi_epi64
+ // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 47))
+ // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+ // OGCG-LABEL: @test_mm512_mask_shrdi_epi64
+ // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47))
+ // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_mask_shrdi_epi64(s, u, a, b, 47);
+}
+
+__m512i test_mm512_maskz_shrdi_epi64(__mmask8 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_maskz_shrdi_epi64
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.call @_mm512_setzero_si512() {{.*}} : () -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_maskz_shrdi_epi64
+ // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 63))
+ // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+ // OGCG-LABEL: @test_mm512_maskz_shrdi_epi64
+ // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63))
+ // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_maskz_shrdi_epi64(u, a, b, 63);
+}
+
+__m512i test_mm512_shrdi_epi64(__m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_shrdi_epi64
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_shrdi_epi64
+ // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 31))
+ // OGCG-LABEL: @test_mm512_shrdi_epi64
+ // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
+ return _mm512_shrdi_epi64(a, b, 31);
+}
+
+__m512i test_mm512_mask_shrdi_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_mask_shrdi_epi32
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+ // LLVM-LABEL: @test_mm512_mask_shrdi_epi32
+ // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 7))
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_mask_shrdi_epi32
+ // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7))
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_shrdi_epi32(s, u, a, b, 7);
+}
+
+__m512i test_mm512_maskz_shrdi_epi32(__mmask16 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_maskz_shrdi_epi32
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+ // LLVM-LABEL: @test_mm512_maskz_shrdi_epi32
+ // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 15))
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_maskz_shrdi_epi32
+ // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15))
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_maskz_shrdi_epi32(u, a, b, 15);
+}
+
+__m512i test_mm512_shrdi_epi32(__m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_shrdi_epi32
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_shrdi_epi32
+ // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 31))
+ // OGCG-LABEL: @test_mm512_shrdi_epi32
+ // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
+ return _mm512_shrdi_epi32(a, b, 31);
+}
+
+__m512i test_mm512_mask_shrdi_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_mask_shrdi_epi16
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+ // LLVM-LABEL: @test_mm512_mask_shrdi_epi16
+ // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 3))
+ // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_mask_shrdi_epi16
+ // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3))
+ // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_mask_shrdi_epi16(s, u, a, b, 3);
+}
+
+__m512i test_mm512_maskz_shrdi_epi16(__mmask32 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_maskz_shrdi_epi16
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+ // LLVM-LABEL: @test_mm512_maskz_shrdi_epi16
+ // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 15))
+ // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_maskz_shrdi_epi16
+ // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
+ // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_maskz_shrdi_epi16(u, a, b, 15);
+}
+
+__m512i test_mm512_shrdi_epi16(__m512i a, __m512i b) {
+ // CIR-LABEL: @test_mm512_shrdi_epi16
+ // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<8 x !s64i>
+ // LLVM-LABEL: @test_mm512_shrdi_epi16
+ // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 31))
+ // OGCG-LABEL: @test_mm512_shrdi_epi16
+ // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31))
+ return _mm512_shrdi_epi16(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldv_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_mask_shldv_epi32
+ // CIR: cir.call @_mm512_shldv_epi32(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<16 x !s32i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+ // CIR-LABEL: test_mm512_mask_shldv_epi32
+ // CIR: cir.call @_mm512_mask_shldv_epi32
+ // LLVM-LABEL: @test_mm512_mask_shldv_epi32
+ // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_mask_shldv_epi32
+ // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_shldv_epi32(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shldv_epi32(__mmask16 u, __m512i s, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_maskz_shldv_epi32
+ // CIR: cir.call @_mm512_shldv_epi32(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<16 x !s32i>
+ // CIR: cir.call @_mm512_setzero_si512() {{.*}} : () -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+ // CIR-LABEL: test_mm512_maskz_shldv_epi32
+ // CIR: cir.call @_mm512_maskz_shldv_epi32
+ // LLVM-LABEL: @test_mm512_maskz_shldv_epi32
+ // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_maskz_shldv_epi32
+ // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_maskz_shldv_epi32(u, s, a, b);
+}
+
+__m512i test_mm512_mask_shrdv_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+ // CIR-LABEL: @_mm512_shrdv_epi32
+ // CIR: cir.call @_mm512_shrdv_epi32(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<16 x !s32i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+ // CIR-LABEL: @test_mm512_mask_shrdv_epi32
+ // CIR: cir.call @_mm512_mask_shrdv_epi32
+ // LLVM-LABEL: @test_mm512_mask_shrdv_epi32
+ // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_mask_shrdv_epi32
+ // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_shrdv_epi32(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shrdv_epi32(__mmask16 u, __m512i s, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_maskz_shrdv_epi32
+ // CIR: cir.call @_mm512_shrdv_epi32(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<16 x !s32i>
+ // CIR: cir.call @_mm512_setzero_si512() {{.*}} : () -> !cir.vector<8 x !s64i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+ // CIR-LABEL: test_mm512_maskz_shrdv_epi32
+ // CIR: cir.call @_mm512_maskz_shrdv_epi32
+ // LLVM-LABEL: @test_mm512_maskz_shrdv_epi32
+ // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+ // OGCG-LABEL: @test_mm512_maskz_shrdv_epi32
+ // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+ // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_maskz_shrdv_epi32(u, s, a, b);
+}
+
+__m512i test_mm512_mask_shrdv_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+ // CIR-LABEL: _mm512_mask_shrdv_epi16
+ // CIR: cir.call @_mm512_shrdv_epi16(%{{.*}}, %{{.*}}, %{{.*}}){{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !cir.vector<8 x !s64i>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<32 x !s16i>
+ // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+ // CIR-LABEL: test_mm512_mask_shrdv_epi16
+ // CIR: cir.call @_mm512_mask_shrdv_epi16
+ // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+ // OGCG-LABEL: @test_mm512_mask_shrdv_epi16
+ // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+ // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_mask_shrdv_epi16(s, u, a, b);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c b/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c
index f64080b829bdf..80fc0682f8126 100644
--- a/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c
+++ b/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c
@@ -6,6 +6,7 @@
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
typedef int vint4 __attribute__((ext_vector_type(4)));
+typedef short vshort8 __attribute__((ext_vector_type(8)));
typedef float vfloat4 __attribute__((ext_vector_type(4)));
typedef double vdouble4 __attribute__((ext_vector_type(4)));
@@ -116,3 +117,89 @@ void test_builtin_elementwise_cos(float f, double d, vfloat4 vf4,
// OGCG: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}})
vd4 = __builtin_elementwise_cos(vd4);
}
+
+void test_builtin_elementwise_fshl(long long int i1, long long int i2,
+ long long int i3, unsigned short us1,
+ unsigned short us2, unsigned short us3,
+ char c1, char c2, char c3,
+ unsigned char uc1, unsigned char uc2,
+ unsigned char uc3, vshort8 vi1,
+ vshort8 vi2, vshort8 vi3, vint4 vu1,
+ vint4 vu2, vint4 vu3) {
+ // CIR-LABEL: test_builtin_elementwise_fshl
+ // LLVM-LABEL: test_builtin_elementwise_fshl
+ // OGCG-LABEL: test_builtin_elementwise_fshl
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!s64i, !s64i, !s64i) -> !s64i
+ // LLVM: %{{.*}} = call i64 @llvm.fshl.i64(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+ // OGCG: %{{.*}} = call i64 @llvm.fshl.i64(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+ i1 = __builtin_elementwise_fshl(i1, i2, i3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!u16i, !u16i, !u16i) -> !u16i
+ // LLVM: %{{.*}} = call i16 @llvm.fshl.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+ // OGCG: %{{.*}} = call i16 @llvm.fshl.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+ us1 = __builtin_elementwise_fshl(us1, us2, us3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!s8i, !s8i, !s8i) -> !s8i
+ // LLVM: %{{.*}} = call i8 @llvm.fshl.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ // OGCG: %{{.*}} = call i8 @llvm.fshl.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ c1 = __builtin_elementwise_fshl(c1, c2, c3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!u8i, !u8i, !u8i) -> !u8i
+ // LLVM: %{{.*}} = call i8 @llvm.fshl.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ // OGCG: %{{.*}} = call i8 @llvm.fshl.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ uc1 = __builtin_elementwise_fshl(uc1, uc2, uc3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
+ // LLVM: %{{.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // OGCG: %{{.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ vi1 = __builtin_elementwise_fshl(vi1, vi2, vi3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
+ // LLVM: %{{.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // OGCG: %{{.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ vu1 = __builtin_elementwise_fshl(vu1, vu2, vu3);
+}
+
+void test_builtin_elementwise_fshr(long long int i1, long long int i2,
+ long long int i3, unsigned short us1,
+ unsigned short us2, unsigned short us3,
+ char c1, char c2, char c3,
+ unsigned char uc1, unsigned char uc2,
+ unsigned char uc3, vshort8 vi1,
+ vshort8 vi2, vshort8 vi3, vint4 vu1,
+ vint4 vu2, vint4 vu3) {
+ // CIR-LABEL: test_builtin_elementwise_fshr
+ // LLVM-LABEL: test_builtin_elementwise_fshr
+ // OGCG-LABEL: test_builtin_elementwise_fshr
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!s64i, !s64i, !s64i) -> !s64i
+ // LLVM: %{{.*}} = call i64 @llvm.fshr.i64(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+ // OGCG: %{{.*}} = call i64 @llvm.fshr.i64(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+ i1 = __builtin_elementwise_fshr(i1, i2, i3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!u16i, !u16i, !u16i) -> !u16i
+ // LLVM: %{{.*}} = call i16 @llvm.fshr.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+ // OGCG: %{{.*}} = call i16 @llvm.fshr.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+ us1 = __builtin_elementwise_fshr(us1, us2, us3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!s8i, !s8i, !s8i) -> !s8i
+ // LLVM: %{{.*}} = call i8 @llvm.fshr.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ // OGCG: %{{.*}} = call i8 @llvm.fshr.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ c1 = __builtin_elementwise_fshr(c1, c2, c3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!u8i, !u8i, !u8i) -> !u8i
+ // LLVM: %{{.*}} = call i8 @llvm.fshr.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ // OGCG: %{{.*}} = call i8 @llvm.fshr.i8(i8 %{{.*}}, i8 %{{.*}}, i8 %{{.*}})
+ uc1 = __builtin_elementwise_fshr(uc1, uc2, uc3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
+ // LLVM: %{{.*}} = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // OGCG: %{{.*}} = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ vi1 = __builtin_elementwise_fshr(vi1, vi2, vi3);
+
+ // CIR: %{{.*}} = cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
+ // LLVM: %{{.*}} = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // OGCG: %{{.*}} = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ vu1 = __builtin_elementwise_fshr(vu1, vu2, vu3);
+}
>From 1de953cd5929f5a8c8249738e671ae862fbc083e Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 10 Feb 2026 23:25:10 +0530
Subject: [PATCH 13/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 1b479dde8b718..9af00b805c37e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1924,6 +1924,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
return emitLibraryCall(*this, fd, e,
cgm.getBuiltinLibFunction(fd, builtinID));
+ // If this is a predefined lib function (e.g. malloc), emit the call
+ // using exactly the normal call path.
+ if (getContext().BuiltinInfo.isPredefinedLibFunction(builtinID))
+ return emitLibraryCall(*this, fd, e,
+ emitScalarExpr(e->getCallee()).getDefiningOp());
+
// See if we have a target specific intrinsic.
std::string name = getContext().BuiltinInfo.getName(builtinID);
Intrinsic::ID intrinsicID = Intrinsic::not_intrinsic;
More information about the cfe-commits
mailing list