[llvm-branch-commits] [clang] users/banach space/cir/svdup z (PR #175976)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 14 08:07:19 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Andrzej WarzyĆski (banach-space)
<details>
<summary>Changes</summary>
- **[mlir] Fix alignment for predicate (i1) vectors**
- **[CIR][AArch64] Add lowering for predicated SVE svdup builtins (zeroing)**
---
Patch is 35.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175976.diff
4 Files Affected:
- (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+86-9)
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+2)
- (modified) clang/lib/CIR/CodeGen/CIRGenTypes.cpp (+4)
- (modified) clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c (+472-5)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 93089eb585aa7..d59d3bebe0bb0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -126,6 +126,81 @@ bool CIRGenFunction::getAArch64SVEProcessedOperands(
return true;
}
+// Reinterpret the input predicate so that it can be used to correctly isolate
+// the elements of the specified datatype.
+mlir::Value CIRGenFunction::emitSVEpredicateCast(mlir::Value *pred,
+ unsigned minNumElts,
+ mlir::Location loc) {
+
+ // TODO: Handle "aarch64.svcount" once we get round to supporting SME.
+
+ auto retTy = cir::VectorType::get(builder.getUIntNTy(1), minNumElts,
+ /*is_scalable=*/true);
+ if (pred->getType() == retTy)
+ return *pred;
+
+ unsigned intID;
+ mlir::Type intrinsicTy;
+ switch (minNumElts) {
+ default:
+ llvm_unreachable("unsupported element count!");
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ intID = Intrinsic::aarch64_sve_convert_from_svbool;
+ intrinsicTy = retTy;
+ break;
+ case 16:
+ intID = Intrinsic::aarch64_sve_convert_to_svbool;
+ intrinsicTy = pred->getType();
+ break;
+ }
+
+ std::string llvmIntrName(Intrinsic::getBaseName(intID));
+ llvmIntrName.erase(0, /*std::strlen(".llvm")=*/5);
+ auto call = emitIntrinsicCallOp(builder, loc, llvmIntrName, retTy,
+ mlir::ValueRange{*pred});
+ assert(call.getType() == retTy && "Unexpected return type!");
+ return call;
+}
+
+// Return the element count for
+static unsigned getSVEMinEltCount(const clang::SVETypeFlags::EltType &sveType) {
+ switch (sveType) {
+ default:
+ llvm_unreachable("Invalid SVETypeFlag!");
+
+ case SVETypeFlags::EltTyInt8:
+ return 16;
+ case SVETypeFlags::EltTyInt16:
+ return 8;
+ case SVETypeFlags::EltTyInt32:
+ return 4;
+ case SVETypeFlags::EltTyInt64:
+ return 2;
+
+ case SVETypeFlags::EltTyMFloat8:
+ return 16;
+ case SVETypeFlags::EltTyFloat16:
+ case SVETypeFlags::EltTyBFloat16:
+ return 8;
+ case SVETypeFlags::EltTyFloat32:
+ return 4;
+ case SVETypeFlags::EltTyFloat64:
+ return 2;
+
+ case SVETypeFlags::EltTyBool8:
+ return 16;
+ case SVETypeFlags::EltTyBool16:
+ return 8;
+ case SVETypeFlags::EltTyBool32:
+ return 4;
+ case SVETypeFlags::EltTyBool64:
+ return 2;
+ }
+}
+
std::optional<mlir::Value>
CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
@@ -171,10 +246,12 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
std::string("unimplemented AArch64 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
- if (typeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented AArch64 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
+ // Zero-ing predication
+ if (typeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) {
+ auto null = builder.getNullValue(convertType(expr->getType()),
+ getLoc(expr->getExprLoc()));
+ ops.insert(ops.begin(), null);
+ }
if (typeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
cgm.errorNYI(expr->getSourceRange(),
@@ -194,11 +271,11 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
// Predicates must match the main datatype.
for (mlir::Value &op : ops)
- if (auto predTy = dyn_cast<mlir::VectorType>(op.getType()))
- if (predTy.getElementType().isInteger(1))
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented AArch64 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
+ if (auto predTy = dyn_cast<cir::VectorType>(op.getType()))
+ if (auto cirInt = dyn_cast<cir::IntType>(predTy.getElementType()))
+ if (cirInt.getWidth() == 1)
+ op = emitSVEpredicateCast(
+ &op, getSVEMinEltCount(typeFlags.getEltType()), loc);
// Splat scalar operand to vector (intrinsics with _n infix)
if (typeFlags.hasSplatOperand()) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 5fe1d9a4f2b76..86d2a8c4ac089 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1269,6 +1269,8 @@ class CIRGenFunction : public CIRGenTypeCache {
bool getAArch64SVEProcessedOperands(unsigned builtinID, const CallExpr *expr,
SmallVectorImpl<mlir::Value> &ops,
clang::SVETypeFlags typeFlags);
+ mlir::Value emitSVEpredicateCast(mlir::Value *pred, unsigned minNumElts,
+ mlir::Location loc);
std::optional<mlir::Value>
emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
ReturnValueSlot returnValue,
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index 985c2901a7b04..f6220c616ed60 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -373,6 +373,10 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
resultType = cir::VectorType::get(builder.getDoubleTy(), 2,
/*is_scalable=*/true);
break;
+ case BuiltinType::SveBool:
+ resultType = cir::VectorType::get(builder.getUIntNTy(1), 16,
+ /*is_scalable=*/true);
+ break;
// Unsigned integral types.
case BuiltinType::Char8:
diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
index 3e0a892d6b368..60a2992ab14ad 100644
--- a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -1,13 +1,13 @@
// REQUIRES: aarch64-registered-target
-
+//
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
#include <arm_sve.h>
#if defined __ARM_FEATURE_SME
@@ -209,3 +209,470 @@ svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
// LLVM_OGCG_CIR: [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]])
return SVE_ACLE_FUNC(svdup,_n,_f64,)(op);
}
+
+// ALL-LABEL: @test_svdup_n_s8_z
+svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op) MODE_ATTR
+{
+// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
+// CIR-SAME: %[[OP:.*]]: !s8i
+// CIR-SAME: -> !cir.vector<[16] x !s8i>
+// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>>
+// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s8i
+// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[16] x !s8i>
+// CIR: cir.store %[[PG]], %[[ALLOCA_PG]]
+// CIR: cir.store %[[OP]], %[[ALLOCA_OP]]
+// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]]
+// CIR: %[[LOAD_OP:.*]] = cir.load align(1) %[[ALLOCA_OP]]
+// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[16] x !s8i>
+// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[LOAD_PG]], %[[LOAD_OP]]
+// CIR-SAME: -> !cir.vector<[16] x !s8i>
+// CIR: cir.store %[[CONVERT_PG]], %[[ALLOCA_RES]]
+// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
+// CIR: cir.return %[[RES]]
+
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2
+// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1
+//
+// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 16 x i8>,{{([[:space:]]?i64 1,)?}} align 16
+//
+// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[TMP0]], i8 [[TMP1]])
+//
+// LLVM_DIRECT: ret {{.*}} [[TMP2]]
+//
+// LLVM_VIA_CIR: store {{.*}} [[TMP2]], ptr [[RES_ADDR]]
+// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
+// LLVM_VIA_CIR: ret {{.*}} [[RES]]
+ return SVE_ACLE_FUNC(svdup,_n,_s8_z,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_s16_z(
+svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op) MODE_ATTR
+{
+// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
+// CIR-SAME: %[[OP:.*]]: !s16i
+// CIR-SAME: -> !cir.vector<[8] x !s16i>
+// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>>
+// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s16i
+// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[8] x !s16i>
+// CIR: cir.store %[[PG]], %[[ALLOCA_PG]]
+// CIR: cir.store %[[OP]], %[[ALLOCA_OP]]
+// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]]
+// CIR: %[[LOAD_OP:.*]] = cir.load align(2) %[[ALLOCA_OP]]
+// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[8] x !s16i>
+// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR-SAME: -> !cir.vector<[8] x !cir.int<u, 1>>
+// CIR: %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR-SAME: -> !cir.vector<[8] x !s16i>
+// CIR: cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
+// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
+// CIR: cir.return %[[RES]]
+
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2
+// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} align 2
+//
+// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 8 x i16>,{{([[:space:]]?i64 1,)?}} align 16
+//
+// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: store i16 [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i16, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
+// LLVM_OGCG_CIR: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP2]], i16 [[TMP1]])
+//
+// LLVM_DIRECT: ret {{.*}} [[TMP3]]
+//
+// LLVM_VIA_CIR: store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
+// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
+// LLVM_VIA_CIR: ret {{.*}} [[RES]]
+ return SVE_ACLE_FUNC(svdup,_n,_s16_z,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_s32_z(
+svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op) MODE_ATTR
+{
+// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
+// CIR-SAME: %[[OP:.*]]: !s32i
+// CIR-SAME: -> !cir.vector<[4] x !s32i>
+// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>>
+// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s32i
+// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[4] x !s32i>
+// CIR: cir.store %[[PG]], %[[ALLOCA_PG]]
+// CIR: cir.store %[[OP]], %[[ALLOCA_OP]]
+// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]]
+// CIR: %[[LOAD_OP:.*]] = cir.load align(4) %[[ALLOCA_OP]]
+// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[4] x !s32i>
+// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR-SAME: -> !cir.vector<[4] x !cir.int<u, 1>>
+// CIR: %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR-SAME: -> !cir.vector<[4] x !s32i>
+// CIR: cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
+// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
+// CIR: cir.return %[[RES]]
+
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2
+// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} align 4
+//
+// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 4 x i32>,{{([[:space:]]?i64 1,)?}} align 16
+//
+// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: store i32 [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i32, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+// LLVM_OGCG_CIR: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP2]], i32 [[TMP1]])
+//
+// LLVM_DIRECT: ret {{.*}} [[TMP3]]
+//
+// LLVM_VIA_CIR: store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
+// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
+// LLVM_VIA_CIR: ret {{.*}} [[RES]]
+ return SVE_ACLE_FUNC(svdup,_n,_s32_z,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_s64_z(
+svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op) MODE_ATTR
+{
+// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
+// CIR-SAME: %[[OP:.*]]: !s64i
+// CIR-SAME: -> !cir.vector<[2] x !s64i>
+// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>>
+// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !s64i
+// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[2] x !s64i>
+// CIR: cir.store %[[PG]], %[[ALLOCA_PG]]
+// CIR: cir.store %[[OP]], %[[ALLOCA_OP]]
+// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]]
+// CIR: %[[LOAD_OP:.*]] = cir.load align(8) %[[ALLOCA_OP]]
+// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[2] x !s64i>
+// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR-SAME: -> !cir.vector<[2] x !cir.int<u, 1>>
+// CIR: %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR-SAME: -> !cir.vector<[2] x !s64i>
+// CIR: cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
+// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
+// CIR: cir.return %[[RES]]
+
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2
+// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} align 8
+//
+// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 2 x i64>,{{([[:space:]]?i64 1,)?}} align 16
+//
+// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: store i64 [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i64, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+// LLVM_OGCG_CIR: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP2]], i64 [[TMP1]])
+//
+// LLVM_DIRECT: ret {{.*}} [[TMP3]]
+//
+// LLVM_VIA_CIR: store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
+// LLVM_VIA_CIR: [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
+// LLVM_VIA_CIR: ret {{.*}} [[RES]]
+ return SVE_ACLE_FUNC(svdup,_n,_s64_z,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_u8_z(
+svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op) MODE_ATTR
+{
+// CIR-SAME: %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
+// CIR-SAME: %[[OP:.*]]: !u8i
+// CIR-SAME: -> !cir.vector<[16] x !u8i>
+// CIR: %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x !cir.int<u, 1>>
+// CIR: %[[ALLOCA_OP:.*]] = cir.alloca !u8i
+// CIR: %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[16] x !u8i>
+// CIR: cir.store %[[PG]], %[[ALLOCA_PG]]
+// CIR: cir.store %[[OP]], %[[ALLOCA_OP]]
+// CIR: %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]]
+// CIR: %[[LOAD_OP:.*]] = cir.load align(1) %[[ALLOCA_OP]]
+// CIR: %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[16] x !u8i>
+// CIR: %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" %[[CONST_0]], %[[LOAD_PG]], %[[LOAD_OP]]
+// CIR-SAME: -> !cir.vector<[16] x !u8i>
+// CIR: cir.store %[[CONVERT_PG]], %[[ALLOCA_RES]]
+// CIR: %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
+// CIR: cir.return %[[RES]]
+
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR: [[PG_ADDR:%.*]] = alloca <vscale x 16 x i1>,{{([[:space:]]?i64 1,)?}} align 2
+// LLVM_OGCG_CIR: [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} align 1
+//
+// LLVM_VIA_CIR: [[RES_ADDR:%.*]] = alloca <vscale x 16 x i8>,{{([[:space:]]?i64 1,)?}} align 16
+//
+// LLVM_OGCG_CIR: store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], align 2
+// LLVM_OGCG_CIR: [[TMP1:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[TMP0]], i8 [[TMP1]])
+//
+// LLVM_DIRECT: ret {{.*}} [[TMP2]]
+//
+// LLVM_VIA_CIR: store {{.*}} [[TMP2]], ptr [[RES_ADDR]]
+// LLVM_VIA_CIR: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/175976
More information about the llvm-branch-commits
mailing list