[clang] [CIR][AArch64] Add lowering for vaba_* and vabd_* builtins (PR #183595)
Andrzej Warzyński via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 3 05:25:44 PST 2026
https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/183595
>From b153fe01898695de005ea028134a840aa0dfd8c3 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 26 Feb 2026 18:44:15 +0000
Subject: [PATCH 1/4] [CIR][AArch64] Add lowering for vaba_* and vabd_*
builtins (1/N)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add CIR lowering for the following AdvSIMD (NEON) intrinsic families:
* vabd_* – Absolute difference
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference
* vaba_* – Absolute difference and accumulate
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference-and-accumulate
Tests for these intrinsics were split out from:
test/CodeGen/AArch64/neon-intrinsics.c
and moved to:
test/CodeGen/AArch64/neon/intrinsics.c
The following helper hooks were adapted from the ClangIR project:
* getNeonType, emitNeonCall, emitNeonCallToOp.
NOTE: Quad-word variants (e.g. vabaq_*) are not included in this change
and will be added in a follow-up patch.
Credit to the ClangIR contributors for the original implementation.
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 3 +
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 137 +++++++++++
clang/test/CodeGen/AArch64/neon-intrinsics.c | 174 --------------
clang/test/CodeGen/AArch64/neon/intrinsics.c | 213 ++++++++++++++++++
4 files changed, 353 insertions(+), 174 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index efae3d9d894ed..fb96050964fcc 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -349,6 +349,9 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
mlir::IntegerAttr align = {},
cir::SyncScopeKindAttr scope = {},
cir::MemOrderAttr order = {}) {
+ if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() !=
+ val.getType())
+ dst = createPtrBitcast(dst, val.getType());
return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope,
order);
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 5129aa75f8f8d..6c7a4fc3edd72 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -290,6 +290,119 @@ static bool hasExtraNeonArgument(unsigned builtinID) {
return mask != 0;
}
+// TODO: Remove `loc` from the list of arguments once all NYIs are gone.
+static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
+ mlir::Location loc,
+ bool hasLegalHalfType = true,
+ bool v1Ty = false,
+ bool allowBFloatArgsAndRet = true) {
+ int isQuad = typeFlags.isQuad();
+ switch (typeFlags.getEltType()) {
+ case NeonTypeFlags::Int8:
+ case NeonTypeFlags::Poly8:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
+ : cgf->sInt8Ty,
+ v1Ty ? 1 : (8 << isQuad));
+ case NeonTypeFlags::MFloat8:
+ cgf->getCIRGenModule().errorNYI(
+ loc, std::string("unimplemented NEON type: MFloat8"));
+ [[fallthrough]];
+ case NeonTypeFlags::Int16:
+ case NeonTypeFlags::Poly16:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
+ : cgf->sInt16Ty,
+ v1Ty ? 1 : (4 << isQuad));
+ case NeonTypeFlags::BFloat16:
+ if (allowBFloatArgsAndRet)
+ cgf->getCIRGenModule().errorNYI(
+ loc, std::string("unimplemented NEON type: BFloat16"));
+ else
+ cgf->getCIRGenModule().errorNYI(
+ loc, std::string("unimplemented NEON type: BFloat16"));
+ [[fallthrough]];
+ case NeonTypeFlags::Float16:
+ if (hasLegalHalfType)
+ cgf->getCIRGenModule().errorNYI(
+ loc, std::string("unimplemented NEON type: Float16"));
+ else
+ cgf->getCIRGenModule().errorNYI(
+ loc, std::string("unimplemented NEON type: Float16"));
+ [[fallthrough]];
+ case NeonTypeFlags::Int32:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
+ : cgf->sInt32Ty,
+ v1Ty ? 1 : (2 << isQuad));
+ case NeonTypeFlags::Int64:
+ case NeonTypeFlags::Poly64:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
+ : cgf->sInt64Ty,
+ v1Ty ? 1 : (1 << isQuad));
+ case NeonTypeFlags::Poly128:
+ // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+ // There is a lot of i128 and f128 API missing.
+ // so we use v16i8 to represent poly128 and get pattern matched.
+ cgf->getCIRGenModule().errorNYI(
+ loc, std::string("unimplemented NEON type: Poly128"));
+ [[fallthrough]];
+ case NeonTypeFlags::Float32:
+ return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
+ v1Ty ? 1 : (2 << isQuad));
+ case NeonTypeFlags::Float64:
+ return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
+ v1Ty ? 1 : (1 << isQuad));
+ }
+ llvm_unreachable("Unknown vector element type!");
+}
+
+template <typename Operation>
+static mlir::Value emitNeonCallToOp(
+ CIRGenBuilderTy &builder, llvm::SmallVector<mlir::Type> argTypes,
+ llvm::SmallVectorImpl<mlir::Value> &args,
+ std::optional<llvm::StringRef> intrinsicName, mlir::Type funcResTy,
+ mlir::Location loc, bool isConstrainedFPIntrinsic = false,
+ unsigned shift = 0, bool rightshift = false) {
+ // TODO: Consider removing the following unreachable when we have
+ // emitConstrainedFPCall feature implemented
+ assert(!cir::MissingFeatures::emitConstrainedFPCall());
+ if (isConstrainedFPIntrinsic)
+ llvm_unreachable("isConstrainedFPIntrinsic NYI");
+
+ for (unsigned j = 0; j < argTypes.size(); ++j) {
+ if (isConstrainedFPIntrinsic) {
+ assert(!cir::MissingFeatures::emitConstrainedFPCall());
+ }
+ if (shift > 0 && shift == j) {
+ llvm_unreachable("shift NYI");
+ } else {
+ args[j] = builder.createBitcast(args[j], argTypes[j]);
+ }
+ }
+ if (isConstrainedFPIntrinsic) {
+ assert(!cir::MissingFeatures::emitConstrainedFPCall());
+ return nullptr;
+ }
+ if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) {
+ return Operation::create(builder, loc,
+ builder.getStringAttr(intrinsicName.value()),
+ funcResTy, args)
+ .getResult();
+ } else {
+ return Operation::create(builder, loc, funcResTy, args).getResult();
+ }
+}
+
+static mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
+ llvm::SmallVector<mlir::Type> argTypes,
+ llvm::SmallVectorImpl<mlir::Value> &args,
+ llvm::StringRef intrinsicName,
+ mlir::Type funcResTy, mlir::Location loc,
+ bool isConstrainedFPIntrinsic = false,
+ unsigned shift = 0, bool rightshift = false) {
+ return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
+ builder, std::move(argTypes), args, intrinsicName, funcResTy, loc,
+ isConstrainedFPIntrinsic, shift, rightshift);
+}
+
std::optional<mlir::Value>
CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
@@ -1454,6 +1567,16 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
assert(!cir::MissingFeatures::aarch64TblBuiltinExpr());
+ const Expr *arg = expr->getArg(expr->getNumArgs() - 1);
+ NeonTypeFlags type(0);
+ // A trailing constant integer is used for discriminating overloaded builtin
+ // calls. Use it to determine the type of this overloaded NEON intrinsic.
+ if (std::optional<llvm::APSInt> result =
+ arg->getIntegerConstantExpr(getContext()))
+ type = NeonTypeFlags(result->getZExtValue());
+
+ bool usgn = type.isUnsigned();
+
mlir::Location loc = getLoc(expr->getExprLoc());
// Handle non-overloaded intrinsics first.
@@ -1678,6 +1801,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
return mlir::Value{};
}
+ cir::VectorType ty = getNeonType(this, type, loc);
+ if (!ty)
+ return nullptr;
+
+ llvm::StringRef intrName;
+
switch (builtinID) {
default:
return std::nullopt;
@@ -1700,7 +1829,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
case NEON::BI__builtin_neon_vmin_v:
case NEON::BI__builtin_neon_vminq_v:
case NEON::BI__builtin_neon_vminh_f16:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
case NEON::BI__builtin_neon_vabd_v:
+ intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd";
+ if (cir::isFPOrVectorOfFPType(ty))
+ intrName = "aarch64.neon.fabd";
+ return emitNeonCall(builder, {ty, ty}, ops, intrName, ty, loc);
case NEON::BI__builtin_neon_vabdq_v:
case NEON::BI__builtin_neon_vpadal_v:
case NEON::BI__builtin_neon_vpadalq_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 42799d27bba89..909d00630b069 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -1038,88 +1038,6 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
return vdiv_f32(v1, v2);
}
-// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_s8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
-// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
-//
-int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
- return vaba_s8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_s16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
-//
-int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
- return vaba_s16(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_s32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
-//
-int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
- return vaba_s32(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_u8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
-// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
-//
-uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
- return vaba_u8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_u16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
-//
-uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
- return vaba_u16(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_u32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
-//
-uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
- return vaba_u32(v1, v2, v3);
-}
-
// CHECK-LABEL: define dso_local <16 x i8> @test_vabaq_s8(
// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -1202,98 +1120,6 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
return vabaq_u32(v1, v2, v3);
}
-// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_s8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
-// CHECK-NEXT: ret <8 x i8> [[VABD_I]]
-//
-int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
- return vabd_s8(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_s16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT: ret <4 x i16> [[VABD2_I]]
-//
-int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
- return vabd_s16(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_s32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT: ret <2 x i32> [[VABD2_I]]
-//
-int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
- return vabd_s32(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_u8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
-// CHECK-NEXT: ret <8 x i8> [[VABD_I]]
-//
-uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
- return vabd_u8(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_u16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT: ret <4 x i16> [[VABD2_I]]
-//
-uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
- return vabd_u16(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_u32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT: ret <2 x i32> [[VABD2_I]]
-//
-uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
- return vabd_u32(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <2 x float> @test_vabd_f32(
-// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]])
-// CHECK-NEXT: ret <2 x float> [[VABD2_I]]
-//
-float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
- return vabd_f32(v1, v2);
-}
-
// CHECK-LABEL: define dso_local <16 x i8> @test_vabdq_s8(
// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 039a08c23852e..227b23f532fe1 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -42,3 +42,216 @@ int64_t test_vnegd_s64(int64_t a) {
// LLVM-NEXT: ret i64 [[VNEGD_I]]
return (int64_t)vnegd_s64(a);
}
+
+//===------------------------------------------------------===//
+// 2.1.1.6.1. Absolute difference
+//===------------------------------------------------------===//
+// LLVM-LABEL: @test_vabd_s8(
+// CIR-LABEL: @vabd_s8(
+int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// LLVM-NEXT: ret <8 x i8> [[VABD_I]]
+ return vabd_s8(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_s16(
+// CIR-LABEL: @vabd_s16(
+int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT: ret <4 x i16> [[VABD2_I]]
+ return vabd_s16(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_s32(
+// CIR-LABEL: @vabd_s32(
+int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT: ret <2 x i32> [[VABD2_I]]
+ return vabd_s32(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_u8(
+// CIR-LABEL: @vabd_u8(
+uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// LLVM-NEXT: ret <8 x i8> [[VABD_I]]
+ return vabd_u8(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_u16(
+// CIR-LABEL: @vabd_u16(
+uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT: ret <4 x i16> [[VABD2_I]]
+ return vabd_u16(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_u32(
+// CIR-LABEL: @vabd_u32(
+uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT: ret <2 x i32> [[VABD2_I]]
+ return vabd_u32(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_f32(
+// CIR-LABEL: @vabd_f32(
+float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]])
+// LLVM-NEXT: ret <2 x float> [[VABD2_I]]
+ return vabd_f32(v1, v2);
+}
+
+//===------------------------------------------------------===//
+// 2.1.1.6.3. Absolute difference and accumulate
+//
+// The following builtins expand to a call to vabd_{} builtins,
+// which is reflected in the CIR output.
+//===------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vaba_u8(
+// CIR-LABEL: @vaba_u8(
+uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_u8
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
+// LLVM-NEXT: ret <8 x i8> [[ADD_I]]
+ return vaba_u8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_u16(
+// CIR-LABEL: @vaba_u16(
+uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_u16
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <4 x i16> [[ADD_I]]
+ return vaba_u16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_u32(
+// CIR-LABEL: @vaba_u32(
+uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_u32
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <2 x i32> [[ADD_I]]
+ return vaba_u32(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_s8(
+// CIR-LABEL: @vaba_s8(
+int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_s8
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
+// LLVM-NEXT: ret <8 x i8> [[ADD_I]]
+ return vaba_s8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_s16(
+// CIR-LABEL: @vaba_s16(
+int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_s16
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <4 x i16> [[ADD_I]]
+ return vaba_s16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_s32(
+// CIR-LABEL: @vaba_s32(
+int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_s32
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <2 x i32> [[ADD_I]]
+ return vaba_s32(v1, v2, v3);
+}
>From 53dc48fc93b5acc0c6a28f45c751e44b6728daed Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 27 Feb 2026 17:38:14 +0000
Subject: [PATCH 2/4] Add quad-ward variants
---
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 2 +-
clang/test/CodeGen/AArch64/neon-intrinsics.c | 190 --------------
clang/test/CodeGen/AArch64/neon/intrinsics.c | 247 ++++++++++++++++++
3 files changed, 248 insertions(+), 191 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 6c7a4fc3edd72..2f1c5f35d20d4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1834,11 +1834,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
getContext().BuiltinInfo.getName(builtinID));
return mlir::Value{};
case NEON::BI__builtin_neon_vabd_v:
+ case NEON::BI__builtin_neon_vabdq_v:
intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd";
if (cir::isFPOrVectorOfFPType(ty))
intrName = "aarch64.neon.fabd";
return emitNeonCall(builder, {ty, ty}, ops, intrName, ty, loc);
- case NEON::BI__builtin_neon_vabdq_v:
case NEON::BI__builtin_neon_vpadal_v:
case NEON::BI__builtin_neon_vpadalq_v:
case NEON::BI__builtin_neon_vpmin_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 909d00630b069..33b0b6bc55426 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -1038,196 +1038,6 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
return vdiv_f32(v1, v2);
}
-// CHECK-LABEL: define dso_local <16 x i8> @test_vabaq_s8(
-// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
-// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
-//
-int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
- return vabaq_s8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vabaq_s16(
-// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
-//
-int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
- return vabaq_s16(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vabaq_s32(
-// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
-//
-int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
- return vabaq_s32(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vabaq_u8(
-// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
-// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
-//
-uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
- return vabaq_u8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vabaq_u16(
-// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
-//
-uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
- return vabaq_u16(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vabaq_u32(
-// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
-// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[VABD2_I]]
-// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
-//
-uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
- return vabaq_u32(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vabdq_s8(
-// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
-// CHECK-NEXT: ret <16 x i8> [[VABD_I]]
-//
-int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
- return vabdq_s8(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vabdq_s16(
-// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
-// CHECK-NEXT: ret <8 x i16> [[VABD2_I]]
-//
-int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
- return vabdq_s16(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vabdq_s32(
-// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
-// CHECK-NEXT: ret <4 x i32> [[VABD2_I]]
-//
-int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
- return vabdq_s32(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vabdq_u8(
-// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
-// CHECK-NEXT: ret <16 x i8> [[VABD_I]]
-//
-uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
- return vabdq_u8(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vabdq_u16(
-// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
-// CHECK-NEXT: ret <8 x i16> [[VABD2_I]]
-//
-uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
- return vabdq_u16(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vabdq_u32(
-// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
-// CHECK-NEXT: ret <4 x i32> [[VABD2_I]]
-//
-uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
- return vabdq_u32(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_vabdq_f32(
-// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]])
-// CHECK-NEXT: ret <4 x float> [[VABD2_I]]
-//
-float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
- return vabdq_f32(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <2 x double> @test_vabdq_f64(
-// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
-// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]])
-// CHECK-NEXT: ret <2 x double> [[VABD2_I]]
-//
-float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
- return vabdq_f64(v1, v2);
-}
-
// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_s8(
// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 227b23f532fe1..783322db33f55 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -155,6 +155,159 @@ float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
return vabd_f32(v1, v2);
}
+// LLVM-LABEL: @test_vabd_f64(
+// CIR-LABEL: @vabd_f64(
+float64x1_t test_vabd_f64(float64x1_t v1, float64x1_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !cir.double>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !cir.double>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <1 x double> noundef [[V1:%.*]], <1 x double> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[V1]] to i64
+// LLVM-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[V2]] to i64
+// LLVM-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]])
+// LLVM-NEXT: ret <1 x double> [[VABD2_I]]
+ return vabd_f64(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_s8(
+// CIR-LABEL: @vabdq_s8(
+int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
+
+// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// LLVM-NEXT: ret <16 x i8> [[VABD_I]]
+ return vabdq_s8(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_s16(
+// CIR-LABEL: @vabdq_s16(
+int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// LLVM-NEXT: ret <8 x i16> [[VABD2_I]]
+ return vabdq_s16(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_s32(
+// CIR-LABEL: @vabdq_s32(
+int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// LLVM-NEXT: ret <4 x i32> [[VABD2_I]]
+ return vabdq_s32(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_u8(
+// CIR-LABEL: @vabdq_u8(
+uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !u8i>, !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
+
+// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// LLVM-NEXT: ret <16 x i8> [[VABD_I]]
+ return vabdq_u8(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_u16(
+// CIR-LABEL: @vabdq_u16(
+uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// LLVM-NEXT: ret <8 x i16> [[VABD2_I]]
+ return vabdq_u16(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_u32(
+// CIR-LABEL: @vabdq_u32(
+uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// LLVM-NEXT: ret <4 x i32> [[VABD2_I]]
+ return vabdq_u32(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_f32(
+// CIR-LABEL: @vabdq_f32(
+float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]])
+// LLVM-NEXT: ret <4 x float> [[VABD2_I]]
+ return vabdq_f32(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabdq_f64(
+// CIR-LABEL: @vabdq_f64(
+float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
+// CIR: [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double>
+// CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double>
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]])
+// LLVM-NEXT: ret <2 x double> [[VABD2_I]]
+ return vabdq_f64(v1, v2);
+}
+
+// TODO SISD variants:
+// TODO @vabdd_f64(a, b);
+// TODO @test_vabds_f32(
+
//===------------------------------------------------------===//
// 2.1.1.6.3. Absolute difference and accumulate
//
@@ -255,3 +408,97 @@ int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
// LLVM-NEXT: ret <2 x i32> [[ADD_I]]
return vaba_s32(v1, v2, v3);
}
+
+// LLVM-LABEL: @test_vabaq_s8(
+// CIR-LABEL: @vabaq_s8(
+int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabdq_s8
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
+// LLVM-NEXT: ret <16 x i8> [[ADD_I]]
+ return vabaq_s8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vabaq_s16(
+// CIR-LABEL: @vabaq_s16(
+int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabdq_s16
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <8 x i16> [[ADD_I]]
+ return vabaq_s16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vabaq_s32(
+// CIR-LABEL: @vabaq_s32(
+int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabdq_s32
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <4 x i32> [[ADD_I]]
+ return vabaq_s32(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vabaq_u8(
+// CIR-LABEL: @vabaq_u8(
+uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabdq_u8
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]])
+// LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
+// LLVM-NEXT: ret <16 x i8> [[ADD_I]]
+ return vabaq_u8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vabaq_u16(
+// CIR-LABEL: @vabaq_u16(
+uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabdq_u16
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <8 x i16> [[ADD_I]]
+ return vabaq_u16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vabaq_u32(
+// CIR-LABEL: @vabaq_u32(
+uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabdq_u32
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
+// LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[VABD2_I]]
+// LLVM-NEXT: ret <4 x i32> [[ADD_I]]
+ return vabaq_u32(v1, v2, v3);
+}
>From e8d6c405bdabba36338abbfe24644bb33b83107b Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Tue, 3 Mar 2026 09:30:12 +0000
Subject: [PATCH 3/4] Address PR comments
---
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 31 +++++++++++--------
1 file changed, 18 insertions(+), 13 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 2f1c5f35d20d4..f734752f7cff8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -290,7 +290,7 @@ static bool hasExtraNeonArgument(unsigned builtinID) {
return mask != 0;
}
-// TODO: Remove `loc` from the list of arguments once all NYIs are gone.
+// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone.
static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
mlir::Location loc,
bool hasLegalHalfType = true,
@@ -354,25 +354,29 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
llvm_unreachable("Unknown vector element type!");
}
+// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
template <typename Operation>
-static mlir::Value emitNeonCallToOp(
- CIRGenBuilderTy &builder, llvm::SmallVector<mlir::Type> argTypes,
- llvm::SmallVectorImpl<mlir::Value> &args,
- std::optional<llvm::StringRef> intrinsicName, mlir::Type funcResTy,
- mlir::Location loc, bool isConstrainedFPIntrinsic = false,
- unsigned shift = 0, bool rightshift = false) {
- // TODO: Consider removing the following unreachable when we have
+static mlir::Value
+emitNeonCallToOp(CIRGenModule &cgm, CIRGenBuilderTy &builder,
+ llvm::SmallVector<mlir::Type> argTypes,
+ llvm::SmallVectorImpl<mlir::Value> &args,
+ std::optional<llvm::StringRef> intrinsicName,
+ mlir::Type funcResTy, mlir::Location loc,
+ bool isConstrainedFPIntrinsic = false, unsigned shift = 0,
+ bool rightshift = false) {
+ // TODO(cir): Consider removing the following unreachable when we have
// emitConstrainedFPCall feature implemented
assert(!cir::MissingFeatures::emitConstrainedFPCall());
if (isConstrainedFPIntrinsic)
- llvm_unreachable("isConstrainedFPIntrinsic NYI");
+ cgm.errorNYI(loc, std::string("unimplemented constrained FP intrinsic"));
for (unsigned j = 0; j < argTypes.size(); ++j) {
if (isConstrainedFPIntrinsic) {
assert(!cir::MissingFeatures::emitConstrainedFPCall());
}
if (shift > 0 && shift == j) {
- llvm_unreachable("shift NYI");
+ cgm.errorNYI(loc,
+ std::string("unimplemented intrinsic requiring a shift Op"));
} else {
args[j] = builder.createBitcast(args[j], argTypes[j]);
}
@@ -391,7 +395,8 @@ static mlir::Value emitNeonCallToOp(
}
}
-static mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
+// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
+static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder,
llvm::SmallVector<mlir::Type> argTypes,
llvm::SmallVectorImpl<mlir::Value> &args,
llvm::StringRef intrinsicName,
@@ -399,7 +404,7 @@ static mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
bool isConstrainedFPIntrinsic = false,
unsigned shift = 0, bool rightshift = false) {
return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
- builder, std::move(argTypes), args, intrinsicName, funcResTy, loc,
+ cgm, builder, std::move(argTypes), args, intrinsicName, funcResTy, loc,
isConstrainedFPIntrinsic, shift, rightshift);
}
@@ -1838,7 +1843,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd";
if (cir::isFPOrVectorOfFPType(ty))
intrName = "aarch64.neon.fabd";
- return emitNeonCall(builder, {ty, ty}, ops, intrName, ty, loc);
+ return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
case NEON::BI__builtin_neon_vpadal_v:
case NEON::BI__builtin_neon_vpadalq_v:
case NEON::BI__builtin_neon_vpmin_v:
>From 3b20f09cf73a89faad15401c13e1904dac153d7f Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Tue, 3 Mar 2026 13:25:10 +0000
Subject: [PATCH 4/4] Tweak NYI msgs
---
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 23 +++++++------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index f734752f7cff8..df85ba7186775 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -304,8 +304,7 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
: cgf->sInt8Ty,
v1Ty ? 1 : (8 << isQuad));
case NeonTypeFlags::MFloat8:
- cgf->getCIRGenModule().errorNYI(
- loc, std::string("unimplemented NEON type: MFloat8"));
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: MFloat8"));
[[fallthrough]];
case NeonTypeFlags::Int16:
case NeonTypeFlags::Poly16:
@@ -314,19 +313,15 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
v1Ty ? 1 : (4 << isQuad));
case NeonTypeFlags::BFloat16:
if (allowBFloatArgsAndRet)
- cgf->getCIRGenModule().errorNYI(
- loc, std::string("unimplemented NEON type: BFloat16"));
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
else
- cgf->getCIRGenModule().errorNYI(
- loc, std::string("unimplemented NEON type: BFloat16"));
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
[[fallthrough]];
case NeonTypeFlags::Float16:
if (hasLegalHalfType)
- cgf->getCIRGenModule().errorNYI(
- loc, std::string("unimplemented NEON type: Float16"));
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
else
- cgf->getCIRGenModule().errorNYI(
- loc, std::string("unimplemented NEON type: Float16"));
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
[[fallthrough]];
case NeonTypeFlags::Int32:
return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
@@ -341,8 +336,7 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
// FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
// There is a lot of i128 and f128 API missing.
// so we use v16i8 to represent poly128 and get pattern matched.
- cgf->getCIRGenModule().errorNYI(
- loc, std::string("unimplemented NEON type: Poly128"));
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128"));
[[fallthrough]];
case NeonTypeFlags::Float32:
return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
@@ -368,15 +362,14 @@ emitNeonCallToOp(CIRGenModule &cgm, CIRGenBuilderTy &builder,
// emitConstrainedFPCall feature implemented
assert(!cir::MissingFeatures::emitConstrainedFPCall());
if (isConstrainedFPIntrinsic)
- cgm.errorNYI(loc, std::string("unimplemented constrained FP intrinsic"));
+ cgm.errorNYI(loc, std::string("constrained FP intrinsic"));
for (unsigned j = 0; j < argTypes.size(); ++j) {
if (isConstrainedFPIntrinsic) {
assert(!cir::MissingFeatures::emitConstrainedFPCall());
}
if (shift > 0 && shift == j) {
- cgm.errorNYI(loc,
- std::string("unimplemented intrinsic requiring a shift Op"));
+ cgm.errorNYI(loc, std::string("intrinsic requiring a shift Op"));
} else {
args[j] = builder.createBitcast(args[j], argTypes[j]);
}
More information about the cfe-commits
mailing list