[clang] [CIR][AArch64] Lower NEON vpmax intrinsics (PR #201495)
Jiahao Guo via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 9 00:01:43 PDT 2026
https://github.com/E00N777 updated https://github.com/llvm/llvm-project/pull/201495
>From 64a2cae1f061d89fd56dc7362f31cef9c89f988f Mon Sep 17 00:00:00 2001
From: E0N777 <E0N_gjh at 163.com>
Date: Thu, 4 Jun 2026 11:00:50 +0800
Subject: [PATCH] [CIR][AArch64] Lower NEON vpmax intrinsics
---
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 12 +-
clang/test/CodeGen/AArch64/neon-intrinsics.c | 240 ----------------
clang/test/CodeGen/AArch64/neon/intrinsics.c | 264 ++++++++++++++++++
3 files changed, 272 insertions(+), 244 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index be906d0671e3a..4111df26d241d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -441,6 +441,10 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
case NEON::BI__builtin_neon_vsrid_n_u64:
case NEON::BI__builtin_neon_vslid_n_s64:
case NEON::BI__builtin_neon_vslid_n_u64:
+ case NEON::BI__builtin_neon_vpmaxs_f32:
+ case NEON::BI__builtin_neon_vpmaxqd_f64:
+ case NEON::BI__builtin_neon_vpmaxnms_f32:
+ case NEON::BI__builtin_neon_vpmaxnmqd_f64:
break;
}
@@ -2807,10 +2811,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
case NEON::BI__builtin_neon_vpmax_v:
case NEON::BI__builtin_neon_vpmaxq_v:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented AArch64 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return mlir::Value{};
+ intrName = usgn ? "aarch64.neon.umaxp" : "aarch64.neon.smaxp";
+ if (cir::isFPOrVectorOfFPType(ty))
+ intrName = "aarch64.neon.fmaxp";
+ return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
case NEON::BI__builtin_neon_vminnm_v:
case NEON::BI__builtin_neon_vminnmq_v:
intrName = "aarch64.neon.fminnm";
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 1f6359bbe2c89..5865c4cf61b50 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -4458,206 +4458,6 @@ uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
return vqrshlq_u64(a, b);
}
-// CHECK-LABEL: define dso_local <8 x i8> @test_vpmax_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
-// CHECK-NEXT: ret <8 x i8> [[VPMAX_I]]
-//
-int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
- return vpmax_s8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vpmax_s16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]])
-// CHECK-NEXT: ret <4 x i16> [[VPMAX2_I]]
-//
-int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
- return vpmax_s16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vpmax_s32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]])
-// CHECK-NEXT: ret <2 x i32> [[VPMAX2_I]]
-//
-int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
- return vpmax_s32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vpmax_u8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
-// CHECK-NEXT: ret <8 x i8> [[VPMAX_I]]
-//
-uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
- return vpmax_u8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vpmax_u16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]])
-// CHECK-NEXT: ret <4 x i16> [[VPMAX2_I]]
-//
-uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
- return vpmax_u16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vpmax_u32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]])
-// CHECK-NEXT: ret <2 x i32> [[VPMAX2_I]]
-//
-uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
- return vpmax_u32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x float> @test_vpmax_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]])
-// CHECK-NEXT: ret <2 x float> [[VPMAX2_I]]
-//
-float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
- return vpmax_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vpmaxq_s8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
-// CHECK-NEXT: ret <16 x i8> [[VPMAX_I]]
-//
-int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
- return vpmaxq_s8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vpmaxq_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]])
-// CHECK-NEXT: ret <8 x i16> [[VPMAX2_I]]
-//
-int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
- return vpmaxq_s16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vpmaxq_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]])
-// CHECK-NEXT: ret <4 x i32> [[VPMAX2_I]]
-//
-int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
- return vpmaxq_s32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vpmaxq_u8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
-// CHECK-NEXT: ret <16 x i8> [[VPMAX_I]]
-//
-uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
- return vpmaxq_u8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vpmaxq_u16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]])
-// CHECK-NEXT: ret <8 x i16> [[VPMAX2_I]]
-//
-uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
- return vpmaxq_u16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vpmaxq_u32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]])
-// CHECK-NEXT: ret <4 x i32> [[VPMAX2_I]]
-//
-uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
- return vpmaxq_u32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_vpmaxq_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]])
-// CHECK-NEXT: ret <4 x float> [[VPMAX2_I]]
-//
-float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
- return vpmaxq_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x double> @test_vpmaxq_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
-// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]])
-// CHECK-NEXT: ret <2 x double> [[VPMAX2_I]]
-//
-float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
- return vpmaxq_f64(a, b);
-}
-
// CHECK-LABEL: define dso_local <2 x float> @test_vpmaxnm_f32(
// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -8902,46 +8702,6 @@ float64_t test_vpaddd_f64(float64x2_t a) {
return vpaddd_f64(a);
}
-// CHECK-LABEL: define dso_local float @test_vpmaxnms_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[A]])
-// CHECK-NEXT: ret float [[VPMAXNMS_F32_I]]
-//
-float32_t test_vpmaxnms_f32(float32x2_t a) {
- return vpmaxnms_f32(a);
-}
-
-// CHECK-LABEL: define dso_local double @test_vpmaxnmqd_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[A]])
-// CHECK-NEXT: ret double [[VPMAXNMQD_F64_I]]
-//
-float64_t test_vpmaxnmqd_f64(float64x2_t a) {
- return vpmaxnmqd_f64(a);
-}
-
-// CHECK-LABEL: define dso_local float @test_vpmaxs_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[A]])
-// CHECK-NEXT: ret float [[VPMAXS_F32_I]]
-//
-float32_t test_vpmaxs_f32(float32x2_t a) {
- return vpmaxs_f32(a);
-}
-
-// CHECK-LABEL: define dso_local double @test_vpmaxqd_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[A]])
-// CHECK-NEXT: ret double [[VPMAXQD_F64_I]]
-//
-float64_t test_vpmaxqd_f64(float64x2_t a) {
- return vpmaxqd_f64(a);
-}
-
// CHECK-LABEL: define dso_local i16 @test_vqdmulhh_s16(
// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index aa3213efd1735..e72d38cbdb5a8 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -6503,3 +6503,267 @@ uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
// LLVM: ret i64 [[RET]]
return (uint64_t)vslid_n_u64(a, b, 63);
}
+
+//===----------------------------------------------------------------------===//
+// 2.1.1.12.3. Pairwise maximum
+// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#pairwise-maximum
+//===----------------------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vpmax_s8(
+// CIR-LABEL: @vpmax_s8(
+int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]])
+// LLVM: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// LLVM-NEXT: ret <8 x i8> [[VPMAX_I]]
+ return vpmax_s8(a, b);
+}
+
+// LLVM-LABEL: @test_vpmax_s16(
+// CIR-LABEL: @vpmax_s16(
+int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s16i>, !cir.vector<4 x !s16i>) -> !cir.vector<4 x !s16i>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]])
+// LLVM-NEXT: ret <4 x i16> [[VPMAX2_I]]
+ return vpmax_s16(a, b);
+}
+
+// LLVM-LABEL: @test_vpmax_s32(
+// CIR-LABEL: @vpmax_s32(
+int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s32i>, !cir.vector<2 x !s32i>) -> !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]])
+// LLVM-NEXT: ret <2 x i32> [[VPMAX2_I]]
+ return vpmax_s32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmax_u8(
+// CIR-LABEL: @vpmax_u8(
+uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]])
+// LLVM: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// LLVM-NEXT: ret <8 x i8> [[VPMAX_I]]
+ return vpmax_u8(a, b);
+}
+
+// LLVM-LABEL: @test_vpmax_u16(
+// CIR-LABEL: @vpmax_u16(
+uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<4 x !u16i>, !cir.vector<4 x !u16i>) -> !cir.vector<4 x !u16i>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]])
+// LLVM-NEXT: ret <4 x i16> [[VPMAX2_I]]
+ return vpmax_u16(a, b);
+}
+
+// LLVM-LABEL: @test_vpmax_u32(
+// CIR-LABEL: @vpmax_u32(
+uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<2 x !u32i>, !cir.vector<2 x !u32i>) -> !cir.vector<2 x !u32i>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]])
+// LLVM-NEXT: ret <2 x i32> [[VPMAX2_I]]
+ return vpmax_u32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmax_f32(
+// CIR-LABEL: @vpmax_f32(
+float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxp" %{{.*}}, %{{.*}} : (!cir.vector<2 x !cir.float>, !cir.vector<2 x !cir.float>) -> !cir.vector<2 x !cir.float>
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]], <2 x float> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]])
+// LLVM-NEXT: ret <2 x float> [[VPMAX2_I]]
+ return vpmax_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_s8(
+// CIR-LABEL: @vpmaxq_s8(
+int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]])
+// LLVM: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM-NEXT: ret <16 x i8> [[VPMAX_I]]
+ return vpmaxq_s8(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_s16(
+// CIR-LABEL: @vpmaxq_s16(
+int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
+
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]])
+// LLVM-NEXT: ret <8 x i16> [[VPMAX2_I]]
+ return vpmaxq_s16(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_s32(
+// CIR-LABEL: @vpmaxq_s32(
+int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
+
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]])
+// LLVM-NEXT: ret <4 x i32> [[VPMAX2_I]]
+ return vpmaxq_s32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_u8(
+// CIR-LABEL: @vpmaxq_u8(
+uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<16 x !u8i>, !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]])
+// LLVM: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM-NEXT: ret <16 x i8> [[VPMAX_I]]
+ return vpmaxq_u8(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_u16(
+// CIR-LABEL: @vpmaxq_u16(
+uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u16i>, !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i>
+
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]])
+// LLVM-NEXT: ret <8 x i16> [[VPMAX2_I]]
+ return vpmaxq_u16(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_u32(
+// CIR-LABEL: @vpmaxq_u32(
+uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<4 x !u32i>, !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
+
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]])
+// LLVM-NEXT: ret <4 x i32> [[VPMAX2_I]]
+ return vpmaxq_u32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_f32(
+// CIR-LABEL: @vpmaxq_f32(
+float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxp" %{{.*}}, %{{.*}} : (!cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]], <4 x float> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]])
+// LLVM-NEXT: ret <4 x float> [[VPMAX2_I]]
+ return vpmaxq_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxq_f64(
+// CIR-LABEL: @vpmaxq_f64(
+float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxp" %{{.*}}, %{{.*}} : (!cir.vector<2 x !cir.double>, !cir.vector<2 x !cir.double>) -> !cir.vector<2 x !cir.double>
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]], <2 x double> {{.*}} [[B:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]])
+// LLVM-NEXT: ret <2 x double> [[VPMAX2_I]]
+ return vpmaxq_f64(a, b);
+}
+
+// LLVM-LABEL: @test_vpmaxs_f32(
+// CIR-LABEL: @vpmaxs_f32(
+float32_t test_vpmaxs_f32(float32x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxv" %{{.*}} : (!cir.vector<2 x !cir.float>) -> !cir.float
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
+// LLVM: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[A]])
+// LLVM-NEXT: ret float [[VPMAXS_F32_I]]
+ return vpmaxs_f32(a);
+}
+
+// LLVM-LABEL: @test_vpmaxqd_f64(
+// CIR-LABEL: @vpmaxqd_f64(
+float64_t test_vpmaxqd_f64(float64x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxv" %{{.*}} : (!cir.vector<2 x !cir.double>) -> !cir.double
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
+// LLVM: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[A]])
+// LLVM-NEXT: ret double [[VPMAXQD_F64_I]]
+ return vpmaxqd_f64(a);
+}
+
+// LLVM-LABEL: @test_vpmaxnms_f32(
+// CIR-LABEL: @vpmaxnms_f32(
+float32_t test_vpmaxnms_f32(float32x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxnmv" %{{.*}} : (!cir.vector<2 x !cir.float>) -> !cir.float
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
+// LLVM: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[A]])
+// LLVM-NEXT: ret float [[VPMAXNMS_F32_I]]
+ return vpmaxnms_f32(a);
+}
+
+// LLVM-LABEL: @test_vpmaxnmqd_f64(
+// CIR-LABEL: @vpmaxnmqd_f64(
+float64_t test_vpmaxnmqd_f64(float64x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxnmv" %{{.*}} : (!cir.vector<2 x !cir.double>) -> !cir.double
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
+// LLVM: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[A]])
+// LLVM-NEXT: ret double [[VPMAXNMQD_F64_I]]
+ return vpmaxnmqd_f64(a);
+}
More information about the cfe-commits
mailing list