[clang] 510792a - [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
Mark Murray via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 2 03:20:44 PST 2019
Author: Mark Murray
Date: 2019-12-02T11:18:53Z
New Revision: 510792a2e0e3792871baa00ed34e162bba7cd9a2
URL: https://github.com/llvm/llvm-project/commit/510792a2e0e3792871baa00ed34e162bba7cd9a2
DIFF: https://github.com/llvm/llvm-project/commit/510792a2e0e3792871baa00ed34e162bba7cd9a2.diff
LOG: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
Summary: Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics and their predicated versions. Add unit tests.
Subscribers: kristof.beyls, hiraditya, dmgreen, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D70829
Added:
clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
clang/test/CodeGen/arm-mve-intrinsics/vminq.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
Modified:
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index dfd8097f0644..90cccb12472c 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -105,6 +105,26 @@ defm vornq_m: predicated_bit_op_fp<"orn_predicated">;
defm vorrq_m: predicated_bit_op_fp<"orr_predicated">;
}
+// Predicated intrinsics - Int types only
+let params = T.Int in {
+def vminq_m: Intrinsic<
+ Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+ (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+def vmaxq_m: Intrinsic<
+ Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+ (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+}
+
+// Predicated intrinsics - Float types only
+let params = T.Float in {
+def vminnmq_m: Intrinsic<
+ Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+ (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+def vmaxnmq_m: Intrinsic<
+ Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+ (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+}
+
let params = T.Int in {
def vminvq: Intrinsic<Scalar, (args Scalar:$prev, Vector:$vec),
(Scalar (IRInt<"minv", [Vector], 1> $prev, $vec))>;
@@ -173,6 +193,28 @@ let params = T.Float in {
defm: compare<"le", fcmp_le>;
}
+let params = T.Signed in {
+ def vminq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+ (select (icmp_sle $a, $b), $a, $b)>;
+ def vmaxq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+ (select (icmp_sge $a, $b), $a, $b)>;
+}
+let params = T.Unsigned in {
+ def vminqu: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+ (select (icmp_ule $a, $b), $a, $b)>,
+ NameOverride<"vminq">;
+ def vmaxqu: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+ (select (icmp_uge $a, $b), $a, $b)>,
+ NameOverride<"vmaxq">;
+}
+let params = T.Float in {
+ def vminnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+ (IRIntBase<"minnum", [Vector]> $a, $b)>;
+ def vmaxnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+ (IRIntBase<"maxnum", [Vector]> $a, $b)>;
+}
+
+
multiclass contiguous_load<string mnemonic, PrimitiveType memtype,
list<Type> same_size, list<Type> wider> {
// Intrinsics named with explicit memory and element sizes that match:
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index c0ed80d456a5..d837a1d33d00 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -107,6 +107,7 @@ def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
def fcmp_lt: IRBuilder<"CreateFCmpOLT">;
def fcmp_le: IRBuilder<"CreateFCmpOLE">;
def splat: CGHelperFn<"ARMMVEVectorSplat">;
+def select: IRBuilder<"CreateSelect">;
// A node that makes an Address out of a pointer-typed Value, by
// providing an alignment as the second argument.
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
new file mode 100644
index 000000000000..63300466c819
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
@@ -0,0 +1,65 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vmaxnmq_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+ return vmaxnmq(a, b);
+#else /* POLYMORPHIC */
+ return vmaxnmq_f16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxnmq_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
+{
+#ifdef POLYMORPHIC
+ return vmaxnmq(a, b);
+#else /* POLYMORPHIC */
+ return vmaxnmq_f32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxnmq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmaxnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vmaxnmq_m_f16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxnmq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmaxnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vmaxnmq_m_f32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
new file mode 100644
index 000000000000..133e28d6cf04
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
@@ -0,0 +1,98 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vmaxq_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]]
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
+int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b)
+{
+#ifdef POLYMORPHIC
+ return vmaxq(a, b);
+#else /* POLYMORPHIC */
+ return vmaxq_s8(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = icmp ult <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]]
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b)
+{
+#ifdef POLYMORPHIC
+ return vmaxq(a, b);
+#else /* POLYMORPHIC */
+ return vmaxq_u16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]]
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b)
+{
+#ifdef POLYMORPHIC
+ return vmaxq(a, b);
+#else /* POLYMORPHIC */
+ return vmaxq_s32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmaxq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vmaxq_m_u8(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmaxq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vmaxq_m_s16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_m_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmaxq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vmaxq_m_u32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
new file mode 100644
index 000000000000..9ed5bf0c859b
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
@@ -0,0 +1,65 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vminnmq_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+ return vminnmq(a, b);
+#else /* POLYMORPHIC */
+ return vminnmq_f16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminnmq_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b)
+{
+#ifdef POLYMORPHIC
+ return vminnmq(a, b);
+#else /* POLYMORPHIC */
+ return vminnmq_f32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminnmq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vminnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vminnmq_m_f16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminnmq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vminnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vminnmq_m_f32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c
new file mode 100644
index 000000000000..9e54eaeb5d83
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c
@@ -0,0 +1,98 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vminq_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]]
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
+uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b)
+{
+#ifdef POLYMORPHIC
+ return vminq(a, b);
+#else /* POLYMORPHIC */
+ return vminq_u8(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]]
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b)
+{
+#ifdef POLYMORPHIC
+ return vminq(a, b);
+#else /* POLYMORPHIC */
+ return vminq_s16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]]
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b)
+{
+#ifdef POLYMORPHIC
+ return vminq(a, b);
+#else /* POLYMORPHIC */
+ return vminq_u32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vminq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vminq_m_s8(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_m_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vminq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vminq_m_u16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vminq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+ return vminq_m_s32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index bd61bf13c54d..4d4bc5e2d82d 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -796,6 +796,12 @@ multiclass IntrinsicSignSuffix<list<LLVMType> rets, list<LLVMType> params = [],
def _u: Intrinsic<rets, params, props, name, sdprops>;
}
+def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+ [IntrNoMem]>;
def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index cc3a8ee77d74..e64ab9b73700 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1015,6 +1015,16 @@ let Predicates = [HasMVEFloat] in {
(v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
(v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ (i32 1), (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (MVE_VMAXNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ (i32 1), (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
}
def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
@@ -1025,6 +1035,16 @@ let Predicates = [HasMVEFloat] in {
(v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
(v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ (i32 1), (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (MVE_VMINNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ (i32 1), (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
}
@@ -1042,48 +1062,45 @@ class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
let Inst{4} = bit_4;
}
-multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
- def s8 : MVE_VMINMAX<iname, "s8", 0b0, 0b00, bit_4>;
- def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>;
- def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>;
- def u8 : MVE_VMINMAX<iname, "u8", 0b1, 0b00, bit_4>;
- def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>;
- def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>;
-}
+multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
-defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
-defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated min/max
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (!cast<Instruction>(NAME)
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ // Predicated min/max
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (!cast<Instruction>(NAME)
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 1), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+multiclass MVE_VMAX<MVEVectorVTInfo VTI>
+ : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>;
+multiclass MVE_VMIN<MVEVectorVTInfo VTI>
+ : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>;
+
+defm MVE_VMINs8 : MVE_VMIN<MVE_v16s8>;
+defm MVE_VMINs16 : MVE_VMIN<MVE_v8s16>;
+defm MVE_VMINs32 : MVE_VMIN<MVE_v4s32>;
+defm MVE_VMINu8 : MVE_VMIN<MVE_v16u8>;
+defm MVE_VMINu16 : MVE_VMIN<MVE_v8u16>;
+defm MVE_VMINu32 : MVE_VMIN<MVE_v4u32>;
+
+defm MVE_VMAXs8 : MVE_VMAX<MVE_v16s8>;
+defm MVE_VMAXs16 : MVE_VMAX<MVE_v8s16>;
+defm MVE_VMAXs32 : MVE_VMAX<MVE_v4s32>;
+defm MVE_VMAXu8 : MVE_VMAX<MVE_v16u8>;
+defm MVE_VMAXu16 : MVE_VMAX<MVE_v8u16>;
+defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>;
+
// end of mve_comp instructions
// start of mve_bit instructions
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
new file mode 100644
index 000000000000..d89308bb5941
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxnmq_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmaxnm.f16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %b)
+ ret <8 x half> %0
+}
+
+declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>) #1
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxnmq_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmaxnm.f32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxnmq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxnmq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
new file mode 100644
index 000000000000..09a7d60cd165
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxq_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmin.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp ugt <16 x i8> %a, %b
+ %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxq_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmin.s16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sgt <8 x i16> %a, %b
+ %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxq_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmin.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp ugt <4 x i32> %a, %b
+ %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s8 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxq_m_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s16 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s32 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
new file mode 100644
index 000000000000..10cd674d39a8
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminnmq_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vminnm.f16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %b)
+ ret <8 x half> %0
+}
+
+declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>) #1
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminnmq_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vminnm.f32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminnmq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vminnmt.f32 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminnmq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vminnmt.f32 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
new file mode 100644
index 000000000000..0cbef86c928f
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmin.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp ugt <16 x i8> %a, %b
+ %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmin.s16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sgt <8 x i16> %a, %b
+ %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmin.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp ugt <4 x i32> %a, %b
+ %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s8 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s16 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s32 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
More information about the cfe-commits
mailing list