[clang] [llvm] [ARM] Introduce intrinsics for MVE vcmp under strict-fp. (PR #169798)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 30 03:25:33 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
Similar to #<!-- -->169156 again, this adds intrinsics for strict-fp compare nodes to
make sure they end up as the original instruction.
---
Patch is 370.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169798.diff
5 Files Affected:
- (modified) clang/include/clang/Basic/arm_mve_defs.td (+18-6)
- (modified) clang/test/CodeGen/arm-mve-intrinsics/compare.c (+3030-1430)
- (modified) llvm/include/llvm/IR/IntrinsicsARM.td (+9)
- (modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+34-15)
- (added) llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-compare.ll (+820)
``````````diff
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..be79002bcbe64 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -114,12 +114,12 @@ def icmp_sgt: IRBuilder<"CreateICmpSGT">;
def icmp_sge: IRBuilder<"CreateICmpSGE">;
def icmp_slt: IRBuilder<"CreateICmpSLT">;
def icmp_sle: IRBuilder<"CreateICmpSLE">;
-def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
-def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
-def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
-def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
-def fcmp_ult: IRBuilder<"CreateFCmpULT">;
-def fcmp_ule: IRBuilder<"CreateFCmpULE">;
+def fcmp_eq_node: IRBuilder<"CreateFCmpOEQ">;
+def fcmp_ne_node: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
+def fcmp_gt_node: IRBuilder<"CreateFCmpOGT">;
+def fcmp_ge_node: IRBuilder<"CreateFCmpOGE">;
+def fcmp_ult_node: IRBuilder<"CreateFCmpULT">;
+def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
def splat: CGHelperFn<"ARMMVEVectorSplat">;
def select: IRBuilder<"CreateSelect">;
def fneg: IRBuilder<"CreateFNeg">;
@@ -589,6 +589,18 @@ def fsub: strictFPAlt<fsub_node,
IRInt<"vsub", [Vector]>>;
def fmul: strictFPAlt<fmul_node,
IRInt<"vmul", [Vector]>>;
+def fcmp_eq : strictFPAlt<fcmp_eq_node,
+ IRInt<"cmp_eq", [Predicate, Vector]>>;
+def fcmp_ne : strictFPAlt<fcmp_ne_node,
+ IRInt<"cmp_ne", [Predicate, Vector]>>;
+def fcmp_gt : strictFPAlt<fcmp_gt_node,
+ IRInt<"cmp_gt", [Predicate, Vector]>>;
+def fcmp_ge : strictFPAlt<fcmp_ge_node,
+ IRInt<"cmp_ge", [Predicate, Vector]>>;
+def fcmp_ult : strictFPAlt<fcmp_ult_node,
+ IRInt<"cmp_lt", [Predicate, Vector]>>;
+def fcmp_ule : strictFPAlt<fcmp_ule_node,
+ IRInt<"cmp_le", [Predicate, Vector]>>;
// -----------------------------------------------------------------------------
// Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
index 8886cf5c10058..dd756a401e5cd 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
@@ -1,17 +1,26 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_mve.h>
-// CHECK-LABEL: @test_vcmpeqq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
{
@@ -22,12 +31,19 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
{
@@ -38,12 +54,19 @@ mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
{
@@ -54,12 +77,19 @@ mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
{
@@ -70,12 +100,19 @@ mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
{
@@ -86,12 +123,19 @@ mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
{
@@ -102,12 +146,19 @@ mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
{
@@ -118,12 +169,19 @@ mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
{
@@ -134,14 +192,23 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
{
@@ -152,14 +219,23 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
{
@@ -170,14 +246,23 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
{
@@ -188,14 +273,23 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169798
More information about the llvm-commits
mailing list