[clang] [llvm] [ARM] Introduce intrinsics for MVE add/sub/mul under strict-fp. (PR #169156)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 22 01:43:08 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
As far as I understand, the MVE fp vadd/vsub/vmul instructions will set exception flags in the same ways as scalar fadd/fsub/fmul, but will not honor flush-to-zero (for f32 they always flush, for f16 they follows the fpsrc flags) and will always use the default rounding mode.
This means that we cannot convert the vadd_f23/vsub_f32/vmul_f32 intrinsics to llvm.constrained.fadd/fsub/fmul, and the fadd/fsub/fmul without changing the expected behaviour under strict-fp. This patch introduces a set in intrinsics that we can use instead, going from vadd_f32 -> llvm.arm.mve.vadd -> MVE_VADD.
If this is acceptable, the other intrinsics I can add will be:
fma
fptoi, itofp
trunc/round/ceil/etc
fcmp
minnum/maxnum
The current implementations assumes that the standard variant of a strictfp alternative will be a IRBuilder, this can be changed to take a IRBuilder or IRInt. There are also Neon intrinsics that AFAIU behave the same way.
---
Patch is 93.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169156.diff
10 Files Affected:
- (modified) clang/include/clang/Basic/arm_mve_defs.td (+18-3)
- (modified) clang/test/CodeGen/arm-mve-intrinsics/vaddq.c (+146-68)
- (modified) clang/test/CodeGen/arm-mve-intrinsics/vmulq.c (+266-124)
- (modified) clang/test/CodeGen/arm-mve-intrinsics/vsubq.c (+146-68)
- (modified) clang/utils/TableGen/MveEmitter.cpp (+36-3)
- (modified) llvm/include/llvm/IR/IntrinsicsARM.td (+12)
- (modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+22-11)
- (added) llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll (+143)
- (modified) llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll (+2-2)
- (modified) llvm/test/CodeGen/Thumb2/mve-pred-ext.ll (+6-6)
``````````diff
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index c1562a0c1f04c..eeca9153dd742 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -74,9 +74,9 @@ def immshr: CGHelperFn<"MVEImmediateShr"> {
let special_params = [IRBuilderIntParam<1, "unsigned">,
IRBuilderIntParam<2, "bool">];
}
-def fadd: IRBuilder<"CreateFAdd">;
-def fmul: IRBuilder<"CreateFMul">;
-def fsub: IRBuilder<"CreateFSub">;
+def fadd_node: IRBuilder<"CreateFAdd">;
+def fmul_node: IRBuilder<"CreateFMul">;
+def fsub_node: IRBuilder<"CreateFSub">;
def load: IRBuilder<"CreateLoad"> {
let special_params = [IRBuilderAddrParam<0>];
}
@@ -212,6 +212,13 @@ def unsignedflag;
// constant giving its size in bits.
def bitsize;
+// strictFPAlt allows a node to have different code generation under strict-fp.
+// TODO: The standard node can be IRBuilderBase or IRIntBase.
+class strictFPAlt<IRBuilderBase standard_, IRIntBase strictfp_> {
+ IRBuilderBase standard = standard_;
+ IRIntBase strictfp = strictfp_;
+}
+
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
// indicates that the IR generation for that intrinsic is done by handwritten
// C++ and not autogenerated at all. The effect in the MVE builtin codegen
@@ -573,6 +580,14 @@ multiclass IntrinsicMXNameOverride<Type rettype, dag arguments, dag cg,
}
}
+// StrictFP nodes that choose between standard fadd and llvm.arm.mve.fadd nodes
+// depending on whether we are using strict-fp.
+def fadd: strictFPAlt<fadd_node,
+ IRInt<"vadd", [Vector]>>;
+def fsub: strictFPAlt<fsub_node,
+ IRInt<"vsub", [Vector]>>;
+def fmul: strictFPAlt<fmul_node,
+ IRInt<"vmul", [Vector]>>;
// -----------------------------------------------------------------------------
// Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
index 238cb4056d4f1..d24834951b385 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -1,6 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -20,10 +22,15 @@ uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b)
{
@@ -34,12 +41,19 @@ float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
@@ -50,12 +64,19 @@ int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vaddq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
{
@@ -66,12 +87,19 @@ float32x4_t test_vaddq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_x_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_x_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vaddq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
@@ -82,12 +110,19 @@ uint16x8_t test_vaddq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_x_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_x_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vaddq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
{
@@ -114,12 +149,19 @@ uint32x4_t test_vaddq_n_u32(uint32x4_t a, uint32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_n_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b)
{
@@ -130,14 +172,23 @@ float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_n_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p)
{
@@ -148,14 +199,23 @@ int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_n_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
{
@@ -166,14 +226,23 @@ float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_n_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_x_n_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_x_n_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
{
@@ -184,14 +253,23 @@ uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169156
More information about the llvm-commits
mailing list