[clang] [llvm] [ARM] Introduce intrinsics for MVE add/sub/mul under strict-fp. (PR #169156)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 22 01:42:38 PST 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/169156
As far as I understand, the MVE fp vadd/vsub/vmul instructions will set exception flags in the same ways as scalar fadd/fsub/fmul, but will not honor flush-to-zero (for f32 they always flush, for f16 they follows the fpsrc flags) and will always use the default rounding mode.
This means that we cannot convert the vadd_f23/vsub_f32/vmul_f32 intrinsics to llvm.constrained.fadd/fsub/fmul, and the fadd/fsub/fmul without changing the expected behaviour under strict-fp. This patch introduces a set in intrinsics that we can use instead, going from vadd_f32 -> llvm.arm.mve.vadd -> MVE_VADD.
If this is acceptable, the other intrinsics I can add will be:
fma
fptoi, itofp
trunc/round/ceil/etc
fcmp
minnum/maxnum
The current implementations assumes that the standard variant of a strictfp alternative will be a IRBuilder, this can be changed to take a IRBuilder or IRInt. There are also Neon intrinsics that AFAIU behave the same way.
>From 4d8d1a25b53cfaac8ab9c61fcd6aee54e40017cc Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sat, 22 Nov 2025 09:39:57 +0000
Subject: [PATCH] [ARM] Introduce intrinsics for MVE add/sub/mul under
strict-fp.
As far as I understand, the MVE fp vadd/vsub/vmul instructions will set
exception flags in the same ways as scalar fadd/fsub/fmul, but will not honor
flush-to-zero (for f32 they always flush, for f16 they follows the fpsrc flags)
and will always use the default rounding mode.
This means that we cannot convert the vadd_f23/vsub_f32/vmul_f32 intrinsics to
llvm.constrained.fadd/fsub/fmul, and the fadd/fsub/fmul without changing the
expected behaviour under strict-fp. This patch introduces a set in intrinsics
that we can use instead, going from vadd_f32 -> llvm.arm.mve.vadd -> MVE_VADD.
If this is acceptable, the other intrinsics I can add will be:
fma
fptoi, itofp
trunc/round/ceil/etc
fcmp
minnum/maxnum
The current implementations assumes that the standard variant of a strictfp
alternative will be a IRBuilder, this can be changed to take a IRBuilder or
IRInt. There are also Neon intrinsics that AFAIU behave the same way.
---
clang/include/clang/Basic/arm_mve_defs.td | 21 +-
clang/test/CodeGen/arm-mve-intrinsics/vaddq.c | 214 +++++++---
clang/test/CodeGen/arm-mve-intrinsics/vmulq.c | 390 ++++++++++++------
clang/test/CodeGen/arm-mve-intrinsics/vsubq.c | 214 +++++++---
clang/utils/TableGen/MveEmitter.cpp | 39 +-
llvm/include/llvm/IR/IntrinsicsARM.td | 12 +
llvm/lib/Target/ARM/ARMInstrMVE.td | 33 +-
.../mve-intrinsics/strict-intrinsics.ll | 143 +++++++
.../CodeGen/Thumb2/mve-intrinsics/vabdq.ll | 4 +-
llvm/test/CodeGen/Thumb2/mve-pred-ext.ll | 12 +-
10 files changed, 797 insertions(+), 285 deletions(-)
create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index c1562a0c1f04c..eeca9153dd742 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -74,9 +74,9 @@ def immshr: CGHelperFn<"MVEImmediateShr"> {
let special_params = [IRBuilderIntParam<1, "unsigned">,
IRBuilderIntParam<2, "bool">];
}
-def fadd: IRBuilder<"CreateFAdd">;
-def fmul: IRBuilder<"CreateFMul">;
-def fsub: IRBuilder<"CreateFSub">;
+def fadd_node: IRBuilder<"CreateFAdd">;
+def fmul_node: IRBuilder<"CreateFMul">;
+def fsub_node: IRBuilder<"CreateFSub">;
def load: IRBuilder<"CreateLoad"> {
let special_params = [IRBuilderAddrParam<0>];
}
@@ -212,6 +212,13 @@ def unsignedflag;
// constant giving its size in bits.
def bitsize;
+// strictFPAlt allows a node to have different code generation under strict-fp.
+// TODO: The standard node can be IRBuilderBase or IRIntBase.
+class strictFPAlt<IRBuilderBase standard_, IRIntBase strictfp_> {
+ IRBuilderBase standard = standard_;
+ IRIntBase strictfp = strictfp_;
+}
+
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
// indicates that the IR generation for that intrinsic is done by handwritten
// C++ and not autogenerated at all. The effect in the MVE builtin codegen
@@ -573,6 +580,14 @@ multiclass IntrinsicMXNameOverride<Type rettype, dag arguments, dag cg,
}
}
+// StrictFP nodes that choose between standard fadd and llvm.arm.mve.fadd nodes
+// depending on whether we are using strict-fp.
+def fadd: strictFPAlt<fadd_node,
+ IRInt<"vadd", [Vector]>>;
+def fsub: strictFPAlt<fsub_node,
+ IRInt<"vsub", [Vector]>>;
+def fmul: strictFPAlt<fmul_node,
+ IRInt<"vmul", [Vector]>>;
// -----------------------------------------------------------------------------
// Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
index 238cb4056d4f1..d24834951b385 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -1,6 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -20,10 +22,15 @@ uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b)
{
@@ -34,12 +41,19 @@ float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
@@ -50,12 +64,19 @@ int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vaddq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
{
@@ -66,12 +87,19 @@ float32x4_t test_vaddq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_x_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_x_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vaddq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
@@ -82,12 +110,19 @@ uint16x8_t test_vaddq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_x_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_x_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vaddq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
{
@@ -114,12 +149,19 @@ uint32x4_t test_vaddq_n_u32(uint32x4_t a, uint32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_n_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b)
{
@@ -130,14 +172,23 @@ float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_n_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p)
{
@@ -148,14 +199,23 @@ int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_m_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_m_n_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_m_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
{
@@ -166,14 +226,23 @@ float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_n_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_x_n_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_x_n_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
{
@@ -184,14 +253,23 @@ uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vaddq_x_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vaddq_x_n_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vaddq_x_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vaddq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
index 9d36a2d334860..708502ddb52fc 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
@@ -1,6 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -48,10 +50,15 @@ uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b)
{
@@ -62,12 +69,19 @@ float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vmulq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
@@ -78,12 +92,19 @@ int8x16_t test_vmulq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmulq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
@@ -94,12 +115,19 @@ uint16x8_t test_vmulq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_s32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vmulq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
@@ -110,12 +138,19 @@ int32x4_t test_vmulq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pre
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vmulq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
{
@@ -126,12 +161,19 @@ float16x8_t test_vmulq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_u8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_u8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_u8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
uint8x16_t test_vmulq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p)
{
@@ -142,12 +184,19 @@ uint8x16_t test_vmulq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_s16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vmulq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p)
{
@@ -158,12 +207,19 @@ int16x8_t test_vmulq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_u32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_u32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmulq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
{
@@ -174,12 +230,19 @@ uint32x4_t test_vmulq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vmulq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
{
@@ -238,12 +301,19 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_n_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b)
{
@@ -254,14 +324,23 @@ float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vmulq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p)
{
@@ -272,14 +351,23 @@ int8x16_t test_vmulq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_n_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_n_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmulq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p)
{
@@ -290,14 +378,23 @@ uint16x8_t test_vmulq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_n_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_s32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_n_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p)
{
@@ -308,14 +405,23 @@ int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pre
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_m_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_m_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
{
@@ -326,14 +432,23 @@ float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_n_u8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef)
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_u8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_n_u8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
uint8x16_t test_vmulq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
{
@@ -344,14 +459,23 @@ uint8x16_t test_vmulq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_n_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_s16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_n_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vmulq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
{
@@ -361,14 +485,23 @@ int16x8_t test_vmulq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
return vmulq_x_n_s16(a, b, p);
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_n_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef)
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_u32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_n_u32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmulq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p)
{
@@ -379,14 +512,23 @@ uint32x4_t test_vmulq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vmulq_x_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef)
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmulq_x_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vmulq_x_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
{
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
index 3d87770889707..6f7b5a4f8271f 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
@@ -1,6 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -20,10 +22,15 @@ uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b)
{
@@ -34,12 +41,19 @@ float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_m_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_m_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_m_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vsubq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
@@ -50,12 +64,19 @@ int8x16_t test_vsubq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_m_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_m_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_m_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vsubq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
{
@@ -66,12 +87,19 @@ float32x4_t test_vsubq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_x_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_x_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_x_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vsubq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
@@ -82,12 +110,19 @@ uint16x8_t test_vsubq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_x_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_x_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_x_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vsubq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
{
@@ -114,12 +149,19 @@ uint32x4_t test_vsubq_n_u32(uint32x4_t a, uint32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_n_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b)
{
@@ -130,14 +172,23 @@ float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_m_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_m_n_s8(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_m_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p)
{
@@ -148,14 +199,23 @@ int8x16_t test_vsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_m_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_m_n_f32(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_m_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vsubq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
{
@@ -166,14 +226,23 @@ float32x4_t test_vsubq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b,
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_x_n_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_x_n_u16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_x_n_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
{
@@ -184,14 +253,23 @@ uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vsubq_x_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NONSTRICT-LABEL: @test_vsubq_x_n_f16(
+// CHECK-NONSTRICT-NEXT: entry:
+// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vsubq_x_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vsubq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
{
diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index f55a5f54bd158..d2b366779ba16 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -757,6 +757,26 @@ class IRIntrinsicResult : public Result {
}
};
+// Result subclass that generates
+// Builder.getIsFPConstrained() ? <Standard> : <StrictFp>
+class StrictFpAltResult : public Result {
+public:
+ Ptr Standard;
+ Ptr StrictFp;
+ StrictFpAltResult(Ptr Standard,Ptr StrictFp)
+ : Standard(Standard), StrictFp(StrictFp) {}
+ void genCode(raw_ostream &OS,
+ CodeGenParamAllocator &ParamAlloc) const override {
+ OS << "!Builder.getIsFPConstrained() ? ";
+ Standard->genCode(OS, ParamAlloc);
+ OS << " : ";
+ StrictFp->genCode(OS, ParamAlloc);
+ }
+ void morePrerequisites(std::vector<Ptr> &output) const override {
+ Standard->morePrerequisites(output);
+ }
+};
+
// Result subclass that specifies a type, for use in IRBuilder operations such
// as CreateBitCast that take a type argument.
class TypeResult : public Result {
@@ -1239,7 +1259,8 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
std::vector<Result::Ptr> Args;
for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)
Args.push_back(getCodeForDagArg(D, i, Scope, Param));
- if (Op->isSubClassOf("IRBuilderBase")) {
+
+ auto GenIRBuilderBase = [&](const Record *Op) {
std::set<unsigned> AddressArgs;
std::map<unsigned, std::string> IntegerArgs;
for (const Record *sp : Op->getValueAsListOfDefs("special_params")) {
@@ -1252,7 +1273,8 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
}
return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"),
Args, AddressArgs, IntegerArgs);
- } else if (Op->isSubClassOf("IRIntBase")) {
+ };
+ auto GenIRIntBase = [&](const Record *Op) {
std::vector<const Type *> ParamTypes;
for (const Record *RParam : Op->getValueAsListOfDefs("params"))
ParamTypes.push_back(getType(RParam, Param));
@@ -1260,7 +1282,18 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
if (Op->getValueAsBit("appendKind"))
IntName += "_" + toLetter(cast<ScalarType>(Param)->kind());
return std::make_shared<IRIntrinsicResult>(IntName, ParamTypes, Args);
- } else {
+ };
+
+ if (Op->isSubClassOf("IRBuilderBase")) {
+ return GenIRBuilderBase(Op);
+ } else if (Op->isSubClassOf("IRIntBase")) {
+ return GenIRIntBase(Op);
+ } else if (Op->isSubClassOf("strictFPAlt")) {
+ auto Standard = GenIRBuilderBase(Op->getValueAsDef("standard"));
+ auto StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
+ return std::make_shared<StrictFpAltResult>(Standard, StrictFp);
+ }
+ else {
PrintFatalError("Unsupported dag node " + Op->getName());
}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 1219ce2f86da8..ecadb235bec36 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1138,6 +1138,18 @@ def int_arm_mve_vshlc_predicated: DefaultAttrsIntrinsic<
[llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
+def int_arm_mve_vadd: DefaultAttrsIntrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_vsub: DefaultAttrsIntrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+def int_arm_mve_vmul: DefaultAttrsIntrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
def int_arm_mve_vmulh: DefaultAttrsIntrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 98591fa3f5bd7..6da04c4ac6f18 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -384,6 +384,16 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
(VTI.Vec MQPR:$inactive)))>;
}
+def vadd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fadd node:$lhs, node:$rhs),
+ (int_arm_mve_vadd node:$lhs, node:$rhs)]>;
+def vsub : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fsub node:$lhs, node:$rhs),
+ (int_arm_mve_vsub node:$lhs, node:$rhs)]>;
+def vmul : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fmul node:$lhs, node:$rhs),
+ (int_arm_mve_vmul node:$lhs, node:$rhs)]>;
+
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -3605,7 +3615,7 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[
let validForTailPredication = 1;
}
-multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
+multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDPatternOperator Op,
Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
@@ -3616,7 +3626,7 @@ multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
}
multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
- : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>;
+ : MVE_VMULT_fp_m<"vmul", VTI, vmul, int_arm_mve_mul_predicated, IdentityVec>;
def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float
def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half
@@ -3740,7 +3750,7 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
+ SDPatternOperator Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> {
let validForTailPredication = 1;
}
@@ -3752,9 +3762,9 @@ multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
}
multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
- : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>;
+ : MVE_VADDSUB_fp_m<"vadd", 0, VTI, vadd, int_arm_mve_add_predicated, IdentityVec>;
multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
- : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>;
+ : MVE_VADDSUB_fp_m<"vsub", 1, VTI, vsub, int_arm_mve_sub_predicated, IdentityVec>;
def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float
def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half
@@ -5391,21 +5401,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16, subnuw, ARMvshruImm>;
defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32, subnuw, ARMvshruImm>;
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
+ SDPatternOperator Op, Intrinsic PredInt,
+ SDPatternOperator IdentityVec> {
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
!cast<Instruction>(NAME), IdentityVec>;
}
let Predicates = [HasMVEFloat] in {
- defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
+ defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, vadd,
int_arm_mve_add_predicated, ARMimmMinusZeroF>;
- defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
+ defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, vadd,
int_arm_mve_add_predicated, ARMimmMinusZeroH>;
- defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
+ defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, vsub,
int_arm_mve_sub_predicated, ARMimmAllZerosV>;
- defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
+ defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, vsub,
int_arm_mve_sub_predicated, ARMimmAllZerosV>;
}
@@ -5588,7 +5599,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> {
let validForTailPredication = 1 in
def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>;
- defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
+ defm : MVE_TwoOpPatternDup<VTI, vmul, int_arm_mve_mul_predicated, (? ),
!cast<Instruction>(NAME), IdentityVec>;
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
new file mode 100644
index 0000000000000..fa1ae5cf306d8
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: test_vaddq_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vadd.f16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> %a, <8 x half> %b)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vaddq_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vadd.f32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x float> @llvm.arm.mve.vadd.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: test_vsubq_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vsub.f16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> %a, <8 x half> %b)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vsubq_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vsub.f32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x float> @llvm.arm.mve.vsub.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: test_vmulq_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmul.f16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x half> @llvm.arm.mve.vmul.v8f16(<8 x half> %a, <8 x half> %b)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vmulq_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmul.f32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %0
+}
+
+
+
+
+define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16_splat(<8 x half> %a, half %b) {
+; CHECK-LABEL: test_vaddq_f16_splat:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f16 r0, s4
+; CHECK-NEXT: vadd.f16 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <8 x half> undef, half %b, i32 0
+ %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
+ %0 = tail call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> %a, <8 x half> %s)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32_splat(<4 x float> %a, float %b) {
+; CHECK-LABEL: test_vaddq_f32_splat:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vadd.f32 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x float> undef, float %b, i32 0
+ %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
+ %0 = tail call <4 x float> @llvm.arm.mve.vadd.v4f32(<4 x float> %a, <4 x float> %s)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16_splat(<8 x half> %a, half %b) {
+; CHECK-LABEL: test_vsubq_f16_splat:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f16 r0, s4
+; CHECK-NEXT: vsub.f16 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <8 x half> undef, half %b, i32 0
+ %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
+ %0 = tail call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> %a, <8 x half> %s)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32_splat(<4 x float> %a, float %b) {
+; CHECK-LABEL: test_vsubq_f32_splat:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vsub.f32 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x float> undef, float %b, i32 0
+ %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
+ %0 = tail call <4 x float> @llvm.arm.mve.vsub.v4f32(<4 x float> %a, <4 x float> %s)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16_splat(<8 x half> %a, half %b) {
+; CHECK-LABEL: test_vmulq_f16_splat:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f16 r0, s4
+; CHECK-NEXT: vmul.f16 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <8 x half> undef, half %b, i32 0
+ %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
+ %0 = tail call <8 x half> @llvm.arm.mve.vmul.v8f16(<8 x half> %a, <8 x half> %s)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32_splat(<4 x float> %a, float %b) {
+; CHECK-LABEL: test_vmulq_f32_splat:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmul.f32 q0, q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x float> undef, float %b, i32 0
+ %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
+ %0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x float> %s)
+ ret <4 x float> %0
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
index 961489613b626..389ccd3491539 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
@@ -25,8 +25,8 @@ entry:
declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>, i32) #1
-define arm_aapcs_vfpcc <8 x half> @test_vabdq_f32(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
-; CHECK-LABEL: test_vabdq_f32:
+define arm_aapcs_vfpcc <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vabdq_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vabd.f16 q0, q0, q1
; CHECK-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
index 0bec2b100911c..1aea7f9849b89 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
@@ -63,8 +63,8 @@ entry:
ret <8 x i16> %0
}
-define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
-; CHECK-MVE-LABEL: sext_v8i1_v8f32:
+define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f16(<8 x half> %src1, <8 x half> %src2) {
+; CHECK-MVE-LABEL: sext_v8i1_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .save {r4, lr}
; CHECK-MVE-NEXT: push {r4, lr}
@@ -110,7 +110,7 @@ define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> %
; CHECK-MVE-NEXT: vmov.16 q0[7], r0
; CHECK-MVE-NEXT: pop {r4, pc}
;
-; CHECK-MVEFP-LABEL: sext_v8i1_v8f32:
+; CHECK-MVEFP-LABEL: sext_v8i1_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i16 q2, #0x0
; CHECK-MVEFP-NEXT: vmov.i8 q3, #0xff
@@ -291,8 +291,8 @@ entry:
ret <8 x i16> %0
}
-define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) {
-; CHECK-MVE-LABEL: zext_v8i1_v8f32:
+define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f16(<8 x half> %src1, <8 x half> %src2) {
+; CHECK-MVE-LABEL: zext_v8i1_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .save {r4, lr}
; CHECK-MVE-NEXT: push {r4, lr}
@@ -340,7 +340,7 @@ define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> %
; CHECK-MVE-NEXT: vand q0, q1, q0
; CHECK-MVE-NEXT: pop {r4, pc}
;
-; CHECK-MVEFP-LABEL: zext_v8i1_v8f32:
+; CHECK-MVEFP-LABEL: zext_v8i1_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i16 q2, #0x0
; CHECK-MVEFP-NEXT: vmov.i16 q3, #0x1
More information about the llvm-commits
mailing list