[clang] [llvm] [ARM] Introduce intrinsics for MVE fp-converts under strict-fp. (PR #170686)

Thu Dec 4 08:30:18 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

This is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need.

---

Patch is 87.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170686.diff


8 Files Affected:

- (modified) clang/include/clang/Basic/arm_mve.td (+1-1) 
- (modified) clang/include/clang/Basic/arm_mve_defs.td (+14-6) 
- (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+32) 
- (modified) clang/test/CodeGen/arm-mve-intrinsics/vcvt.c (+658-300) 
- (modified) clang/utils/TableGen/MveEmitter.cpp (+5-2) 
- (modified) llvm/include/llvm/IR/IntrinsicsARM.td (+6) 
- (modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+11-9) 
- (modified) llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll (+81) 


``````````diff

diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 77531c31538c1..eae3a9f9624ab 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -598,7 +598,7 @@ foreach half = [ "b", "t" ] in {
   } // params = [f16], pnt = PNT_None
 } // loop over half = "b", "t"
 
-multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {
+multiclass float_int_conversions<Type FScalar, Type IScalar, Builder ftoi, Builder itof> {
   defvar FVector = VecOf<FScalar>;
   defvar IVector = VecOf<IScalar>;
 
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3210549d0cb56..d228b298a9aa6 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -123,10 +123,10 @@ def fcmp_ule: IRBuilder<"CreateFCmpULE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
 def fneg: IRBuilder<"CreateFNeg">;
-def sitofp: IRBuilder<"CreateSIToFP">;
-def uitofp: IRBuilder<"CreateUIToFP">;
-def fptosi: IRBuilder<"CreateFPToSI">;
-def fptoui: IRBuilder<"CreateFPToUI">;
+def sitofp_node: IRBuilder<"CreateSIToFP">;
+def uitofp_node: IRBuilder<"CreateUIToFP">;
+def fptosi_node: IRBuilder<"CreateFPToSI">;
+def fptoui_node: IRBuilder<"CreateFPToUI">;
 def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> {
   let special_params = [IRBuilderIntParam<1, "unsigned">];
 }
@@ -215,9 +215,9 @@ def bitsize;
 
 // strictFPAlt allows a node to have different code generation under strict-fp.
 // TODO: The standard node can be IRBuilderBase or IRIntBase.
-class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder {
+class strictFPAlt<Builder standard_, Builder strictfp_> : Builder {
   Builder standard = standard_;
-  IRIntBase strictfp = strictfp_;
+  Builder strictfp = strictfp_;
 }
 
 // If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
@@ -593,6 +593,14 @@ def fminnm : strictFPAlt<IRIntBase<"minnum", [Vector]>,
                          IRInt<"vminnm", [Vector]>>;
 def fmaxnm : strictFPAlt<IRIntBase<"maxnum", [Vector]>,
                          IRInt<"vmaxnm", [Vector]>>;
+def sitofp: strictFPAlt<sitofp_node,
+                        CGFHelperFn<"ARMMVECreateSIToFP">>;
+def uitofp: strictFPAlt<uitofp_node,
+                        CGFHelperFn<"ARMMVECreateUIToFP">>;
+def fptosi: strictFPAlt<fptosi_node,
+                        CGFHelperFn<"ARMMVECreateFPToSI">>;
+def fptoui: strictFPAlt<fptoui_node,
+                        CGFHelperFn<"ARMMVECreateFPToUI">>;
 
 // -----------------------------------------------------------------------------
 // Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index d4b0b81d3d87f..744cd1b0a324a 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -3512,6 +3512,38 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
   return Builder.CreateShuffleVector(V, Indices);
 }
 
+static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
+                                       CodeGenFunction *CGF, llvm::Value *V,
+                                       llvm::Type *Ty) {
+  return Builder.CreateCall(
+      CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
+      {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
+}
+
+static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
+                                       CodeGenFunction *CGF, llvm::Value *V,
+                                       llvm::Type *Ty) {
+  return Builder.CreateCall(
+      CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
+      {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
+}
+
+static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
+                                       CodeGenFunction *CGF, llvm::Value *V,
+                                       llvm::Type *Ty) {
+  return Builder.CreateCall(
+      CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
+      {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
+}
+
+static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
+                                       CodeGenFunction *CGF, llvm::Value *V,
+                                       llvm::Type *Ty) {
+  return Builder.CreateCall(
+      CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
+      {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
+}
+
 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E,
                                               ReturnValueSlot ReturnValue,
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index b2a6d0c1ea668..14a9116208b87 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -1,15 +1,22 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vcvtq_f16_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f16_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f16_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> [[A:%.*]], i32 0) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vcvtq_f16_s16(int16x8_t a)
 {
@@ -20,10 +27,15 @@ float16x8_t test_vcvtq_f16_s16(int16x8_t a)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_f16_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half>
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f16_u16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f16_u16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vcvtq_f16_u16(uint16x8_t a)
 {
@@ -34,10 +46,15 @@ float16x8_t test_vcvtq_f16_u16(uint16x8_t a)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_f32_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f32_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f32_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> [[A:%.*]], i32 0) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vcvtq_f32_s32(int32x4_t a)
 {
@@ -48,10 +65,15 @@ float32x4_t test_vcvtq_f32_s32(int32x4_t a)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_f32_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f32_u32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f32_u32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vcvtq_f32_u32(uint32x4_t a)
 {
@@ -62,52 +84,79 @@ float32x4_t test_vcvtq_f32_u32(uint32x4_t a)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_s16_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_s16_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_s16_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> [[A:%.*]], i32 0) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vcvtq_s16_f16(float16x8_t a)
 {
     return vcvtq_s16_f16(a);
 }
 
-// CHECK-LABEL: @test_vcvtq_s32_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_s32_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_s32_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> [[A:%.*]], i32 0) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vcvtq_s32_f32(float32x4_t a)
 {
     return vcvtq_s32_f32(a);
 }
 
-// CHECK-LABEL: @test_vcvtq_u16_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_u16_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NOSTRICT-NEXT:    ret <8 x i16> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_u16_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vcvtq_u16_f16(float16x8_t a)
 {
     return vcvtq_u16_f16(a);
 }
 
-// CHECK-LABEL: @test_vcvtq_u32_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_u32_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NOSTRICT-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_u32_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vcvtq_u32_f32(float32x4_t a)
 {
     return vcvtq_u32_f32(a);
 }
 
-// CHECK-LABEL: @test_vcvtq_m_f16_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f16_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f16_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p)
 {
@@ -118,12 +167,19 @@ float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_m_f16_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f16_u16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f16_u16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p)
 {
@@ -134,12 +190,19 @@ float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_m_f32_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f32_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f32_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p)
 {
@@ -150,12 +213,19 @@ float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_m_f32_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f32_u32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f32_u32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p)
 {
@@ -166,12 +236,19 @@ float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcvtq_m_s16_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_s16_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/170686