[clang] [llvm] [AArch64] Remove aarch64_neon_vcvtfp2hf and aarch64_neon_vcvthf2fp (PR #203903)

Mon Jun 15 06:27:31 PDT 2026

llvmorg-github-actions[bot] wrote:




@llvm/pr-subscribers-clang-codegen

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

This removes aarch64_neon_vcvtfp2hf and aarch64_neon_vcvthf2fp intrinsics, relying on fp16 fpext and fptrunc instructions directly. Arm is left using its version of the intrinsics, as the types in the backend are more difficult without fullfp16.

---

Patch is 21.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/203903.diff


12 Files Affected:

- (modified) clang/include/clang/Basic/AArch64CodeGenUtils.h (-2) 
- (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+2) 
- (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+17-4) 
- (modified) clang/test/CodeGen/AArch64/neon-misc.c (+8-20) 
- (modified) clang/test/CodeGen/arm_neon_intrinsics.c (+7-12) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (-6) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+19) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (-10) 
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (-3) 
- (modified) llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll (+2-5) 
- (modified) llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll (+2-3) 
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll (+12-12) 


``````````diff

diff --git a/clang/include/clang/Basic/AArch64CodeGenUtils.h b/clang/include/clang/Basic/AArch64CodeGenUtils.h
index f64a41df63cf8..3b9145920bd5f 100644
--- a/clang/include/clang/Basic/AArch64CodeGenUtils.h
+++ b/clang/include/clang/Basic/AArch64CodeGenUtils.h
@@ -158,10 +158,8 @@ const inline ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap [] = {
   NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
-  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
   NEONMAP0(vcvt_f16_s16),
   NEONMAP0(vcvt_f16_u16),
-  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
   NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
   NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 84b9bb1007763..19b6cea40d50e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2959,6 +2959,8 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
     return builder.createCast(loc, cir::CastKind::int_to_float, ops[0], ty);
   case NEON::BI__builtin_neon_vcvt_f64_f32:
   case NEON::BI__builtin_neon_vcvt_f32_f64:
+  case NEON::BI__builtin_neon_vcvt_f16_f32:
+  case NEON::BI__builtin_neon_vcvt_f32_f16:
   case NEON::BI__builtin_neon_vcvt_s32_v:
   case NEON::BI__builtin_neon_vcvt_u32_v:
   case NEON::BI__builtin_neon_vcvt_s64_v:
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 4c668dabd53dc..edf4456b82e52 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -595,10 +595,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
-  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
   NEONMAP0(vcvt_f16_s16),
   NEONMAP0(vcvt_f16_u16),
-  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
   NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
   NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
@@ -2662,10 +2660,15 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
                         "vsha1h");
 
-  case NEON::BI__builtin_neon_vcvth_bf16_f32: {
+  case NEON::BI__builtin_neon_vcvth_bf16_f32:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
                         "vcvtbfp2bf");
-  }
+  case NEON::BI__builtin_neon_vcvt_f16_f32:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf), Ops,
+                        "vcvtfp2hf");
+  case NEON::BI__builtin_neon_vcvt_f32_f16:
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp), Ops,
+                        "vcvthf2fp");
 
   // The ARM _MoveToCoprocessor builtins put the input register value as
   // the first argument, but the LLVM intrinsic expects it as the third one.
@@ -6026,6 +6029,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
     return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
   }
+  case NEON::BI__builtin_neon_vcvt_f16_f32: {
+    llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
+    llvm::Type *V4F16 = FixedVectorType::get(Builder.getHalfTy(), 4);
+    return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4F16);
+  }
+  case NEON::BI__builtin_neon_vcvt_f32_f16: {
+    llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
+    llvm::Type *V4F16 = FixedVectorType::get(Builder.getHalfTy(), 4);
+    return Builder.CreateFPExt(Builder.CreateBitCast(Ops[0], V4F16), V4F32);
+  }
   case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
     SmallVector<int, 16> ConcatMask(8);
     std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c
index 964b0059662d2..720a1c22933d1 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -2606,11 +2606,8 @@ uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT:    [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]])
-// CHECK-NEXT:    [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <4 x float> [[TMP2]] to <4 x half>
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
@@ -2622,11 +2619,8 @@ float16x4_t test_vcvt_f16_f32(float32x4_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[VCVT_F16_F32_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT:    [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I_I]])
-// CHECK-NEXT:    [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I_I]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <4 x float> [[TMP2]] to <4 x half>
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 //
@@ -2693,11 +2687,8 @@ float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
-// CHECK-NEXT:    [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
@@ -2710,11 +2701,8 @@ float32x4_t test_vcvt_f32_f16(float16x4_t a) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]])
-// CHECK-NEXT:    [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I_I]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float32x4_t test_vcvt_high_f32_f16(float16x8_t a) {
diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c
index eb9fe126ff2a0..a471ccf52e173 100644
--- a/clang/test/CodeGen/arm_neon_intrinsics.c
+++ b/clang/test/CodeGen/arm_neon_intrinsics.c
@@ -3330,12 +3330,10 @@ int64x1_t test_vcreate_s64(uint64_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT:    [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]])
-// CHECK-NEXT:    [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
-// CHECK-NEXT:    ret <4 x half> [[TMP3]]
+// CHECK-NEXT:    [[VCVTFP2HF_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT:    [[VCVTFP2HF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVTFP2HF_I]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[VCVTFP2HF1_I]] to <4 x half>
+// CHECK-NEXT:    ret <4 x half> [[TMP2]]
 //
 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
   return vcvt_f16_f32(a);
@@ -3394,12 +3392,9 @@ float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
-// CHECK-NEXT:    [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NEXT:    [[VCVTHF2FP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT:    [[VCVTHF2FP1_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVTHF2FP_I]])
+// CHECK-NEXT:    ret <4 x float> [[VCVTHF2FP1_I]]
 //
 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
   return vcvt_f32_f16(a);
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b1a2b348bc04b..2923595486712 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -465,12 +465,6 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
   def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
 
-  // Vector Conversions Between Half-Precision and Single-Precision.
-  def int_aarch64_neon_vcvtfp2hf
-    : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_aarch64_neon_vcvthf2fp
-    : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
-
   // Vector Conversions Between Floating-point and Fixed-point.
   def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
   def int_aarch64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index f27713df7f146..814e985ebf7be 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1000,6 +1000,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
         return true;
       }
 
+      // vcvtfp2hf and vcvthf2fp -> fpext and fptrunc
+      if (Name == "vcvtfp2hf" || Name == "vcvthf2fp") {
+        NewFn = nullptr;
+        return true;
+      }
+
       return false; // No other 'aarch64.neon.*'.
     }
     if (Name.consume_front("sve.")) {
@@ -4670,6 +4676,19 @@ static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
                                    CI->getName());
   }
 
+  if (Name == "neon.vcvtfp2hf")
+    return Builder.CreateBitCast(
+        Builder.CreateFPTrunc(
+            CI->getOperand(0),
+            FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
+        FixedVectorType::get(Type::getInt16Ty(F->getContext()), 4));
+  if (Name == "neon.vcvthf2fp")
+    return Builder.CreateFPExt(
+        Builder.CreateBitCast(
+            CI->getOperand(0),
+            FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
+        FixedVectorType::get(Type::getFloatTy(F->getContext()), 4));
+
   llvm_unreachable("Unhandled Intrinsic!");
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 493f11ac13484..9f478ab6c3ca6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5833,11 +5833,6 @@ defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
 defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
 defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
 defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
-def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
-          (FCVTLv4i16 V64:$Rn)>;
-def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
-                                                                (i64 4)))),
-          (FCVTLv8i16 V128:$Rn)>;
 def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
           (FCVTLv2i32 V64:$Rn)>;
 def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
@@ -5851,11 +5846,6 @@ defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu
 defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
 defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
 defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
-def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
-          (FCVTNv4i16 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd,
-                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
-          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
           (FCVTNv2i32 V128:$Rn)>;
 def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c45ec68f3cd07..383f503a2e87f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -7186,9 +7186,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::aarch64_neon_fcvtzu:
     // Floating-point convert to lower precision narrow, rounding to odd
     case Intrinsic::aarch64_neon_fcvtxn:
-    // Vector Conversions Between Half-Precision and Single-Precision
-    case Intrinsic::aarch64_neon_vcvthf2fp:
-    case Intrinsic::aarch64_neon_vcvtfp2hf:
       handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
       break;
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
index 310dc711fdc26..286390a8d80a0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
@@ -33,6 +33,8 @@ define <4 x float> @scvtq(<4 x i32> %a) nounwind readnone ssp {
   ret <4 x float> %vcvt.i
 }
 
+; Test that vcvtfp2hf and vcvthf2fp are AutoUpgraded to fp16 fptrunc + fpext.
+
 define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp {
 ; CHECK-LABEL: cvtf16:
 ; CHECK: fcvtl  v0.4s, v0.4h
@@ -50,8 +52,6 @@ define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind readnone ssp {
   ret <4 x float> %vcvt1.i
 }
 
-
-
 define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp {
 ; CHECK-LABEL: cvtf16f32:
 ; CHECK: fcvtn  v0.4h, v0.4s
@@ -68,6 +68,3 @@ define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x float> %high_big) {
   %res = shufflevector <4 x i16> %low, <4 x i16> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
-
-declare <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll b/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll
index c7e22b2f4301d..cef94f820d317 100644
--- a/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll
+++ b/llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll
@@ -10,9 +10,8 @@ define <8 x half> @test1(<4 x float> noundef %a) {
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
-  %vcvt_f16_f321.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a)
-  %0 = bitcast <4 x i16> %vcvt_f16_f321.i to <4 x half>
-  %shuffle.i = shufflevector <4 x half> %0, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %b = fptrunc <4 x float> %a to <4 x half>
+  %shuffle.i = shufflevector <4 x half> %b, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x half> %shuffle.i
 }
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll
index 802c3aa4784ee..cab8139c70042 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vcvt_f32_su32.ll
@@ -75,9 +75,9 @@ define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp sanitize_memory {
 ; CHECK-SAME: <4 x i16> [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i16> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-; CHECK-NEXT:    [[VCVT1_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[A]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[VCVT1_I:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x float> [[VCVT1_I]]
 ;
@@ -92,9 +92,9 @@ define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind readnone ssp sanitize_mem
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[IN:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i16> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-; CHECK-NEXT:    [[VCVT1_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[IN]]) #[[ATTR3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[IN]] to <4 x half>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
+; CHECK-NEXT:    [[VCVT1_I:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x float> [[VCVT1_I]]
 ;
@@ -110,9 +110,9 @@ define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp  sanitize_memo
 ; CHECK-SAME: <4 x float> [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[T...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/203903