[clang] clang: Stop using llvm.convert.to.fp16/llvm.convert.from.fp16 (PR #174494)

Tue Jan 6 11:04:24 PST 2026

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/174494

>From 091ebf1b40e93b291ee5a0b2d92ce18983e11051 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 5 Jan 2026 23:15:43 +0100
Subject: [PATCH] clang: Stop using llvm.convert.to.fp16/llvm.convert.from.fp16

There is no reason to use these over fpext/fptrunc and bitcast.

Split out from #174484. The test coverage is also shockingly bad,
so adds a new wasm test which shows different contexts the intrinsics
are used.

I've also reverted this to a more conservative version that leaves the
useFP16ConversionIntrinsics configuration in place, and only replaces
the exact intrinsic usage. This should be removed, but it seems to have
turned into a buggy ABI option. Some contexts which probably meant to
check NativeHalfType or NativeHalfArgsAndReturns were relying on this
instead. Additionally, some of the SVE intrinsics appear to be using
__fp16 but really expect _Float16 treatment.
---
 clang/include/clang/Basic/TargetInfo.h      |  10 +-
 clang/lib/CodeGen/CGExprScalar.cpp          |  76 +++++----
 clang/test/CodeGen/builtin_float_strictfp.c |   3 +-
 clang/test/CodeGen/fp16-ops-strictfp.c      |   4 +-
 clang/test/CodeGen/wasm-fp16.c              | 161 ++++++++++++++++++++
 5 files changed, 206 insertions(+), 48 deletions(-)
 create mode 100644 clang/test/CodeGen/wasm-fp16.c

diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 4ff77bb64cf1c..bf688f726e134 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1013,10 +1013,12 @@ class TargetInfo : public TransferrableTargetInfo,
     return ComplexLongDoubleUsesFP2Ret;
   }
 
-  /// Check whether llvm intrinsics such as llvm.convert.to.fp16 should be used
-  /// to convert to and from __fp16.
-  /// FIXME: This function should be removed once all targets stop using the
-  /// conversion intrinsics.
+  /// Check whether conversions to and from __fp16 should go through an integer
+  /// bitcast with i16.
+  ///
+  /// FIXME: This function should be removed. The intrinsics / no longer exist,
+  /// and are emulated with bitcast + fp cast. This only exists because of
+  /// misuse in ABI determining contexts.
   virtual bool useFP16ConversionIntrinsics() const {
     return true;
   }
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 84421fef9f524..30a13c065a729 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1597,22 +1597,21 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
     // Cast to FP using the intrinsic if the half type itself isn't supported.
     if (DstTy->isFloatingPointTy()) {
-      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
-        return Builder.CreateCall(
-            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy),
-            Src);
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
+        Value *BitCast = Builder.CreateBitCast(Src, CGF.CGM.HalfTy);
+        return Builder.CreateFPExt(BitCast, DstTy, "conv");
+      }
     } else {
       // Cast to other types through float, using either the intrinsic or FPExt,
       // depending on whether the half type itself is supported
       // (as opposed to operations on half, available with NativeHalfType).
-      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
-        Src = Builder.CreateCall(
-            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
-                                 CGF.CGM.FloatTy),
-            Src);
-      } else {
-        Src = Builder.CreateFPExt(Src, CGF.CGM.FloatTy, "conv");
+
+      if (Src->getType() != CGF.CGM.HalfTy) {
+        assert(CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics());
+        Src = Builder.CreateBitCast(Src, CGF.CGM.HalfTy);
       }
+
+      Src = Builder.CreateFPExt(Src, CGF.CGM.FloatTy, "conv");
       SrcType = CGF.getContext().FloatTy;
       SrcTy = CGF.FloatTy;
     }
@@ -1723,27 +1722,33 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
     // Make sure we cast in a single step if from another FP type.
     if (SrcTy->isFloatingPointTy()) {
-      // Use the intrinsic if the half type itself isn't supported
-      // (as opposed to operations on half, available with NativeHalfType).
-      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
-        return Builder.CreateCall(
-            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
+      // Handle the case where the half type is represented as an integer (as
+      // opposed to operations on half, available with NativeHalfType).
+
       // If the half type is supported, just use an fptrunc.
-      return Builder.CreateFPTrunc(Src, DstTy);
+      Value *Res = Builder.CreateFPTrunc(Src, CGF.CGM.HalfTy, "conv");
+      if (DstTy == CGF.CGM.HalfTy)
+        return Res;
+
+      assert(DstTy->isIntegerTy(16) &&
+             CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics() &&
+             "Only half FP requires extra conversion");
+      return Builder.CreateBitCast(Res, DstTy);
     }
+
     DstTy = CGF.FloatTy;
   }
 
   Res = EmitScalarCast(Src, SrcType, DstType, SrcTy, DstTy, Opts);
 
   if (DstTy != ResTy) {
-    if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
-      assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
-      Res = Builder.CreateCall(
-        CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy),
-        Res);
-    } else {
-      Res = Builder.CreateFPTrunc(Res, ResTy, "conv");
+    Res = Builder.CreateFPTrunc(Res, CGF.CGM.HalfTy, "conv");
+
+    if (ResTy != CGF.CGM.HalfTy) {
+      assert(ResTy->isIntegerTy(16) &&
+             CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics() &&
+             "Only half FP requires extra conversion");
+      Res = Builder.CreateBitCast(Res, ResTy);
     }
   }
 
@@ -3398,15 +3403,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
-      // Another special case: half FP increment should be done via float
-      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
-        value = Builder.CreateCall(
-            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
-                                 CGF.CGM.FloatTy),
-            input, "incdec.conv");
-      } else {
-        value = Builder.CreateFPExt(input, CGF.CGM.FloatTy, "incdec.conv");
-      }
+      // Another special case: half FP increment should be done via float. If
+      // the input isn't already half, it may be i16.
+      Value *bitcast = Builder.CreateBitCast(input, CGF.CGM.HalfTy);
+      value = Builder.CreateFPExt(bitcast, CGF.CGM.FloatTy, "incdec.conv");
     }
 
     if (value->getType()->isFloatTy())
@@ -3439,14 +3439,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
-      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
-        value = Builder.CreateCall(
-            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
-                                 CGF.CGM.FloatTy),
-            value, "incdec.conv");
-      } else {
-        value = Builder.CreateFPTrunc(value, input->getType(), "incdec.conv");
-      }
+      value = Builder.CreateFPTrunc(value, CGF.CGM.HalfTy, "incdec.conv");
+      value = Builder.CreateBitCast(value, input->getType());
     }
 
   // Fixed-point types.
diff --git a/clang/test/CodeGen/builtin_float_strictfp.c b/clang/test/CodeGen/builtin_float_strictfp.c
index b7cf567ccd66f..81bf89228f59c 100644
--- a/clang/test/CodeGen/builtin_float_strictfp.c
+++ b/clang/test/CodeGen/builtin_float_strictfp.c
@@ -18,7 +18,8 @@ void test_half(__fp16 *H, __fp16 *H2) {
   (void)__builtin_isinf(*H);
   // NOFP16:       [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8
   // NOFP16-NEXT:  [[IHALF:%.*]]  = load i16, ptr [[LDADDR]], align 2
-  // NOFP16-NEXT:  [[CONV:%.*]]   = call float @llvm.convert.from.fp16.f32(i16 [[IHALF]])
+  // NOFP16-NEXT:  [[BITCAST:%.*]] = bitcast i16 [[IHALF]] to half
+  // NOFP16-NEXT:  [[CONV:%.*]]   = call float @llvm.experimental.constrained.fpext.f32.f16(half [[BITCAST]], metadata !"fpexcept.strict")
   // NOFP16-NEXT:  [[RES1:%.*]]   = call i1 @llvm.is.fpclass.f32(float [[CONV]], i32 516)
   // NOFP16-NEXT:                   zext i1 [[RES1]] to i32
   // FP16:         [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8
diff --git a/clang/test/CodeGen/fp16-ops-strictfp.c b/clang/test/CodeGen/fp16-ops-strictfp.c
index 25753e5b98beb..830be6256456e 100644
--- a/clang/test/CodeGen/fp16-ops-strictfp.c
+++ b/clang/test/CodeGen/fp16-ops-strictfp.c
@@ -334,7 +334,7 @@ void foo(void) {
   // NOTNATIVE: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict")
   // NOTNATIVE: call half @llvm.experimental.constrained.fptrunc.f16.f64(double 4.200000e+01, metadata !"round.tonearest", metadata !"fpexcept.strict")
   // NATIVE-HALF: call half @llvm.experimental.constrained.fptrunc.f16.f64(double 4.200000e+01, metadata !"round.tonearest", metadata !"fpexcept.strict")
-  // NOTNATIVE: call float @llvm.experimental.constrained.fpext.f32.f16(half %98, metadata !"fpexcept.strict")
+  // NOTNATIVE: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict")
   // NOTNATIVE: call i1 @llvm.experimental.constrained.fcmps.f32(float %{{.*}}, float %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
   // NATIVE-HALF: call i1 @llvm.experimental.constrained.fcmps.f16(half %{{.*}}, half %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
   // CHECK: store {{.*}} i32 {{.*}}, ptr
@@ -418,7 +418,7 @@ void foo(void) {
   // NOTNATIVE: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict")
   // NOTNATIVE: call half @llvm.experimental.constrained.fptrunc.f16.f64(double 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict")
   // NATIVE-HALF: call half @llvm.experimental.constrained.fptrunc.f16.f64(double 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict")
-  // NOTNATIVE: call float @llvm.experimental.constrained.fpext.f32.f16(half %122, metadata !"fpexcept.strict")
+  // NOTNATIVE: call float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict")
   // NOTNATIVE: call i1 @llvm.experimental.constrained.fcmp.f32(float %{{.*}}, float %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
   // NATIVE-HALF: call i1 @llvm.experimental.constrained.fcmp.f16(half %{{.*}}, half %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
   // CHECK: store {{.*}} i32 {{.*}}, ptr
diff --git a/clang/test/CodeGen/wasm-fp16.c b/clang/test/CodeGen/wasm-fp16.c
new file mode 100644
index 0000000000000..e974b9f09d324
--- /dev/null
+++ b/clang/test/CodeGen/wasm-fp16.c
@@ -0,0 +1,161 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// RUN: %clang_cc1 -triple wasm64-- -fnative-half-arguments-and-returns -emit-llvm -O1 -disable-llvm-passes -o - %s | FileCheck %s
+
+__fp16 g = 2.0f;
+
+//.
+// CHECK: @g = global i16 16384, align 2
+//.
+// CHECK-LABEL: define float @test_memory_fp16_to_float(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT:    ret float [[CONV]]
+//
+float test_memory_fp16_to_float(__fp16 *ptr) {
+    return *ptr;
+}
+
+// CHECK-LABEL: define void @test_memory_float_from_fp16(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], float noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    store float [[VAL]], ptr [[VAL_ADDR]], align 4, !tbaa [[FLOAT_TBAA11:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[VAL_ADDR]], align 4, !tbaa [[FLOAT_TBAA11]]
+// CHECK-NEXT:    [[CONV:%.*]] = fptrunc float [[TMP0]] to half
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[CONV]] to i16
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    store i16 [[TMP1]], ptr [[TMP2]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    ret void
+//
+void test_memory_float_from_fp16(__fp16* ptr, float val) {
+    *ptr = val;
+}
+
+// CHECK-LABEL: define float @test_memory_fp16_preinc(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+// CHECK-NEXT:    [[INCDEC_CONV:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT:    [[INC:%.*]] = fadd float [[INCDEC_CONV]], 1.000000e+00
+// CHECK-NEXT:    [[INCDEC_CONV1:%.*]] = fptrunc float [[INC]] to half
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[INCDEC_CONV1]] to i16
+// CHECK-NEXT:    store i16 [[TMP3]], ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP3]] to half
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP4]] to float
+// CHECK-NEXT:    ret float [[CONV]]
+//
+float test_memory_fp16_preinc(__fp16 *ptr) {
+  return ++(*ptr);
+}
+
+// CHECK-LABEL: define float @test_memory_fp16_postinc(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+// CHECK-NEXT:    [[INCDEC_CONV:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT:    [[INC:%.*]] = fadd float [[INCDEC_CONV]], 1.000000e+00
+// CHECK-NEXT:    [[INCDEC_CONV1:%.*]] = fptrunc float [[INC]] to half
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[INCDEC_CONV1]] to i16
+// CHECK-NEXT:    store i16 [[TMP3]], ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP1]] to half
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP4]] to float
+// CHECK-NEXT:    ret float [[CONV]]
+//
+float test_memory_fp16_postinc(__fp16 *ptr) {
+  return (*ptr)++;
+}
+
+// CHECK-LABEL: define float @test_memory_fp16_predec(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+// CHECK-NEXT:    [[INCDEC_CONV:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT:    [[DEC:%.*]] = fadd float [[INCDEC_CONV]], -1.000000e+00
+// CHECK-NEXT:    [[INCDEC_CONV1:%.*]] = fptrunc float [[DEC]] to half
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[INCDEC_CONV1]] to i16
+// CHECK-NEXT:    store i16 [[TMP3]], ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP3]] to half
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP4]] to float
+// CHECK-NEXT:    ret float [[CONV]]
+//
+float test_memory_fp16_predec(__fp16 *ptr) {
+  return --(*ptr);
+}
+
+// CHECK-LABEL: define float @test_memory_fp16_postdec(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[PTR]], ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[__FP16PTR_TBAA6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+// CHECK-NEXT:    [[INCDEC_CONV:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT:    [[DEC:%.*]] = fadd float [[INCDEC_CONV]], -1.000000e+00
+// CHECK-NEXT:    [[INCDEC_CONV1:%.*]] = fptrunc float [[DEC]] to half
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[INCDEC_CONV1]] to i16
+// CHECK-NEXT:    store i16 [[TMP3]], ptr [[TMP0]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP1]] to half
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP4]] to float
+// CHECK-NEXT:    ret float [[CONV]]
+//
+float test_memory_fp16_postdec(__fp16 *ptr) {
+  return (*ptr)--;
+}
+
+// CHECK-LABEL: define i16 @test_arg_return(
+// CHECK-SAME: i16 noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store i16 [[X]], ptr [[X_ADDR]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[X_ADDR]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[TMP0]] to half
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[X_ADDR]], align 2, !tbaa [[__FP16_TBAA9]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
+// CHECK-NEXT:    [[CONV1:%.*]] = fpext half [[TMP3]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[CONV]], [[CONV1]]
+// CHECK-NEXT:    [[CONV2:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[CONV2]] to i16
+// CHECK-NEXT:    ret i16 [[TMP4]]
+//
+__fp16 test_arg_return(__fp16 x) {
+    return x + x;
+}
+//.
+// CHECK: attributes #[[ATTR0]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+// CHECK: [[META2:![0-9]+]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[__FP16PTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"p1 __fp16", [[META8:![0-9]+]], i64 0}
+// CHECK: [[META8]] = !{!"any pointer", [[META4]], i64 0}
+// CHECK: [[__FP16_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+// CHECK: [[META10]] = !{!"__fp16", [[META4]], i64 0}
+// CHECK: [[FLOAT_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"float", [[META4]], i64 0}
+//.