[clang] 4987ae8 - [ARM][AArch64] Dont use macros for half instrinsics in NeonEmitter

Mon Oct 3 07:27:32 PDT 2022

Author: David Green
Date: 2022-10-03T15:27:23+01:00
New Revision: 4987ae84622b804e72c405aacbe297f0e5e247c7

URL: https://github.com/llvm/llvm-project/commit/4987ae84622b804e72c405aacbe297f0e5e247c7
DIFF: https://github.com/llvm/llvm-project/commit/4987ae84622b804e72c405aacbe297f0e5e247c7.diff

LOG: [ARM][AArch64] Dont use macros for half instrinsics in NeonEmitter

We don't require arm_neon.h fp16 intrinsics to be treated as macros any
more.

Differential Revision: https://reviews.llvm.org/D131504

Added: 
    

Modified: 
    clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
    clang/utils/TableGen/NeonEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index c9e4b6ea6090d..984324a9113b3 100644

--- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -1486,18 +1486,18 @@ float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
 // CHECK-LABEL: define {{[^@]+}}@test_vfmaq_n_f16
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4
-// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
-// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
-// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[C]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[C]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[C]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[C]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[C]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[C]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[C]], i32 7
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[VECINIT7]], <8 x half> [[A]])
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[VECINIT7_I]], <8 x half> [[A]])
 // CHECK-NEXT:    ret <8 x half> [[TMP3]]
 //
 float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
@@ -1601,15 +1601,15 @@ float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
 // CHECK-LABEL: define {{[^@]+}}@test_vfms_n_f16
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[FNEG:%.*]] = fneg <4 x half> [[B]]
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[C]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3
+// CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <4 x half> [[B]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[C]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[C]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[C]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[C]], i32 3
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <8 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FNEG]], <4 x half> [[VECINIT3]], <4 x half> [[A]])
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[FNEG_I]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FNEG_I]], <4 x half> [[VECINIT3_I]], <4 x half> [[A]])
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
@@ -1619,19 +1619,19 @@ float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
 // CHECK-LABEL: define {{[^@]+}}@test_vfmsq_n_f16
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[FNEG:%.*]] = fneg <8 x half> [[B]]
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4
-// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
-// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
-// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
+// CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <8 x half> [[B]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[C]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[C]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[C]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[C]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[C]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[C]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[C]], i32 7
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FNEG]], <8 x half> [[VECINIT7]], <8 x half> [[A]])
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[FNEG_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FNEG_I]], <8 x half> [[VECINIT7_I]], <8 x half> [[A]])
 // CHECK-NEXT:    ret <8 x half> [[TMP3]]
 //
 float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
@@ -1721,12 +1721,12 @@ float16x8_t test_vmulq_laneq_f16(float16x8_t a, float16x8_t b) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmul_n_f16
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[B]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[B]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[B]], i32 3
-// CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x half> [[A]], [[VECINIT3]]
-// CHECK-NEXT:    ret <4 x half> [[MUL]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[B]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[B]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[B]], i32 3
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x half> [[A]], [[VECINIT3_I]]
+// CHECK-NEXT:    ret <4 x half> [[MUL_I]]
 //
 float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) {
   return vmul_n_f16(a, b);
@@ -1735,16 +1735,16 @@ float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmulq_n_f16
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[B]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[B]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[B]], i32 3
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[B]], i32 4
-// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[B]], i32 5
-// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[B]], i32 6
-// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[B]], i32 7
-// CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x half> [[A]], [[VECINIT7]]
-// CHECK-NEXT:    ret <8 x half> [[MUL]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[B]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[B]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[B]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[B]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[B]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[B]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[B]], i32 7
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <8 x half> [[A]], [[VECINIT7_I]]
+// CHECK-NEXT:    ret <8 x half> [[MUL_I]]
 //
 float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
   return vmulq_n_f16(a, b);
@@ -1858,14 +1858,14 @@ float16x8_t test_vmulxq_laneq_f16(float16x8_t a, float16x8_t b) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmulx_n_f16
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[B]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[B]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[B]], i32 3
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[B]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[B]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[B]], i32 3
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[VECINIT3]] to <8 x i8>
-// CHECK-NEXT:    [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[VECINIT3]])
-// CHECK-NEXT:    ret <4 x half> [[VMULX2_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT:    [[VMULX2_I_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[VECINIT3_I]])
+// CHECK-NEXT:    ret <4 x half> [[VMULX2_I_I]]
 //
 float16x4_t test_vmulx_n_f16(float16x4_t a, float16_t b) {
   return vmulx_n_f16(a, b);
@@ -1874,18 +1874,18 @@ float16x4_t test_vmulx_n_f16(float16x4_t a, float16_t b) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmulxq_n_f16
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[B]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[B]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[B]], i32 3
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[B]], i32 4
-// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[B]], i32 5
-// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[B]], i32 6
-// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[B]], i32 7
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[B]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[B]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[B]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[B]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[B]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[B]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[B]], i32 7
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8>
-// CHECK-NEXT:    [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[VECINIT7]])
-// CHECK-NEXT:    ret <8 x half> [[VMULX2_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[VECINIT7_I]] to <16 x i8>
+// CHECK-NEXT:    [[VMULX2_I_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[VECINIT7_I]])
+// CHECK-NEXT:    ret <8 x half> [[VMULX2_I_I]]
 //
 float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) {
   return vmulxq_n_f16(a, b);
@@ -1917,9 +1917,8 @@ float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[VMAXV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT:    [[VMAXV1:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v4f16(<4 x half> [[VMAXV]])
-// CHECK-NEXT:    ret half [[VMAXV1]]
+// CHECK-NEXT:    [[VMAXV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMAXV1_I]]
 //
 float16_t test_vmaxv_f16(float16x4_t a) {
   return vmaxv_f16(a);
@@ -1929,9 +1928,8 @@ float16_t test_vmaxv_f16(float16x4_t a) {
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VMAXV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT:    [[VMAXV1:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v8f16(<8 x half> [[VMAXV]])
-// CHECK-NEXT:    ret half [[VMAXV1]]
+// CHECK-NEXT:    [[VMAXV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMAXV1_I]]
 //
 float16_t test_vmaxvq_f16(float16x8_t a) {
   return vmaxvq_f16(a);
@@ -1941,9 +1939,8 @@ float16_t test_vmaxvq_f16(float16x8_t a) {
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[VMINV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT:    [[VMINV1:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v4f16(<4 x half> [[VMINV]])
-// CHECK-NEXT:    ret half [[VMINV1]]
+// CHECK-NEXT:    [[VMINV1_I:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMINV1_I]]
 //
 float16_t test_vminv_f16(float16x4_t a) {
   return vminv_f16(a);
@@ -1953,9 +1950,8 @@ float16_t test_vminv_f16(float16x4_t a) {
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VMINV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT:    [[VMINV1:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v8f16(<8 x half> [[VMINV]])
-// CHECK-NEXT:    ret half [[VMINV1]]
+// CHECK-NEXT:    [[VMINV1_I:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMINV1_I]]
 //
 float16_t test_vminvq_f16(float16x8_t a) {
   return vminvq_f16(a);
@@ -1965,9 +1961,8 @@ float16_t test_vminvq_f16(float16x8_t a) {
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[VMAXNMV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT:    [[VMAXNMV1:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v4f16(<4 x half> [[VMAXNMV]])
-// CHECK-NEXT:    ret half [[VMAXNMV1]]
+// CHECK-NEXT:    [[VMAXNMV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMAXNMV1_I]]
 //
 float16_t test_vmaxnmv_f16(float16x4_t a) {
   return vmaxnmv_f16(a);
@@ -1977,9 +1972,8 @@ float16_t test_vmaxnmv_f16(float16x4_t a) {
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VMAXNMV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT:    [[VMAXNMV1:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v8f16(<8 x half> [[VMAXNMV]])
-// CHECK-NEXT:    ret half [[VMAXNMV1]]
+// CHECK-NEXT:    [[VMAXNMV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMAXNMV1_I]]
 //
 float16_t test_vmaxnmvq_f16(float16x8_t a) {
   return vmaxnmvq_f16(a);
@@ -1989,9 +1983,8 @@ float16_t test_vmaxnmvq_f16(float16x8_t a) {
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[VMINNMV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT:    [[VMINNMV1:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v4f16(<4 x half> [[VMINNMV]])
-// CHECK-NEXT:    ret half [[VMINNMV1]]
+// CHECK-NEXT:    [[VMINNMV1_I:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMINNMV1_I]]
 //
 float16_t test_vminnmv_f16(float16x4_t a) {
   return vminnmv_f16(a);
@@ -2001,9 +1994,8 @@ float16_t test_vminnmv_f16(float16x4_t a) {
 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VMINNMV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT:    [[VMINNMV1:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v8f16(<8 x half> [[VMINNMV]])
-// CHECK-NEXT:    ret half [[VMINNMV1]]
+// CHECK-NEXT:    [[VMINNMV1_I:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT:    ret half [[VMINNMV1_I]]
 //
 float16_t test_vminnmvq_f16(float16x8_t a) {
   return vminnmvq_f16(a);
@@ -2200,11 +2192,11 @@ float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16
 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
-// CHECK-NEXT:    ret <4 x half> [[VECINIT3]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[A]], i32 3
+// CHECK-NEXT:    ret <4 x half> [[VECINIT3_I]]
 //
 float16x4_t test_vmov_n_f16(float16_t a) {
   return vmov_n_f16(a);
@@ -2213,15 +2205,15 @@ float16x4_t test_vmov_n_f16(float16_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16
 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
-// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
-// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
-// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
-// CHECK-NEXT:    ret <8 x half> [[VECINIT7]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[A]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[A]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[A]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[A]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[A]], i32 7
+// CHECK-NEXT:    ret <8 x half> [[VECINIT7_I]]
 //
 float16x8_t test_vmovq_n_f16(float16_t a) {
   return vmovq_n_f16(a);
@@ -2230,11 +2222,11 @@ float16x8_t test_vmovq_n_f16(float16_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16
 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
-// CHECK-NEXT:    ret <4 x half> [[VECINIT3]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[A]], i32 3
+// CHECK-NEXT:    ret <4 x half> [[VECINIT3_I]]
 //
 float16x4_t test_vdup_n_f16(float16_t a) {
   return vdup_n_f16(a);
@@ -2243,15 +2235,15 @@ float16x4_t test_vdup_n_f16(float16_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16
 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
-// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
-// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
-// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
-// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
-// CHECK-NEXT:    ret <8 x half> [[VECINIT7]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[A]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[A]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[A]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[A]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[A]], i32 7
+// CHECK-NEXT:    ret <8 x half> [[VECINIT7_I]]
 //
 float16x8_t test_vdupq_n_f16(float16_t a) {
   return vdupq_n_f16(a);

diff  --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index d641a41ddb841..dbef48b24288a 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -395,11 +395,7 @@ class Intrinsic {
 
       // Pointer arguments need to use macros to avoid hiding aligned attributes
       // from the pointer type.
-
-      // It is not permitted to pass or return an __fp16 by value, so intrinsics
-      // taking a scalar float16_t must be implemented as macros.
-      if (Type.isImmediate() || Type.isPointer() ||
-          (Type.isScalar() && Type.isHalf()))
+      if (Type.isImmediate() || Type.isPointer())
         UseMacro = true;
     }
   }