[clang] 2aaa646 - [X86] Support *_set1_pch(Float16 _Complex h)

Sat Sep 11 03:20:37 PDT 2021

Author: Wang, Pengfei
Date: 2021-09-11T17:47:31+08:00
New Revision: 2aaa6466fe00a1720a1c62c6d35b3d3efaf13160

URL: https://github.com/llvm/llvm-project/commit/2aaa6466fe00a1720a1c62c6d35b3d3efaf13160
DIFF: https://github.com/llvm/llvm-project/commit/2aaa6466fe00a1720a1c62c6d35b3d3efaf13160.diff

LOG: [X86] Support *_set1_pch(Float16 _Complex h)

Reviewed By: LuoYuanke

Differential Revision: https://reviews.llvm.org/D109487

Added: 
    

Modified: 
    clang/lib/Headers/avx512fp16intrin.h
    clang/lib/Headers/avx512vlfp16intrin.h
    clang/test/CodeGen/X86/avx512fp16-builtins.c
    clang/test/CodeGen/X86/avx512vlfp16-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 8911b4e2ff42f..80f22ab997243 100644

--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -97,6 +97,11 @@ _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
                 (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6),     \
                 (h5), (h4), (h3), (h2), (h1))
 
+static __inline __m512h __DEFAULT_FN_ATTRS512
+_mm512_set1_pch(_Float16 _Complex h) {
+  return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h));
+}
+
 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
   return (__m128)__a;
 }

diff  --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index b8cd554717a03..69f81af9217f7 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -51,6 +51,16 @@ _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
   return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
 }
 
+static __inline __m256h __DEFAULT_FN_ATTRS256
+_mm256_set1_pch(_Float16 _Complex h) {
+  return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
+}
+
+static __inline __m128h __DEFAULT_FN_ATTRS128
+_mm_set1_pch(_Float16 _Complex h) {
+  return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
+}
+
 static __inline __m256h __DEFAULT_FN_ATTRS256
 _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
               _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,

diff  --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index ee4bed552f9ac..7fccb216a9d30 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -81,6 +81,29 @@ __m512h test_mm512_set1_ph(_Float16 h) {
   return _mm512_set1_ph(h);
 }
 
+__m512h test_mm512_set1_pch(_Float16 _Complex h) {
+  // CHECK-LABEL: @test_mm512_set1_pch
+  // CHECK: bitcast { half, half }{{.*}} to float
+  // CHECK: insertelement <16 x float> {{.*}}, i32 0
+  // CHECK: insertelement <16 x float> {{.*}}, i32 1
+  // CHECK: insertelement <16 x float> {{.*}}, i32 2
+  // CHECK: insertelement <16 x float> {{.*}}, i32 3
+  // CHECK: insertelement <16 x float> {{.*}}, i32 4
+  // CHECK: insertelement <16 x float> {{.*}}, i32 5
+  // CHECK: insertelement <16 x float> {{.*}}, i32 6
+  // CHECK: insertelement <16 x float> {{.*}}, i32 7
+  // CHECK: insertelement <16 x float> {{.*}}, i32 8
+  // CHECK: insertelement <16 x float> {{.*}}, i32 9
+  // CHECK: insertelement <16 x float> {{.*}}, i32 10
+  // CHECK: insertelement <16 x float> {{.*}}, i32 11
+  // CHECK: insertelement <16 x float> {{.*}}, i32 12
+  // CHECK: insertelement <16 x float> {{.*}}, i32 13
+  // CHECK: insertelement <16 x float> {{.*}}, i32 14
+  // CHECK: insertelement <16 x float> {{.*}}, i32 15
+  // CHECK: bitcast <16 x float>{{.*}} to <32 x half>
+  return _mm512_set1_pch(h);
+}
+
 __m512h test_mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
                           _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
                           _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,

diff  --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
index 948e6ad13567c..2864157e1bdd0 100644
--- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -61,6 +61,32 @@ __m256h test_mm256_set1_ph(_Float16 h) {
   return _mm256_set1_ph(h);
 }
 
+__m128h test_mm_set1_pch(_Float16 _Complex h) {
+  // CHECK-LABEL: @test_mm_set1_pch
+  // CHECK: bitcast { half, half }{{.*}} to float
+  // CHECK: insertelement <4 x float> {{.*}}, i32 0
+  // CHECK: insertelement <4 x float> {{.*}}, i32 1
+  // CHECK: insertelement <4 x float> {{.*}}, i32 2
+  // CHECK: insertelement <4 x float> {{.*}}, i32 3
+  // CHECK: bitcast <4 x float>{{.*}} to <8 x half>
+  return _mm_set1_pch(h);
+}
+
+__m256h test_mm256_set1_pch(_Float16 _Complex h) {
+  // CHECK-LABEL: @test_mm256_set1_pch
+  // CHECK: bitcast { half, half }{{.*}} to float
+  // CHECK: insertelement <8 x float> {{.*}}, i32 0
+  // CHECK: insertelement <8 x float> {{.*}}, i32 1
+  // CHECK: insertelement <8 x float> {{.*}}, i32 2
+  // CHECK: insertelement <8 x float> {{.*}}, i32 3
+  // CHECK: insertelement <8 x float> {{.*}}, i32 4
+  // CHECK: insertelement <8 x float> {{.*}}, i32 5
+  // CHECK: insertelement <8 x float> {{.*}}, i32 6
+  // CHECK: insertelement <8 x float> {{.*}}, i32 7
+  // CHECK: bitcast <8 x float>{{.*}} to <16 x half>
+  return _mm256_set1_pch(h);
+}
+
 __m128h test_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
                        _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
   // CHECK-LABEL: @test_mm_set_ph