r245605 - [Headers][X86] Use __builtin_shufflevector in AVX2 broadcasts.

Ahmed Bougacha via cfe-commits cfe-commits at lists.llvm.org
Thu Aug 20 13:27:22 PDT 2015


Author: ab
Date: Thu Aug 20 15:27:21 2015
New Revision: 245605

URL: http://llvm.org/viewvc/llvm-project?rev=245605&view=rev
Log:
[Headers][X86] Use __builtin_shufflevector in AVX2 broadcasts.

This lets us optimize them better. We agreed to remove the intrinsics,
instead of combining them later, as, at -O0, we generate the expected
instructions. Plus, it's a nice cleanup.

Differential Revision: http://reviews.llvm.org/D10556

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx2intrin.h
    cfe/trunk/test/CodeGen/avx2-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=245605&r1=245604&r2=245605&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Aug 20 15:27:21 2015
@@ -590,17 +590,6 @@ TARGET_BUILTIN(__builtin_ia32_psrld256,
 TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLi*", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_vbroadcastss_ps, "V4fV4f", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_vbroadcastss_ps256, "V8fV4f", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_vbroadcastsd_pd256, "V4dV2d", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb256, "V32cV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw256, "V16sV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd256, "V8iV4i", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq256, "V4LLiV2LLi", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb128, "V16cV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw128, "V8sV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd128, "V4iV4i", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq128, "V2LLiV2LLi", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "", "avx2")

Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=245605&r1=245604&r2=245605&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Thu Aug 20 15:27:21 2015
@@ -760,7 +760,7 @@ _mm256_stream_load_si256(__m256i *__V)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_broadcastss_ps(__m128 __X)
 {
-  return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
+  return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -772,13 +772,13 @@ _mm_broadcastsd_pd(__m128d __a)
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_broadcastss_ps(__m128 __X)
 {
-  return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
+  return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_broadcastsd_pd(__m128d __X)
 {
-  return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
+  return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -812,50 +812,50 @@ _mm256_broadcastsi128_si256(__m128i __X)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastb_epi8(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
+  return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastw_epi16(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
+  return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastd_epi32(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
+  return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastq_epi64(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastq256(__X);
+  return (__m256i)__builtin_shufflevector(__X, __X, 0, 0, 0, 0);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastb_epi8(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
+  return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastw_epi16(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
+  return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastd_epi32(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
+  return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastq_epi64(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastq128(__X);
+  return (__m128i)__builtin_shufflevector(__X, __X, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=245605&r1=245604&r2=245605&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu Aug 20 15:27:21 2015
@@ -607,7 +607,9 @@ __m256i test_mm256_stream_load_si256(__m
 }
 
 __m128 test_mm_broadcastss_ps(__m128 a) {
-  // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps
+  // CHECK-LABEL: test_mm_broadcastss_ps
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_broadcastss_ps(a);
 }
 
@@ -617,12 +619,16 @@ __m128d test_mm_broadcastsd_pd(__m128d a
 }
 
 __m256 test_mm256_broadcastss_ps(__m128 a) {
-  // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps.256
+  // CHECK-LABEL: test_mm256_broadcastss_ps
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps.256
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer
   return _mm256_broadcastss_ps(a);
 }
 
 __m256d test_mm256_broadcastsd_pd(__m128d a) {
-  // check: @llvm.x86.avx2.vbroadcast.sd.pd.256
+  // CHECK-LABEL: test_mm256_broadcastsd_pd
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.sd.pd.256
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer
   return _mm256_broadcastsd_pd(a);
 }
 
@@ -646,42 +652,58 @@ __m256i test_mm256_blend_epi32(__m256i a
 }
 
 __m256i test_mm256_broadcastb_epi8(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastb.256
+  // CHECK-LABEL: test_mm256_broadcastb_epi8
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.256
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer
   return _mm256_broadcastb_epi8(a);
 }
 
 __m256i test_mm256_broadcastw_epi16(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastw.256
+  // CHECK-LABEL: test_mm256_broadcastw_epi16
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.256
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer
   return _mm256_broadcastw_epi16(a);
 }
 
 __m256i test_mm256_broadcastd_epi32(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastd.256
+  // CHECK-LABEL: test_mm256_broadcastd_epi32
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.256
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer
   return _mm256_broadcastd_epi32(a);
 }
 
 __m256i test_mm256_broadcastq_epi64(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastq.256
+  // CHECK-LABEL: test_mm256_broadcastq_epi64
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.256
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer
   return _mm256_broadcastq_epi64(a);
 }
 
 __m128i test_mm_broadcastb_epi8(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastb.128
+  // CHECK-LABEL: test_mm_broadcastb_epi8
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer
   return _mm_broadcastb_epi8(a);
 }
 
 __m128i test_mm_broadcastw_epi16(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastw.128
+  // CHECK-LABEL: test_mm_broadcastw_epi16
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.128
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer
   return _mm_broadcastw_epi16(a);
 }
 
 __m128i test_mm_broadcastd_epi32(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastd.128
+  // CHECK-LABEL: test_mm_broadcastd_epi32
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.128
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_broadcastd_epi32(a);
 }
 
 __m128i test_mm_broadcastq_epi64(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastq.128
+  // CHECK-LABEL: test_mm_broadcastq_epi64
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer
   return _mm_broadcastq_epi64(a);
 }
 




More information about the cfe-commits mailing list