r208664 - Patched clang to emit x86 blends as shufflevectors.

Mon May 12 19:37:02 PDT 2014

Author: filcab
Date: Mon May 12 21:37:02 2014
New Revision: 208664

URL: http://llvm.org/viewvc/llvm-project?rev=208664&view=rev
Log:
Patched clang to emit x86 blends as shufflevectors.

Summary:
Most of the clang header patch by Simon Pilgrim @ SCEE.
Also fixed (or added) clang tests for these intrinsics.

LLVM tests to make sure we get the blend instruction out of these
shufflevectors are at http://reviews.llvm.org/D3600

Reviewers: eli.friedman, craig.topper, rafael

Subscribers: cfe-commits

Differential Revision: http://reviews.llvm.org/D3601

Modified:
    cfe/trunk/lib/Headers/avx2intrin.h
    cfe/trunk/lib/Headers/avxintrin.h
    cfe/trunk/lib/Headers/smmintrin.h
    cfe/trunk/test/CodeGen/avx-builtins.c
    cfe/trunk/test/CodeGen/avx2-builtins.c
    cfe/trunk/test/CodeGen/sse-builtins.c

Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=208664&r1=208663&r2=208664&view=diff
==============================================================================

--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Mon May 12 21:37:02 2014
@@ -160,7 +160,23 @@ _mm256_blendv_epi8(__m256i __V1, __m256i
 #define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
   __m256i __V1 = (V1); \
   __m256i __V2 = (V2); \
-  (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); })
+  (__m256d)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \
+                                   (((M) & 0x01) ? 16 : 0), \
+                                   (((M) & 0x02) ? 17 : 1), \
+                                   (((M) & 0x04) ? 18 : 2), \
+                                   (((M) & 0x08) ? 19 : 3), \
+                                   (((M) & 0x10) ? 20 : 4), \
+                                   (((M) & 0x20) ? 21 : 5), \
+                                   (((M) & 0x40) ? 22 : 6), \
+                                   (((M) & 0x80) ? 23 : 7), \
+                                   (((M) & 0x01) ? 24 : 8), \
+                                   (((M) & 0x02) ? 25 : 9), \
+                                   (((M) & 0x04) ? 26 : 10), \
+                                   (((M) & 0x08) ? 27 : 11), \
+                                   (((M) & 0x10) ? 28 : 12), \
+                                   (((M) & 0x20) ? 29 : 13), \
+                                   (((M) & 0x40) ? 30 : 14), \
+                                   (((M) & 0x80) ? 31 : 15)); })
 
 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
@@ -761,12 +777,24 @@ _mm256_broadcastsi128_si256(__m128i __X)
 #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
   __m128i __V1 = (V1); \
   __m128i __V2 = (V2); \
-  (__m128i)__builtin_ia32_pblendd128((__v4si)__V1, (__v4si)__V2, (M)); })
+  (__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \
+                                   (((M) & 0x01) ? 4 : 0), \
+                                   (((M) & 0x02) ? 5 : 1), \
+                                   (((M) & 0x04) ? 6 : 2), \
+                                   (((M) & 0x08) ? 7 : 3)); })
 
 #define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
   __m256i __V1 = (V1); \
   __m256i __V2 = (V2); \
-  (__m256i)__builtin_ia32_pblendd256((__v8si)__V1, (__v8si)__V2, (M)); })
+  (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
+                                   (((M) & 0x01) ?  8 : 0), \
+                                   (((M) & 0x02) ?  9 : 1), \
+                                   (((M) & 0x04) ? 10 : 2), \
+                                   (((M) & 0x08) ? 11 : 3), \
+                                   (((M) & 0x10) ? 12 : 4), \
+                                   (((M) & 0x20) ? 13 : 5), \
+                                   (((M) & 0x40) ? 14 : 6), \
+                                   (((M) & 0x80) ? 15 : 7)); })
 
 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_broadcastb_epi8(__m128i __X)

Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=208664&r1=208663&r2=208664&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Mon May 12 21:37:02 2014
@@ -308,12 +308,24 @@ _mm256_permutevar_ps(__m256 __a, __m256i
 #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
   __m256d __V1 = (V1); \
   __m256d __V2 = (V2); \
-  (__m256d)__builtin_ia32_blendpd256((__v4df)__V1, (__v4df)__V2, (M)); })
+  (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
+                                   (((M) & 0x01) ? 4 : 0), \
+                                   (((M) & 0x02) ? 5 : 1), \
+                                   (((M) & 0x04) ? 6 : 2), \
+                                   (((M) & 0x08) ? 7 : 3)); })
 
 #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
   __m256 __V1 = (V1); \
   __m256 __V2 = (V2); \
-  (__m256)__builtin_ia32_blendps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+  (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
+                                  (((M) & 0x01) ?  8 : 0), \
+                                  (((M) & 0x02) ?  9 : 1), \
+                                  (((M) & 0x04) ? 10 : 2), \
+                                  (((M) & 0x08) ? 11 : 3), \
+                                  (((M) & 0x10) ? 12 : 4), \
+                                  (((M) & 0x20) ? 13 : 5), \
+                                  (((M) & 0x40) ? 14 : 6), \
+                                  (((M) & 0x80) ? 15 : 7)); })
 
 static __inline __m256d __attribute__((__always_inline__, __nodebug__))
 _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)

Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=208664&r1=208663&r2=208664&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Mon May 12 21:37:02 2014
@@ -79,12 +79,18 @@
 #define _mm_blend_pd(V1, V2, M) __extension__ ({ \
   __m128d __V1 = (V1); \
   __m128d __V2 = (V2); \
-  (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, (M)); })
+  (__m128d)__builtin_shufflevector((__v2df)__V1, (__v2df)__V2, \
+                                   (((M) & 0x01) ? 2 : 0), \
+                                   (((M) & 0x02) ? 3 : 1)); })
 
 #define _mm_blend_ps(V1, V2, M) __extension__ ({ \
   __m128 __V1 = (V1); \
   __m128 __V2 = (V2); \
-  (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, (M)); })
+  (__m128)__builtin_shufflevector((__v4sf)__V1, (__v4sf)__V2, \
+                                  (((M) & 0x01) ? 4 : 0), \
+                                  (((M) & 0x02) ? 5 : 1), \
+                                  (((M) & 0x04) ? 6 : 2), \
+                                  (((M) & 0x08) ? 7 : 3)); })
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
@@ -110,7 +116,15 @@ _mm_blendv_epi8 (__m128i __V1, __m128i _
 #define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
   __m128i __V1 = (V1); \
   __m128i __V2 = (V2); \
-  (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, (M)); })
+  (__m128i)__builtin_shufflevector((__v8hi)__V1, (__v8hi)__V2, \
+                                   (((M) & 0x01) ?  8 : 0), \
+                                   (((M) & 0x02) ?  9 : 1), \
+                                   (((M) & 0x04) ? 10 : 2), \
+                                   (((M) & 0x08) ? 11 : 3), \
+                                   (((M) & 0x10) ? 12 : 4), \
+                                   (((M) & 0x20) ? 13 : 5), \
+                                   (((M) & 0x40) ? 14 : 6), \
+                                   (((M) & 0x80) ? 15 : 7)); })
 
 /* SSE4 Dword Multiply Instructions.  */
 static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=208664&r1=208663&r2=208664&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Mon May 12 21:37:02 2014
@@ -111,3 +111,15 @@ int test_extract_epi8(__m256i __a) {
   // CHECK: extractelement <32 x i8> %{{.*}}, i32 0
   return _mm256_extract_epi8(__a, 32);
 }
+
+__m256d test_256_blend_pd(__m256d __a, __m256d __b) {
+  // CHECK-LABEL: @test_256_blend_pd
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  return _mm256_blend_pd(__a, __b, 0x35);
+}
+
+__m256 test_256_blend_ps(__m256 __a, __m256 __b) {
+  // CHECK-LABEL: @test_256_blend_ps
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
+  return _mm256_blend_ps(__a, __b, 0x35);
+}

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=208664&r1=208663&r2=208664&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Mon May 12 21:37:02 2014
@@ -5,6 +5,11 @@
 
 #include <immintrin.h>
 
+// FIXME: We should lower as many of these as possible to LLVM IR without
+// intrinsics which will allow us to remove redundant intrinsics in the
+// future. Taking care that we manage to optimize them afterwards and emit
+// the actual instruction (or better code).
+
 __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
   // CHECK: @llvm.x86.avx2.mpsadbw({{.*}}, {{.*}}, i32 3)
   return _mm256_mpsadbw_epu8(x, y, 3);
@@ -177,7 +182,8 @@ __m256i test_mm256_blendv_epi8(__m256i a
 }
 
 __m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.pblendw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, i32 2)
+  // CHECK-LABEL: test_mm256_blend_epi16
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm256_blend_epi16(a, b, 2);
 }
 
@@ -612,13 +618,13 @@ __m256i test_mm256_broadcastsi128_si256(
 }
 
 __m128i test_mm_blend_epi32(__m128i a, __m128i b) {
-  // CHECK: @llvm.x86.avx2.pblendd.128
-  return _mm_blend_epi32(a, b, 57);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  return _mm_blend_epi32(a, b, 0x35);
 }
 
 __m256i test_mm256_blend_epi32(__m256i a, __m256i b) {
-  // CHECK: @llvm.x86.avx2.pblendd.256
-  return _mm256_blend_epi32(a, b, 57);
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
+  return _mm256_blend_epi32(a, b, 0x35);
 }
 
 __m256i test_mm256_broadcastb_epi8(__m128i a) {

Modified: cfe/trunk/test/CodeGen/sse-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=208664&r1=208663&r2=208664&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse-builtins.c Mon May 12 21:37:02 2014
@@ -237,3 +237,21 @@ void test_insert_epi32(__m128i __a, int
   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
    _mm_insert_epi32(__a, b, 4);
 }
+
+__m128d test_blend_pd(__m128d V1, __m128d V2) {
+  // CHECK-LABEL: @test_blend_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 2, i32 1>
+  return _mm_blend_pd(V1, V2, 1);
+}
+
+__m128 test_blend_ps(__m128 V1, __m128 V2) {
+  // CHECK-LABEL: @test_blend_ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  return _mm_blend_ps(V1, V2, 5);
+}
+
+__m128i test_blend_epi16(__m128i V1, __m128i V2) {
+  // CHECK-LABEL: @test_blend_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
+  return _mm_blend_epi16(V1, V2, 42);
+}