[cfe-commits] r146969 - in /cfe/trunk: include/clang/Basic/BuiltinsX86.def lib/Headers/avx2intrin.h lib/Headers/smmintrin.h test/CodeGen/avx2-builtins.c test/CodeGen/builtins-x86.c

Craig Topper craig.topper at gmail.com
Tue Dec 20 01:55:26 PST 2011


Author: ctopper
Date: Tue Dec 20 03:55:26 2011
New Revision: 146969

URL: http://llvm.org/viewvc/llvm-project?rev=146969&view=rev
Log:
Add AVX2 intrinsics for pavg, pblend, and pcmp instructions. Also remove unneeded builtins for SSE pcmp. Change SSE pcmpeqq and pcmpgtq to not use builtins and just use vector == and >.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx2intrin.h
    cfe/trunk/lib/Headers/smmintrin.h
    cfe/trunk/test/CodeGen/avx2-builtins.c
    cfe/trunk/test/CodeGen/builtins-x86.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=146969&r1=146968&r2=146969&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Dec 20 03:55:26 2011
@@ -210,12 +210,6 @@
 BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "")
 BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "")
 BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pcmpeqb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pcmpeqw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pcmpeqd128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_pcmpgtb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pcmpgtw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pcmpgtd128, "V4iV4iV4i", "")
 BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "")
 BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "")
 BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "")
@@ -353,7 +347,6 @@
 BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "")
 BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "")
 BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_pcmpeqq, "V2LLiV2LLiV2LLi", "")
 BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16ci", "")
 
 // SSE 4.2
@@ -374,8 +367,6 @@
 //BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","")
 //BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","")
 
-BUILTIN(__builtin_ia32_pcmpgtq, "V2LLiV2LLiV2LLi", "")
-
 BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "")
 BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "")
 BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "")
@@ -499,5 +490,9 @@
 BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "")
 BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "")
 BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIc", "")
+BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "")
+BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "")
+BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "")
+BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "")
 
 #undef BUILTIN

Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=146969&r1=146968&r2=146969&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Tue Dec 20 03:55:26 2011
@@ -136,6 +136,78 @@
 }
 
 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_avg_epu8(__m256i a, __m256i b)
+{
+  return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_avg_epu16(__m256i a, __m256i b)
+{
+  return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
+{
+  return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
+                                              (__v32qi)__M);
+}
+
+#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
+  __m256i __V1 = (V1); \
+  __m256i __V2 = (V2); \
+  (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, M); })
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi8(__m256i a, __m256i b)
+{
+  return (__m256i)((__v32qi)a == (__v32qi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi16(__m256i a, __m256i b)
+{
+  return (__m256i)((__v16hi)a == (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi32(__m256i a, __m256i b)
+{
+  return (__m256i)((__v8si)a == (__v8si)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi64(__m256i a, __m256i b)
+{
+  return (__m256i)((__v4di)a == (__v4di)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi8(__m256i a, __m256i b)
+{
+  return (__m256i)((__v32qi)a > (__v32qi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi16(__m256i a, __m256i b)
+{
+  return (__m256i)((__v16hi)a > (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi32(__m256i a, __m256i b)
+{
+  return (__m256i)((__v8si)a > (__v8si)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi64(__m256i a, __m256i b)
+{
+  return (__m256i)((__v4di)a > (__v4di)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_or_si256(__m256i a, __m256i b)
 {
   return a | b;

Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=146969&r1=146968&r2=146969&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Tue Dec 20 03:55:26 2011
@@ -245,7 +245,7 @@
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2);
+  return (__m128i)((__v2di)__V1 == (__v2di)__V2);
 }
 
 /* SSE4 Packed Integer Sign-Extension.  */
@@ -398,7 +398,7 @@
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
 {
-  return __builtin_ia32_pcmpgtq((__v2di)__V1, (__v2di)__V2);
+  return (__m128i)((__v2di)__V1 > (__v2di)__V2);
 }
 
 /* SSE4.2 Accumulate CRC32.  */

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=146969&r1=146968&r2=146969&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Tue Dec 20 03:55:26 2011
@@ -155,3 +155,63 @@
   // CHECK: xor <4 x i64>
   return _mm256_xor_si256(a, b);
 }
+
+__m256 test_mm256_avg_epu8(__m256 a, __m256 b) {
+  // CHECK: @llvm.x86.avx2.pavg.b
+  return _mm256_avg_epu8(a, b);
+}
+
+__m256 test_mm256_avg_epu16(__m256 a, __m256 b) {
+  // CHECK: @llvm.x86.avx2.pavg.w
+  return _mm256_avg_epu16(a, b);
+}
+
+__m256 test_mm256_blendv_epi8(__m256 a, __m256 b, __m256 m) {
+  // CHECK: @llvm.x86.avx2.pblendvb
+  return _mm256_blendv_epi8(a, b, m);
+}
+
+__m256 test_mm256_blend_epi16(__m256 a, __m256 b) {
+  // CHECK: @llvm.x86.avx2.pblendw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, i32 2)
+  return _mm256_blend_epi16(a, b, 2);
+}
+
+__m256 test_mm256_cmpeq_epi8(__m256 a, __m256 b) {
+  // CHECK: icmp eq <32 x i8>
+  return _mm256_cmpeq_epi8(a, b);
+}
+
+__m256 test_mm256_cmpeq_epi16(__m256 a, __m256 b) {
+  // CHECK: icmp eq <16 x i16>
+  return _mm256_cmpeq_epi16(a, b);
+}
+
+__m256 test_mm256_cmpeq_epi32(__m256 a, __m256 b) {
+  // CHECK: icmp eq <8 x i32>
+  return _mm256_cmpeq_epi32(a, b);
+}
+
+__m256 test_mm256_cmpeq_epi64(__m256 a, __m256 b) {
+  // CHECK: icmp eq <4 x i64>
+  return _mm256_cmpeq_epi64(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi8(__m256 a, __m256 b) {
+  // CHECK: icmp sgt <32 x i8>
+  return _mm256_cmpgt_epi8(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi16(__m256 a, __m256 b) {
+  // CHECK: icmp sgt <16 x i16>
+  return _mm256_cmpgt_epi16(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi32(__m256 a, __m256 b) {
+  // CHECK: icmp sgt <8 x i32>
+  return _mm256_cmpgt_epi32(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi64(__m256 a, __m256 b) {
+  // CHECK: icmp sgt <4 x i64>
+  return _mm256_cmpgt_epi64(a, b);
+}

Modified: cfe/trunk/test/CodeGen/builtins-x86.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-x86.c?rev=146969&r1=146968&r2=146969&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-x86.c (original)
+++ cfe/trunk/test/CodeGen/builtins-x86.c Tue Dec 20 03:55:26 2011
@@ -199,12 +199,6 @@
   tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s);
   tmp_V16c = __builtin_ia32_pavgb128(tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_pavgw128(tmp_V8s, tmp_V8s);
-  tmp_V16c = __builtin_ia32_pcmpeqb128(tmp_V16c, tmp_V16c);
-  tmp_V8s = __builtin_ia32_pcmpeqw128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_pcmpeqd128(tmp_V4i, tmp_V4i);
-  tmp_V16c = __builtin_ia32_pcmpgtb128(tmp_V16c, tmp_V16c);
-  tmp_V8s = __builtin_ia32_pcmpgtw128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_pcmpgtd128(tmp_V4i, tmp_V4i);
   tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
   tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);





More information about the cfe-commits mailing list