r271106 - [X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)

Simon Pilgrim via cfe-commits cfe-commits at lists.llvm.org
Sat May 28 01:12:48 PDT 2016


Author: rksimon
Date: Sat May 28 03:12:45 2016
New Revision: 271106

URL: http://llvm.org/viewvc/llvm-project?rev=271106&view=rev
Log:
[X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)

The VPMOVSX and (V)PMOVZX sign/zero extension intrinsics can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics.

This patch removes the clang builtins and their use in the sse2/avx headers - a companion patch will remove/auto-upgrade the llvm intrinsics.

Note: We already did this for SSE41 PMOVSX sometime ago.

Differential Revision: http://reviews.llvm.org/D20684

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx2intrin.h
    cfe/trunk/lib/Headers/smmintrin.h
    cfe/trunk/test/CodeGen/avx2-builtins.c
    cfe/trunk/test/CodeGen/builtins-x86.c
    cfe/trunk/test/CodeGen/sse41-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=271106&r1=271105&r2=271106&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat May 28 03:12:45 2016
@@ -382,12 +382,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsb128,
 TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbd128, "V4iV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbq128, "V2LLiV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbw128, "V8sV16c", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxdq128, "V2LLiV4i", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwd128, "V4iV8s", "", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwq128, "V2LLiV8s", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmulld128, "V4iV4iV4i", "", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "", "sse4.1")
@@ -558,18 +552,6 @@ TARGET_BUILTIN(__builtin_ia32_pminsb256,
 TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbw256, "V16sV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbd256, "V8iV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxbq256, "V4LLiV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxwd256, "V8iV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxwq256, "V4LLiV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovsxdq256, "V4LLiV4i", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbw256, "V16sV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbd256, "V8iV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "", "avx2")

Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=271106&r1=271105&r2=271106&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Sat May 28 03:12:45 2016
@@ -360,73 +360,85 @@ _mm256_movemask_epi8(__m256i __a)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi16(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V);
+  return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V);
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V);
+  return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi16(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V);
+  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+  return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V);
+  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V);
+  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V);
+  typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+  return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V);
+  typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu32_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
+  typedef unsigned int __v4su __attribute__((__vector_size__(16)));
+  return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);
 }
 
 static __inline__  __m256i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=271106&r1=271105&r2=271106&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Sat May 28 03:12:45 2016
@@ -324,37 +324,43 @@ _mm_cvtepi32_epi64(__m128i __V)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi16(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
+  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi32(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
+  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
+  typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi32(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
+  typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
+  typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu32_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
+  typedef unsigned int __v4su __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);
 }
 
 /* SSE4 Pack with Unsigned Saturation.  */

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=271106&r1=271105&r2=271106&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Sat May 28 03:12:45 2016
@@ -292,73 +292,79 @@ __m256i test_mm256_cmpgt_epi64(__m256i a
 
 __m256i test_mm256_cvtepi8_epi16(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi8_epi16
-  // CHECK: call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %{{.*}})
+  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
   return _mm256_cvtepi8_epi16(a);
 }
 
 __m256i test_mm256_cvtepi8_epi32(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi8_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %{{.*}})
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: sext <8 x i8> %{{.*}} to <8 x i32>
   return _mm256_cvtepi8_epi32(a);
 }
 
 __m256i test_mm256_cvtepi8_epi64(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi8_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %{{.*}})
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: sext <4 x i8> %{{.*}} to <4 x i64>
   return _mm256_cvtepi8_epi64(a);
 }
 
 __m256i test_mm256_cvtepi16_epi32(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi16_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %{{.*}})
+  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
   return _mm256_cvtepi16_epi32(a);
 }
 
 __m256i test_mm256_cvtepi16_epi64(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi16_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %{{.*}})
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i64>
   return _mm256_cvtepi16_epi64(a);
 }
 
 __m256i test_mm256_cvtepi32_epi64(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepi32_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %{{.*}})
+  // CHECK: sext <4 x i32> %{{.*}} to <4 x i64>
   return _mm256_cvtepi32_epi64(a);
 }
 
 __m256i test_mm256_cvtepu8_epi16(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepu8_epi16
-  // CHECK: call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %{{.*}})
+  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
   return _mm256_cvtepu8_epi16(a);
 }
 
 __m256i test_mm256_cvtepu8_epi32(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepu8_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %{{.*}})
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: zext <8 x i8> %{{.*}} to <8 x i32>
   return _mm256_cvtepu8_epi32(a);
 }
 
 __m256i test_mm256_cvtepu8_epi64(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepu8_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %{{.*}})
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: zext <4 x i8> %{{.*}} to <4 x i64>
   return _mm256_cvtepu8_epi64(a);
 }
 
 __m256i test_mm256_cvtepu16_epi32(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepu16_epi32
-  // CHECK: call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %{{.*}})
+  // CHECK: zext <8 x i16> {{.*}} to <8 x i32>
   return _mm256_cvtepu16_epi32(a);
 }
 
 __m256i test_mm256_cvtepu16_epi64(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepu16_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %{{.*}})
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: zext <4 x i16> %{{.*}} to <4 x i64>
   return _mm256_cvtepu16_epi64(a);
 }
 
 __m256i test_mm256_cvtepu32_epi64(__m128i a) {
   // CHECK-LABEL: test_mm256_cvtepu32_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %{{.*}})
+  // CHECK: zext <4 x i32> %{{.*}} to <4 x i64>
   return _mm256_cvtepu32_epi64(a);
 }
 

Modified: cfe/trunk/test/CodeGen/builtins-x86.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-x86.c?rev=271106&r1=271105&r2=271106&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-x86.c (original)
+++ cfe/trunk/test/CodeGen/builtins-x86.c Sat May 28 03:12:45 2016
@@ -387,12 +387,6 @@ void f0() {
   tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i);
   tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i);
   tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_pmovzxbd128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovzxbq128(tmp_V16c);
-  tmp_V8s = __builtin_ia32_pmovzxbw128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovzxdq128(tmp_V4i);
-  tmp_V4i = __builtin_ia32_pmovzxwd128(tmp_V8s);
-  tmp_V2LLi = __builtin_ia32_pmovzxwq128(tmp_V8s);
   tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i);
   tmp_V4i = __builtin_ia32_pmulld128(tmp_V4i, tmp_V4i);
   tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16);

Modified: cfe/trunk/test/CodeGen/sse41-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse41-builtins.c?rev=271106&r1=271105&r2=271106&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse41-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse41-builtins.c Sat May 28 03:12:45 2016
@@ -119,37 +119,43 @@ __m128i test_mm_cvtepi32_epi64(__m128i a
 
 __m128i test_mm_cvtepu8_epi16(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: zext <8 x i8> {{.*}} to <8 x i16>
   return _mm_cvtepu8_epi16(a);
 }
 
 __m128i test_mm_cvtepu8_epi32(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: zext <4 x i8> {{.*}} to <4 x i32>
   return _mm_cvtepu8_epi32(a);
 }
 
 __m128i test_mm_cvtepu8_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu8_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
+  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: zext <2 x i8> {{.*}} to <2 x i64>
   return _mm_cvtepu8_epi64(a);
 }
 
 __m128i test_mm_cvtepu16_epi32(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu16_epi32
-  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
+  // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: zext <4 x i16> {{.*}} to <4 x i32>
   return _mm_cvtepu16_epi32(a);
 }
 
 __m128i test_mm_cvtepu16_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu16_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
+  // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: zext <2 x i16> {{.*}} to <2 x i64>
   return _mm_cvtepu16_epi64(a);
 }
 
 __m128i test_mm_cvtepu32_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_cvtepu32_epi64
-  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
+  // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: zext <2 x i32> {{.*}} to <2 x i64>
   return _mm_cvtepu32_epi64(a);
 }
 




More information about the cfe-commits mailing list