r280197 - [AVX-512] Implement masked floating point logical operations with native IR and remove the builtins.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Tue Aug 30 22:38:59 PDT 2016


Author: ctopper
Date: Wed Aug 31 00:38:58 2016
New Revision: 280197

URL: http://llvm.org/viewvc/llvm-project?rev=280197&view=rev
Log:
[AVX-512] Implement masked floating point logical operations with native IR and remove the builtins.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx512dqintrin.h
    cfe/trunk/lib/Headers/avx512vldqintrin.h
    cfe/trunk/test/CodeGen/avx512dq-builtins.c
    cfe/trunk/test/CodeGen/avx512vldq-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 31 00:38:58 2016
@@ -1131,32 +1131,8 @@ TARGET_BUILTIN(__builtin_ia32_psubd512_m
 
 TARGET_BUILTIN(__builtin_ia32_pmulld512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmullq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_pmullq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_pmullq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
 
 TARGET_BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "", "avx512bw")

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Aug 31 00:38:58 2016
@@ -54,179 +54,155 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U,
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_xor_pd (__m512d __A, __m512d __B) {
-  return (__m512d) ((__v8du) __A ^ (__v8du) __B);
+_mm512_xor_pd(__m512d __A, __m512d __B) {
+  return (__m512d)((__v8du)__A ^ (__v8du)__B);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
-             (__v8df) __B,
-             (__v8df) __W,
-             (__mmask8) __U);
+_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_xor_pd(__A, __B),
+                                              (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
-             (__v8df) __B,
-             (__v8df)
-             _mm512_setzero_pd (),
-             (__mmask8) __U);
+_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_xor_pd(__A, __B),
+                                              (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_xor_ps (__m512 __A, __m512 __B) {
-  return (__m512) ((__v16su) __A ^ (__v16su) __B);
+  return (__m512)((__v16su)__A ^ (__v16su)__B);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
-            (__v16sf) __B,
-            (__v16sf) __W,
-            (__mmask16) __U);
+_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_xor_ps(__A, __B),
+                                             (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
-            (__v16sf) __B,
-            (__v16sf)
-            _mm512_setzero_ps (),
-            (__mmask16) __U);
+_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_xor_ps(__A, __B),
+                                             (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_or_pd (__m512d __A, __m512d __B) {
-  return (__m512d) ((__v8du) __A | (__v8du) __B);
+_mm512_or_pd(__m512d __A, __m512d __B) {
+  return (__m512d)((__v8du)__A | (__v8du)__B);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
-            (__v8df) __B,
-            (__v8df) __W,
-            (__mmask8) __U);
+_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_or_pd(__A, __B),
+                                              (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
-            (__v8df) __B,
-            (__v8df)
-            _mm512_setzero_pd (),
-            (__mmask8) __U);
+_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_or_pd(__A, __B),
+                                              (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_or_ps (__m512 __A, __m512 __B) {
-  return (__m512) ((__v16su) __A | (__v16su) __B);
+_mm512_or_ps(__m512 __A, __m512 __B) {
+  return (__m512)((__v16su)__A | (__v16su)__B);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
-                 (__v16sf) __B,
-                 (__v16sf) __W,
-                 (__mmask16) __U);
+_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_or_ps(__A, __B),
+                                             (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
-                 (__v16sf) __B,
-                 (__v16sf)
-                 _mm512_setzero_ps (),
-                 (__mmask16) __U);
+_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_or_ps(__A, __B),
+                                             (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_and_pd (__m512d __A, __m512d __B) {
-  return (__m512d) ((__v8du) __A & (__v8du) __B);
+_mm512_and_pd(__m512d __A, __m512d __B) {
+  return (__m512d)((__v8du)__A & (__v8du)__B);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
-             (__v8df) __B,
-             (__v8df) __W,
-             (__mmask8) __U);
+_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_and_pd(__A, __B),
+                                              (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
-             (__v8df) __B,
-             (__v8df)
-             _mm512_setzero_pd (),
-             (__mmask8) __U);
+_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_and_pd(__A, __B),
+                                              (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_and_ps (__m512 __A, __m512 __B) {
-  return (__m512) ((__v16su) __A & (__v16su) __B);
+_mm512_and_ps(__m512 __A, __m512 __B) {
+  return (__m512)((__v16su)__A & (__v16su)__B);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
-            (__v16sf) __B,
-            (__v16sf) __W,
-            (__mmask16) __U);
+_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_and_ps(__A, __B),
+                                             (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
-            (__v16sf) __B,
-            (__v16sf)
-            _mm512_setzero_ps (),
-            (__mmask16) __U);
+_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_and_ps(__A, __B),
+                                             (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_andnot_pd (__m512d __A, __m512d __B) {
+_mm512_andnot_pd(__m512d __A, __m512d __B) {
   return (__m512d)(~(__v8du)__A & (__v8du)__B);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
-              (__v8df) __B,
-              (__v8df) __W,
-              (__mmask8) __U);
+_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_andnot_pd(__A, __B),
+                                              (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
-              (__v8df) __B,
-              (__v8df)
-              _mm512_setzero_pd (),
-              (__mmask8) __U);
+_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_andnot_pd(__A, __B),
+                                              (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_andnot_ps (__m512 __A, __m512 __B) {
+_mm512_andnot_ps(__m512 __A, __m512 __B) {
   return (__m512)(~(__v16su)__A & (__v16su)__B);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
-             (__v16sf) __B,
-             (__v16sf) __W,
-             (__mmask16) __U);
+_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_andnot_ps(__A, __B),
+                                             (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
-             (__v16sf) __B,
-             (__v16sf)
-             _mm512_setzero_ps (),
-             (__mmask16) __U);
+_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_andnot_ps(__A, __B),
+                                             (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vldqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vldqintrin.h Wed Aug 31 00:38:58 2016
@@ -76,276 +76,227 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
-  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
-              (__v4df) __B,
-              (__v4df) __W,
-              (__mmask8) __U);
+_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_andnot_pd(__A, __B),
+                                              (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
-  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
-              (__v4df) __B,
-              (__v4df)
-              _mm256_setzero_pd (),
-              (__mmask8) __U);
+_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_andnot_pd(__A, __B),
+                                              (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
-              (__v2df) __B,
-              (__v2df) __W,
-              (__mmask8) __U);
+_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_andnot_pd(__A, __B),
+                                              (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
-              (__v2df) __B,
-              (__v2df)
-              _mm_setzero_pd (),
-              (__mmask8) __U);
+_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_andnot_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
-             (__v8sf) __B,
-             (__v8sf) __W,
-             (__mmask8) __U);
+_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_andnot_ps(__A, __B),
+                                             (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
-             (__v8sf) __B,
-             (__v8sf)
-             _mm256_setzero_ps (),
-             (__mmask8) __U);
+_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_andnot_ps(__A, __B),
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
-             (__v4sf) __B,
-             (__v4sf) __W,
-             (__mmask8) __U);
+_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_andnot_ps(__A, __B),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
-             (__v4sf) __B,
-             (__v4sf)
-             _mm_setzero_ps (),
-             (__mmask8) __U);
+_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_andnot_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
-  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
-             (__v4df) __B,
-             (__v4df) __W,
-             (__mmask8) __U);
+_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_and_pd(__A, __B),
+                                              (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
-  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
-             (__v4df) __B,
-             (__v4df)
-             _mm256_setzero_pd (),
-             (__mmask8) __U);
+_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_and_pd(__A, __B),
+                                              (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
-             (__v2df) __B,
-             (__v2df) __W,
-             (__mmask8) __U);
+_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_and_pd(__A, __B),
+                                              (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
-             (__v2df) __B,
-             (__v2df)
-             _mm_setzero_pd (),
-             (__mmask8) __U);
+_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_and_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
-            (__v8sf) __B,
-            (__v8sf) __W,
-            (__mmask8) __U);
+_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_and_ps(__A, __B),
+                                             (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
-            (__v8sf) __B,
-            (__v8sf)
-            _mm256_setzero_ps (),
-            (__mmask8) __U);
+_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_and_ps(__A, __B),
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
-            (__v4sf) __B,
-            (__v4sf) __W,
-            (__mmask8) __U);
+_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_and_ps(__A, __B),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
-            (__v4sf) __B,
-            (__v4sf)
-            _mm_setzero_ps (),
-            (__mmask8) __U);
+_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_and_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
-        __m256d __B) {
-  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
-             (__v4df) __B,
-             (__v4df) __W,
-             (__mmask8) __U);
+_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_xor_pd(__A, __B),
+                                              (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
-  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
-             (__v4df) __B,
-             (__v4df)
-             _mm256_setzero_pd (),
-             (__mmask8) __U);
+_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_xor_pd(__A, __B),
+                                              (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
-             (__v2df) __B,
-             (__v2df) __W,
-             (__mmask8) __U);
+_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_xor_pd(__A, __B),
+                                              (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
-             (__v2df) __B,
-             (__v2df)
-             _mm_setzero_pd (),
-             (__mmask8) __U);
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_xor_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
-            (__v8sf) __B,
-            (__v8sf) __W,
-            (__mmask8) __U);
+_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_xor_ps(__A, __B),
+                                             (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
-            (__v8sf) __B,
-            (__v8sf)
-            _mm256_setzero_ps (),
-            (__mmask8) __U);
+_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_xor_ps(__A, __B),
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
-            (__v4sf) __B,
-            (__v4sf) __W,
-            (__mmask8) __U);
+_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_xor_ps(__A, __B),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
-            (__v4sf) __B,
-            (__v4sf)
-            _mm_setzero_ps (),
-            (__mmask8) __U);
+_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_xor_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
-  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
-            (__v4df) __B,
-            (__v4df) __W,
-            (__mmask8) __U);
+_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_or_pd(__A, __B),
+                                              (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
-  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
-            (__v4df) __B,
-            (__v4df)
-            _mm256_setzero_pd (),
-            (__mmask8) __U);
+_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_or_pd(__A, __B),
+                                              (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
-            (__v2df) __B,
-            (__v2df) __W,
-            (__mmask8) __U);
+_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_or_pd(__A, __B),
+                                              (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
-            (__v2df) __B,
-            (__v2df)
-            _mm_setzero_pd (),
-            (__mmask8) __U);
+_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_or_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
-                 (__v8sf) __B,
-                 (__v8sf) __W,
-                 (__mmask8) __U);
+_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_or_ps(__A, __B),
+                                             (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
-  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
-                 (__v8sf) __B,
-                 (__v8sf)
-                 _mm256_setzero_ps (),
-                 (__mmask8) __U);
+_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_or_ps(__A, __B),
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
-                 (__v4sf) __B,
-                 (__v4sf) __W,
-                 (__mmask8) __U);
+_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_or_ps(__A, __B),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
-                 (__v4sf) __B,
-                 (__v4sf)
-                 _mm_setzero_ps (),
-                 (__mmask8) __U);
+_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_or_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Wed Aug 31 00:38:58 2016
@@ -31,13 +31,17 @@ __m512d test_mm512_xor_pd (__m512d __A,
 
 __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_xor_pd
-  // CHECK: @llvm.x86.avx512.mask.xor.pd.512
+  // CHECK: xor <8 x i64>
+  // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B);
 }
 
 __m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_xor_pd
-  // CHECK: @llvm.x86.avx512.mask.xor.pd.512
+  // CHECK: xor <8 x i64>
+  // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_maskz_xor_pd(__U, __A, __B);
 }
 
@@ -49,13 +53,17 @@ __m512 test_mm512_xor_ps (__m512 __A, __
 
 __m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_xor_ps
-  // CHECK: @llvm.x86.avx512.mask.xor.ps.512
+  // CHECK: xor <16 x i32>
+  // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_mask_xor_ps(__W, __U, __A, __B);
 }
 
 __m512 test_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_maskz_xor_ps
-  // CHECK: @llvm.x86.avx512.mask.xor.ps.512
+  // CHECK: xor <16 x i32>
+  // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_maskz_xor_ps(__U, __A, __B);
 }
 
@@ -67,13 +75,17 @@ __m512d test_mm512_or_pd (__m512d __A, _
 
 __m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_or_pd
-  // CHECK: @llvm.x86.avx512.mask.or.pd.512
+  // CHECK: or <8 x i64>
+  // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_mask_or_pd(__W, __U, __A, __B);
 }
 
 __m512d test_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_or_pd
-  // CHECK: @llvm.x86.avx512.mask.or.pd.512
+  // CHECK: or <8 x i64>
+  // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_maskz_or_pd(__U, __A, __B);
 }
 
@@ -85,13 +97,17 @@ __m512 test_mm512_or_ps (__m512 __A, __m
 
 __m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_or_ps
-  // CHECK: @llvm.x86.avx512.mask.or.ps.512
+  // CHECK: or <16 x i32>
+  // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_mask_or_ps(__W, __U, __A, __B);
 }
 
 __m512 test_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_maskz_or_ps
-  // CHECK: @llvm.x86.avx512.mask.or.ps.512
+  // CHECK: or <16 x i32>
+  // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_maskz_or_ps(__U, __A, __B);
 }
 
@@ -103,13 +119,17 @@ __m512d test_mm512_and_pd (__m512d __A,
 
 __m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_and_pd
-  // CHECK: @llvm.x86.avx512.mask.and.pd.512
+  // CHECK: and <8 x i64>
+  // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_mask_and_pd(__W, __U, __A, __B);
 }
 
 __m512d test_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_and_pd
-  // CHECK: @llvm.x86.avx512.mask.and.pd.512
+  // CHECK: and <8 x i64>
+  // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_maskz_and_pd(__U, __A, __B);
 }
 
@@ -121,13 +141,17 @@ __m512 test_mm512_and_ps (__m512 __A, __
 
 __m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_and_ps
-  // CHECK: @llvm.x86.avx512.mask.and.ps.512
+  // CHECK: and <16 x i32>
+  // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_mask_and_ps(__W, __U, __A, __B);
 }
 
 __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_maskz_and_ps
-  // CHECK: @llvm.x86.avx512.mask.and.ps.512
+  // CHECK: and <16 x i32>
+  // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_maskz_and_ps(__U, __A, __B);
 }
 
@@ -140,13 +164,17 @@ __m512d test_mm512_andnot_pd (__m512d __
 
 __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_andnot_pd
-  // CHECK: @llvm.x86.avx512.mask.andn.pd.512
+  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B);
 }
 
 __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_andnot_pd
-  // CHECK: @llvm.x86.avx512.mask.andn.pd.512
+  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B);
 }
 
@@ -159,13 +187,17 @@ __m512 test_mm512_andnot_ps (__m512 __A,
 
 __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_andnot_ps
-  // CHECK: @llvm.x86.avx512.mask.andn.ps.512
+  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B);
 }
 
 __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_maskz_andnot_ps
-  // CHECK: @llvm.x86.avx512.mask.andn.ps.512
+  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B);
 }
 

Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Wed Aug 31 00:38:58 2016
@@ -43,193 +43,233 @@ __m128i test_mm_maskz_mullo_epi64 (__mma
 
 __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_andnot_pd
-  // CHECK: @llvm.x86.avx512.mask.andn.pd.256
+  // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B);
 }
 
 __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_andnot_pd
-  // CHECK: @llvm.x86.avx512.mask.andn.pd.256
+  // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B);
 }
 
 __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_andnot_pd
-  // CHECK: @llvm.x86.avx512.mask.andn.pd.128
+  // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B);
 }
 
 __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_maskz_andnot_pd
-  // CHECK: @llvm.x86.avx512.mask.andn.pd.128
+  // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B);
 }
 
 __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_andnot_ps
-  // CHECK: @llvm.x86.avx512.mask.andn.ps.256
+  // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B);
 }
 
 __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_andnot_ps
-  // CHECK: @llvm.x86.avx512.mask.andn.ps.256
+  // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B);
 }
 
 __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_andnot_ps
-  // CHECK: @llvm.x86.avx512.mask.andn.ps.128
+  // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B);
 }
 
 __m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_maskz_andnot_ps
-  // CHECK: @llvm.x86.avx512.mask.andn.ps.128
+  // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return (__m128) _mm_maskz_andnot_ps (__U, __A, __B);
 }
 
 __m256d test_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_and_pd
-  // CHECK: @llvm.x86.avx512.mask.and.pd.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_mask_and_pd ( __W, __U, __A, __B);
 }
 
 __m256d test_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_and_pd
-  // CHECK: @llvm.x86.avx512.mask.and.pd.256
+  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_maskz_and_pd (__U, __A, __B);
 }
 
 __m128d test_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_and_pd
-  // CHECK: @llvm.x86.avx512.mask.and.pd.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_mask_and_pd ( __W, __U, __A, __B);
 }
 
 __m128d test_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_maskz_and_pd
-  // CHECK: @llvm.x86.avx512.mask.and.pd.128
+  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_maskz_and_pd (__U, __A, __B);
 }
 
 __m256 test_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_and_ps
-  // CHECK: @llvm.x86.avx512.mask.and.ps.256
+  // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_mask_and_ps ( __W, __U, __A, __B);
 }
 
 __m256 test_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_and_ps
-  // CHECK: @llvm.x86.avx512.mask.and.ps.256
+  // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_maskz_and_ps (__U, __A, __B);
 }
 
 __m128 test_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_and_ps
-  // CHECK: @llvm.x86.avx512.mask.and.ps.128
+  // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return (__m128) _mm_mask_and_ps ( __W, __U, __A, __B);
 }
 
 __m128 test_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_maskz_and_ps
-  // CHECK: @llvm.x86.avx512.mask.and.ps.128
+  // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return (__m128) _mm_maskz_and_ps (__U, __A, __B);
 }
 
 __m256d test_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_xor_pd
-  // CHECK: @llvm.x86.avx512.mask.xor.pd.256
+  // CHECK: xor <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_mask_xor_pd ( __W, __U, __A, __B);
 }
 
 __m256d test_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_xor_pd
-  // CHECK: @llvm.x86.avx512.mask.xor.pd.256
+  // CHECK: xor <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_maskz_xor_pd (__U, __A, __B);
 }
 
 __m128d test_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_xor_pd
-  // CHECK: @llvm.x86.avx512.mask.xor.pd.128
+  // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_mask_xor_pd ( __W, __U, __A, __B);
 }
 
 __m128d test_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_maskz_xor_pd
-  // CHECK: @llvm.x86.avx512.mask.xor.pd.128
+  // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_maskz_xor_pd (__U, __A, __B);
 }
 
 __m256 test_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_xor_ps
-  // CHECK: @llvm.x86.avx512.mask.xor.ps.256
+  // CHECK: xor <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_mask_xor_ps ( __W, __U, __A, __B);
 }
 
 __m256 test_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_xor_ps
-  // CHECK: @llvm.x86.avx512.mask.xor.ps.256
+  // CHECK: xor <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_maskz_xor_ps (__U, __A, __B);
 }
 
 __m128 test_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_xor_ps
-  // CHECK: @llvm.x86.avx512.mask.xor.ps.128
-    return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B);
+  // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B);
 }
 
 __m128 test_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_maskz_xor_ps
-  // CHECK: @llvm.x86.avx512.mask.xor.ps.128
+  // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return (__m128) _mm_maskz_xor_ps (__U, __A, __B);
 }
 
 __m256d test_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_or_pd
-  // CHECK: @llvm.x86.avx512.mask.or.pd.256
+  // CHECK: or <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_mask_or_pd ( __W, __U, __A, __B);
 }
 
 __m256d test_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_or_pd
-  // CHECK: @llvm.x86.avx512.mask.or.pd.256
+  // CHECK: or <4 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return (__m256d) _mm256_maskz_or_pd (__U, __A, __B);
 }
 
 __m128d test_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_or_pd
-  // CHECK: @llvm.x86.avx512.mask.or.pd.128
+  // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_mask_or_pd ( __W, __U, __A, __B);
 }
 
 __m128d test_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_maskz_or_pd
-  // CHECK: @llvm.x86.avx512.mask.or.pd.128
+  // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return (__m128d) _mm_maskz_or_pd (__U, __A, __B);
 }
 
 __m256 test_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_or_ps
-  // CHECK: @llvm.x86.avx512.mask.or.ps.256
+  // CHECK: or <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_mask_or_ps ( __W, __U, __A, __B);
 }
 
 __m256 test_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_or_ps
-  // CHECK: @llvm.x86.avx512.mask.or.ps.256
+  // CHECK: or <8 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return (__m256) _mm256_maskz_or_ps (__U, __A, __B);
 }
 
 __m128 test_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_or_ps
-  // CHECK: @llvm.x86.avx512.mask.or.ps.128
+  // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return (__m128) _mm_mask_or_ps ( __W, __U, __A, __B);
 }
 
 __m128 test_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_maskz_or_ps
-  // CHECK: @llvm.x86.avx512.mask.or.ps.128
+  // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return (__m128) _mm_maskz_or_ps(__U, __A, __B);
 }
 




More information about the cfe-commits mailing list