r273533 - [AVX512] Replace masked unpack builtins with shufflevector and selects.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 22 23:36:43 PDT 2016


Author: ctopper
Date: Thu Jun 23 01:36:42 2016
New Revision: 273533

URL: http://llvm.org/viewvc/llvm-project?rev=273533&view=rev
Log:
[AVX512] Replace masked unpack builtins with shufflevector and selects.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx512bwintrin.h
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/lib/Headers/avx512vlbwintrin.h
    cfe/trunk/lib/Headers/avx512vlintrin.h
    cfe/trunk/test/CodeGen/avx512bw-builtins.c
    cfe/trunk/test/CodeGen/avx512f-builtins.c
    cfe/trunk/test/CodeGen/avx512vl-builtins.c
    cfe/trunk/test/CodeGen/avx512vlbw-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Jun 23 01:36:42 2016
@@ -1469,10 +1469,6 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2var
 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpckhbw512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpckhwd512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpcklbw512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpcklwd512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "", "avx512vl,avx512dq")
@@ -1525,14 +1521,6 @@ TARGET_BUILTIN(__builtin_ia32_pmulhuw128
 TARGET_BUILTIN(__builtin_ia32_pmulhuw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmulhw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmulhw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpckhbw128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpckhbw256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpckhwd128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpckhwd256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpcklbw128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpcklbw256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpcklwd128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_punpcklwd256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "", "avx512dq")
@@ -1776,18 +1764,6 @@ TARGET_BUILTIN(__builtin_ia32_storeupd12
 TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpckhpd512_mask, "V8dV8dV8dV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_unpckhps512_mask, "V16fV16fV16fV16fUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_unpcklpd512_mask, "V8dV8dV8dV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_unpcklps512_mask, "V16fV16fV16fV16fUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_unpckhpd128_mask, "V2dV2dV2dV2dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpckhpd256_mask, "V4dV4dV4dV4dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpckhps128_mask, "V4fV4fV4fV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpckhps256_mask, "V8fV8fV8fV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpcklpd128_mask, "V2dV2dV2dV2dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpcklpd256_mask, "V4dV4dV4dV4dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpcklps128_mask, "V4fV4fV4fV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_unpcklps256_mask, "V8fV8fV8fV8fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc","","avx512vl")
@@ -1854,18 +1830,6 @@ TARGET_BUILTIN(__builtin_ia32_ptestnmq12
 TARGET_BUILTIN(__builtin_ia32_ptestnmq256, "UcV4LLiV4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_ptestnmd512, "UsV16iV16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_ptestnmq512, "UcV8LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_punpckhdq512_mask, "V16iV16iV16iV16iUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_punpckhqdq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_punpckldq512_mask, "V16iV16iV16iV16iUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_punpcklqdq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_punpckhdq128_mask, "V4iV4iV4iV4iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_punpckhdq256_mask, "V8iV8iV8iV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_punpckhqdq128_mask, "V2LLiV2LLiV2LLiV2LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_punpckhqdq256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_punpckldq128_mask, "V4iV4iV4iV4iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_punpckldq256_mask, "V8iV8iV8iV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_punpcklqdq128_mask, "V2LLiV2LLiV2LLiV2LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_punpcklqdq256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi","","avx512f")

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Thu Jun 23 01:36:42 2016
@@ -1395,103 +1395,127 @@ _mm512_maskz_cvtepi16_epi8 (__mmask32 __
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi8 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) -1);
+_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
+                                          8,  64+8,   9, 64+9,
+                                          10, 64+10, 11, 64+11,
+                                          12, 64+12, 13, 64+13,
+                                          14, 64+14, 15, 64+15,
+                                          24, 64+24, 25, 64+25,
+                                          26, 64+26, 27, 64+27,
+                                          28, 64+28, 29, 64+29,
+                                          30, 64+30, 31, 64+31,
+                                          40, 64+40, 41, 64+41,
+                                          42, 64+42, 43, 64+43,
+                                          44, 64+44, 45, 64+45,
+                                          46, 64+46, 47, 64+47,
+                                          56, 64+56, 57, 64+57,
+                                          58, 64+58, 59, 64+59,
+                                          60, 64+60, 61, 64+61,
+                                          62, 64+62, 63, 64+63);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-         __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) __W,
-                 (__mmask64) __U);
+_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
+                                        (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) __U);
+_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
+                                        (__v64qi)_mm512_setzero_qi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi16 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) -1);
+_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
+                                          4,  32+4,   5, 32+5,
+                                          6,  32+6,   7, 32+7,
+                                          12, 32+12, 13, 32+13,
+                                          14, 32+14, 15, 32+15,
+                                          20, 32+20, 21, 32+21,
+                                          22, 32+22, 23, 32+23,
+                                          28, 32+28, 29, 32+29,
+                                          30, 32+30, 31, 32+31);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-          __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) __W,
-                 (__mmask32) __U);
+_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
+                                       (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) __U);
+_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
+                                       (__v32hi)_mm512_setzero_hi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi8 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) -1);
+_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
+                                          0,  64+0,   1, 64+1,
+                                          2,  64+2,   3, 64+3,
+                                          4,  64+4,   5, 64+5,
+                                          6,  64+6,   7, 64+7,
+                                          16, 64+16, 17, 64+17,
+                                          18, 64+18, 19, 64+19,
+                                          20, 64+20, 21, 64+21,
+                                          22, 64+22, 23, 64+23,
+                                          32, 64+32, 33, 64+33,
+                                          34, 64+34, 35, 64+35,
+                                          36, 64+36, 37, 64+37,
+                                          38, 64+38, 39, 64+39,
+                                          48, 64+48, 49, 64+49,
+                                          50, 64+50, 51, 64+51,
+                                          52, 64+52, 53, 64+53,
+                                          54, 64+54, 55, 64+55);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-         __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) __W,
-                 (__mmask64) __U);
+_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
+                                        (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) __U);
+_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
+                                        (__v64qi)_mm512_setzero_qi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi16 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) -1);
+_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
+                                          0,  32+0,   1, 32+1,
+                                          2,  32+2,   3, 32+3,
+                                          8,  32+8,   9, 32+9,
+                                          10, 32+10, 11, 32+11,
+                                          16, 32+16, 17, 32+17,
+                                          18, 32+18, 19, 32+19,
+                                          24, 32+24, 25, 32+25,
+                                          26, 32+26, 27, 32+27);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-          __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) __W,
-                 (__mmask32) __U);
+_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
+                                       (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) __U);
+_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
+                                       (__v32hi)_mm512_setzero_hi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -1613,12 +1637,14 @@ _mm512_maskz_cvtepu8_epi16 (__mmask32 __
 
 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
   (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
-                                      _mm512_shufflehi_epi16((A), (imm)), \
+                                      (__v32hi)_mm512_shufflehi_epi16((A), \
+                                                                      (imm)), \
                                       (__v32hi)(__m512i)(W)); })
 
 #define _mm512_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
   (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
-                                      _mm512_shufflehi_epi16((A), (imm)), \
+                                      (__v32hi)_mm512_shufflehi_epi16((A), \
+                                                                      (imm)), \
                                       (__v32hi)_mm512_setzero_hi()); })
 
 #define _mm512_shufflelo_epi16(A, imm) __extension__ ({ \
@@ -1648,13 +1674,15 @@ _mm512_maskz_cvtepu8_epi16 (__mmask32 __
 
 #define _mm512_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
   (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
-                                      _mm512_shufflelo_epi16((A), (imm)), \
+                                      (__v32hi)_mm512_shufflelo_epi16((A), \
+                                                                      (imm)), \
                                       (__v32hi)(__m512i)(W)); })
 
 
 #define _mm512_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
   (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
-                                      _mm512_shufflelo_epi16((A), (imm)), \
+                                      (__v32hi)_mm512_shufflelo_epi16((A), \
+                                                                      (imm)), \
                                       (__v32hi)_mm512_setzero_hi()); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Thu Jun 23 01:36:42 2016
@@ -4167,37 +4167,203 @@ _mm512_maskz_cvtpd_epu32 (__mmask8 __U,
                   (__mmask8) __U,
                   _MM_FROUND_CUR_DIRECTION);
 }
+
 /* Unpack and Interleave */
+
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
 {
-  return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
+                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
+                                           (__v8df)__W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
+                                           (__v8df)_mm512_setzero_pd());
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
 {
-  return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
+                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
+                                           (__v8df)__W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
+                                           (__v8df)_mm512_setzero_pd());
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
 {
-  return __builtin_shufflevector(__a, __b,
-                                 2,    18,    3,    19,
-                                 2+4,  18+4,  3+4,  19+4,
-                                 2+8,  18+8,  3+8,  19+8,
-                                 2+12, 18+12, 3+12, 19+12);
+  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
+                                         2,    18,    3,    19,
+                                         2+4,  18+4,  3+4,  19+4,
+                                         2+8,  18+8,  3+8,  19+8,
+                                         2+12, 18+12, 3+12, 19+12);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
+                                          (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
+                                          (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
 {
-  return __builtin_shufflevector(__a, __b,
-                                 0,    16,    1,    17,
-                                 0+4,  16+4,  1+4,  17+4,
-                                 0+8,  16+8,  1+8,  17+8,
-                                 0+12, 16+12, 1+12, 17+12);
+  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
+                                         0,    16,    1,    17,
+                                         0+4,  16+4,  1+4,  17+4,
+                                         0+8,  16+8,  1+8,  17+8,
+                                         0+12, 16+12, 1+12, 17+12);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
+                                          (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
+                                          (__v16sf)_mm512_setzero_ps());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
+                                          2,    18,    3,    19,
+                                          2+4,  18+4,  3+4,  19+4,
+                                          2+8,  18+8,  3+8,  19+8,
+                                          2+12, 18+12, 3+12, 19+12);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
+                                       (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
+                                       (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
+                                          0,    16,    1,    17,
+                                          0+4,  16+4,  1+4,  17+4,
+                                          0+8,  16+8,  1+8,  17+8,
+                                          0+12, 16+12, 1+12, 17+12);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
+                                       (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
+                                       (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
+                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
+                                        (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
+                                        (__v8di)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
+                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
+                                        (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
+                                        (__v8di)_mm512_setzero_si512());
 }
 
 /* Bit Test */
@@ -5787,14 +5953,6 @@ _mm512_kmov (__mmask16 __A)
   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
                               (int)(P), (int)(R)); })
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
-                (__v8df) __B,
-                (__v8df) __W,
-                (__mmask8) __U);
-}
 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -5807,15 +5965,6 @@ _mm512_mask2_permutex2var_epi32 (__m512i
                    (__v16si) __B,
                    (__mmask16) __U);
 }
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
-                 (__v16si) __B,
-                 (__v16si)
-                 _mm512_setzero_si512 (),
-                 (__mmask16) -1);
-}
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sll_epi32 (__m512i __A, __m128i __B)
@@ -6201,73 +6350,6 @@ _mm512_maskz_srlv_epi64 (__mmask8 __U, _
                                              (__v8di)(__m512i)(C), (int)(imm), \
                                              (__mmask8)(U)); })
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
-                (__v8df) __B,
-                (__v8df)
-                _mm512_setzero_pd (),
-                (__mmask8) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
-               (__v16sf) __B,
-               (__v16sf) __W,
-               (__mmask16) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
-               (__v16sf) __B,
-               (__v16sf)
-               _mm512_setzero_ps (),
-               (__mmask16) __U);
-}
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
-                (__v8df) __B,
-                (__v8df) __W,
-                (__mmask8) __U);
-}
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
-                (__v8df) __B,
-                (__v8df)
-                _mm512_setzero_pd (),
-                (__mmask8) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
-               (__v16sf) __B,
-               (__v16sf) __W,
-               (__mmask16) __U);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
-               (__v16sf) __B,
-               (__v16sf)
-               _mm512_setzero_ps (),
-               (__mmask16) __U);
-}
-
 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
 
@@ -6686,114 +6768,6 @@ _mm512_maskz_cvttpd_epu32 (__mmask8 __U,
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-          __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
-                 (__v16si) __B,
-                 (__v16si) __W,
-                 (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
-                 (__v16si) __B,
-                 (__v16si)
-                 _mm512_setzero_si512 (),
-                 (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
-                  (__v8di) __B,
-                  (__v8di)
-                  _mm512_setzero_si512 (),
-                  (__mmask8) -1);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
-                  (__v8di) __B,
-                  (__v8di) __W,
-                  (__mmask8) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
-                  (__v8di) __B,
-                  (__v8di)
-                  _mm512_setzero_si512 (),
-                  (__mmask8) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
-                 (__v16si) __B,
-                 (__v16si)
-                 _mm512_setzero_si512 (),
-                 (__mmask16) -1);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-          __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
-                 (__v16si) __B,
-                 (__v16si) __W,
-                 (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
-                 (__v16si) __B,
-                 (__v16si)
-                 _mm512_setzero_si512 (),
-                 (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
-                  (__v8di) __B,
-                  (__v8di)
-                  _mm512_setzero_si512 (),
-                  (__mmask8) -1);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
-                  (__v8di) __B,
-                  (__v8di) __W,
-                  (__mmask8) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
-                  (__v8di) __B,
-                  (__v8di)
-                  _mm512_setzero_si512 (),
-                  (__mmask8) __U);
-}
-
 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
                                                 (__v2df)(__m128d)(B), \

Modified: cfe/trunk/lib/Headers/avx512vlbwintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlbwintrin.h?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlbwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlbwintrin.h Thu Jun 23 01:36:42 2016
@@ -2121,139 +2121,115 @@ _mm256_maskz_mulhi_epi16 (__mmask16 __U,
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-      __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) __W,
-               (__mmask16) __U);
+_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
+                                           (__v16qi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) _mm_setzero_si128(),
-               (__mmask16) __U);
+_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
+                                           (__v16qi)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-         __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) __W,
-               (__mmask32) __U);
+_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
+                                        (__v32qi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) _mm256_setzero_si256(),
-               (__mmask32) __U);
+_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
+                                        (__v32qi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) __W,
-               (__mmask8) __U);
+_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
+                                           (__v8hi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) _mm_setzero_si128(),
-               (__mmask8) __U);
+_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
+                                           (__v8hi) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-          __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) __W,
-               (__mmask16) __U);
+_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
+                                       (__v16hi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) _mm256_setzero_si256(),
-               (__mmask16) __U);
+_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
+                                       (__v16hi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-      __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) __W,
-               (__mmask16) __U);
+_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
+                                           (__v16qi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) _mm_setzero_si128(),
-               (__mmask16) __U);
+_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
+                                           (__v16qi)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-         __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) __W,
-               (__mmask32) __U);
+_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
+                                        (__v32qi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) _mm256_setzero_si256(),
-               (__mmask32) __U);
+_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
+                                        (__v32qi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) __W,
-               (__mmask8) __U);
+_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
+                                           (__v8hi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) _mm_setzero_si128(),
-               (__mmask8) __U);
+_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
+                                           (__v8hi) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-          __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) __W,
-               (__mmask16) __U);
+_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
+                                       (__v16hi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) _mm256_setzero_si256(),
-               (__mmask16) __U);
+_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
+                                       (__v16hi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Thu Jun 23 01:36:42 2016
@@ -6468,161 +6468,131 @@ _mm256_mask_storeu_ps (void *__P, __mmas
 
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
-          __m128d __B)
+_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
-                (__v2df) __B,
-                (__v2df) __W,
-                (__mmask8) __U);
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpackhi_pd(__A, __B),
+                                              (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
+_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
-                (__v2df) __B,
-                (__v2df)
-                _mm_setzero_pd (),
-                (__mmask8) __U);
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpackhi_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
-       __m256d __B)
+_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
 {
-  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
-                (__v4df) __B,
-                (__v4df) __W,
-                (__mmask8) __U);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
+                                           (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
+_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
 {
-  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
-                (__v4df) __B,
-                (__v4df)
-                _mm256_setzero_pd (),
-                (__mmask8) __U);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
+                                           (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
-               (__v4sf) __B,
-               (__v4sf) __W,
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
+_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
-               (__v4sf) __B,
-               (__v4sf)
-               _mm_setzero_ps (),
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
-       __m256 __B)
+_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
-  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
-               (__v8sf) __B,
-               (__v8sf) __W,
-               (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
+                                           (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
+_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
 {
-  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
-               (__v8sf) __B,
-               (__v8sf)
-               _mm256_setzero_ps (),
-               (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
+                                           (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
-          __m128d __B)
+_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
-                (__v2df) __B,
-                (__v2df) __W,
-                (__mmask8) __U);
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpacklo_pd(__A, __B),
+                                              (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
+_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
-                (__v2df) __B,
-                (__v2df)
-                _mm_setzero_pd (),
-                (__mmask8) __U);
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpacklo_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
-       __m256d __B)
+_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
 {
-  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
-                (__v4df) __B,
-                (__v4df) __W,
-                (__mmask8) __U);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
+                                           (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
+_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
 {
-  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
-                (__v4df) __B,
-                (__v4df)
-                _mm256_setzero_pd (),
-                (__mmask8) __U);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
+                                           (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
-               (__v4sf) __B,
-               (__v4sf) __W,
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
+                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
+_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
-               (__v4sf) __B,
-               (__v4sf)
-               _mm_setzero_ps (),
-               (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
-       __m256 __B)
+_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
-  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
-               (__v8sf) __B,
-               (__v8sf) __W,
-               (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
+                                           (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
+_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
 {
-  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
-               (__v8sf) __B,
-               (__v8sf)
-               _mm256_setzero_ps (),
-               (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
+                                           (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -6973,163 +6943,131 @@ _mm256_mask_testn_epi64_mask (__mmask8 _
 
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B)
+_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
-                 (__v4si) __B,
-                 (__v4si) __W,
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
+                                           (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
-                 (__v4si) __B,
-                 (__v4si)
-                 _mm_setzero_si128 (),
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
+                                           (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-          __m256i __B)
+_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
-                 (__v8si) __B,
-                 (__v8si) __W,
-                 (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
+                                        (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
-                 (__v8si) __B,
-                 (__v8si)
-                 _mm256_setzero_si256 (),
-                 (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
+                                        (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B)
+_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
-                  (__v2di) __B,
-                  (__v2di) __W,
-                  (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
+                                           (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
-                  (__v2di) __B,
-                  (__v2di)
-                  _mm_setzero_di (),
-                  (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
+                                           (__v2di)_mm_setzero_di());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-          __m256i __B)
+_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
-                  (__v4di) __B,
-                  (__v4di) __W,
-                  (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
+                                        (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
-                  (__v4di) __B,
-                  (__v4di)
-                  _mm256_setzero_si256 (),
-                  (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
+                                        (__v4di)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B)
+_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
-                 (__v4si) __B,
-                 (__v4si) __W,
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
+                                           (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
-                 (__v4si) __B,
-                 (__v4si)
-                 _mm_setzero_si128 (),
-                 (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
+                                           (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-          __m256i __B)
+_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
-                 (__v8si) __B,
-                 (__v8si) __W,
-                 (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
+                                        (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
-                 (__v8si) __B,
-                 (__v8si)
-                 _mm256_setzero_si256 (),
-                 (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
+                                        (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B)
+_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
-                  (__v2di) __B,
-                  (__v2di) __W,
-                  (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
+                                           (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
-                  (__v2di) __B,
-                  (__v2di)
-                  _mm_setzero_di (),
-                  (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
+                                           (__v2di)_mm_setzero_di());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-          __m256i __B)
+_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
-                  (__v4di) __B,
-                  (__v4di) __W,
-                  (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
+                                        (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
-                  (__v4di) __B,
-                  (__v4di)
-                  _mm256_setzero_si256 (),
-                  (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
+                                        (__v4di)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Thu Jun 23 01:36:42 2016
@@ -999,73 +999,81 @@ __m256i test_mm512_maskz_cvtepi16_epi8(_
 
 __m512i test_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpackhi_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
   return _mm512_unpackhi_epi8(__A, __B); 
 }
 
 __m512i test_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpackhi_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
+  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_mask_unpackhi_epi8(__W, __U, __A, __B); 
 }
 
 __m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
+  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_maskz_unpackhi_epi8(__U, __A, __B); 
 }
 
 __m512i test_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpackhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
   return _mm512_unpackhi_epi16(__A, __B); 
 }
 
 __m512i test_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpackhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_unpackhi_epi16(__W, __U, __A, __B); 
 }
 
 __m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_unpackhi_epi16(__U, __A, __B); 
 }
 
 __m512i test_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpacklo_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
   return _mm512_unpacklo_epi8(__A, __B); 
 }
 
 __m512i test_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpacklo_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
+  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_mask_unpacklo_epi8(__W, __U, __A, __B); 
 }
 
 __m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
+  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_maskz_unpacklo_epi8(__U, __A, __B); 
 }
 
 __m512i test_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpacklo_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
   return _mm512_unpacklo_epi16(__A, __B); 
 }
 
 __m512i test_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpacklo_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_unpacklo_epi16(__W, __U, __A, __B); 
 }
 
 __m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_unpacklo_epi16(__U, __A, __B); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu Jun 23 01:36:42 2016
@@ -2908,7 +2908,8 @@ __mmask16 test_mm512_kmov(__mmask16 __A)
 
 __m512d test_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_unpackhi_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckh.pd.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 unsigned long long test_mm_cvt_roundsd_si64(__m128d __A) {
@@ -2923,13 +2924,14 @@ __m512i test_mm512_mask2_permutex2var_ep
 }
 __m512i test_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpackhi_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckhd.q.512
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
   return _mm512_unpackhi_epi32(__A, __B); 
 }
 
 __m512d test_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpackhi_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckh.pd.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_unpackhi_pd(__U, __A, __B); 
 }
 long long test_mm_cvt_roundsd_i64(__m128d __A) {
@@ -2944,43 +2946,50 @@ __m512d test_mm512_mask2_permutex2var_pd
 }
 __m512i test_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpackhi_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckhd.q.512
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_unpackhi_epi32(__W, __U, __A, __B); 
 }
 
 __m512 test_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_unpackhi_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckh.ps.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_unpackhi_ps(__W, __U, __A, __B); 
 }
 
 __m512 test_mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpackhi_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckh.ps.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_unpackhi_ps(__U, __A, __B); 
 }
 
 __m512d test_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_unpacklo_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckl.pd.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
 __m512d test_mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpacklo_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckl.pd.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_unpacklo_pd(__U, __A, __B); 
 }
 
 __m512 test_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_unpacklo_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckl.ps.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_unpacklo_ps(__W, __U, __A, __B); 
 }
 
 __m512 test_mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpacklo_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckl.ps.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_unpacklo_ps(__U, __A, __B); 
 }
 int test_mm_cvt_roundsd_si32(__m128d __A) {
@@ -3572,61 +3581,68 @@ __mmask8 test_mm512_mask_test_epi64_mask
 
 __m512i test_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckhd.q.512
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_unpackhi_epi32(__U, __A, __B); 
 }
 
 __m512i test_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpackhi_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpckhqd.q.512
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   return _mm512_unpackhi_epi64(__A, __B); 
 }
 
 __m512i test_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpackhi_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpckhqd.q.512
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_unpackhi_epi64(__W, __U, __A, __B); 
 }
 
 __m512i test_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpckhqd.q.512
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_unpackhi_epi64(__U, __A, __B); 
 }
 
 __m512i test_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpacklo_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckld.q.512
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
   return _mm512_unpacklo_epi32(__A, __B); 
 }
 
 __m512i test_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpacklo_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckld.q.512
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_unpacklo_epi32(__W, __U, __A, __B); 
 }
 
 __m512i test_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckld.q.512
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_unpacklo_epi32(__U, __A, __B); 
 }
 
 __m512i test_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_unpacklo_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpcklqd.q.512
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   return _mm512_unpacklo_epi64(__A, __B); 
 }
 
 __m512i test_mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_unpacklo_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpcklqd.q.512
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_unpacklo_epi64(__W, __U, __A, __B); 
 }
 
 __m512i test_mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpcklqd.q.512
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_unpacklo_epi64(__U, __A, __B); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Thu Jun 23 01:36:42 2016
@@ -4425,97 +4425,113 @@ void test_mm256_mask_storeu_ps(void *__P
 
 __m128d test_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_unpackhi_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckh.pd.128
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 
 __m128d test_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_maskz_unpackhi_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckh.pd.128
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_unpackhi_pd(__U, __A, __B); 
 }
 
 __m256d test_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_unpackhi_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckh.pd.256
+  // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 
 __m256d test_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpackhi_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckh.pd.256
+  // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_unpackhi_pd(__U, __A, __B); 
 }
 
 __m128 test_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_unpackhi_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckh.ps.128
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}} <4 x float> %{{.*}}
   return _mm_mask_unpackhi_ps(__W, __U, __A, __B); 
 }
 
 __m128 test_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_maskz_unpackhi_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckh.ps.128
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}} <4 x float> %{{.*}}
   return _mm_maskz_unpackhi_ps(__U, __A, __B); 
 }
 
 __m256 test_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_unpackhi_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckh.ps.256
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_unpackhi_ps(__W, __U, __A, __B); 
 }
 
 __m256 test_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpackhi_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckh.ps.256
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_unpackhi_ps(__U, __A, __B); 
 }
 
 __m128d test_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_unpacklo_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckl.pd.128
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
 __m128d test_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_maskz_unpacklo_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckl.pd.128
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_unpacklo_pd(__U, __A, __B); 
 }
 
 __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_unpacklo_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckl.pd.256
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
 __m256d test_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpacklo_pd
-  // CHECK: @llvm.x86.avx512.mask.unpckl.pd.256
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  // CHECK: select <4 x i1> %{{.*}} <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_unpacklo_pd(__U, __A, __B); 
 }
 
 __m128 test_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_unpacklo_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckl.ps.128
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_mask_unpacklo_ps(__W, __U, __A, __B); 
 }
 
 __m128 test_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_maskz_unpacklo_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckl.ps.128
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_maskz_unpacklo_ps(__U, __A, __B); 
 }
 
 __m256 test_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_unpacklo_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckl.ps.256
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_unpacklo_ps(__W, __U, __A, __B); 
 }
 
 __m256 test_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpacklo_ps
-  // CHECK: @llvm.x86.avx512.mask.unpckl.ps.256
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_unpacklo_ps(__U, __A, __B); 
 }
 
@@ -4784,97 +4800,113 @@ __mmask8 test_mm256_mask_testn_epi64_mas
 }
 __m128i test_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpackhi_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckhd.q.128
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_unpackhi_epi32(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpackhi_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckhd.q.128
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_unpackhi_epi32(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpackhi_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckhd.q.256
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_unpackhi_epi32(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckhd.q.256
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_unpackhi_epi32(__U, __A, __B); 
 }
 
 __m128i test_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpackhi_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpckhqd.q.128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_unpackhi_epi64(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpackhi_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpckhqd.q.128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_maskz_unpackhi_epi64(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpackhi_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpckhqd.q.256
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_unpackhi_epi64(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpckhqd.q.256
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_unpackhi_epi64(__U, __A, __B); 
 }
 
 __m128i test_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpacklo_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckld.q.128
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_unpacklo_epi32(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpacklo_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckld.q.128
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_unpacklo_epi32(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpacklo_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckld.q.256
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_unpacklo_epi32(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi32
-  // CHECK: @llvm.x86.avx512.mask.punpckld.q.256
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_unpacklo_epi32(__U, __A, __B); 
 }
 
 __m128i test_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpacklo_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpcklqd.q.128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_unpacklo_epi64(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpacklo_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpcklqd.q.128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_maskz_unpacklo_epi64(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpacklo_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpcklqd.q.256
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_unpacklo_epi64(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi64
-  // CHECK: @llvm.x86.avx512.mask.punpcklqd.q.256
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_unpacklo_epi64(__U, __A, __B); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlbw-builtins.c?rev=273533&r1=273532&r2=273533&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Thu Jun 23 01:36:42 2016
@@ -1667,97 +1667,113 @@ __m256i test_mm256_maskz_mulhi_epi16(__m
 
 __m128i test_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpackhi_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_unpackhi_epi8(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpackhi_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_unpackhi_epi8(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpackhi_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.256
+  // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_unpackhi_epi8(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.256
+  // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_unpackhi_epi8(__U, __A, __B); 
 }
 
 __m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpackhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.128
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_unpackhi_epi16(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpackhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.128
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_unpackhi_epi16(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpackhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.256
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_unpackhi_epi16(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.256
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_unpackhi_epi16(__U, __A, __B); 
 }
 
 __m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpacklo_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_unpacklo_epi8(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpacklo_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_unpacklo_epi8(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpacklo_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.256
+  // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_unpacklo_epi8(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi8
-  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.256
+  // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_unpacklo_epi8(__U, __A, __B); 
 }
 
 __m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_unpacklo_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.128
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_unpacklo_epi16(__W, __U, __A, __B); 
 }
 
 __m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_unpacklo_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.128
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_unpacklo_epi16(__U, __A, __B); 
 }
 
 __m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_unpacklo_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.256
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_unpacklo_epi16(__W, __U, __A, __B); 
 }
 
 __m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi16
-  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.256
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_unpacklo_epi16(__U, __A, __B); 
 }
 




More information about the cfe-commits mailing list