r292329 - [AVX-512] Replace subvector broadcast builtins with shufflevectors and selects.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 17 18:17:11 PST 2017


Author: ctopper
Date: Tue Jan 17 20:17:10 2017
New Revision: 292329

URL: http://llvm.org/viewvc/llvm-project?rev=292329&view=rev
Log:
[AVX-512] Replace subvector broadcast builtins with shufflevectors and selects.

Verified that the backend codegens this equally well.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx512dqintrin.h
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/lib/Headers/avx512vldqintrin.h
    cfe/trunk/lib/Headers/avx512vlintrin.h
    cfe/trunk/test/CodeGen/avx512dq-builtins.c
    cfe/trunk/test/CodeGen/avx512f-builtins.c
    cfe/trunk/test/CodeGen/avx512vl-builtins.c
    cfe/trunk/test/CodeGen/avx512vldq-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Jan 17 20:17:10 2017
@@ -1590,27 +1590,15 @@ TARGET_BUILTIN(__builtin_ia32_cvtq2mask1
 TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmb512, "V8LLiUc","","avx512cd")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw512, "V16iUs","","avx512cd")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x4_512, "V16fV4fV16fUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_broadcastf64x4_512, "V8dV4dV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_512, "V16iV4iV16iUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_broadcasti64x4_512, "V8LLiV4LLiV8LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_broadcastmb128, "V2LLiUc","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x8_512_mask, "V16fV8fV16fUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcastf64x2_512_mask, "V8dV2dV8dUc","","avx512dq")
 TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x8_512_mask, "V16iV8iV16iUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_512_mask, "V8LLiV2LLiV8LLiUc","","avx512dq")
 TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastf64x2_256_mask, "V4dV2dV4dUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_256_mask, "V4LLiV2LLiV4LLiUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x4_256_mask, "V8fV4fV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_256_mask, "V8iV4iV8iUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl")

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Tue Jan 17 20:17:10 2017
@@ -995,51 +995,50 @@ _mm512_maskz_broadcast_f32x2 (__mmask16
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_broadcast_f32x8 (__m256 __A)
+_mm512_broadcast_f32x8(__m256 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
-                _mm512_undefined_ps(),
-                (__mmask16) -1);
+  return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
+                                         0, 1, 2, 3, 4, 5, 6, 7,
+                                         0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
+_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
-                (__v16sf)__O,
-                __M);
+  return (__m512)__builtin_ia32_selectps_512((__mmask8)__M,
+                                           (__v16sf)_mm512_broadcast_f32x8(__A),
+                                           (__v16sf)__O);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
+_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
-                (__v16sf)_mm512_setzero_ps (),
-                __M);
+  return (__m512)__builtin_ia32_selectps_512((__mmask8)__M,
+                                           (__v16sf)_mm512_broadcast_f32x8(__A),
+                                           (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_broadcast_f64x2 (__m128d __A)
+_mm512_broadcast_f64x2(__m128d __A)
 {
-  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
-                 (__v8df)_mm512_undefined_pd(),
-                 (__mmask8) -1);
+  return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
+                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
+_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
 {
-  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
-                 (__v8df)
-                 __O, __M);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
+                                            (__v8df)_mm512_broadcast_f64x2(__A),
+                                            (__v8df)__O);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
+_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
 {
-  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
-                 (__v8df)_mm512_setzero_ps (),
-                 __M);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
+                                            (__v8df)_mm512_broadcast_f64x2(__A),
+                                            (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -1067,52 +1066,50 @@ _mm512_maskz_broadcast_i32x2 (__mmask16
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcast_i32x8 (__m256i __A)
+_mm512_broadcast_i32x8(__m256i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
-                 (__v16si)_mm512_setzero_si512(),
-                 (__mmask16) -1);
+  return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
+                                          0, 1, 2, 3, 4, 5, 6, 7,
+                                          0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
+_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
-                 (__v16si)__O,
-                 __M);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M,
+                                           (__v16si)_mm512_broadcast_i32x8(__A),
+                                           (__v16si)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
+_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
-                 (__v16si)
-                 _mm512_setzero_si512 (),
-                 __M);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M,
+                                           (__v16si)_mm512_broadcast_i32x8(__A),
+                                           (__v16si)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcast_i64x2 (__m128i __A)
+_mm512_broadcast_i64x2(__m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
-                 (__v8di)_mm512_setzero_si512(),
-                 (__mmask8) -1);
+  return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
+                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
+_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
-                 (__v8di)
-                 __O, __M);
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
+                                            (__v8di)_mm512_broadcast_i64x2(__A),
+                                            (__v8di)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
+_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
-                 (__v8di)_mm512_setzero_si512 (),
-                 __M);
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
+                                            (__v8di)_mm512_broadcast_i64x2(__A),
+                                            (__v8di)_mm512_setzero_si512());
 }
 
 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Jan 17 20:17:10 2017
@@ -7278,107 +7278,97 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128
                                            (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_broadcast_f32x4 (__m128 __A)
+_mm512_broadcast_f32x4(__m128 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
-                 (__v16sf)
-                 _mm512_undefined_ps (),
-                 (__mmask16) -1);
+  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
+                                         0, 1, 2, 3, 0, 1, 2, 3,
+                                         0, 1, 2, 3, 0, 1, 2, 3);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
+_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
-                 (__v16sf) __O,
-                 __M);
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+                                           (__v16sf)_mm512_broadcast_f32x4(__A),
+                                           (__v16sf)__O);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
+_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
-                 (__v16sf)
-                 _mm512_setzero_ps (),
-                 __M);
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+                                           (__v16sf)_mm512_broadcast_f32x4(__A),
+                                           (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_broadcast_f64x4 (__m256d __A)
+_mm512_broadcast_f64x4(__m256d __A)
 {
-  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
-                  (__v8df)
-                  _mm512_undefined_pd (),
-                  (__mmask8) -1);
+  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
+                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
+_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
 {
-  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
-                  (__v8df) __O,
-                  __M);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
+                                            (__v8df)_mm512_broadcast_f64x4(__A),
+                                            (__v8df)__O);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
+_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
 {
-  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
-                  (__v8df)
-                  _mm512_setzero_pd (),
-                  __M);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
+                                            (__v8df)_mm512_broadcast_f64x4(__A),
+                                            (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcast_i32x4 (__m128i __A)
+_mm512_broadcast_i32x4(__m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
-                  (__v16si)
-                  _mm512_undefined_epi32 (),
-                  (__mmask16) -1);
+  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
+                                          0, 1, 2, 3, 0, 1, 2, 3,
+                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
+_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
-                  (__v16si) __O,
-                  __M);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+                                           (__v16si)_mm512_broadcast_i32x4(__A),
+                                           (__v16si)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
+_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
-                  (__v16si)
-                  _mm512_setzero_si512 (),
-                  __M);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+                                           (__v16si)_mm512_broadcast_i32x4(__A),
+                                           (__v16si)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcast_i64x4 (__m256i __A)
+_mm512_broadcast_i64x4(__m256i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
-                  (__v8di)
-                  _mm512_undefined_epi32 (),
-                  (__mmask8) -1);
+  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
+                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
+_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
-                  (__v8di) __O,
-                  __M);
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
+                                            (__v8di)_mm512_broadcast_i64x4(__A),
+                                            (__v8di)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
+_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
-                  (__v8di)
-                  _mm512_setzero_si512 (),
-                  __M);
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
+                                            (__v8di)_mm512_broadcast_i64x4(__A),
+                                            (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vldqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vldqintrin.h Tue Jan 17 20:17:10 2017
@@ -1000,27 +1000,26 @@ _mm256_maskz_broadcast_f32x2 (__mmask8 _
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_broadcast_f64x2 (__m128d __A)
+_mm256_broadcast_f64x2(__m128d __A)
 {
-  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
-                 (__v4df)_mm256_undefined_pd(),
-                 (__mmask8) -1);
+  return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
+                                          0, 1, 0, 1);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
+_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
 {
-  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
-                 (__v4df) __O,
-                 __M);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
+                                            (__v4df)_mm256_broadcast_f64x2(__A),
+                                            (__v4df)__O);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
 {
-  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
-                 (__v4df) _mm256_setzero_ps (),
-                 __M);
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
+                                            (__v4df)_mm256_broadcast_f64x2(__A),
+                                            (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1072,27 +1071,26 @@ _mm256_maskz_broadcast_i32x2 (__mmask8 _
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_broadcast_i64x2 (__m128i __A)
+_mm256_broadcast_i64x2(__m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
-                 (__v4di)_mm256_undefined_si256(),
-                 (__mmask8) -1);
+  return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
+                                          0, 1, 0, 1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
+_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
-                 (__v4di) __O,
-                 __M);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
+                                            (__v4di)_mm256_broadcast_i64x2(__A),
+                                            (__v4di)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
-                 (__v4di) _mm256_setzero_si256 (),
-                 __M);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
+                                            (__v4di)_mm256_broadcast_i64x2(__A),
+                                            (__v4di)_mm256_setzero_si256());
 }
 
 #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \

Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Tue Jan 17 20:17:10 2017
@@ -7189,52 +7189,49 @@ _mm256_maskz_rsqrt14_ps (__mmask8 __U, _
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_broadcast_f32x4 (__m128 __A)
+_mm256_broadcast_f32x4(__m128 __A)
 {
-  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
-                (__v8sf)_mm256_undefined_pd (),
-                (__mmask8) -1);
+  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
+                                         0, 1, 2, 3, 0, 1, 2, 3);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
+_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
 {
-  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
-                (__v8sf) __O,
-                __M);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+                                            (__v8sf)_mm256_broadcast_f32x4(__A),
+                                            (__v8sf)__O);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
 {
-  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
-                (__v8sf) _mm256_setzero_ps (),
-                __M);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+                                            (__v8sf)_mm256_broadcast_f32x4(__A),
+                                            (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_broadcast_i32x4 (__m128i __A)
+_mm256_broadcast_i32x4(__m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
-                 (__v8si)_mm256_undefined_si256 (),
-                 (__mmask8) -1);
+  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
+                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
+_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
-                 (__v8si)
-                 __O, __M);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                            (__v8si)_mm256_broadcast_i32x4(__A),
+                                            (__v8si)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
+_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
-                 __A,
-                 (__v8si) _mm256_setzero_si256 (),
-                 __M);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                            (__v8si)_mm256_broadcast_i32x4(__A),
+                                            (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Tue Jan 17 20:17:10 2017
@@ -963,40 +963,44 @@ __m512 test_mm512_maskz_broadcast_f32x2(
   return _mm512_maskz_broadcast_f32x2(__M, __A); 
 }
 
-__m512 test_mm512_broadcast_f32x8(__m256 __A) {
+__m512 test_mm512_broadcast_f32x8(float const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_f32x8
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x8
-  return _mm512_broadcast_f32x8(__A); 
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return _mm512_broadcast_f32x8(_mm256_loadu_ps(__A)); 
 }
 
-__m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) {
+__m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, float const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_f32x8
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x8
-  return _mm512_mask_broadcast_f32x8(__O, __M, __A); 
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_mask_broadcast_f32x8(__O, __M, _mm256_loadu_ps(__A)); 
 }
 
-__m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) {
+__m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x8
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x8
-  return _mm512_maskz_broadcast_f32x8(__M, __A); 
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_maskz_broadcast_f32x8(__M, _mm256_loadu_ps(__A)); 
 }
 
-__m512d test_mm512_broadcast_f64x2(__m128d __A) {
+__m512d test_mm512_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_f64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x2
-  return _mm512_broadcast_f64x2(__A); 
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  return _mm512_broadcast_f64x2(_mm_loadu_pd(__A)); 
 }
 
-__m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) {
+__m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, double const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_f64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x2
-  return _mm512_mask_broadcast_f64x2(__O, __M, __A); 
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
-__m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
+__m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x2
-  return _mm512_maskz_broadcast_f64x2(__M, __A); 
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
 __m512i test_mm512_broadcast_i32x2(__m128i __A) {
@@ -1017,41 +1021,46 @@ __m512i test_mm512_maskz_broadcast_i32x2
   return _mm512_maskz_broadcast_i32x2(__M, __A); 
 }
 
-__m512i test_mm512_broadcast_i32x8(__m256i __A) {
+__m512i test_mm512_broadcast_i32x8(__m256i const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_i32x8
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x8
-  return _mm512_broadcast_i32x8(__A); 
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return _mm512_broadcast_i32x8(_mm256_loadu_si256(__A)); 
 }
 
-__m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) {
+__m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_i32x8
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x8
-  return _mm512_mask_broadcast_i32x8(__O, __M, __A); 
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_mask_broadcast_i32x8(__O, __M, _mm256_loadu_si256(__A)); 
 }
 
-__m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) {
+__m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x8
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x8
-  return _mm512_maskz_broadcast_i32x8(__M, __A); 
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_maskz_broadcast_i32x8(__M, _mm256_loadu_si256(__A)); 
 }
 
-__m512i test_mm512_broadcast_i64x2(__m128i __A) {
+__m512i test_mm512_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_i64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x2
-  return _mm512_broadcast_i64x2(__A); 
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  return _mm512_broadcast_i64x2(_mm_loadu_si128(__A)); 
 }
 
-__m512i test_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) {
+__m512i test_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_i64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x2
-  return _mm512_mask_broadcast_i64x2(__O, __M, __A); 
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_broadcast_i64x2(__O, __M, _mm_loadu_si128(__A)); 
 }
 
-__m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
+__m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x2
-  return _mm512_maskz_broadcast_i64x2(__M, __A); 
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_broadcast_i64x2(__M, _mm_loadu_si128(__A)); 
 }
+
 __m256 test_mm512_extractf32x8_ps(__m512 __A) {
   // CHECK-LABEL: @test_mm512_extractf32x8_ps
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Tue Jan 17 20:17:10 2017
@@ -4647,76 +4647,84 @@ __m128 test_mm_maskz_sqrt_round_ss(__mma
     return _mm_maskz_sqrt_round_ss(__U,__A,__B,_MM_FROUND_CUR_DIRECTION);
 }
 
-__m512 test_mm512_broadcast_f32x4(__m128 __A) {
+__m512 test_mm512_broadcast_f32x4(float const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_f32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x4
-  return _mm512_broadcast_f32x4(__A); 
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  return _mm512_broadcast_f32x4(_mm_loadu_ps(__A)); 
 }
 
-__m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
+__m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, float const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_f32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x4
-  return _mm512_mask_broadcast_f32x4(__O, __M, __A); 
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_mask_broadcast_f32x4(__O, __M, _mm_loadu_ps(__A)); 
 }
 
-__m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) {
+__m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x4
-  return _mm512_maskz_broadcast_f32x4(__M, __A); 
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_maskz_broadcast_f32x4(__M, _mm_loadu_ps(__A)); 
 }
 
-__m512d test_mm512_broadcast_f64x4(__m256d __A) {
+__m512d test_mm512_broadcast_f64x4(float const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_f64x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x4
-  return _mm512_broadcast_f64x4(__A); 
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  return _mm512_broadcast_f64x4(_mm256_loadu_ps(__A)); 
 }
 
-__m512d test_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) {
+__m512d test_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, float const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_f64x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x4
-  return _mm512_mask_broadcast_f64x4(__O, __M, __A); 
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_mask_broadcast_f64x4(__O, __M, _mm256_loadu_ps(__A)); 
 }
 
-__m512d test_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) {
+__m512d test_mm512_maskz_broadcast_f64x4(__mmask8 __M, float const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x4
-  return _mm512_maskz_broadcast_f64x4(__M, __A); 
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_maskz_broadcast_f64x4(__M, _mm256_loadu_ps(__A)); 
 }
 
-__m512i test_mm512_broadcast_i32x4(__m128i __A) {
+__m512i test_mm512_broadcast_i32x4(__m128i const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_i32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x4
-  return _mm512_broadcast_i32x4(__A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  return _mm512_broadcast_i32x4(_mm_loadu_si128(__A)); 
 }
 
-__m512i test_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
+__m512i test_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_i32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x4
-  return _mm512_mask_broadcast_i32x4(__O, __M, __A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_mask_broadcast_i32x4(__O, __M, _mm_loadu_si128(__A)); 
 }
 
-__m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) {
+__m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x4
-  return _mm512_maskz_broadcast_i32x4(__M, __A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
-__m512i test_mm512_broadcast_i64x4(__m256i __A) {
+__m512i test_mm512_broadcast_i64x4(__m256i const* __A) {
   // CHECK-LABEL: @test_mm512_broadcast_i64x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x4
-  return _mm512_broadcast_i64x4(__A); 
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  return _mm512_broadcast_i64x4(_mm256_loadu_si256(__A)); 
 }
 
-__m512i test_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) {
+__m512i test_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i const* __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_i64x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x4
-  return _mm512_mask_broadcast_i64x4(__O, __M, __A); 
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_broadcast_i64x4(__O, __M, _mm256_loadu_si256(__A)); 
 }
 
-__m512i test_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) {
+__m512i test_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i const* __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x4
-  return _mm512_maskz_broadcast_i64x4(__M, __A); 
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_broadcast_i64x4(__M, _mm256_loadu_si256(__A)); 
 }
 
 __m512d test_mm512_broadcastsd_pd(__m128d __A) {

Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Tue Jan 17 20:17:10 2017
@@ -5734,38 +5734,42 @@ __m256 test_mm256_maskz_rsqrt14_ps(__mma
 
 __m256 test_mm256_broadcast_f32x4(__m128 __A) {
   // CHECK-LABEL: @test_mm256_broadcast_f32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x4
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   return _mm256_broadcast_f32x4(__A); 
 }
 
 __m256 test_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) {
   // CHECK-LABEL: @test_mm256_mask_broadcast_f32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x4
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_broadcast_f32x4(__O, __M, __A); 
 }
 
 __m256 test_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
   // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x4
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_broadcast_f32x4(__M, __A); 
 }
 
-__m256i test_mm256_broadcast_i32x4(__m128i __A) {
+__m256i test_mm256_broadcast_i32x4(__m128i const* __A) {
   // CHECK-LABEL: @test_mm256_broadcast_i32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x4
-  return _mm256_broadcast_i32x4(__A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  return _mm256_broadcast_i32x4(_mm_loadu_si128(__A)); 
 }
 
-__m256i test_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) {
+__m256i test_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm256_mask_broadcast_i32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x4
-  return _mm256_mask_broadcast_i32x4(__O, __M, __A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
+  return _mm256_mask_broadcast_i32x4(__O, __M, _mm_loadu_si128(__A)); 
 }
 
-__m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) {
+__m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x4
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x4
-  return _mm256_maskz_broadcast_i32x4(__M, __A); 
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
+  return _mm256_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
 __m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {

Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=292329&r1=292328&r2=292329&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Tue Jan 17 20:17:10 2017
@@ -918,22 +918,24 @@ __m256 test_mm256_maskz_broadcast_f32x2(
   return _mm256_maskz_broadcast_f32x2(__M, __A); 
 }
 
-__m256d test_mm256_broadcast_f64x2(__m128d __A) {
+__m256d test_mm256_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: @test_mm256_broadcast_f64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x2
-  return _mm256_broadcast_f64x2(__A); 
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  return _mm256_broadcast_f64x2(_mm_loadu_pd(__A)); 
 }
 
-__m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) {
+__m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, double const* __A) {
   // CHECK-LABEL: @test_mm256_mask_broadcast_f64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x2
-  return _mm256_mask_broadcast_f64x2(__O, __M, __A); 
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
-__m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
+__m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: @test_mm256_maskz_broadcast_f64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf64x2
-  return _mm256_maskz_broadcast_f64x2(__M, __A); 
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
 __m128i test_mm_broadcast_i32x2(__m128i __A) {
@@ -972,22 +974,24 @@ __m256i test_mm256_maskz_broadcast_i32x2
   return _mm256_maskz_broadcast_i32x2(__M, __A); 
 }
 
-__m256i test_mm256_broadcast_i64x2(__m128i __A) {
+__m256i test_mm256_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: @test_mm256_broadcast_i64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x2
-  return _mm256_broadcast_i64x2(__A); 
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  return _mm256_broadcast_i64x2(_mm_loadu_si128(__A)); 
 }
 
-__m256i test_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) {
+__m256i test_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm256_mask_broadcast_i64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x2
-  return _mm256_mask_broadcast_i64x2(__O, __M, __A); 
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+  return _mm256_mask_broadcast_i64x2(__O, __M, _mm_loadu_si128(__A)); 
 }
 
-__m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
+__m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) {
   // CHECK-LABEL: @test_mm256_maskz_broadcast_i64x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti64x2
-  return _mm256_maskz_broadcast_i64x2(__M, __A); 
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+  return _mm256_maskz_broadcast_i64x2(__M, _mm_loadu_si128(__A)); 
 }
 
 __m128d test_mm256_extractf64x2_pd(__m256d __A) {




More information about the cfe-commits mailing list